diff --git a/.env.example b/.env.example index b04078e3..a19fd2d7 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,16 @@ APP_URL=http://localhost:3000 PORT=3000 +# --- Security / reverse proxy --- +# The app runs with Fastify `trustProxy` ENABLED, so it derives the client IP +# (req.ip) from the `X-Forwarded-For` header. That header is client-forgeable. +# Deploy this app behind a trusted reverse proxy that SETS/OVERWRITES (not +# appends) `X-Forwarded-For` with the real client IP. Without such a proxy, any +# per-IP throttling — including the /mcp Basic brute-force limiter — can be +# bypassed by an attacker who simply spoofs `X-Forwarded-For` to rotate IPs. +# (The /mcp limiter keeps a global per-email key as an IP-independent backstop, +# but the per-IP and per-IP+email keys rely on a trustworthy X-Forwarded-For.) + # minimum of 32 characters. Generate one with: openssl rand -hex 32 APP_SECRET=REPLACE_WITH_LONG_SECRET diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 5959983e..2d81467c 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -18,7 +18,12 @@ env: IMAGE: ghcr.io/vvzvlad/gitmost jobs: + # Run the reusable test suite first so a failing test blocks the image build. + test: + uses: ./.github/workflows/test.yml + build: + needs: test runs-on: ubuntu-latest steps: - name: Checkout diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7137d953..694df01b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,12 @@ env: IMAGE: ghcr.io/vvzvlad/gitmost jobs: + # Run the reusable test suite first so a failing test blocks the image build. + test: + uses: ./.github/workflows/test.yml + build: + needs: test strategy: matrix: include: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..955b0ac2 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,40 @@ +name: Test + +on: + pull_request: + workflow_call: + workflow_dispatch: + +concurrency: + group: test-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + # Required for the client suite, which resolves @docmost/editor-ext via its + # dist build (the server suite also rebuilds it through its own pretest). + - name: Build editor-ext + run: pnpm --filter @docmost/editor-ext build + + - name: Run tests + run: pnpm -r test diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index 25ff2530..414e75b8 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -391,6 +391,13 @@ "Toggle block": "Сворачиваемый блок", "Callout": "Выноска", "Insert callout notice.": "Вставить выноску с сообщением.", + "Footnote": "Сноска", + "Insert a footnote reference.": "Вставить ссылку на сноску.", + "Footnotes": "Примечания", + "Footnote {{number}}": "Сноска {{number}}", + "Go to footnote": "Перейти к сноске", + "Back to reference": "Вернуться к ссылке", + "Empty footnote": "Пустая сноска", "Math inline": "Строчная формула", "Insert inline math equation.": "Вставить математическое выражение в строку.", "Math block": "Блок формулы", diff --git a/apps/client/src/features/ai-chat/components/message-item.tsx b/apps/client/src/features/ai-chat/components/message-item.tsx index 4ba1d934..e8709d5c 100644 --- a/apps/client/src/features/ai-chat/components/message-item.tsx +++ b/apps/client/src/features/ai-chat/components/message-item.tsx @@ -10,6 +10,18 @@ import classes from "@/features/ai-chat/components/ai-chat.module.css"; interface MessageItemProps { message: UIMessage; + /** + * Forwarded to ToolCallCard: whether tool cards render page citation links. + * Defaults to true (internal chat). The public share passes false. + */ + showCitations?: boolean; + /** + * Neutralize internal/relative markdown links in the rendered answer (drop + * their href so they become inert text). Defaults to false (internal chat, + * links stay clickable). The anonymous public share passes true so internal + * UUIDs/routes in the assistant's markdown don't leak as clickable links. + */ + neutralizeInternalLinks?: boolean; } /** @@ -24,7 +36,11 @@ interface MessageItemProps { * `message` prop identity (and its `parts`) changes each tick. Re-rendering the * text parts on each delta is what makes the answer stream in progressively. */ -export default function MessageItem({ message }: MessageItemProps) { +export default function MessageItem({ + message, + showCitations = true, + neutralizeInternalLinks = false, +}: MessageItemProps) { const { t } = useTranslation(); const isUser = message.role === "user"; @@ -53,7 +69,9 @@ export default function MessageItem({ message }: MessageItemProps) { // starts with an empty text part before the first token arrives); the // typing indicator covers that gap until real content streams in. if (!part.text.trim()) return null; - const html = renderChatMarkdown(part.text); + const html = renderChatMarkdown(part.text, { + neutralizeInternalLinks, + }); if (html) { return (
; + return ( +stale
"; + renderRawHtml(container, ""); + expect(container.innerHTML).toBe(""); + }); + + it("clears prior content first on a re-render with new source", () => { + const win = dom.window as unknown as RecordHello
World
`; + + const json = htmlToJson(html); + expect(hasHtmlEmbedNode(json)).toBe(true); + expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false); + }); + + it('is still DETECTED even when the data-source is NOT valid base64', async () => { + // A naive raw inline source (HTML-escaped, not base64) still parses as an + // htmlEmbed NODE — the decoder just yields an empty source. Detection (and + // therefore stripping) does not depend on the source being well-formed, so + // the bypass cannot be hidden by sending a malformed data-source. + const md = ``; + const html = await markdownToHtml(md); + const json = htmlToJson(html); + expect(hasHtmlEmbedNode(json)).toBe(true); + expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false); + }); +}); diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts index 6b07ec0b..28a59ea3 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -92,6 +92,102 @@ describe('stripHtmlEmbedNodes', () => { const result = stripHtmlEmbedNodes(doc); expect(result).toEqual(doc); }); + + it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => { + // htmlEmbed sits as a sibling of a paragraph, nested four containers deep. + const doc = { + type: 'doc', + content: [ + { + type: 'callout', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { + type: 'paragraph', + content: [{ type: 'text', text: 'deep keep' }], + }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }, + ], + }, + ], + }, + ], + }; + + const result = stripHtmlEmbedNodes(doc); + expect(hasHtmlEmbedNode(result)).toBe(false); + const col = findFirstChild(result, 'column'); + // Sibling paragraph survives; only the embed is removed. + expect(col.content).toHaveLength(1); + expect(col.content[0].type).toBe('paragraph'); + expect(col.content[0].content[0].text).toBe('deep keep'); + }); + + it('returns non-object / null / array-without-content nodes unchanged', () => { + // Non-object inputs are returned as-is (callers persist what they got). + expect(stripHtmlEmbedNodes(null as any)).toBeNull(); + expect(stripHtmlEmbedNodes(undefined as any)).toBeUndefined(); + expect(stripHtmlEmbedNodes('not-a-node' as any)).toBe('not-a-node'); + expect(stripHtmlEmbedNodes(42 as any)).toBe(42); + + // An object node with no `content` array is returned shallow-cloned, equal. + const leaf = { type: 'paragraph', attrs: { id: 'x' } }; + const out = stripHtmlEmbedNodes(leaf); + expect(out).toEqual(leaf); + expect(out).not.toBe(leaf); // new object, input not mutated + }); + + it('yields empty content (not null/undefined) for a doc whose only child is an htmlEmbed', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: 'only' } }], + }; + const result = stripHtmlEmbedNodes(doc) as any; + expect(Array.isArray(result.content)).toBe(true); + expect(result.content).toHaveLength(0); + expect(result.content).not.toBeNull(); + expect(result.content).not.toBeUndefined(); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); +}); + +describe('hasHtmlEmbedNode (root/odd-shape detection)', () => { + it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => { + const rootEmbed = { type: 'htmlEmbed', attrs: { source: '' } }; + expect(hasHtmlEmbedNode(rootEmbed)).toBe(true); + }); + + it('returns false for a doc with embed-like TEXT but no htmlEmbed node', () => { + // The literal string "htmlEmbed" appears only as text content, not as a + // node type, so it must NOT be detected. + const doc = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { type: 'text', text: 'type: htmlEmbed:" with no trailing space', () => {
+ // `${code}: ${undefined ?? ''}`.trim() collapses to just ":".
+ expect(describeProviderError({ statusCode: 503 })).toBe('503:');
+ // The trailing space after the colon is trimmed away.
+ expect(describeProviderError({ statusCode: 503 }).endsWith(': ')).toBe(false);
+ });
+
+ it('object with neither message nor statusCode nor body => fallback', () => {
+ expect(describeProviderError({}, 'AI stream error')).toBe('AI stream error');
+ // An object carrying only unrelated keys is still treated as message-less.
+ expect(describeProviderError({ foo: 'bar' } as never)).toBe('Unknown error');
+ });
});
diff --git a/apps/server/src/integrations/ai/ai.service.spec.ts b/apps/server/src/integrations/ai/ai.service.spec.ts
index 7bedc23a..ef44a59d 100644
--- a/apps/server/src/integrations/ai/ai.service.spec.ts
+++ b/apps/server/src/integrations/ai/ai.service.spec.ts
@@ -171,4 +171,117 @@ describe('AiService.getChatModel role model override', () => {
expect(aiProviderCredentialsRepo.find).not.toHaveBeenCalled();
expect(secretBox.decryptSecret).not.toHaveBeenCalled();
});
+
+ /**
+ * Build a service whose workspace driver is ollama (no apiKey, with a baseUrl).
+ * Complements makeService (which configures openai) for the same-driver and
+ * not-configured ollama cases.
+ */
+ function makeOllamaService(over: { baseUrl?: string } = {}) {
+ const aiSettings = {
+ resolve: jest.fn().mockResolvedValue({
+ driver: 'ollama',
+ chatModel: 'llama3',
+ apiKey: undefined,
+ baseUrl: over.baseUrl ?? 'http://localhost:11434/v1',
+ }),
+ };
+ const aiProviderCredentialsRepo = { find: jest.fn() };
+ const secretBox = { decryptSecret: jest.fn() };
+ const service = new AiService(
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ aiSettings as any,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ aiProviderCredentialsRepo as any,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ secretBox as any,
+ );
+ return { service, aiSettings, aiProviderCredentialsRepo, secretBox };
+ }
+
+ it('same-driver ollama override (workspace driver=ollama): reuses the workspace ollama baseUrl, no creds lookup/decrypt', async () => {
+ // Workspace driver IS ollama. A role that overrides to ollama (same driver)
+ // legitimately reuses the workspace's configured ollama endpoint — it must
+ // NOT hit the cross-driver 503 path, NOT query ai_provider_credentials, and
+ // NOT decrypt anything (ollama needs no key).
+ const { service, aiProviderCredentialsRepo, secretBox } = makeOllamaService();
+
+ const model = await service.getChatModel('ws-1', {
+ driver: 'ollama',
+ chatModel: 'llama3.1',
+ roleName: 'Local',
+ });
+
+ expect(model).toBeDefined();
+ expect(aiProviderCredentialsRepo.find).not.toHaveBeenCalled();
+ expect(secretBox.decryptSecret).not.toHaveBeenCalled();
+ });
+
+ it('chatModel-only override on an ollama workspace: reuses the workspace ollama baseUrl, no creds lookup', async () => {
+ // No override.driver on an ollama workspace => the workspace ollama driver +
+ // baseUrl are reused; no creds lookup, no decrypt (the cheap public-share
+ // model-only override path against an ollama workspace).
+ const { service, aiProviderCredentialsRepo, secretBox } = makeOllamaService();
+
+ const model = await service.getChatModel('ws-1', { chatModel: 'mistral' });
+
+ expect(model).toBeDefined();
+ expect(aiProviderCredentialsRepo.find).not.toHaveBeenCalled();
+ expect(secretBox.decryptSecret).not.toHaveBeenCalled();
+ });
+
+ it('blank chatModel guard: workspace has a driver but a blank chatModel and no override chatModel => AiNotConfiguredException', async () => {
+ // cfg.driver passes the first guard, but cfg.chatModel is blank and the
+ // override carries no chatModel, so the effective chatModel is empty.
+ const aiSettings = {
+ resolve: jest.fn().mockResolvedValue({
+ driver: 'openai',
+ chatModel: '',
+ apiKey: 'workspace-key',
+ baseUrl: undefined,
+ }),
+ };
+ const aiProviderCredentialsRepo = { find: jest.fn() };
+ const secretBox = { decryptSecret: jest.fn() };
+ const service = new AiService(
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ aiSettings as any,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ aiProviderCredentialsRepo as any,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ secretBox as any,
+ );
+
+ await expect(
+ // Override has only a roleName, no chatModel to fill the blank.
+ service.getChatModel('ws-1', { roleName: 'Writer' }),
+ ).rejects.toBeInstanceOf(AiNotConfiguredException);
+ });
+
+ it('non-ollama driver with a missing apiKey => AiNotConfiguredException', async () => {
+ // Workspace is openai (non-ollama) with a model but NO apiKey: the combined
+ // `driver !== ollama && !apiKey` guard must 503.
+ const aiSettings = {
+ resolve: jest.fn().mockResolvedValue({
+ driver: 'openai',
+ chatModel: 'gpt-4o-mini',
+ apiKey: undefined,
+ baseUrl: undefined,
+ }),
+ };
+ const aiProviderCredentialsRepo = { find: jest.fn() };
+ const secretBox = { decryptSecret: jest.fn() };
+ const service = new AiService(
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ aiSettings as any,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ aiProviderCredentialsRepo as any,
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ secretBox as any,
+ );
+
+ await expect(service.getChatModel('ws-1')).rejects.toBeInstanceOf(
+ AiNotConfiguredException,
+ );
+ });
});
diff --git a/apps/server/src/integrations/mcp/mcp-auth.helpers.ts b/apps/server/src/integrations/mcp/mcp-auth.helpers.ts
index 4a0b5be1..c3aa3292 100644
--- a/apps/server/src/integrations/mcp/mcp-auth.helpers.ts
+++ b/apps/server/src/integrations/mcp/mcp-auth.helpers.ts
@@ -5,6 +5,7 @@
// the Authorization header.
import { UnauthorizedException } from '@nestjs/common';
import { timingSafeEqual } from 'node:crypto';
+import { isInitializeRequest } from '@modelcontextprotocol/sdk/types.js';
import { JwtType } from '../../core/auth/dto/jwt-payload';
import { CREDENTIALS_MISMATCH_MESSAGE } from '../../core/auth/auth.constants';
@@ -291,6 +292,14 @@ export interface BearerVerifyDeps {
workspaceId?: string;
sessionId?: string;
}>;
+ // The workspace id of THIS MCP instance, when the caller can resolve it (the
+ // community build is single-workspace, so McpService passes its default
+ // workspace's id). When provided, the token's `workspaceId` claim MUST equal
+ // it, mirroring JwtStrategy's `req.raw.workspaceId !== payload.workspaceId`
+ // guard so a valid ACCESS token from a DIFFERENT workspace cannot be replayed
+ // against this instance in a multi-workspace deployment. Optional so callers /
+ // tests that genuinely cannot resolve an instance workspace are unchanged.
+ expectedWorkspaceId?: string;
// Load the user (or undefined) for the disabled check.
findUser: (
sub: string,
@@ -321,6 +330,19 @@ export async function verifyBearerAccess(
throw new UnauthorizedException(generic);
}
+ // Bind the token to THIS instance's workspace (mirrors JwtStrategy). When the
+ // caller resolved an instance workspace id, a token whose `workspaceId` claim
+ // points at another workspace is rejected, so a valid ACCESS token minted in
+ // workspace B cannot be replayed against an MCP instance serving workspace A.
+ // In the single-workspace community build expectedWorkspaceId equals the only
+ // workspace, so this is a no-op there; it only bites a multi-workspace deploy.
+ if (
+ deps.expectedWorkspaceId &&
+ payload.workspaceId !== deps.expectedWorkspaceId
+ ) {
+ throw new UnauthorizedException(generic);
+ }
+
const user = await deps.findUser(payload.sub, payload.workspaceId);
if (!user || user.deactivatedAt || user.deletedAt) {
throw new UnauthorizedException(generic);
@@ -342,21 +364,129 @@ export async function verifyBearerAccess(
/**
* Detect a genuine JSON-RPC `initialize` request from an already-parsed body.
- * Mirrors the @modelcontextprotocol/sdk `isInitializeRequest` signal that
- * packages/mcp/src/http.ts uses to decide whether to mint a session, but
- * framework/SDK-free so it is unit-testable and usable from the CommonJS
- * McpService. An initialize request is a single JSON-RPC object whose `method`
- * is exactly 'initialize'; a batch (array) body is never an initialize request.
+ * Delegates to the @modelcontextprotocol/sdk `isInitializeRequest` predicate —
+ * the SAME predicate packages/mcp/src/http.ts uses to decide whether to mint a
+ * session — so the session-minting side (this server) and the session-creating
+ * side (http.ts) agree EXACTLY on what counts as an initialize request. The SDK
+ * predicate validates the full InitializeRequest shape (jsonrpc, id, method ===
+ * 'initialize', params incl. protocolVersion); a bare `{ method: 'initialize' }`
+ * with no params, a batch (array) body, etc. are NOT initialize requests.
*
* This is the second half of the session-INIT decision: `isSessionInit` is
- * (no `mcp-session-id` header) AND `isInitializeRequestBody(body)`. Using it
- * ensures the side-effecting login() (user_sessions insert + USER_LOGIN audit +
- * lastLoginAt) only runs for a real initialize, never for an arbitrary
- * header-less request that http.ts will subsequently 400.
+ * (no `mcp-session-id` header) AND `isInitializeRequestBody(body)`. Matching the
+ * SDK predicate exactly ensures the side-effecting login() (user_sessions insert
+ * + USER_LOGIN audit + lastLoginAt) only runs for a request http.ts will also
+ * accept as an initialize — never for an arbitrary header-less request that
+ * http.ts would subsequently 400 (which would otherwise spam the audit log /
+ * grow user_sessions without ever creating an MCP session).
*/
export function isInitializeRequestBody(body: unknown): boolean {
- if (!body || typeof body !== 'object' || Array.isArray(body)) return false;
- return (body as { method?: unknown }).method === 'initialize';
+ return isInitializeRequest(body);
+}
+
+/**
+ * The outcome of McpService.handle's pre-hijack gauntlet, as a pure value the
+ * caller acts on. Either send a JSON error with a fixed status (`respond`), or
+ * proceed to hijack the response and delegate to the MCP transport (`hijack`).
+ * Keeping this a pure decision (no FastifyReply, no res.hijack) makes the
+ * status/body mapping unit-testable, and guarantees no error path can leak the
+ * password or Authorization header — the body is only ever a fixed string or the
+ * UnauthorizedException's own message.
+ */
+export type McpHandleDecision =
+ | { kind: 'respond'; status: number; body: { error: string } }
+ | { kind: 'hijack' };
+
+/**
+ * Pure mapping of McpService.handle's auth/enablement gauntlet to a response
+ * decision. Precedence mirrors handle():
+ * 1. shared X-MCP-Token mismatch -> 401 {error:'Unauthorized'} (no hijack).
+ * 2. workspace MCP disabled -> 403 {error:'MCP is disabled ...'}.
+ * 3. resolveSessionConfig threw:
+ * - an UnauthorizedException -> 401 with err.message (a SPECIFIC reason;
+ * never the password/header — the message is the only thing surfaced).
+ * - any other error -> 500 generic 'Internal server error'.
+ * 4. otherwise (auth resolved) -> hijack and delegate to the transport.
+ */
+export function mapAuthResultToResponse(input: {
+ sharedTokenOk: boolean;
+ enabled: boolean;
+ error?: unknown;
+}): McpHandleDecision {
+ if (!input.sharedTokenOk) {
+ return { kind: 'respond', status: 401, body: { error: 'Unauthorized' } };
+ }
+
+ if (!input.enabled) {
+ return {
+ kind: 'respond',
+ status: 403,
+ body: { error: 'MCP is disabled for this workspace' },
+ };
+ }
+
+ if (input.error !== undefined) {
+ if (input.error instanceof UnauthorizedException) {
+ return {
+ kind: 'respond',
+ status: 401,
+ body: { error: input.error.message },
+ };
+ }
+ return {
+ kind: 'respond',
+ status: 500,
+ body: { error: 'Internal server error' },
+ };
+ }
+
+ return { kind: 'hijack' };
+}
+
+// Result of the EE MFA module's requirement check for the Basic gate. Both
+// flags absent/false means MFA does not block the password login.
+export interface BasicGateMfaResult {
+ userHasMfa?: boolean;
+ requiresMfaSetup?: boolean;
+}
+
+/**
+ * Pure decision logic for the /mcp HTTP-Basic pre-token gate, replicating EXACTLY
+ * what AuthController.login enforces before issuing a token, so the Basic path is
+ * not an SSO/MFA bypass. Framework-free (no ModuleRef, no on-disk EE MFA module)
+ * so the SSO/MFA decision is unit-testable in isolation:
+ *
+ * - `ssoEnforced` true -> throw Unauthorized ("enforced SSO"); a password
+ * login is not allowed on an SSO-enforced workspace.
+ * - otherwise, `mfa` is the EE MFA module's requirement result (or undefined
+ * when no EE MFA module is bundled — a community/fork build). If MFA is
+ * present and the user has MFA enabled OR needs MFA setup, throw Unauthorized
+ * telling the caller to use a Bearer access token (Basic cannot complete MFA).
+ * - no SSO + no MFA gate -> resolve (the Basic login is allowed to proceed).
+ *
+ * McpService.enforceBasicLoginGate wires the concrete `validateSsoEnforcement`
+ * result and the lazily-loaded MFA module result into this, so the gate decision
+ * itself carries no framework dependencies. Throws UnauthorizedException on
+ * rejection (surfaced as a clean 401); never logs the password.
+ */
+export function decideBasicGate(input: {
+ ssoEnforced: boolean;
+ mfa?: BasicGateMfaResult;
+}): void {
+ if (input.ssoEnforced) {
+ throw new UnauthorizedException(
+ 'This workspace has enforced SSO login. Use SSO; MCP HTTP Basic is not allowed.',
+ );
+ }
+
+ const mfa = input.mfa;
+ if (mfa && (mfa.userHasMfa || mfa.requiresMfaSetup)) {
+ throw new UnauthorizedException(
+ 'This account requires multi-factor authentication. MCP HTTP Basic ' +
+ 'cannot complete MFA — log in normally and use a Bearer access token ' +
+ 'instead.',
+ );
+ }
}
/** Extract a Bearer token from an Authorization header (case-insensitive). */
diff --git a/apps/server/src/integrations/mcp/mcp-basic-login-gate.spec.ts b/apps/server/src/integrations/mcp/mcp-basic-login-gate.spec.ts
new file mode 100644
index 00000000..351b467b
--- /dev/null
+++ b/apps/server/src/integrations/mcp/mcp-basic-login-gate.spec.ts
@@ -0,0 +1,259 @@
+import { UnauthorizedException } from '@nestjs/common';
+
+// ---------------------------------------------------------------------------
+// These tests exercise the REAL McpService.enforceBasicLoginGate (the pre-token
+// SSO/MFA gate on the /mcp HTTP-Basic path). Unlike the resolveMcpSessionConfig
+// tests in mcp.service.spec.ts — which STUB the gate and only assert it runs
+// before login()/verifyCredentials — here the gate logic is instantiated for
+// real and only its LEAF dependencies are mocked:
+// - the workspace object (plain object with/without enforceSso),
+// - the user credentials (plain object),
+// - the lazily-required EE MFA module (jest.mock with { virtual: true } so we
+// can simulate BOTH "bundled" and "not bundled" community-build states),
+// - the injected MfaService instance (via a stub moduleRef).
+//
+// McpService cannot normally be imported under jest because it imports
+// AuthService, which drags in the React email-template graph
+// (@docmost/transactional/emails/*) that the jest moduleNameMapper does not
+// resolve. We therefore mock the heavy collaborator modules (auth.service,
+// token.service, the @docmost/db repos and mcp-auth.helpers) at the module
+// level so importing mcp.service.ts succeeds. None of those are touched by the
+// gate itself, so the gate runs unmodified against the real code path.
+// ---------------------------------------------------------------------------
+
+// The EE MFA module specifier the jest.mock below intercepts MUST be
+// byte-for-byte the specifier that mcp.service.ts lazily require()s
+// ('./../../ee/mfa/services/mfa.service'). jest.mock is hoisted above all
+// non-hoisted code, so the path is inlined as a literal in the call below
+// rather than referenced through a const (which would not yet be initialised).
+// `{ virtual: true }` is required because the EE module does not exist in this
+// OSS build (there is no src/ee directory) — without it jest cannot register a
+// mock for a path it cannot resolve on disk.
+
+// Mutable handle the virtual mock factory reads, so each test can decide whether
+// the EE module is "bundled" (factory returns a MfaService class) or "not
+// bundled" (factory throws, mimicking the require() failing on a community
+// build). jest.mock is hoisted, so the factory must close over this lazily.
+let mfaModuleState: { bundled: boolean; checkMfaRequirements?: jest.Mock } = {
+ bundled: false,
+};
+
+jest.mock(
+ './../../ee/mfa/services/mfa.service',
+ () => {
+ if (!mfaModuleState.bundled) {
+ // Simulate a community/fork build with no EE MFA module: the real
+ // require() throws, which the gate catches as the "no MFA gate" path.
+ throw new Error('Cannot find module (EE MFA not bundled)');
+ }
+ // "Bundled" build: expose a MfaService class token. The actual instance the
+ // gate calls is resolved through moduleRef.get(MfaModule.MfaService), which
+ // our stub moduleRef returns regardless of the token identity.
+ class MfaService {}
+ return { MfaService };
+ },
+ { virtual: true },
+);
+
+// --- Mock the heavy collaborator modules so importing mcp.service succeeds. ---
+// The gate never calls into these; they exist only to satisfy the import graph.
+jest.mock('../../core/auth/services/auth.service', () => ({
+ AuthService: class AuthService {},
+}));
+jest.mock('../../core/auth/services/token.service', () => ({
+ TokenService: class TokenService {},
+}));
+jest.mock('@docmost/db/repos/workspace/workspace.repo', () => ({
+ WorkspaceRepo: class WorkspaceRepo {},
+}));
+jest.mock('@docmost/db/repos/user/user.repo', () => ({
+ UserRepo: class UserRepo {},
+}));
+jest.mock('@docmost/db/repos/session/user-session.repo', () => ({
+ UserSessionRepo: class UserSessionRepo {},
+}));
+// mcp-auth.helpers exports runtime values the gate relies on (decideBasicGate,
+// mapAuthResultToResponse, etc.). Keep the REAL helpers so the gate exercises
+// real logic; only stub FailedLoginLimiter so its constructor runs without a
+// real sweep timer. The module is framework-free and loads cleanly under jest
+// (mcp.service.spec.ts already imports it directly), so requireActual is safe.
+jest.mock('./mcp-auth.helpers', () => {
+ const actual = jest.requireActual('./mcp-auth.helpers');
+ return {
+ ...actual,
+ FailedLoginLimiter: class FailedLoginLimiter {
+ sweep() {}
+ },
+ };
+});
+
+// Import AFTER the mocks are registered.
+// eslint-disable-next-line @typescript-eslint/no-require-imports
+import { McpService } from './mcp.service';
+
+type GateCreds = { email: string; password: string };
+
+// Build an McpService instance with stubbed constructor deps. We never call the
+// auth/db collaborators from the gate, so undefined stand-ins are fine for all
+// but moduleRef, which the MFA branch reads.
+function makeService(opts: {
+ checkMfaRequirements?: jest.Mock;
+}): { service: McpService; gate: (ws: unknown, creds: GateCreds) => Promise } {
+ // Stub moduleRef.get -> returns an object whose checkMfaRequirements is the
+ // provided mock. The gate calls moduleRef.get(MfaModule.MfaService).
+ const moduleRef = {
+ get: jest.fn().mockReturnValue({
+ checkMfaRequirements:
+ opts.checkMfaRequirements ?? jest.fn().mockResolvedValue(undefined),
+ }),
+ };
+
+ const service = new McpService(
+ undefined as never, // environmentService
+ undefined as never, // workspaceRepo
+ undefined as never, // authService
+ undefined as never, // tokenService
+ undefined as never, // userRepo
+ undefined as never, // userSessionRepo
+ moduleRef as never, // moduleRef (read by the MFA branch)
+ );
+ // Stop the constructor's unref'd sweep timer leaking across tests.
+ service.onModuleDestroy();
+
+ // enforceBasicLoginGate is private; reach it through the instance. Calling the
+ // REAL method (not a stub) is the whole point of this suite.
+ const gate = (
+ service as unknown as {
+ enforceBasicLoginGate: (ws: unknown, creds: GateCreds) => Promise;
+ }
+ ).enforceBasicLoginGate.bind(service);
+
+ return { service, gate };
+}
+
+const CREDS: GateCreds = { email: 'user@example.com', password: 'pw' };
+
+describe('McpService.enforceBasicLoginGate (REAL gate, leaf deps mocked)', () => {
+ beforeEach(() => {
+ // Reset to the community-build default (no EE module) before each test.
+ mfaModuleState = { bundled: false };
+ jest.clearAllMocks();
+ });
+
+ describe('SSO enforcement (validateSsoEnforcement)', () => {
+ it('rejects with Unauthorized when the workspace enforces SSO, before any MFA/login', async () => {
+ const { gate } = makeService({});
+ const workspace = { id: 'ws-1', enforceSso: true };
+
+ await expect(gate(workspace, CREDS)).rejects.toBeInstanceOf(
+ UnauthorizedException,
+ );
+ // The /mcp 401 surfaces an SSO-specific message (not a generic MCP error).
+ await expect(gate(workspace, CREDS)).rejects.toThrow(/enforced SSO/i);
+ });
+
+ it('does NOT consult the MFA module when SSO is enforced (gate short-circuits)', async () => {
+ // Even if the EE module WERE bundled, the SSO branch throws first, so the
+ // moduleRef MFA lookup must never run.
+ mfaModuleState = {
+ bundled: true,
+ checkMfaRequirements: jest.fn(),
+ };
+ const { service, gate } = makeService({
+ checkMfaRequirements: mfaModuleState.checkMfaRequirements,
+ });
+ const moduleRefGet = (
+ service as unknown as { moduleRef: { get: jest.Mock } }
+ ).moduleRef.get;
+
+ await expect(
+ gate({ id: 'ws-1', enforceSso: true }, CREDS),
+ ).rejects.toThrow(/enforced SSO/i);
+ // The SSO branch fired before the MFA require/lookup.
+ expect(moduleRefGet).not.toHaveBeenCalled();
+ expect(mfaModuleState.checkMfaRequirements).not.toHaveBeenCalled();
+ });
+ });
+
+ describe('community build: EE MFA module NOT bundled', () => {
+ it('passes (no throw) when SSO is not enforced and the lazy require fails (no MFA gate)', async () => {
+ // mfaModuleState.bundled === false -> the virtual mock factory throws,
+ // exactly like require() of a missing EE module on a community build.
+ const { service, gate } = makeService({});
+ const moduleRefGet = (
+ service as unknown as { moduleRef: { get: jest.Mock } }
+ ).moduleRef.get;
+
+ await expect(
+ gate({ id: 'ws-1', enforceSso: false }, CREDS),
+ ).resolves.toBeUndefined();
+ // The require() failed, so the gate returned before touching moduleRef.
+ expect(moduleRefGet).not.toHaveBeenCalled();
+ });
+ });
+
+ describe('EE MFA module bundled', () => {
+ it('rejects with a "use a Bearer token" signal when the user has MFA enabled', async () => {
+ const check = jest.fn().mockResolvedValue({
+ userHasMfa: true,
+ requiresMfaSetup: false,
+ });
+ mfaModuleState = { bundled: true, checkMfaRequirements: check };
+ const { gate } = makeService({ checkMfaRequirements: check });
+
+ const promise = gate({ id: 'ws-1', enforceSso: false }, CREDS);
+ await expect(promise).rejects.toBeInstanceOf(UnauthorizedException);
+ await expect(
+ gate({ id: 'ws-1', enforceSso: false }, CREDS),
+ ).rejects.toThrow(/Bearer access token/i);
+ // The real requirement check was consulted with the creds + workspace.
+ expect(check).toHaveBeenCalledWith(
+ CREDS,
+ { id: 'ws-1', enforceSso: false },
+ undefined,
+ );
+ });
+
+ it('rejects when the workspace enforces MFA (requiresMfaSetup)', async () => {
+ // requiresMfaSetup === true models a workspace that enforces MFA for a
+ // user who has not set it up yet; the Basic path cannot complete it.
+ const check = jest.fn().mockResolvedValue({
+ userHasMfa: false,
+ requiresMfaSetup: true,
+ });
+ mfaModuleState = { bundled: true, checkMfaRequirements: check };
+ const { gate } = makeService({ checkMfaRequirements: check });
+
+ await expect(
+ gate({ id: 'ws-1', enforceSso: false }, CREDS),
+ ).rejects.toThrow(/Bearer access token/i);
+ });
+
+ it('passes when the user has no MFA and the workspace does not enforce it', async () => {
+ const check = jest.fn().mockResolvedValue({
+ userHasMfa: false,
+ requiresMfaSetup: false,
+ });
+ mfaModuleState = { bundled: true, checkMfaRequirements: check };
+ const { gate } = makeService({ checkMfaRequirements: check });
+
+ await expect(
+ gate({ id: 'ws-1', enforceSso: false }, CREDS),
+ ).resolves.toBeUndefined();
+ // The bundled module's requirement check WAS consulted (proving we took
+ // the bundled branch, not the community no-op branch).
+ expect(check).toHaveBeenCalledTimes(1);
+ });
+
+ it('passes when checkMfaRequirements returns a falsy result (no requirement flags)', async () => {
+ // Defensive: a bundled module that returns undefined must not reject.
+ const check = jest.fn().mockResolvedValue(undefined);
+ mfaModuleState = { bundled: true, checkMfaRequirements: check };
+ const { gate } = makeService({ checkMfaRequirements: check });
+
+ await expect(
+ gate({ id: 'ws-1', enforceSso: false }, CREDS),
+ ).resolves.toBeUndefined();
+ });
+ });
+});
diff --git a/apps/server/src/integrations/mcp/mcp.service.spec.ts b/apps/server/src/integrations/mcp/mcp.service.spec.ts
index bf4c8a24..cfa8472d 100644
--- a/apps/server/src/integrations/mcp/mcp.service.spec.ts
+++ b/apps/server/src/integrations/mcp/mcp.service.spec.ts
@@ -9,6 +9,9 @@ import {
sharedTokenMatches,
clientIp,
bindAccessJwtVerifier,
+ extractBearer,
+ decideBasicGate,
+ mapAuthResultToResponse,
McpAuthDeps,
} from './mcp-auth.helpers';
import { JwtType } from '../../core/auth/dto/jwt-payload';
@@ -79,6 +82,26 @@ describe('parseBasicAuth', () => {
});
});
+describe('extractBearer', () => {
+ it('extracts the token from a "Bearer " header', () => {
+ expect(extractBearer('Bearer abc.def.ghi')).toBe('abc.def.ghi');
+ });
+
+ it('is case-insensitive on the scheme (lowercase + uppercase)', () => {
+ // The split keeps the token as-is; only the scheme is compared lowercased.
+ expect(extractBearer('bearer abc')).toBe('abc');
+ expect(extractBearer('BEARER abc')).toBe('abc');
+ });
+
+ it('returns undefined for a non-Bearer scheme (e.g. Basic)', () => {
+ expect(extractBearer('Basic abc')).toBeUndefined();
+ });
+
+ it('returns undefined for an undefined header', () => {
+ expect(extractBearer(undefined)).toBeUndefined();
+ });
+});
+
describe('isCredentialsFailure', () => {
it('is true for the credentials-mismatch UnauthorizedException', () => {
expect(
@@ -185,6 +208,43 @@ describe('FailedLoginLimiter', () => {
expect(lim.isBlocked(k, 0)).toBe(true);
expect(lim.isBlocked(k, 1000)).toBe(false);
});
+
+ describe('sweep (expired-bucket eviction, injectable clock)', () => {
+ // sweep() drops buckets whose windowStart is older than windowMs so
+ // never-revisited keys cannot accumulate forever. It takes an injectable
+ // `now` so the behaviour is deterministic without faking timers.
+ it('drops a bucket strictly older than windowMs', () => {
+ const lim = new FailedLoginLimiter(5, 1000);
+ // Seed a bucket at t=0 (windowStart=0).
+ lim.recordFailure('stale', 0);
+ // Sweep well past the window: now - windowStart = 5000 >= 1000 -> dropped.
+ lim.sweep(5000);
+ // A dropped bucket means a brand-new bucket is created on next touch, so
+ // the prior failure count is gone (a single fresh failure is far from 5).
+ lim.recordFailure('stale', 5001);
+ expect(lim.isBlocked('stale', 5001)).toBe(false);
+ });
+
+ it('drops a bucket exactly at the windowMs boundary (>= is inclusive)', () => {
+ const lim = new FailedLoginLimiter(1, 1000);
+ lim.recordFailure('boundary', 0); // windowStart=0, blocked at threshold 1
+ expect(lim.isBlocked('boundary', 0)).toBe(true);
+ // now - windowStart = 1000 == windowMs -> the >= check evicts it.
+ lim.sweep(1000);
+ // Re-touch at the same instant: a fresh bucket (count 0) is created, so the
+ // key is no longer blocked, proving the boundary bucket was swept.
+ expect(lim.isBlocked('boundary', 1000)).toBe(false);
+ });
+
+ it('retains a fresh bucket still within the window', () => {
+ const lim = new FailedLoginLimiter(1, 1000);
+ lim.recordFailure('fresh', 0); // windowStart=0
+ // now - windowStart = 999 < 1000 -> the bucket survives the sweep.
+ lim.sweep(999);
+ // Still blocked because the bucket (and its count) was retained.
+ expect(lim.isBlocked('fresh', 999)).toBe(true);
+ });
+ });
});
describe('verifyBearerAccess (Bearer revocation/disabled checks)', () => {
@@ -264,6 +324,31 @@ describe('verifyBearerAccess (Bearer revocation/disabled checks)', () => {
),
).rejects.toThrow('jwt expired');
});
+
+ // Item 3: bind the Bearer token to THIS instance's workspace (mirrors
+ // JwtStrategy). A token whose workspaceId claim differs from the instance
+ // workspace must be rejected; matching/absent expectedWorkspaceId is allowed.
+ it('rejects a token from a DIFFERENT workspace when expectedWorkspaceId is set', async () => {
+ await expect(
+ verifyBearerAccess('t', {
+ ...bearerDeps(),
+ expectedWorkspaceId: 'ws-OTHER',
+ }),
+ ).rejects.toThrow(UnauthorizedException);
+ });
+
+ it('accepts a token whose workspace matches expectedWorkspaceId', async () => {
+ const res = await verifyBearerAccess('t', {
+ ...bearerDeps(),
+ expectedWorkspaceId: 'ws-1',
+ });
+ expect(res).toEqual({ sub: 'user-1', email: 'u@e.com' });
+ });
+
+ it('does NOT enforce a workspace when expectedWorkspaceId is undefined (single-workspace no-op)', async () => {
+ const res = await verifyBearerAccess('t', bearerDeps());
+ expect(res).toEqual({ sub: 'user-1', email: 'u@e.com' });
+ });
});
describe('resolveMcpSessionConfig', () => {
@@ -587,23 +672,48 @@ describe('resolveMcpSessionConfig', () => {
});
});
-describe('isInitializeRequestBody (session-INIT detection)', () => {
- it('true only for a single JSON-RPC object with method === "initialize"', () => {
- expect(isInitializeRequestBody({ jsonrpc: '2.0', method: 'initialize' })).toBe(
- true,
- );
+// A full, valid JSON-RPC InitializeRequest as the @modelcontextprotocol/sdk
+// `isInitializeRequest` predicate (which isInitializeRequestBody now delegates
+// to) requires: jsonrpc + id + method === 'initialize' + params.protocolVersion.
+const fullInitializeRequest = {
+ jsonrpc: '2.0',
+ id: 1,
+ method: 'initialize',
+ params: {
+ protocolVersion: '2024-11-05',
+ capabilities: {},
+ clientInfo: { name: 'test-client', version: '1.0.0' },
+ },
+};
+
+describe('isInitializeRequestBody (session-INIT detection, matches SDK predicate)', () => {
+ it('true for a FULL valid InitializeRequest (the SDK predicate signal)', () => {
+ expect(isInitializeRequestBody(fullInitializeRequest)).toBe(true);
+ });
+
+ it('false for a bare { method: "initialize" } with no id/params (item 1)', () => {
+ // Item 1: this previously returned true (method-only check) and let an
+ // authenticated client POST a params-less body with no mcp-session-id, which
+ // ran the side-effecting login() before http.ts 400'd it. The SDK predicate
+ // rejects it (no id, no params.protocolVersion), so it no longer mints a
+ // session / audit row.
+ expect(isInitializeRequestBody({ method: 'initialize' })).toBe(false);
+ expect(
+ isInitializeRequestBody({ jsonrpc: '2.0', method: 'initialize' }),
+ ).toBe(false);
+ expect(
+ isInitializeRequestBody({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }),
+ ).toBe(false);
});
it('false for a non-initialize method (e.g. tools/call)', () => {
expect(
- isInitializeRequestBody({ jsonrpc: '2.0', method: 'tools/call' }),
+ isInitializeRequestBody({ ...fullInitializeRequest, method: 'tools/call' }),
).toBe(false);
});
it('false for a batch (array) body, null/undefined, or a non-object', () => {
- expect(
- isInitializeRequestBody([{ jsonrpc: '2.0', method: 'initialize' }]),
- ).toBe(false);
+ expect(isInitializeRequestBody([fullInitializeRequest])).toBe(false);
expect(isInitializeRequestBody(undefined)).toBe(false);
expect(isInitializeRequestBody(null)).toBe(false);
expect(isInitializeRequestBody('initialize')).toBe(false);
@@ -618,8 +728,14 @@ describe('isSessionInit decision (no mcp-session-id AND initialize body)', () =>
const decide = (sessionId: string | undefined, body: unknown): boolean =>
!sessionId && isInitializeRequestBody(body);
- it('no header + initialize body -> init', () => {
- expect(decide(undefined, { method: 'initialize' })).toBe(true);
+ it('no header + full initialize body -> init', () => {
+ expect(decide(undefined, fullInitializeRequest)).toBe(true);
+ });
+
+ it('no header + bare params-less initialize body -> NOT init (item 1)', () => {
+ // A header-less { method: 'initialize' } with no params is no longer treated
+ // as an init by the SDK predicate, so it does not mint a session via login().
+ expect(decide(undefined, { method: 'initialize' })).toBe(false);
});
it('no header + non-initialize body -> NOT init (verifyCredentials path)', () => {
@@ -627,7 +743,7 @@ describe('isSessionInit decision (no mcp-session-id AND initialize body)', () =>
});
it('has session-id -> never init regardless of body', () => {
- expect(decide('sess-1', { method: 'initialize' })).toBe(false);
+ expect(decide('sess-1', fullInitializeRequest)).toBe(false);
});
});
@@ -769,3 +885,138 @@ describe('bindAccessJwtVerifier enforces JwtType.ACCESS (item 3)', () => {
expect(res).toEqual({ sub: 'user-1', email: undefined });
});
});
+
+describe('decideBasicGate (pure SSO/MFA pre-token gate, refactor R1)', () => {
+ // The pure decision extracted out of McpService.enforceBasicLoginGate. It is
+ // tested WITHOUT ModuleRef and WITHOUT an on-disk EE MFA module: the SSO verdict
+ // and the MFA requirement result are passed in as plain values.
+
+ it('SSO enforced -> throws Unauthorized ("enforced SSO")', () => {
+ expect(() => decideBasicGate({ ssoEnforced: true })).toThrow(
+ UnauthorizedException,
+ );
+ expect(() => decideBasicGate({ ssoEnforced: true })).toThrow(/enforced SSO/);
+ // SSO takes precedence even if MFA flags are also set.
+ expect(() =>
+ decideBasicGate({ ssoEnforced: true, mfa: { userHasMfa: true } }),
+ ).toThrow(/enforced SSO/);
+ });
+
+ it('no SSO + no MFA module (mfa undefined) -> resolves (Basic allowed)', () => {
+ // A community/fork build with no EE MFA module passes mfa: undefined and the
+ // gate must allow the password login (same as the controller with no MFA).
+ expect(() => decideBasicGate({ ssoEnforced: false })).not.toThrow();
+ expect(() =>
+ decideBasicGate({ ssoEnforced: false, mfa: undefined }),
+ ).not.toThrow();
+ });
+
+ it('MFA present + userHasMfa -> rejects ("use a Bearer access token")', () => {
+ expect(() =>
+ decideBasicGate({ ssoEnforced: false, mfa: { userHasMfa: true } }),
+ ).toThrow(/use a Bearer access token/);
+ expect(() =>
+ decideBasicGate({ ssoEnforced: false, mfa: { userHasMfa: true } }),
+ ).toThrow(UnauthorizedException);
+ });
+
+ it('MFA present + requiresMfaSetup -> rejects', () => {
+ expect(() =>
+ decideBasicGate({ ssoEnforced: false, mfa: { requiresMfaSetup: true } }),
+ ).toThrow(/use a Bearer access token/);
+ });
+
+ it('MFA present but none required (both flags false) -> resolves', () => {
+ expect(() =>
+ decideBasicGate({
+ ssoEnforced: false,
+ mfa: { userHasMfa: false, requiresMfaSetup: false },
+ }),
+ ).not.toThrow();
+ });
+});
+
+describe('mapAuthResultToResponse (handle status/body mapping, refactor R2)', () => {
+ // The pure response decision extracted out of McpService.handle. It maps the
+ // pre-hijack gauntlet (shared token, enablement, auth error) to either a fixed
+ // JSON error response or the hijack path — never leaking the password/header.
+
+ it('wrong X-MCP-Token -> 401 {error:"Unauthorized"} and NOT the hijack path', () => {
+ const d = mapAuthResultToResponse({ sharedTokenOk: false, enabled: true });
+ expect(d).toEqual({
+ kind: 'respond',
+ status: 401,
+ body: { error: 'Unauthorized' },
+ });
+ });
+
+ it('workspace MCP disabled -> 403', () => {
+ const d = mapAuthResultToResponse({ sharedTokenOk: true, enabled: false });
+ expect(d.kind).toBe('respond');
+ if (d.kind === 'respond') {
+ expect(d.status).toBe(403);
+ expect(d.body).toEqual({ error: 'MCP is disabled for this workspace' });
+ }
+ });
+
+ it('an UnauthorizedException -> 401 with err.message; no password/header leaked', () => {
+ // Construct an UnauthorizedException whose message is the SPECIFIC auth reason.
+ const err = new UnauthorizedException('Email or password does not match');
+ const d = mapAuthResultToResponse({
+ sharedTokenOk: true,
+ enabled: true,
+ error: err,
+ });
+ expect(d).toEqual({
+ kind: 'respond',
+ status: 401,
+ body: { error: 'Email or password does not match' },
+ });
+ // The surfaced body is ONLY the exception message — never the raw secret.
+ if (d.kind === 'respond') {
+ const serialized = JSON.stringify(d.body);
+ expect(serialized).not.toContain('password=');
+ expect(serialized).not.toContain('Authorization');
+ expect(serialized).not.toContain('Basic ');
+ expect(serialized).not.toContain('Bearer ');
+ }
+ });
+
+ it('a non-Unauthorized error -> 500 generic (no error detail surfaced)', () => {
+ const err = new Error('db blew up: connection string secret');
+ const d = mapAuthResultToResponse({
+ sharedTokenOk: true,
+ enabled: true,
+ error: err,
+ });
+ expect(d).toEqual({
+ kind: 'respond',
+ status: 500,
+ body: { error: 'Internal server error' },
+ });
+ // The generic body must NOT echo the underlying error message.
+ if (d.kind === 'respond') {
+ expect(d.body.error).not.toContain('secret');
+ }
+ });
+
+ it('happy path (auth resolved, no error) -> hijack', () => {
+ const d = mapAuthResultToResponse({ sharedTokenOk: true, enabled: true });
+ expect(d).toEqual({ kind: 'hijack' });
+ });
+
+ it('shared-token failure takes precedence over disabled/error', () => {
+ // Even with a disabled workspace and an error, a bad shared token is the
+ // first gate, so the response is the uniform 401 Unauthorized.
+ const d = mapAuthResultToResponse({
+ sharedTokenOk: false,
+ enabled: false,
+ error: new UnauthorizedException('should not surface'),
+ });
+ expect(d).toEqual({
+ kind: 'respond',
+ status: 401,
+ body: { error: 'Unauthorized' },
+ });
+ });
+});
diff --git a/apps/server/src/integrations/mcp/mcp.service.ts b/apps/server/src/integrations/mcp/mcp.service.ts
index 7ac16fb6..cb746b90 100644
--- a/apps/server/src/integrations/mcp/mcp.service.ts
+++ b/apps/server/src/integrations/mcp/mcp.service.ts
@@ -25,6 +25,8 @@ import {
sharedTokenMatches,
clientIp,
bindAccessJwtVerifier,
+ decideBasicGate,
+ mapAuthResultToResponse,
DocmostMcpConfig,
ResolvedMcpAuth,
} from './mcp-auth.helpers';
@@ -154,6 +156,15 @@ export class McpService implements OnModuleDestroy {
private async verifyMcpBearer(
token: string,
): Promise<{ sub?: string; email?: string }> {
+ // Resolve THIS instance's workspace so verifyBearerAccess can bind the
+ // token's `workspaceId` claim to it (mirrors JwtStrategy). The community
+ // build is single-workspace (findFirst), so this is the default workspace
+ // and the check is a no-op here; it only rejects a foreign-workspace token
+ // in a multi-workspace deployment. Undefined (no workspace configured) means
+ // no check — the credentials path would already have failed with no
+ // workspace, and an undefined here keeps the helper a no-op rather than
+ // rejecting every token.
+ const instanceWorkspace = await this.workspaceRepo.findFirst();
// The revocation/disabled decision logic lives in the framework-free
// verifyBearerAccess helper (unit-testable without the heavy auth graph);
// this method only wires in the concrete TokenService + repos.
@@ -163,6 +174,7 @@ export class McpService implements OnModuleDestroy {
verifyJwt: bindAccessJwtVerifier(this.tokenService) as (
t: string,
) => Promise,
+ expectedWorkspaceId: instanceWorkspace?.id,
findUser: (sub, workspaceId) =>
this.userRepo.findById(sub, workspaceId),
findActiveSession: (sessionId) =>
@@ -231,49 +243,54 @@ export class McpService implements OnModuleDestroy {
workspace: Workspace,
creds: { email: string; password: string },
): Promise {
- // 1) SSO enforcement. validateSsoEnforcement throws BadRequestException; we
- // re-surface it as Unauthorized so the /mcp 401 path is consistent and a
- // token is never issued.
+ // 1) SSO enforcement. validateSsoEnforcement throws when the workspace
+ // enforces SSO; we only need the boolean verdict for the pure decision.
+ let ssoEnforced = false;
try {
validateSsoEnforcement(workspace);
} catch {
- throw new UnauthorizedException(
- 'This workspace has enforced SSO login. Use SSO; MCP HTTP Basic is not allowed.',
- );
+ ssoEnforced = true;
}
// 2) MFA gate — lazy-require the EE module exactly like AuthController.login.
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
- let MfaModule: any;
- try {
- // eslint-disable-next-line @typescript-eslint/no-require-imports
- MfaModule = require('./../../ee/mfa/services/mfa.service');
- } catch {
- // No EE MFA module bundled in this build: same as the controller -> no
- // MFA gate. (A community/fork build has no MFA, so Basic is allowed.)
- return;
+ // On a fork WITHOUT the EE module bundled, mfaResult stays undefined and the
+ // pure gate behaves exactly like the controller (no MFA module -> no MFA
+ // gate). We only LOAD the module + read the requirement flags here; the
+ // accept/reject decision lives in the framework-free decideBasicGate so the
+ // SSO/MFA logic is unit-testable without ModuleRef or the on-disk EE module.
+ let mfaResult: { userHasMfa?: boolean; requiresMfaSetup?: boolean } | undefined;
+ // Only consult the MFA module when SSO has not already disqualified the
+ // request (SSO short-circuits, and skipping the load avoids a needless
+ // require on the SSO-reject path).
+ if (!ssoEnforced) {
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ let MfaModule: any;
+ try {
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
+ MfaModule = require('./../../ee/mfa/services/mfa.service');
+ } catch {
+ // No EE MFA module bundled in this build: same as the controller -> no
+ // MFA gate. (A community/fork build has no MFA, so Basic is allowed.)
+ MfaModule = undefined;
+ }
+
+ if (MfaModule) {
+ const mfaService = this.moduleRef.get(MfaModule.MfaService, {
+ strict: false,
+ });
+ // Same requirement check the controller uses. We pass NO FastifyReply
+ // (the controller passes `res` only to set a cookie on the no-MFA happy
+ // path, which we never take here): we only read the requirement flags.
+ mfaResult = await mfaService.checkMfaRequirements(
+ creds,
+ workspace,
+ undefined,
+ );
+ }
}
- const mfaService = this.moduleRef.get(MfaModule.MfaService, {
- strict: false,
- });
- // Use the same requirement check the controller uses. We pass NO FastifyReply
- // (the controller passes `res` only to set a cookie on the no-MFA happy path,
- // which we never take here): we only read the requirement flags. Be tolerant
- // of either a (loginInput, workspace) or (loginInput, workspace, res) shape.
- const mfaResult = await mfaService.checkMfaRequirements(
- creds,
- workspace,
- undefined,
- );
-
- if (mfaResult && (mfaResult.userHasMfa || mfaResult.requiresMfaSetup)) {
- throw new UnauthorizedException(
- 'This account requires multi-factor authentication. MCP HTTP Basic ' +
- 'cannot complete MFA — log in normally and use a Bearer access token ' +
- 'instead.',
- );
- }
+ // Pure accept/reject decision (throws UnauthorizedException on rejection).
+ decideBasicGate({ ssoEnforced, mfa: mfaResult });
}
// Lazily create the HTTP handler exactly once. The import is indirected so
@@ -333,52 +350,61 @@ export class McpService implements OnModuleDestroy {
// matching `X-MCP-Token` header. It now lives in its OWN header so it never
// collides with `Authorization`, which carries the per-user credentials.
const sharedToken = process.env.MCP_TOKEN;
- if (sharedToken) {
- const provided = req.headers['x-mcp-token'];
- if (!sharedTokenMatches(sharedToken, provided)) {
- res.status(401).send({ error: 'Unauthorized' });
- return;
- }
- }
+ const sharedTokenOk = sharedToken
+ ? sharedTokenMatches(sharedToken, req.headers['x-mcp-token'])
+ : true;
- if (!(await this.isEnabled())) {
- res.status(403).send({ error: 'MCP is disabled for this workspace' });
- return;
- }
+ // Short-circuit checks (shared token, enablement) that do not need the auth
+ // resolution. Compute them up front so the response mapping is a single pure
+ // decision (mapAuthResultToResponse) that cannot leak the password/header.
+ const enabled = sharedTokenOk ? await this.isEnabled() : false;
// Resolve + validate the per-session identity BEFORE hijacking the response
// so bad credentials surface as a clean 401 JSON (never a torn response and
// never a generic "MCP error"). The resolved config/identity is stashed on
// the raw request for the package's resolver + identify hook to read back.
- let resolved: ResolvedMcpAuth;
- try {
- resolved = await this.resolveSessionConfig(req);
- } catch (err) {
- if (err instanceof UnauthorizedException) {
- // Warn once if the only thing missing is the service account, to keep
- // the original operator hint.
- if (
- !this.credsConfigured() &&
- !req.headers['authorization'] &&
- !this.warnedMissingCreds
- ) {
- this.warnedMissingCreds = true;
- this.logger.warn(
- 'MCP is enabled but received a request with no credentials and no ' +
- 'MCP_DOCMOST_EMAIL/MCP_DOCMOST_PASSWORD service account configured.',
- );
+ let resolved: ResolvedMcpAuth | undefined;
+ let authError: unknown;
+ if (sharedTokenOk && enabled) {
+ try {
+ resolved = await this.resolveSessionConfig(req);
+ } catch (err) {
+ authError = err;
+ if (err instanceof UnauthorizedException) {
+ // Warn once if the only thing missing is the service account, to keep
+ // the original operator hint.
+ if (
+ !this.credsConfigured() &&
+ !req.headers['authorization'] &&
+ !this.warnedMissingCreds
+ ) {
+ this.warnedMissingCreds = true;
+ this.logger.warn(
+ 'MCP is enabled but received a request with no credentials and no ' +
+ 'MCP_DOCMOST_EMAIL/MCP_DOCMOST_PASSWORD service account configured.',
+ );
+ }
+ } else {
+ this.logger.error('MCP auth resolution failed', err as Error);
}
- res.status(401).send({ error: err.message });
- return;
}
- this.logger.error('MCP auth resolution failed', err as Error);
- res.status(500).send({ error: 'Internal server error' });
+ }
+
+ // Pure status/body mapping for the whole pre-hijack gauntlet.
+ const decision = mapAuthResultToResponse({
+ sharedTokenOk,
+ enabled,
+ error: authError,
+ });
+ if (decision.kind === 'respond') {
+ res.status(decision.status).send(decision.body);
return;
}
// Stash the resolved auth on the raw request so the package's resolver +
// identify hook (wired in getHandler) read it back instead of re-parsing.
- (req.raw as unknown as Record)[MCP_RESOLVED] = resolved;
+ (req.raw as unknown as Record)[MCP_RESOLVED] =
+ resolved as ResolvedMcpAuth;
// Hand the raw Node req/res to the MCP transport. hijack() tells Fastify
// to stop managing this response so the transport can write to it directly.
diff --git a/apps/server/src/ws/listeners/page-ws.listener.spec.ts b/apps/server/src/ws/listeners/page-ws.listener.spec.ts
index 734e8228..3282d318 100644
--- a/apps/server/src/ws/listeners/page-ws.listener.spec.ts
+++ b/apps/server/src/ws/listeners/page-ws.listener.spec.ts
@@ -3,6 +3,7 @@ import { PageWsListener } from './page-ws.listener';
import { WsTreeService } from '../ws-tree.service';
import {
PageEvent,
+ PageMovedEvent,
TreeNodeSnapshot,
} from '../../database/listeners/page.listener';
@@ -93,3 +94,139 @@ describe('PageWsListener.onPageCreated', () => {
expect(wsTree.broadcastRefetchRoot).not.toHaveBeenCalled();
});
});
+
+describe('PageWsListener delete/move/restore handlers', () => {
+ let listener: PageWsListener;
+ let wsTree: {
+ broadcastPageCreated: jest.Mock;
+ broadcastPageDeleted: jest.Mock;
+ broadcastPageMoved: jest.Mock;
+ broadcastRefetchRoot: jest.Mock;
+ };
+ let warnSpy: jest.SpyInstance;
+
+ const secondSnapshot: TreeNodeSnapshot = {
+ id: 'page-2',
+ slugId: 'slug-2',
+ title: 'World',
+ icon: '📁',
+ position: 'a2',
+ spaceId: 'space-1',
+ parentPageId: null,
+ };
+
+ beforeEach(async () => {
+ wsTree = {
+ broadcastPageCreated: jest.fn().mockResolvedValue(undefined),
+ broadcastPageDeleted: jest.fn().mockResolvedValue(undefined),
+ broadcastPageMoved: jest.fn().mockResolvedValue(undefined),
+ broadcastRefetchRoot: jest.fn().mockResolvedValue(undefined),
+ };
+
+ const module: TestingModule = await Test.createTestingModule({
+ providers: [
+ PageWsListener,
+ { provide: WsTreeService, useValue: wsTree },
+ ],
+ }).compile();
+
+ listener = module.get(PageWsListener);
+ // The PAGE_RESTORED-without-spaceId branch logs a warning; silence + assert.
+ warnSpy = jest
+ .spyOn(listener['logger'], 'warn')
+ .mockImplementation(() => undefined);
+ });
+
+ afterEach(() => {
+ warnSpy.mockRestore();
+ });
+
+ // --- onPageDeleted (PAGE_SOFT_DELETED / PAGE_DELETED) ---
+
+ it('onPageDeleted with N `pages`: one broadcastPageDeleted per page', async () => {
+ const event: PageEvent = {
+ pageIds: ['page-1', 'page-2'],
+ workspaceId: 'ws-1',
+ pages: [snapshot, secondSnapshot],
+ };
+
+ await listener.onPageDeleted(event);
+
+ expect(wsTree.broadcastPageDeleted).toHaveBeenCalledTimes(2);
+ expect(wsTree.broadcastPageDeleted).toHaveBeenNthCalledWith(1, snapshot);
+ expect(wsTree.broadcastPageDeleted).toHaveBeenNthCalledWith(
+ 2,
+ secondSnapshot,
+ );
+ });
+
+ it('onPageDeleted with an EMPTY `pages` array: no broadcast', async () => {
+ const event: PageEvent = {
+ pageIds: ['page-1'],
+ workspaceId: 'ws-1',
+ pages: [],
+ };
+
+ await listener.onPageDeleted(event);
+
+ expect(wsTree.broadcastPageDeleted).not.toHaveBeenCalled();
+ });
+
+ it('onPageDeleted with UNDEFINED `pages`: no broadcast (no crash)', async () => {
+ const event: PageEvent = {
+ pageIds: ['page-1'],
+ workspaceId: 'ws-1',
+ };
+
+ await listener.onPageDeleted(event);
+
+ expect(wsTree.broadcastPageDeleted).not.toHaveBeenCalled();
+ });
+
+ // --- onPageMoved (PAGE_MOVED) ---
+
+ it('onPageMoved: forwards the whole event to a single broadcastPageMoved', async () => {
+ const event: PageMovedEvent = {
+ workspaceId: 'ws-1',
+ oldParentId: 'old-parent',
+ hasChildren: false,
+ node: { ...snapshot, parentPageId: 'new-parent', position: 'a5' },
+ };
+
+ await listener.onPageMoved(event);
+
+ expect(wsTree.broadcastPageMoved).toHaveBeenCalledTimes(1);
+ expect(wsTree.broadcastPageMoved).toHaveBeenCalledWith(event);
+ });
+
+ // --- onPageRestored (PAGE_RESTORED) ---
+
+ it('onPageRestored WITHOUT spaceId: warns and does NOT refetch', async () => {
+ const event: PageEvent = {
+ pageIds: ['page-1'],
+ workspaceId: 'ws-1',
+ };
+
+ await listener.onPageRestored(event);
+
+ expect(warnSpy).toHaveBeenCalledTimes(1);
+ expect(warnSpy).toHaveBeenCalledWith(
+ expect.stringContaining('PAGE_RESTORED'),
+ );
+ expect(wsTree.broadcastRefetchRoot).not.toHaveBeenCalled();
+ });
+
+ it('onPageRestored WITH spaceId: one broadcastRefetchRoot scoped to the space', async () => {
+ const event: PageEvent = {
+ pageIds: ['page-1'],
+ workspaceId: 'ws-1',
+ spaceId: 'space-9',
+ };
+
+ await listener.onPageRestored(event);
+
+ expect(warnSpy).not.toHaveBeenCalled();
+ expect(wsTree.broadcastRefetchRoot).toHaveBeenCalledTimes(1);
+ expect(wsTree.broadcastRefetchRoot).toHaveBeenCalledWith('space-9');
+ });
+});
diff --git a/apps/server/src/ws/ws-service.spec.ts b/apps/server/src/ws/ws-service.spec.ts
new file mode 100644
index 00000000..c87d1493
--- /dev/null
+++ b/apps/server/src/ws/ws-service.spec.ts
@@ -0,0 +1,259 @@
+import { Test, TestingModule } from '@nestjs/testing';
+import { CACHE_MANAGER } from '@nestjs/cache-manager';
+import { WsService } from './ws.service';
+import { PagePermissionRepo } from '@docmost/db/repos/page/page-permission.repo';
+import {
+ getSpaceRoomName,
+ WS_SPACE_RESTRICTION_CACHE_PREFIX,
+ WS_CACHE_TTL_MS,
+} from './ws.utils';
+
+/**
+ * WsService server-side unit tests (M7 item 2):
+ * - spaceHasRestrictions cache lifecycle (miss -> read+set with TTL; hit ->
+ * no re-read; documents the stale-false window).
+ * - broadcastToAuthorizedUsers fan-out (authorized-only delivery, multi-socket
+ * fan-out per user, sockets with no userId skipped).
+ *
+ * Both private methods are exercised through their public entry points:
+ * spaceHasRestrictions via emitTreeEvent, broadcastToAuthorizedUsers via
+ * emitToAuthorizedUsers. WsService is constructed with mocked cache + repo and a
+ * mocked socket.io server, so no live infra is needed.
+ */
+
+describe('WsService.spaceHasRestrictions (cache lifecycle, via emitTreeEvent)', () => {
+ let service: WsService;
+ let pagePermissionRepo: {
+ hasRestrictedPagesInSpace: jest.Mock;
+ hasRestrictedAncestor: jest.Mock;
+ getUserIdsWithPageAccess: jest.Mock;
+ };
+ let cache: { get: jest.Mock; set: jest.Mock; del: jest.Mock };
+ let roomEmit: jest.Mock;
+
+ beforeEach(async () => {
+ pagePermissionRepo = {
+ hasRestrictedPagesInSpace: jest.fn(),
+ hasRestrictedAncestor: jest.fn(),
+ getUserIdsWithPageAccess: jest.fn(),
+ };
+ cache = {
+ get: jest.fn().mockResolvedValue(null),
+ set: jest.fn().mockResolvedValue(undefined),
+ del: jest.fn().mockResolvedValue(undefined),
+ };
+
+ const module: TestingModule = await Test.createTestingModule({
+ providers: [
+ WsService,
+ { provide: PagePermissionRepo, useValue: pagePermissionRepo },
+ { provide: CACHE_MANAGER, useValue: cache },
+ ],
+ }).compile();
+
+ service = module.get(WsService);
+
+ roomEmit = jest.fn();
+ const server = {
+ to: jest.fn().mockReturnValue({ emit: roomEmit }),
+ in: jest.fn().mockReturnValue({ fetchSockets: jest.fn() }),
+ };
+ service.setServer(server as never);
+ });
+
+ const cacheKey = (spaceId: string): string =>
+ `${WS_SPACE_RESTRICTION_CACHE_PREFIX}${spaceId}`;
+
+ it('first call MISSES the cache -> reads the repo and sets it with WS_CACHE_TTL_MS', async () => {
+ cache.get.mockResolvedValue(null); // miss
+ pagePermissionRepo.hasRestrictedPagesInSpace.mockResolvedValue(true);
+ pagePermissionRepo.hasRestrictedAncestor.mockResolvedValue(false);
+
+ await service.emitTreeEvent('space-1', 'page-1', { op: 'x' });
+
+ expect(cache.get).toHaveBeenCalledWith(cacheKey('space-1'));
+ expect(pagePermissionRepo.hasRestrictedPagesInSpace).toHaveBeenCalledTimes(1);
+ expect(pagePermissionRepo.hasRestrictedPagesInSpace).toHaveBeenCalledWith(
+ 'space-1',
+ );
+ // The freshly-read verdict is cached with the 30s TTL.
+ expect(cache.set).toHaveBeenCalledWith(
+ cacheKey('space-1'),
+ true,
+ WS_CACHE_TTL_MS,
+ );
+ });
+
+ it('second call HITS the cache -> the repo is NOT re-read', async () => {
+ // Cache hit returns false (no restrictions) -> open-space fast path.
+ cache.get.mockResolvedValue(false);
+
+ await service.emitTreeEvent('space-1', 'page-1', { op: 'x' });
+
+ expect(cache.get).toHaveBeenCalledWith(cacheKey('space-1'));
+ // The whole point of the cache: no repo read on a hit.
+ expect(pagePermissionRepo.hasRestrictedPagesInSpace).not.toHaveBeenCalled();
+ expect(cache.set).not.toHaveBeenCalled();
+ // false verdict -> broadcast to the whole room (open-space fast path).
+ expect(roomEmit).toHaveBeenCalledWith('message', { op: 'x' });
+ });
+
+ it('a cached `false` is returned even when restrictions now exist (the stale window)', async () => {
+ // The cache says "no restrictions" (false) but the repo, if asked, would now
+ // say true. spaceHasRestrictions trusts the cached false and never re-reads —
+ // this documents the up-to-TTL stale window the production comment warns about
+ // (a payload can fan out room-wide until the cache is invalidated/expires).
+ cache.get.mockResolvedValue(false);
+ pagePermissionRepo.hasRestrictedPagesInSpace.mockResolvedValue(true);
+
+ await service.emitTreeEvent('space-1', 'page-1', { op: 'stale' });
+
+ expect(pagePermissionRepo.hasRestrictedPagesInSpace).not.toHaveBeenCalled();
+ // Treated as open -> the event is broadcast to the WHOLE room.
+ expect(roomEmit).toHaveBeenCalledWith('message', { op: 'stale' });
+ });
+
+ it('caches a `false` verdict too (so the next emit hits, not re-reads)', async () => {
+ cache.get.mockResolvedValueOnce(null); // first call: miss
+ pagePermissionRepo.hasRestrictedPagesInSpace.mockResolvedValue(false);
+
+ await service.emitTreeEvent('space-2', 'page-9', { op: 'y' });
+
+ expect(cache.set).toHaveBeenCalledWith(
+ cacheKey('space-2'),
+ false,
+ WS_CACHE_TTL_MS,
+ );
+ });
+});
+
+describe('WsService.broadcastToAuthorizedUsers fan-out (via emitToAuthorizedUsers)', () => {
+ let service: WsService;
+ let pagePermissionRepo: {
+ hasRestrictedPagesInSpace: jest.Mock;
+ hasRestrictedAncestor: jest.Mock;
+ getUserIdsWithPageAccess: jest.Mock;
+ };
+ let cache: { get: jest.Mock; set: jest.Mock; del: jest.Mock };
+ let fetchSockets: jest.Mock;
+ let serverIn: jest.Mock;
+
+ beforeEach(async () => {
+ pagePermissionRepo = {
+ hasRestrictedPagesInSpace: jest.fn(),
+ hasRestrictedAncestor: jest.fn(),
+ getUserIdsWithPageAccess: jest.fn(),
+ };
+ cache = {
+ get: jest.fn().mockResolvedValue(null),
+ set: jest.fn().mockResolvedValue(undefined),
+ del: jest.fn().mockResolvedValue(undefined),
+ };
+
+ const module: TestingModule = await Test.createTestingModule({
+ providers: [
+ WsService,
+ { provide: PagePermissionRepo, useValue: pagePermissionRepo },
+ { provide: CACHE_MANAGER, useValue: cache },
+ ],
+ }).compile();
+
+ service = module.get(WsService);
+
+ fetchSockets = jest.fn();
+ serverIn = jest.fn().mockReturnValue({ fetchSockets });
+ const server = {
+ to: jest.fn().mockReturnValue({ emit: jest.fn() }),
+ in: serverIn,
+ };
+ service.setServer(server as never);
+ });
+
+ it('only sockets whose userId is in getUserIdsWithPageAccess receive the event', async () => {
+ pagePermissionRepo.getUserIdsWithPageAccess.mockResolvedValue(['user-ok']);
+
+ const okEmit = jest.fn();
+ const noEmit = jest.fn();
+ fetchSockets.mockResolvedValue([
+ { id: 's1', data: { userId: 'user-ok' }, emit: okEmit },
+ { id: 's2', data: { userId: 'user-no' }, emit: noEmit },
+ ]);
+
+ const data = { operation: 'moveTreeNode' };
+ await service.emitToAuthorizedUsers('space-1', 'page-1', data);
+
+ // The authorized set is resolved from the candidate userIds present on the
+ // sockets (deduped), then only those users' sockets get the event.
+ expect(pagePermissionRepo.getUserIdsWithPageAccess).toHaveBeenCalledWith(
+ 'page-1',
+ expect.arrayContaining(['user-ok', 'user-no']),
+ );
+ expect(okEmit).toHaveBeenCalledWith('message', data);
+ expect(noEmit).not.toHaveBeenCalled();
+ });
+
+ it('a user with TWO sockets receives the event on BOTH (userSocketMap fan-out)', async () => {
+ pagePermissionRepo.getUserIdsWithPageAccess.mockResolvedValue(['user-ok']);
+
+ const tab1 = jest.fn();
+ const tab2 = jest.fn();
+ fetchSockets.mockResolvedValue([
+ { id: 's1', data: { userId: 'user-ok' }, emit: tab1 },
+ { id: 's2', data: { userId: 'user-ok' }, emit: tab2 },
+ ]);
+
+ const data = { operation: 'moveTreeNode' };
+ await service.emitToAuthorizedUsers('space-1', 'page-1', data);
+
+ // Both of the authorized user's sockets (e.g. two browser tabs) receive it.
+ expect(tab1).toHaveBeenCalledWith('message', data);
+ expect(tab2).toHaveBeenCalledWith('message', data);
+ // The candidate set is deduped to a single userId even with two sockets.
+ expect(pagePermissionRepo.getUserIdsWithPageAccess).toHaveBeenCalledWith(
+ 'page-1',
+ ['user-ok'],
+ );
+ });
+
+ it('a socket with NO userId is skipped (not a candidate, never emitted to)', async () => {
+ pagePermissionRepo.getUserIdsWithPageAccess.mockResolvedValue(['user-ok']);
+
+ const okEmit = jest.fn();
+ const anonEmit = jest.fn();
+ fetchSockets.mockResolvedValue([
+ { id: 's1', data: { userId: 'user-ok' }, emit: okEmit },
+ // Unauthenticated socket: no userId -> excluded from the candidate map.
+ { id: 's2', data: {}, emit: anonEmit },
+ ]);
+
+ const data = { operation: 'moveTreeNode' };
+ await service.emitToAuthorizedUsers('space-1', 'page-1', data);
+
+ expect(okEmit).toHaveBeenCalledWith('message', data);
+ expect(anonEmit).not.toHaveBeenCalled();
+ // The no-userId socket is not even offered as a candidate to the repo.
+ expect(pagePermissionRepo.getUserIdsWithPageAccess).toHaveBeenCalledWith(
+ 'page-1',
+ ['user-ok'],
+ );
+ });
+
+ it('no sockets in the room -> no repo lookup, no emit', async () => {
+ fetchSockets.mockResolvedValue([]);
+
+ await service.emitToAuthorizedUsers('space-1', 'page-1', { op: 'x' });
+
+ expect(pagePermissionRepo.getUserIdsWithPageAccess).not.toHaveBeenCalled();
+ });
+
+ it('routes through the space room name', async () => {
+ pagePermissionRepo.getUserIdsWithPageAccess.mockResolvedValue([]);
+ fetchSockets.mockResolvedValue([
+ { id: 's1', data: { userId: 'u' }, emit: jest.fn() },
+ ]);
+
+ await service.emitToAuthorizedUsers('space-7', 'page-1', { op: 'x' });
+
+ expect(serverIn).toHaveBeenCalledWith(getSpaceRoomName('space-7'));
+ });
+});
diff --git a/apps/server/src/ws/ws-tree.service.spec.ts b/apps/server/src/ws/ws-tree.service.spec.ts
index 0c511223..973e6b00 100644
--- a/apps/server/src/ws/ws-tree.service.spec.ts
+++ b/apps/server/src/ws/ws-tree.service.spec.ts
@@ -329,3 +329,109 @@ describe('WsService.emitTreeEvent', () => {
expect(anonEmit).toHaveBeenCalledWith('message', data);
});
});
+
+describe('move-into-restricted disjointness contract (WsTreeService + real WsService)', () => {
+ // CONTRACT: a move under a restricted ancestor PARTITIONS the room. The
+ // authorized set (gets the moveTreeNode via emitToAuthorizedUsers) and its
+ // complement (gets the deleteTreeNode via emitDeleteToUnauthorized) are
+ // disjoint and together cover every socket — and an anonymous (no-userId)
+ // socket lands in the delete set. We wire a REAL WsService (only its repo,
+ // cache and socket server mocked) so both broadcasts run against the SAME fixed
+ // socket set, the way they do in production.
+ let treeService: WsTreeService;
+ let pagePermissionRepo: {
+ hasRestrictedPagesInSpace: jest.Mock;
+ hasRestrictedAncestor: jest.Mock;
+ getUserIdsWithPageAccess: jest.Mock;
+ };
+
+ // Fixed room: two authorized users (one with two sockets), one unauthorized
+ // user, one anonymous socket.
+ const moveSeen: string[] = [];
+ const deleteSeen: string[] = [];
+
+ const mkSocket = (id: string, userId: string | undefined) => ({
+ id,
+ data: userId ? { userId } : {},
+ emit: jest.fn((_event: string, payload: { operation: string }) => {
+ if (payload.operation === 'moveTreeNode') moveSeen.push(id);
+ if (payload.operation === 'deleteTreeNode') deleteSeen.push(id);
+ }),
+ });
+
+ const sockets = [
+ mkSocket('s-ok-1', 'user-ok'), // authorized, tab 1
+ mkSocket('s-ok-2', 'user-ok'), // authorized, tab 2 (fan-out)
+ mkSocket('s-no', 'user-no'), // unauthorized
+ mkSocket('s-anon', undefined), // anonymous (no userId)
+ ];
+
+ beforeEach(async () => {
+ moveSeen.length = 0;
+ deleteSeen.length = 0;
+
+ pagePermissionRepo = {
+ hasRestrictedPagesInSpace: jest.fn().mockResolvedValue(true),
+ // The move destination IS under a restricted ancestor.
+ hasRestrictedAncestor: jest.fn().mockResolvedValue(true),
+ // Only user-ok is authorized to see the page.
+ getUserIdsWithPageAccess: jest.fn().mockResolvedValue(['user-ok']),
+ };
+ const cache = {
+ get: jest.fn().mockResolvedValue(null),
+ set: jest.fn().mockResolvedValue(undefined),
+ del: jest.fn().mockResolvedValue(undefined),
+ };
+
+ const module: TestingModule = await Test.createTestingModule({
+ providers: [
+ WsTreeService,
+ WsService,
+ { provide: PagePermissionRepo, useValue: pagePermissionRepo },
+ { provide: CACHE_MANAGER, useValue: cache },
+ ],
+ }).compile();
+
+ const wsService = module.get(WsService);
+ const server = {
+ to: jest.fn().mockReturnValue({ emit: jest.fn() }),
+ in: jest.fn().mockReturnValue({
+ fetchSockets: jest.fn().mockResolvedValue(sockets),
+ }),
+ };
+ wsService.setServer(server as never);
+
+ treeService = module.get(WsTreeService);
+ });
+
+ it('authorized set (move) and complement (delete) partition the room; anon is in delete', async () => {
+ const event: PageMovedEvent = {
+ workspaceId: 'ws-1',
+ oldParentId: 'old-parent',
+ hasChildren: false,
+ node: { ...snapshot, parentPageId: 'restricted-parent', position: 'a5' },
+ };
+
+ await treeService.broadcastPageMoved(event);
+
+ const moveSet = new Set(moveSeen);
+ const deleteSet = new Set(deleteSeen);
+
+ // Authorized user's BOTH sockets got the move; nobody else did.
+ expect(moveSet).toEqual(new Set(['s-ok-1', 's-ok-2']));
+ // Everyone else (unauthorized + anonymous) got the delete.
+ expect(deleteSet).toEqual(new Set(['s-no', 's-anon']));
+
+ // DISJOINT: no socket received both a move and a delete.
+ const intersection = [...moveSet].filter((id) => deleteSet.has(id));
+ expect(intersection).toEqual([]);
+
+ // PARTITION: the two sets together cover every socket in the room exactly.
+ const union = new Set([...moveSet, ...deleteSet]);
+ expect(union).toEqual(new Set(sockets.map((s) => s.id)));
+
+ // The anonymous socket specifically lands in the DELETE set, never the move.
+ expect(deleteSet.has('s-anon')).toBe(true);
+ expect(moveSet.has('s-anon')).toBe(false);
+ });
+});
diff --git a/docs/backlog/feature-test-coverage-deferred.md b/docs/backlog/feature-test-coverage-deferred.md
new file mode 100644
index 00000000..410357a4
--- /dev/null
+++ b/docs/backlog/feature-test-coverage-deferred.md
@@ -0,0 +1,93 @@
+# Отложенные тесты по фичам с коммита 053a9c0d (хвост от PR #49)
+
+## Контекст
+
+PR #49 («test: cover features since 053a9c0d + repair test tooling») закрыл
+основную массу покрытия новых фич gitmost (+~330 тестов: server/Jest,
+client/Vitest, editor-ext/Vitest, packages/mcp/node:test) и починил
+тест-инструментарий (FIX-0 сломанные спеки transclusion, BUILD-0 сборка
+editor-ext перед серверными тестами, INFRA-0 резолв `.tsx` email-шаблонов).
+
+Часть тестов из принятого тест-плана **намеренно отложена** — им нужен
+тестовый Postgres, реальный Redis или HTTP/e2e-харнес, которых в проекте
+сейчас нет, либо инвазивный рефактор продакшн-кода. Ниже — что осталось и
+почему, чтобы не потерять.
+
+---
+
+## 1. Интеграционные тесты против БД (нужен тестовый Postgres)
+
+Сейчас все repo-зависимые проверки делаются на моках; SQL-уровень не
+исполняется. Чтобы покрыть это честно, нужен поднимаемый в CI Postgres
+(testcontainers или сервис в pipeline) + хелпер миграций.
+
+- **`AiAgentRoleRepo` — изоляция и индексы.**
+ `apps/server/src/database/repos/ai-agent-roles/ai-agent-roles.repo.ts`.
+ Проверить против реальной БД: `findById`/`listByWorkspace` исключают
+ soft-deleted строки; `findById` для roleId из ЧУЖОГО workspace → undefined
+ (tenant-изоляция); дубль имени в одном workspace → 23505; то же имя
+ переиспользуемо после softDelete (partial unique index
+ `WHERE deleted_at IS NULL`, миграция `20260620T120000-ai-agent-roles.ts`);
+ одинаковое имя в разных workspace разрешено. Это «хребет» безопасности —
+ сейчас только предполагается unit-моками.
+
+- **`AiChatRepo.findByCreator` — join role-badge.**
+ `apps/server/src/database/repos/ai-chat/ai-chat.repo.ts` (~:27-70).
+ Чат с enabled-ролью → roleName/roleEmoji заполнены; с soft-deleted ролью →
+ бейдж NULL; с DISABLED ролью → бейдж NULL (должно совпадать с
+ `resolveRoleForRequest`); ORDER BY квалифицирован `aiChats.*` (нет
+ ambiguous column после join). Не проверяемо чистым unit-ом.
+
+- **`WorkspaceService.update` / `WorkspaceRepo.updateSetting` — jsonb-merge.**
+ `apps/server/src/core/workspace/services/workspace.service.ts` (~:514),
+ `apps/server/src/database/repos/workspace/workspace.repo.ts` (~:275).
+ Сейчас покрыта только форма вызова сервиса
+ (`workspace-html-embed.spec.ts`). Не покрыто (нужна БД): `htmlEmbed:true`
+ персистится через jsonb-merge **не затирая** соседние настройки (ai,
+ sharing). Это и есть «kill-switch пишется» — критично, что write-половина
+ тоггла не ломает остальной settings-namespace.
+
+- **FK `page_template_references` onDelete('cascade').**
+ Миграция `20260620T131000-page-template-references.ts`. Проверить, что
+ удаление source/reference-страницы каскадит строки ссылок.
+
+## 2. HTTP / e2e-харнес (его нет в apps/server)
+
+- **Public-share ассистент: обход per-IP throttle ротацией XFF, но
+ per-workspace cap держит.**
+ Контроллер использует стоковый `@UseGuards(ThrottlerGuard)`
+ (`apps/server/src/core/ai-chat/public-share-chat.controller.ts`), IP берётся
+ из Fastify `trustProxy` → `X-Forwarded-For`. Единственный оправданный e2e
+ (named journey «аноним спамит ассистента»): ротация XFF обходит per-IP
+ лимит 5/min, но per-workspace cost-cap всё равно отдаёт 429. Требует
+ поднятого HTTP-слоя Nest + trusted-proxy конфигурации.
+
+- **Достоверность Lua-окна cost-cap против реального Redis.**
+ `apps/server/src/core/ai-chat/public-share-workspace-limiter.ts`
+ (`SLIDING_WINDOW_LUA`). Сейчас cap тестируется против TS-реализации
+ `FakeRedis` в `public-share-chat.spec.ts` — баг в самой Lua-строке
+ (`>=` vs `>`, неверный PEXPIRE) не поймается. Нужен интеграционный тест
+ против реального/testcontainers Redis.
+
+## 3. Полная интеграция `AiChatService.stream` (рефактор R1-stream)
+
+`apps/server/src/core/ai-chat/ai-chat.service.ts`. В PR #49 извлечён и
+покрыт только чистый `buildErrorAssistantRecord`. Полные интеграционные
+сценарии — **запись чата, упавшего на первом ходу** (onError), жизненный
+цикл external-MCP клиентов (закрытие при throw/onFinish), и
+**история восстанавливается из БД, а не из `body.messages`** (анти-tamper) —
+требуют сидирования SDK `streamText` (инъекция/seam колбэков `onError`/
+`onFinish`/`onAbort` + `res.hijack`). Отложено, чтобы не дестабилизировать
+287-строчный `stream()`; делать вместе с выносом testable turn-pipeline.
+
+---
+
+## Сопутствующие НЕ-тестовые находки
+
+Вынесены в отдельные issues (всплыли во время написания тестов):
+
+- #52 — ai-roles: нет серверной валидации модели роли + дрейф enum драйверов.
+- #53 — ws: `invalidateSpaceRestrictionCache` без вызывающих (30с stale-окно).
+- #54 — page-embed: серверный guard глубины/циклов раскрытия.
+- #55 — transclusion: cycle-guard в `collectPageEmbedsFromPmJson`.
+- #56 — test-infra: jest DI + lib0 ESM (16 падающих сьютов).
diff --git a/packages/editor-ext/src/index.ts b/packages/editor-ext/src/index.ts
index 86ecc4a6..08888ddf 100644
--- a/packages/editor-ext/src/index.ts
+++ b/packages/editor-ext/src/index.ts
@@ -35,4 +35,5 @@ export * from "./lib/status";
export * from "./lib/pdf";
export * from "./lib/page-break";
export * from "./lib/resizable-nodeview";
+export * from "./lib/footnote";
diff --git a/packages/editor-ext/src/lib/footnote/footnote-definition.ts b/packages/editor-ext/src/lib/footnote/footnote-definition.ts
new file mode 100644
index 00000000..819adb70
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote-definition.ts
@@ -0,0 +1,72 @@
+import { mergeAttributes, Node } from "@tiptap/core";
+import { ReactNodeViewRenderer } from "@tiptap/react";
+import { FOOTNOTE_DEFINITION_NAME } from "./footnote-util";
+
+export interface FootnoteDefinitionOptions {
+ HTMLAttributes: Record;
+ view: any;
+}
+
+/**
+ * A single footnote definition: an editable block (paragraphs only, no nested
+ * footnotes) keyed by `id` to its reference. Lives only inside `footnotesList`.
+ */
+export const FootnoteDefinition = Node.create({
+ name: FOOTNOTE_DEFINITION_NAME,
+
+ // paragraph+ keeps definitions simple. Note this does NOT block nested
+ // footnote references on its own: a footnoteReference is inline and the
+ // paragraphs here accept inline content, so the schema would permit one.
+ // Nested references are instead prevented by the setFootnote command and the
+ // sync plugin (which refuse to create/keep a reference inside a definition).
+ content: "paragraph+",
+ defining: true,
+ isolating: true,
+ selectable: false,
+
+ addOptions() {
+ return {
+ HTMLAttributes: {},
+ view: null,
+ };
+ },
+
+ addAttributes() {
+ return {
+ id: {
+ default: null,
+ parseHTML: (element) => element.getAttribute("data-id"),
+ renderHTML: (attributes) => {
+ if (!attributes.id) return {};
+ return { "data-id": attributes.id };
+ },
+ },
+ };
+ },
+
+ parseHTML() {
+ return [
+ {
+ tag: "div[data-footnote-def]",
+ },
+ ];
+ },
+
+ renderHTML({ HTMLAttributes }) {
+ return [
+ "div",
+ mergeAttributes(
+ { "data-footnote-def": "", class: "footnote-def" },
+ this.options.HTMLAttributes,
+ HTMLAttributes,
+ ),
+ 0,
+ ];
+ },
+
+ addNodeView() {
+ if (!this.options.view) return null;
+ this.editor.isInitialized = true;
+ return ReactNodeViewRenderer(this.options.view);
+ },
+});
diff --git a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts
new file mode 100644
index 00000000..844134f6
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts
@@ -0,0 +1,140 @@
+import { describe, it, expect } from "vitest";
+import { htmlToMarkdown } from "../markdown/utils/turndown.utils";
+import { markdownToHtml } from "../markdown/utils/marked.utils";
+import { extractFootnoteDefinitions } from "../markdown/utils/footnote.marked";
+
+// HTML the editor-ext nodes render (sup[data-footnote-ref], section/div).
+const HTML =
+ `Water and clay.
` +
+ `` +
+ `First note.
` +
+ `Second note.
` +
+ ` `;
+
+describe("footnote markdown round-trip", () => {
+ it("HTML -> Markdown produces pandoc footnote syntax", () => {
+ const md = htmlToMarkdown(HTML);
+ expect(md).toContain("[^fn1]");
+ expect(md).toContain("[^fn2]");
+ expect(md).toContain("[^fn1]: First note.");
+ expect(md).toContain("[^fn2]: Second note.");
+ });
+
+ it("Markdown -> HTML rebuilds the footnote nodes' HTML", async () => {
+ const md = htmlToMarkdown(HTML);
+ const html = await markdownToHtml(md);
+ expect(html).toContain('data-footnote-ref data-id="fn1"');
+ expect(html).toContain('data-footnote-ref data-id="fn2"');
+ expect(html).toContain("data-footnotes");
+ expect(html).toContain('data-footnote-def data-id="fn1"');
+ expect(html).toContain("First note.");
+ expect(html).toContain("Second note.");
+ });
+
+ it("preserves a [^id]: line shown inside a fenced code block (not a definition)", async () => {
+ // A document that DOCUMENTS footnote syntax inside a code fence. The
+ // `[^demo]: ...` line is example text, not a real definition, and must
+ // survive the Markdown -> HTML conversion verbatim.
+ const md = [
+ "Here is how footnotes look:",
+ "",
+ "```markdown",
+ "Some text[^demo]",
+ "",
+ "[^demo]: this is the definition",
+ "```",
+ "",
+ "End of doc.",
+ ].join("\n");
+
+ const html = await markdownToHtml(md);
+ // The example definition line is kept inside the rendered code block.
+ expect(html).toContain("[^demo]: this is the definition");
+ // It did NOT get pulled out into a real footnotes section.
+ expect(html).not.toContain("data-footnotes");
+ expect(html).not.toContain("data-footnote-def");
+ });
+
+ it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => {
+ // Two definitions share id `d`, and the body has two `[^d]` markers. The
+ // output must keep BOTH definitions with DISTINCT ids and rewrite the second
+ // marker so the (reference, definition) pairing stays 1:1.
+ const md = [
+ "See here[^d] and there[^d].",
+ "",
+ "[^d]: first",
+ "[^d]: second",
+ ].join("\n");
+
+ const { body, section } = extractFootnoteDefinitions(md);
+
+ // Pull out the def ids from the section in order.
+ const defIds = Array.from(
+ section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
+ ).map((m) => m[1]);
+ expect(defIds.length).toBe(2);
+ expect(new Set(defIds).size).toBe(2); // distinct
+ expect(defIds[0]).toBe("d"); // first definition keeps the id
+
+ // Both definition texts survive.
+ expect(section).toContain("first");
+ expect(section).toContain("second");
+
+ // The body still has two markers, now pointing at the two distinct ids.
+ const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
+ (m) => m[1],
+ );
+ expect(refIds.length).toBe(2);
+ expect(refIds.sort()).toEqual(defIds.sort());
+ });
+
+ it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => {
+ // The derived id must be a pure function of the input markdown so importing
+ // the same source twice (or via the editor and the MCP mirror) yields
+ // identical ids — never random/time-based.
+ const md = [
+ "See[^d] one[^d] two[^d].",
+ "",
+ "[^d]: first",
+ "[^d]: second",
+ "[^d]: third",
+ ].join("\n");
+
+ const run = () => {
+ const { body, section } = extractFootnoteDefinitions(md);
+ const defIds = Array.from(
+ section.matchAll(/data-footnote-def data-id="([^"]+)"/g),
+ ).map((m) => m[1]);
+ const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map(
+ (m) => m[1],
+ );
+ return { defIds, refIds };
+ };
+
+ const a = run();
+ const b = run();
+ // Identical across runs (this is what would FAIL on the random-id version).
+ expect(a.defIds).toEqual(b.defIds);
+ expect(a.refIds).toEqual(b.refIds);
+ // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
+ expect(a.defIds).toEqual(["d", "d__2", "d__3"]);
+ expect(a.refIds.sort()).toEqual(a.defIds.sort());
+ });
+
+ it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => {
+ const md = [
+ "See here[^d] and there[^d].",
+ "",
+ "[^d]: first",
+ "[^d]: second",
+ ].join("\n");
+ const html = await markdownToHtml(md);
+ const defIds = Array.from(
+ html.matchAll(/data-footnote-def data-id="([^"]+)"/g),
+ ).map((m) => m[1]);
+ expect(defIds.length).toBe(2);
+ expect(new Set(defIds).size).toBe(2);
+ expect(html).toContain("first");
+ expect(html).toContain("second");
+ });
+});
diff --git a/packages/editor-ext/src/lib/footnote/footnote-numbering.ts b/packages/editor-ext/src/lib/footnote/footnote-numbering.ts
new file mode 100644
index 00000000..8a487b1f
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote-numbering.ts
@@ -0,0 +1,119 @@
+import { EditorState, Plugin, PluginKey } from "@tiptap/pm/state";
+import { Decoration, DecorationSet } from "@tiptap/pm/view";
+import { Node as ProseMirrorNode } from "@tiptap/pm/model";
+import {
+ FOOTNOTE_DEFINITION_NAME,
+ FOOTNOTE_REFERENCE_NAME,
+ computeFootnoteNumbers,
+} from "./footnote-util";
+
+export const footnoteNumberingPluginKey = new PluginKey(
+ "footnoteNumbering",
+);
+
+/**
+ * Cached state of the numbering plugin. Both the displayed-number map and the
+ * decoration set are computed ONCE per doc-changing transaction (in `apply`) and
+ * cached here, so NodeViews can read a footnote's number by id without walking
+ * the whole document on every React render (which was O(n^2) per keystroke in
+ * large docs).
+ */
+interface FootnoteNumberingState {
+ /** referenceId -> 1-based display number, for the current doc. */
+ numbers: Map;
+ /** Decorations rendering those numbers (refs + definitions). */
+ decorations: DecorationSet;
+}
+
+/**
+ * Build the decoration set for footnote numbers. Pure function of the document:
+ * walk references in document order, assign 1-based numbers, then attach a
+ * node decoration (carrying the number via a CSS variable + data attribute) to
+ * every reference and to every matching definition. Because it is deterministic
+ * from the document alone, all collaborating clients compute identical numbers
+ * with no document mutation.
+ */
+export function buildFootnoteDecorations(doc: ProseMirrorNode): DecorationSet {
+ return buildFootnoteNumberingState(doc).decorations;
+}
+
+/**
+ * Compute both the number map AND the decorations for `doc` in a single walk.
+ * The plugin caches the result so NodeViews can read numbers without
+ * recomputing.
+ */
+function buildFootnoteNumberingState(
+ doc: ProseMirrorNode,
+): FootnoteNumberingState {
+ const numbers = computeFootnoteNumbers(doc);
+ const decorations: Decoration[] = [];
+
+ doc.descendants((node, pos) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
+ const num = numbers.get(node.attrs.id);
+ if (num != null) {
+ decorations.push(
+ Decoration.node(pos, pos + node.nodeSize, {
+ "data-footnote-number": String(num),
+ style: `--footnote-number: "${num}";`,
+ }),
+ );
+ }
+ }
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
+ const num = numbers.get(node.attrs.id);
+ if (num != null) {
+ decorations.push(
+ Decoration.node(pos, pos + node.nodeSize, {
+ "data-footnote-number": String(num),
+ style: `--footnote-number: "${num}";`,
+ }),
+ );
+ }
+ }
+ });
+
+ return { numbers, decorations: DecorationSet.create(doc, decorations) };
+}
+
+/**
+ * Read the cached footnote number for `id` from the numbering plugin's state.
+ * This is the source NodeViews should use instead of calling
+ * computeFootnoteNumbers() on every render (that walked the whole doc per
+ * NodeView per render = O(n^2) per keystroke). Returns undefined if the plugin
+ * is not installed or the id has no number yet.
+ */
+export function getFootnoteNumber(
+ state: EditorState,
+ id: string,
+): number | undefined {
+ return footnoteNumberingPluginKey.getState(state)?.numbers.get(id);
+}
+
+/**
+ * ProseMirror plugin that renders footnote numbers as decorations. It never
+ * mutates the document (safe in read-only / share and in collaboration) — it
+ * only recomputes decorations from the current doc on each transaction.
+ */
+export function footnoteNumberingPlugin(): Plugin {
+ return new Plugin({
+ key: footnoteNumberingPluginKey,
+ state: {
+ init(_, { doc }) {
+ return buildFootnoteNumberingState(doc);
+ },
+ apply(tr, old) {
+ // Recompute (and re-cache) only when the document actually changed, so
+ // the number map NodeViews read stays current on every edit while
+ // non-doc transactions (selection, etc.) reuse the cache for free.
+ if (!tr.docChanged) return old;
+ return buildFootnoteNumberingState(tr.doc);
+ },
+ },
+ props: {
+ decorations(state) {
+ return footnoteNumberingPluginKey.getState(state)?.decorations;
+ },
+ },
+ });
+}
diff --git a/packages/editor-ext/src/lib/footnote/footnote-reference.ts b/packages/editor-ext/src/lib/footnote/footnote-reference.ts
new file mode 100644
index 00000000..7b47617d
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote-reference.ts
@@ -0,0 +1,331 @@
+import { mergeAttributes, Node } from "@tiptap/core";
+import { TextSelection, Transaction } from "@tiptap/pm/state";
+import { ReactNodeViewRenderer } from "@tiptap/react";
+import {
+ FOOTNOTE_DEFINITION_NAME,
+ FOOTNOTE_REFERENCE_NAME,
+ FOOTNOTES_LIST_NAME,
+ generateFootnoteId,
+} from "./footnote-util";
+import { footnoteNumberingPlugin } from "./footnote-numbering";
+import { footnoteSyncPlugin, footnotePastePlugin } from "./footnote-sync";
+
+export interface FootnoteReferenceOptions {
+ HTMLAttributes: Record;
+ view: any;
+ /**
+ * Optional predicate identifying remote/collaboration transactions so the
+ * sync plugin skips them (orphan cleanup must run only on local changes).
+ */
+ isRemoteTransaction?: (tr: Transaction) => boolean;
+ /**
+ * When false, the footnote sync/integrity plugin is fully disabled — it never
+ * appends a transaction. Numbering decorations stay active. Set this in
+ * read-only / share editors so a viewer's doc is decorated (numbered) but
+ * never mutated (e.g. by a programmatic setContent). Defaults to true.
+ */
+ enableSync?: boolean;
+}
+
+declare module "@tiptap/core" {
+ interface Commands {
+ footnote: {
+ /**
+ * Insert a footnote reference at the cursor and create the matching
+ * (empty) definition in the bottom footnotes list, in one transaction.
+ */
+ setFootnote: () => ReturnType;
+ /**
+ * Remove a footnote reference and cascade-delete its definition (one
+ * transaction so a single undo restores both).
+ */
+ removeFootnote: (id: string) => ReturnType;
+ /** Scroll to (and focus) a footnote definition by id. */
+ scrollToFootnote: (id: string) => ReturnType;
+ /** Scroll to (and select) a footnote reference by id. */
+ scrollToReference: (id: string) => ReturnType;
+ };
+ }
+}
+
+/**
+ * Inline atom that marks a footnote reference in the body text. It holds only
+ * an `id` linking it to its `footnoteDefinition`; the visible number is NOT
+ * stored — it is rendered by the numbering plugin as a decoration (see
+ * footnote-numbering.ts). Modeled on mention.ts (inline atom).
+ *
+ * The reference is forbidden inside code blocks and inside footnote definitions
+ * (no nested footnotes); those restrictions are enforced by the `setFootnote`
+ * command and the sync plugin rather than by schema content expressions, since
+ * an inline group node cannot express "not inside X" declaratively.
+ */
+export const FootnoteReference = Node.create({
+ name: FOOTNOTE_REFERENCE_NAME,
+
+ // Higher than the default (100) so its parse rule is considered before the
+ // Superscript mark's rule.
+ priority: 101,
+
+ group: "inline",
+ inline: true,
+ atom: true,
+ selectable: true,
+ draggable: false,
+
+ addOptions() {
+ return {
+ HTMLAttributes: {},
+ view: null,
+ isRemoteTransaction: undefined,
+ enableSync: true,
+ };
+ },
+
+ addProseMirrorPlugins() {
+ const plugins = [footnoteNumberingPlugin()];
+ // Numbering always runs (decoration-only). The sync/integrity plugin is
+ // skipped entirely when sync is disabled (read-only / share) so the viewer's
+ // doc is never mutated.
+ if (this.options.enableSync !== false) {
+ plugins.push(footnoteSyncPlugin(this.options.isRemoteTransaction));
+ // Regenerate colliding footnote ids on paste so a pasted reference+
+ // definition pair never clobbers/merges with an existing footnote.
+ plugins.push(footnotePastePlugin());
+ }
+ return plugins;
+ },
+
+ addAttributes() {
+ return {
+ id: {
+ default: null,
+ parseHTML: (element) => element.getAttribute("data-id"),
+ renderHTML: (attributes) => {
+ if (!attributes.id) return {};
+ return { "data-id": attributes.id };
+ },
+ },
+ };
+ },
+
+ parseHTML() {
+ return [
+ {
+ // High priority so the Superscript mark (which also matches ) does
+ // not claim a footnote reference and drop it as empty content.
+ tag: "sup[data-footnote-ref]",
+ priority: 100,
+ },
+ ];
+ },
+
+ renderHTML({ HTMLAttributes }) {
+ return [
+ "sup",
+ mergeAttributes(
+ { "data-footnote-ref": "", class: "footnote-ref" },
+ this.options.HTMLAttributes,
+ HTMLAttributes,
+ ),
+ ];
+ },
+
+ // Plain-text representation (used by generateText / markdown text fallbacks).
+ renderText({ node }) {
+ return `[^${node.attrs.id ?? ""}]`;
+ },
+
+ addNodeView() {
+ if (!this.options.view) return null;
+ // Force the react node view to render immediately using flush sync.
+ this.editor.isInitialized = true;
+ return ReactNodeViewRenderer(this.options.view);
+ },
+
+ addCommands() {
+ return {
+ setFootnote:
+ () =>
+ ({ state, tr, dispatch, editor }) => {
+ const { schema, selection } = state;
+ const refType = schema.nodes[FOOTNOTE_REFERENCE_NAME];
+ const listType = schema.nodes[FOOTNOTES_LIST_NAME];
+ const defType = schema.nodes[FOOTNOTE_DEFINITION_NAME];
+ if (!refType || !listType || !defType) return false;
+
+ const { $from } = selection;
+
+ // Forbid references inside code blocks and inside footnote definitions
+ // (no nested footnotes).
+ for (let depth = $from.depth; depth > 0; depth--) {
+ const node = $from.node(depth);
+ if (
+ node.type.spec.code ||
+ node.type.name === FOOTNOTE_DEFINITION_NAME ||
+ node.type.name === FOOTNOTES_LIST_NAME
+ ) {
+ return false;
+ }
+ }
+
+ // Make sure the parent accepts an inline atom here.
+ const insertPos = selection.from;
+ if (!$from.parent.type.spec.content?.includes("inline") &&
+ !$from.parent.isTextblock) {
+ return false;
+ }
+
+ const id = generateFootnoteId();
+
+ // 1) Count references that occur strictly before the insertion point;
+ // the new definition goes at that index in the bottom list so the
+ // list order matches reference order.
+ let refsBefore = 0;
+ state.doc.nodesBetween(0, insertPos, (node) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) refsBefore++;
+ });
+
+ // 2) Insert the reference at the cursor.
+ tr.insert(insertPos, refType.create({ id }));
+
+ // 3) Locate (or create) the footnotes list, then insert the new
+ // definition at index `refsBefore`.
+ const emptyParagraph = schema.nodes.paragraph.create();
+ const definition = defType.create({ id }, emptyParagraph);
+
+ // Find existing list (always the last top-level child if present).
+ let listPos: number | null = null;
+ let listNode: any = null;
+ tr.doc.forEach((child, offset) => {
+ if (child.type.name === FOOTNOTES_LIST_NAME) {
+ listPos = offset;
+ listNode = child;
+ }
+ });
+
+ let defInsidePos: number | null = null;
+ if (listNode == null) {
+ // Create a new list at the very end of the document.
+ const list = listType.create(null, definition);
+ const end = tr.doc.content.size;
+ tr.insert(end, list);
+ // Cursor target: inside the new definition's first paragraph.
+ // end -> list open, +1 definition open, +1 paragraph open.
+ defInsidePos = end + 3;
+ } else {
+ // Insert at the right index within the existing list.
+ const listStart = listPos! + 1; // position of the first definition
+ let pos = listStart;
+ let index = 0;
+ listNode.forEach((defChild: any, defOffset: number) => {
+ if (index < refsBefore) {
+ pos = listStart + defOffset + defChild.nodeSize;
+ index++;
+ }
+ });
+ tr.insert(pos, definition);
+ defInsidePos = pos + 2; // +1 enter definition, +1 enter paragraph
+ }
+
+ if (dispatch) {
+ // Move the cursor into the new definition's paragraph so the user
+ // can immediately type the footnote text.
+ try {
+ const resolved = tr.doc.resolve(
+ Math.min(defInsidePos!, tr.doc.content.size),
+ );
+ tr.setSelection(TextSelection.near(resolved));
+ } catch {
+ // Selection placement is best-effort; ignore failures.
+ }
+ tr.scrollIntoView();
+ dispatch(tr);
+ }
+
+ return true;
+ },
+
+ removeFootnote:
+ (id: string) =>
+ ({ state, tr, dispatch }) => {
+ if (!id) return false;
+
+ // Collect: reference range(s), the definition range, and the list.
+ const refRanges: Array<{ from: number; to: number }> = [];
+ let defRange: { from: number; to: number } | null = null;
+ let listInfo: { pos: number; size: number; count: number } | null =
+ null;
+
+ state.doc.descendants((node, pos) => {
+ if (
+ node.type.name === FOOTNOTE_REFERENCE_NAME &&
+ node.attrs.id === id
+ ) {
+ refRanges.push({ from: pos, to: pos + node.nodeSize });
+ }
+ if (
+ node.type.name === FOOTNOTE_DEFINITION_NAME &&
+ node.attrs.id === id
+ ) {
+ defRange = { from: pos, to: pos + node.nodeSize };
+ }
+ if (node.type.name === FOOTNOTES_LIST_NAME) {
+ listInfo = {
+ pos,
+ size: node.nodeSize,
+ count: node.childCount,
+ };
+ }
+ });
+
+ if (refRanges.length === 0 && !defRange) return false;
+
+ // Build the list of ranges to delete. If removing this definition
+ // would empty the list (it is the list's only child), delete the
+ // entire list instead — an empty footnotesList is invalid schema and
+ // a leftover empty list would be ugly.
+ const ranges: Array<{ from: number; to: number }> = [...refRanges];
+ if (defRange) {
+ if (listInfo && (listInfo as any).count <= 1) {
+ const li = listInfo as { pos: number; size: number };
+ ranges.push({ from: li.pos, to: li.pos + li.size });
+ } else {
+ ranges.push(defRange);
+ }
+ }
+
+ // Delete from the end so earlier positions stay valid.
+ ranges
+ .sort((a, b) => b.from - a.from)
+ .forEach(({ from, to }) => tr.delete(from, to));
+
+ if (dispatch) dispatch(tr);
+ return true;
+ },
+
+ scrollToFootnote:
+ (id: string) =>
+ ({ editor }) => {
+ if (!id) return false;
+ const dom = editor.view.dom.querySelector(
+ `[data-footnote-def][data-id="${id}"]`,
+ ) as HTMLElement | null;
+ if (!dom) return false;
+ dom.scrollIntoView({ behavior: "smooth", block: "center" });
+ return true;
+ },
+
+ scrollToReference:
+ (id: string) =>
+ ({ editor }) => {
+ if (!id) return false;
+ const dom = editor.view.dom.querySelector(
+ `sup[data-footnote-ref][data-id="${id}"]`,
+ ) as HTMLElement | null;
+ if (!dom) return false;
+ dom.scrollIntoView({ behavior: "smooth", block: "center" });
+ return true;
+ },
+ };
+ },
+});
diff --git a/packages/editor-ext/src/lib/footnote/footnote-sync.ts b/packages/editor-ext/src/lib/footnote/footnote-sync.ts
new file mode 100644
index 00000000..505a60d0
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote-sync.ts
@@ -0,0 +1,634 @@
+import { Plugin, PluginKey, Transaction } from "@tiptap/pm/state";
+import { Node as ProseMirrorNode, Fragment, Slice } from "@tiptap/pm/model";
+import {
+ FOOTNOTE_DEFINITION_NAME,
+ FOOTNOTE_REFERENCE_NAME,
+ FOOTNOTES_LIST_NAME,
+ deriveFootnoteId,
+} from "./footnote-util";
+
+export const footnoteSyncPluginKey = new PluginKey("footnoteSync");
+
+const SYNC_META = "footnoteSyncApplied";
+
+interface RefOccurrence {
+ /** Position of the reference node in the document. */
+ pos: number;
+ /** The id the reference currently carries. */
+ id: string;
+ node: ProseMirrorNode;
+}
+
+interface DefOccurrence {
+ /** Position of the definition node in the document. */
+ pos: number;
+ /** The id the definition currently carries. */
+ id: string;
+ node: ProseMirrorNode;
+}
+
+interface FootnoteScan {
+ /**
+ * Every reference occurrence in document order (NOT de-duplicated). Needed so
+ * that duplicate ids — which would otherwise be silently collapsed — can be
+ * detected and (together with their definitions) re-id'd instead of dropped.
+ */
+ refOccurrences: RefOccurrence[];
+ /**
+ * Every definition occurrence in document order (NOT de-duplicated). The old
+ * implementation used a last-wins Map here, which is exactly what caused
+ * silent data loss: two definitions sharing an id collapsed to one.
+ */
+ defOccurrences: DefOccurrence[];
+ /** Every top-level footnotesList node, in document order. */
+ lists: Array<{ pos: number; node: ProseMirrorNode }>;
+}
+
+function scan(doc: ProseMirrorNode): FootnoteScan {
+ const refOccurrences: RefOccurrence[] = [];
+ const defOccurrences: DefOccurrence[] = [];
+ const lists: Array<{ pos: number; node: ProseMirrorNode }> = [];
+
+ doc.descendants((node, pos) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
+ const id = node.attrs.id;
+ if (id) refOccurrences.push({ pos, id, node });
+ }
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
+ const id = node.attrs.id;
+ if (id) defOccurrences.push({ pos, id, node });
+ }
+ if (node.type.name === FOOTNOTES_LIST_NAME) {
+ lists.push({ pos, node });
+ }
+ });
+
+ return { refOccurrences, defOccurrences, lists };
+}
+
+/**
+ * Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the
+ * concrete reference re-id edits that must be applied to the body so the doc no
+ * longer contains two footnotes sharing a single id.
+ *
+ * The overriding invariant is that NO definition is ever dropped here: every
+ * definition occurrence ends up with a unique id and therefore survives the
+ * canonical rebuild. Duplicate references are likewise re-id'd (and paired with
+ * a duplicate definition when one exists) so importing/pasting `[^d]` twice with
+ * two `[^d]:` definitions yields TWO distinct footnotes rather than one.
+ */
+interface CollisionPlan {
+ /**
+ * Reference ids in document order, de-duplicated AFTER re-id. This is the
+ * source of truth for definition order/numbering, exactly as before — only
+ * now collisions have been resolved so it no longer hides duplicates.
+ */
+ referenceIds: string[];
+ /** id -> definition node, after duplicates were re-id'd. One entry per id. */
+ definitions: Map;
+ /**
+ * Body reference re-id edits to apply (position of a reference node -> the
+ * fresh id it must carry). Empty when there are no colliding references.
+ */
+ refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>;
+ /** True when any collision required a re-id (refs and/or defs). */
+ changed: boolean;
+}
+
+/**
+ * Resolve duplicate-id collisions among references and definitions WITHOUT ever
+ * dropping a definition.
+ *
+ * Strategy:
+ * - Walk references in document order. The FIRST reference for an id keeps it.
+ * Any later reference sharing that id is a duplicate and gets a fresh unique
+ * id; if a still-unclaimed duplicate definition with the original id exists,
+ * it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched.
+ * - Walk definitions in document order. The FIRST definition for an id keeps
+ * it; later duplicates that were not already claimed by a duplicate reference
+ * get their own fresh unique id (surviving as a distinct footnote/orphan).
+ *
+ * Re-id determinism: every fresh id is DERIVED from document state via
+ * deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids
+ * already present) — NEVER random/time-based. Because the sync plugin runs
+ * identically on every collaborating client, a deterministic re-id is the only
+ * way they can converge on the SAME ids; a random id (the previous
+ * implementation) made two clients editing the same duplicate-id document mint
+ * DIFFERENT ids for the same duplicate, causing permanent Yjs divergence.
+ */
+function resolveCollisions(scan: FootnoteScan): CollisionPlan {
+ const definitions = new Map();
+ const refReids: Array<{
+ pos: number;
+ node: ProseMirrorNode;
+ newId: string;
+ }> = [];
+ const referenceIds: string[] = [];
+ const seenRefIds = new Set();
+ let changed = false;
+
+ // `taken` is the set of every id that must be avoided when minting a derived
+ // id: all original reference + definition ids in the document PLUS every id we
+ // mint during this pass. It is pure document state, so the derivation stays
+ // deterministic across clients. Per-original occurrence counters make the k-th
+ // duplicate of `X` deterministically become `X__2`, `X__3`, ...
+ const taken = new Set();
+ for (const occ of scan.refOccurrences) taken.add(occ.id);
+ for (const occ of scan.defOccurrences) taken.add(occ.id);
+ const occurrenceOf = new Map();
+ // Mint a deterministic unique id for a duplicate of `originalId`. The first
+ // duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ...
+ const mintId = (originalId: string): string => {
+ const next = (occurrenceOf.get(originalId) ?? 1) + 1;
+ occurrenceOf.set(originalId, next);
+ const id = deriveFootnoteId(originalId, next, taken);
+ taken.add(id);
+ return id;
+ };
+
+ // Bucket definition occurrences by their original id so a duplicate reference
+ // can claim a matching (as-yet-unclaimed) duplicate definition and re-id the
+ // pair together. defByOriginalId[id] is consumed front-to-back.
+ const defByOriginalId = new Map();
+ for (const occ of scan.defOccurrences) {
+ const arr = defByOriginalId.get(occ.id);
+ if (arr) arr.push(occ);
+ else defByOriginalId.set(occ.id, [occ]);
+ }
+ // The FIRST definition for each id is the canonical keeper of that id.
+ const claimed = new Set();
+
+ for (const ref of scan.refOccurrences) {
+ if (!seenRefIds.has(ref.id)) {
+ // First reference with this id keeps it.
+ seenRefIds.add(ref.id);
+ referenceIds.push(ref.id);
+ continue;
+ }
+ // Duplicate reference: assign a deterministic derived id. Pair it with the
+ // next unclaimed duplicate definition (NOT the first keeper) carrying the
+ // same original id, if one exists, so the (ref, def) pairing is preserved
+ // 1:1.
+ const newId = mintId(ref.id);
+ refReids.push({ pos: ref.pos, node: ref.node, newId });
+ seenRefIds.add(newId);
+ referenceIds.push(newId);
+ changed = true;
+
+ const candidates = defByOriginalId.get(ref.id) ?? [];
+ // Skip the first occurrence (it keeps the original id); pick the first
+ // duplicate not already claimed.
+ for (let i = 1; i < candidates.length; i++) {
+ const cand = candidates[i];
+ if (!claimed.has(cand)) {
+ claimed.add(cand);
+ definitions.set(newId, cand.node);
+ break;
+ }
+ }
+ }
+
+ // Now place every definition under a unique id. The first occurrence of each
+ // original id keeps it; remaining duplicates either were paired with a
+ // duplicate reference above (already placed) or get a fresh standalone id.
+ const seenDefIds = new Set();
+ for (const occ of scan.defOccurrences) {
+ if (claimed.has(occ)) continue; // already placed against a duplicate ref id
+ if (!seenDefIds.has(occ.id)) {
+ seenDefIds.add(occ.id);
+ definitions.set(occ.id, occ.node);
+ } else {
+ // Duplicate definition with no duplicate reference to pair with: keep it
+ // with a deterministic derived id so it is NEVER silently dropped. (It
+ // becomes an orphan and is then subject to the normal orphan policy — but
+ // only ever because it has no matching reference, never because it
+ // collided.)
+ const newId = mintId(occ.id);
+ definitions.set(newId, occ.node);
+ changed = true;
+ }
+ }
+
+ return { referenceIds, definitions, refReids, changed };
+}
+
+/**
+ * Idempotent integrity pass for footnotes. Runs only on LOCAL document changes
+ * (skips remote/collaboration steps and — crucially — its own appended meta) so
+ * the plugin can never re-trigger itself, guaranteeing termination.
+ *
+ * Everything is computed against the CURRENT document in a SINGLE invocation and
+ * emitted as AT MOST ONE transaction, always tagged with SYNC_META (and
+ * addToHistory:false). The strategy is "rebuild the canonical footnotes section
+ * from the desired end-state" rather than running several self-triggering
+ * passes:
+ *
+ * 1. Collect every footnote reference id in document order (the source of
+ * truth for which definitions must exist and in what order).
+ * 2. Compute the desired list of definitions: one per referenced id, in
+ * reference order, reusing the existing definition node when present or
+ * creating an empty one when missing. Orphan definitions (no matching
+ * reference) are dropped.
+ * 3. Compare against the actual footnotesList state:
+ * - no references -> there must be NO list (remove any);
+ * - references present -> there must be exactly ONE list, holding
+ * exactly the desired definitions, and it
+ * must sit after all real body content.
+ * 4. If the document already matches the desired end-state, return null (no
+ * transaction) — this idempotence is what stops oscillation.
+ *
+ * Placement note: the list is considered correctly placed when nothing but
+ * EMPTY paragraphs follow it. This is deliberate so the plugin coexists with a
+ * trailing-node plugin (which keeps an empty paragraph at the very end of the
+ * doc): the footnote list does not need to be the literal last child, only the
+ * last block of meaningful content. Without this, the two plugins would
+ * ping-pong forever (list moved to end -> trailing paragraph appended -> list
+ * no longer last -> moved again ...).
+ *
+ * Duplicate-id collisions (two references and/or two definitions sharing one
+ * id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a
+ * reference+definition pair) are resolved up front by resolveCollisions(): the
+ * duplicates are re-id'd to fresh unique ids so BOTH survive as distinct
+ * footnotes. This guarantees the overriding invariant — no footnoteDefinition is
+ * ever silently deleted by this automatic (addToHistory:false) transaction. A
+ * definition is only ever removed when it has NO matching reference (orphan
+ * policy), never because its id collided with another.
+ */
+export function footnoteSyncPlugin(
+ isRemoteTransaction?: (tr: Transaction) => boolean,
+): Plugin {
+ return new Plugin({
+ key: footnoteSyncPluginKey,
+ appendTransaction(transactions, _oldState, newState) {
+ // Only react to document changes.
+ if (!transactions.some((t) => t.docChanged)) return null;
+ // Skip our OWN appended transaction. This is the guard that makes the
+ // plugin loop-safe: the transaction we emit carries SYNC_META, so when
+ // ProseMirror feeds it back to appendTransaction we bail out immediately
+ // and never produce a follow-up. (Termination invariant.)
+ if (transactions.some((t) => t.getMeta(SYNC_META))) return null;
+ // Skip remote/collab steps (orphan cleanup must run only on local edits).
+ if (
+ isRemoteTransaction &&
+ transactions.some((t) => isRemoteTransaction(t))
+ ) {
+ return null;
+ }
+
+ const { doc, schema } = newState;
+ const defType = schema.nodes[FOOTNOTE_DEFINITION_NAME];
+ const listType = schema.nodes[FOOTNOTES_LIST_NAME];
+ const paragraphType = schema.nodes.paragraph;
+ if (!defType || !listType || !paragraphType) return null;
+
+ const info = scan(doc);
+
+ // 0) Resolve duplicate-id collisions (two references and/or two
+ // definitions sharing one id) by re-id'ing duplicates to fresh unique
+ // ids. This is the critical defense: the old last-wins Map silently
+ // dropped all but the last definition for a shared id; here EVERY
+ // definition survives with a unique id, and duplicate references are
+ // paired with duplicate definitions so two same-id imports/pastes yield
+ // two distinct footnotes instead of one.
+ const plan = resolveCollisions(info);
+ const referenceIds = plan.referenceIds;
+
+ // The set of ids that must have a definition, in reference order (after
+ // collision re-id). De-duplicated already by resolveCollisions.
+ const referenceIdSet = new Set(referenceIds);
+
+ // 1) For each definition occurrence, compute the id it should END UP with
+ // (which differs from its current id only when collision resolution
+ // re-id'd it). plan.definitions maps a FINAL id -> the chosen node, so
+ // we invert it by node identity to recover each occurrence's target id.
+ const finalIdByNode = new Map();
+ for (const [id, node] of plan.definitions) finalIdByNode.set(node, id);
+
+ const isEmptyParagraph = (node: ProseMirrorNode) =>
+ node.type === paragraphType && node.content.size === 0;
+
+ // 2) Classify every existing definition occurrence:
+ // - reId: keep the node in place, only change its id attr (collision).
+ // - orphan: delete it (its final id has no matching reference).
+ // A definition that already carries the right id and is referenced is
+ // left COMPLETELY untouched (its Yjs subtree is preserved). This is the
+ // core of the data-loss fix: a pure reference reorder produces NO
+ // mutation of any definition subtree.
+ interface DefReid {
+ pos: number;
+ node: ProseMirrorNode;
+ newId: string;
+ }
+ const defReids: DefReid[] = [];
+ const orphanDefs: DefOccurrence[] = [];
+ // Track which referenced ids already have a surviving (non-orphan)
+ // definition, so we can synthesize the genuinely missing ones.
+ const satisfiedIds = new Set();
+ // Choose a "primary" list to receive inserts/migrated defs: the LAST list
+ // whose placement is canonical (only empty paragraphs follow it), else the
+ // last list, else none. New defs and consolidated defs land here.
+ for (const occ of info.defOccurrences) {
+ const finalId = finalIdByNode.get(occ.node) ?? occ.id;
+ if (!referenceIdSet.has(finalId)) {
+ orphanDefs.push(occ);
+ continue;
+ }
+ if (occ.id !== finalId) {
+ defReids.push({ pos: occ.pos, node: occ.node, newId: finalId });
+ }
+ satisfiedIds.add(finalId);
+ }
+
+ // 3) Referenced ids with no surviving definition need a fresh empty one.
+ const missingIds = referenceIds.filter((id) => !satisfiedIds.has(id));
+
+ // 4) Determine list topology.
+ const hasRefs = referenceIds.length > 0;
+
+ // Pick the primary list: prefer the last canonically-placed list.
+ const listIsTrailing = (listPos: number, listNode: ProseMirrorNode) => {
+ const listEnd = listPos + listNode.nodeSize;
+ let ok = true;
+ doc.nodesBetween(listEnd, doc.content.size, (child, childPos) => {
+ if (childPos >= listEnd && child !== listNode) {
+ if (!isEmptyParagraph(child)) ok = false;
+ }
+ return false; // do not descend
+ });
+ return ok;
+ };
+ let primaryList: { pos: number; node: ProseMirrorNode } | null = null;
+ for (let i = info.lists.length - 1; i >= 0; i--) {
+ if (listIsTrailing(info.lists[i].pos, info.lists[i].node)) {
+ primaryList = info.lists[i];
+ break;
+ }
+ }
+ if (!primaryList && info.lists.length > 0) {
+ primaryList = info.lists[info.lists.length - 1];
+ }
+ // Extra lists (everything except the primary) must be consolidated away.
+ const extraLists = info.lists.filter((l) => l !== primaryList);
+ const inExtraList = (pos: number) =>
+ extraLists.some((l) => pos > l.pos && pos < l.pos + l.node.nodeSize);
+
+ // Definitions inside an extra list are migrated (recreated with the right
+ // id) into the primary list, so drop their in-place re-id markups — the
+ // whole extra list is deleted below and the markup would be wasted.
+ const defReidsToApply = defReids.filter((r) => !inExtraList(r.pos));
+
+ // 5) Decide whether anything must change. The document is canonical when:
+ // - no collisions were resolved (refs or defs), AND
+ // - no orphan definitions, AND
+ // - no missing definitions, AND
+ // - exactly the right number of lists (0 when no refs, else 1) AND the
+ // single list is canonically placed (trailing).
+ const noChangeNeeded =
+ !plan.changed &&
+ defReids.length === 0 &&
+ orphanDefs.length === 0 &&
+ missingIds.length === 0 &&
+ extraLists.length === 0 &&
+ (hasRefs
+ ? info.lists.length === 1 && primaryList !== null
+ : info.lists.length === 0);
+
+ if (noChangeNeeded) return null;
+
+ // 6) Apply the targeted, minimal mutations in ONE transaction. We never
+ // delete-and-recreate an unchanged definition subtree; we only:
+ // (a) re-id specific colliding references and definitions (attr-only),
+ // (b) delete genuine orphan definitions and extra/empty lists,
+ // (c) insert genuinely-missing empty definitions and migrate defs out
+ // of extra lists into the primary list,
+ // (d) create the primary list if references exist but none does yet.
+ const tr = newState.tr;
+
+ // 6a) Re-id colliding references (inline atoms: attr-only, size-stable).
+ for (const reid of plan.refReids) {
+ tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
+ ...reid.node.attrs,
+ id: reid.newId,
+ });
+ }
+ // 6b) Re-id colliding definitions IN PLACE (attr-only). This preserves the
+ // definition's content subtree — never delete+recreate it.
+ for (const reid of defReidsToApply) {
+ tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, {
+ ...reid.node.attrs,
+ id: reid.newId,
+ });
+ }
+
+ // 6c) Migrate non-orphan definitions out of every extra list into the
+ // primary list (or, if there is no primary list, into a new one we
+ // build), then delete the extra (now drained) lists. This is the only
+ // path that moves a definition subtree, and it runs ONLY in the
+ // abnormal multi-list case (paste/collab merge) — never on a plain
+ // reorder, which keeps a single list untouched.
+ const migrated: ProseMirrorNode[] = [];
+ for (const extra of extraLists) {
+ extra.node.forEach((defChild) => {
+ if (defChild.type !== defType) return;
+ const finalId = finalIdByNode.get(defChild) ?? defChild.attrs.id;
+ if (!referenceIdSet.has(finalId)) return; // orphan: drop it
+ migrated.push(
+ defChild.attrs.id === finalId
+ ? defChild
+ : defType.create({ id: finalId }, defChild.content),
+ );
+ });
+ }
+
+ // 6c-bis) The definitions to INSERT into the primary list: migrated defs
+ // from extra lists + freshly synthesized empty defs for references
+ // that have no definition at all. Computed before deletions so we can
+ // decide whether the primary list would be left empty.
+ const toInsert: ProseMirrorNode[] = [
+ ...migrated,
+ ...missingIds.map((id) =>
+ defType.create({ id }, paragraphType.create()),
+ ),
+ ];
+
+ // Does the primary list keep at least one definition after we strip its
+ // orphans AND counting the defs we are about to insert? If it ends up
+ // empty (an empty footnotesList is invalid schema), delete the WHOLE list
+ // instead of leaving a hollow shell. Only the primary list can receive
+ // inserts; extra lists are always deleted wholesale.
+ let primarySurvivors = 0;
+ if (primaryList) {
+ primaryList.node.forEach((defChild) => {
+ if (defChild.type !== defType) return;
+ const finalId = finalIdByNode.get(defChild) ?? defChild.attrs.id;
+ if (referenceIdSet.has(finalId)) primarySurvivors += 1;
+ });
+ }
+ const primaryWillBeEmpty =
+ !!primaryList && primarySurvivors === 0 && toInsert.length === 0;
+
+ // 6d) Delete orphan definitions, extra lists, and any list that would be
+ // left empty. Sort deletions from the end so earlier positions stay
+ // valid; map through tr.mapping to account for the (size-stable) re-id
+ // markups and earlier deletions.
+ const deletions: Array<{ from: number; to: number }> = [];
+ const wholeListDeletes = new Set(extraLists);
+ if (primaryWillBeEmpty && primaryList) wholeListDeletes.add(primaryList);
+
+ for (const occ of orphanDefs) {
+ // Skip orphans inside a list that is being deleted wholesale.
+ const inWholeDeleted = [...wholeListDeletes].some(
+ (l) => occ.pos > l.pos && occ.pos < l.pos + l.node.nodeSize,
+ );
+ if (inWholeDeleted) continue;
+ deletions.push({ from: occ.pos, to: occ.pos + occ.node.nodeSize });
+ }
+ for (const l of wholeListDeletes) {
+ deletions.push({ from: l.pos, to: l.pos + l.node.nodeSize });
+ }
+ deletions
+ .sort((a, b) => b.from - a.from)
+ .forEach(({ from, to }) => {
+ tr.delete(tr.mapping.map(from), tr.mapping.map(to));
+ });
+
+ // If we deleted the primary list wholesale, it can no longer receive the
+ // inserts below — null it out so a fresh list is created when needed.
+ if (primaryWillBeEmpty) primaryList = null;
+
+ // 6e) Insert the migrated + synthesized definitions.
+ if (hasRefs) {
+ if (primaryList) {
+ if (toInsert.length > 0) {
+ // Append at the end of the (mapped) primary list, just before its
+ // closing token, so its existing definition subtrees are untouched.
+ // We only changed attrs (size-stable) and deleted OTHER nodes, so
+ // mapping the original list-end position forward lands at the same
+ // boundary; -1 puts us just inside the list's closing token.
+ const insertAt =
+ tr.mapping.map(primaryList.pos + primaryList.node.nodeSize) - 1;
+ tr.insert(insertAt, Fragment.fromArray(toInsert));
+ }
+ } else {
+ // No usable list exists yet but references do — create one holding the
+ // migrated + synthesized definitions, placed after the last meaningful
+ // (non-empty-paragraph) top-level block so it sits before any trailing
+ // empty paragraph the trailing-node plugin maintains.
+ const mappedDoc = tr.doc;
+ let insertPos = mappedDoc.content.size;
+ for (let i = mappedDoc.childCount - 1; i >= 0; i--) {
+ const child = mappedDoc.child(i);
+ if (isEmptyParagraph(child)) insertPos -= child.nodeSize;
+ else break;
+ }
+ const list = listType.create(null, Fragment.fromArray(toInsert));
+ tr.insert(insertPos, list);
+ }
+ }
+
+ if (!tr.docChanged) return null;
+
+ tr.setMeta(SYNC_META, true);
+ tr.setMeta("addToHistory", false);
+ return tr;
+ },
+ });
+}
+
+export const footnotePastePluginKey = new PluginKey("footnotePaste");
+
+/**
+ * Paste id-collision guard. When pasted content carries footnote reference or
+ * definition ids that ALREADY EXIST in the current document, regenerate those
+ * ids (consistently across the pasted slice, so a pasted reference and its
+ * definition keep pointing at each other) BEFORE the slice is inserted.
+ *
+ * Without this, pasting a reference+definition pair copied from elsewhere — or
+ * duplicating one in place — would merge with (or clobber) the existing footnote
+ * of the same id. The schema-sync plugin already guarantees no definition is
+ * ever silently deleted after the fact (it re-id's collisions), but regenerating
+ * at paste time keeps the pasted footnote cleanly separate from the start and
+ * avoids any transient merge.
+ *
+ * Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is
+ * not present elsewhere is left untouched (so it still resolves to its existing
+ * definition).
+ */
+export function footnotePastePlugin(): Plugin {
+ return new Plugin({
+ key: footnotePastePluginKey,
+ props: {
+ transformPasted(slice, view) {
+ // Collect ids already present in the current document.
+ const existing = new Set();
+ view.state.doc.descendants((node) => {
+ if (
+ node.type.name === FOOTNOTE_REFERENCE_NAME ||
+ node.type.name === FOOTNOTE_DEFINITION_NAME
+ ) {
+ const id = node.attrs.id;
+ if (id) existing.add(id);
+ }
+ });
+ if (existing.size === 0) return slice;
+
+ // Build a remap (old id -> fresh id) for every COLLIDING id found in the
+ // pasted slice, shared by references and definitions so a pasted pair
+ // stays matched. A paste is a distinct local user action (not a
+ // shared-state convergence point), so determinism is not strictly
+ // required here — but we derive the new id deterministically anyway
+ // (deriveFootnoteId against the current doc's id set) for consistency
+ // with the sync/import paths and to keep Math.random off this code path.
+ const remap = new Map();
+ const collectColliding = (node: ProseMirrorNode) => {
+ if (
+ node.type.name === FOOTNOTE_REFERENCE_NAME ||
+ node.type.name === FOOTNOTE_DEFINITION_NAME
+ ) {
+ const id = node.attrs.id;
+ if (id && existing.has(id) && !remap.has(id)) {
+ const newId = deriveFootnoteId(id, 2, existing);
+ remap.set(id, newId);
+ // Reserve it so a second colliding id deriving to the same base
+ // bumps instead of clashing.
+ existing.add(newId);
+ }
+ }
+ node.descendants(collectColliding);
+ };
+ slice.content.descendants(collectColliding);
+ if (remap.size === 0) return slice;
+
+ // Rewrite the colliding ids throughout the slice.
+ const rewrite = (fragment: Fragment): Fragment => {
+ const nodes: ProseMirrorNode[] = [];
+ fragment.forEach((node) => {
+ const isFootnote =
+ node.type.name === FOOTNOTE_REFERENCE_NAME ||
+ node.type.name === FOOTNOTE_DEFINITION_NAME;
+ const newId = isFootnote ? remap.get(node.attrs.id) : undefined;
+ const newContent = node.content.size
+ ? rewrite(node.content)
+ : node.content;
+ if (newId) {
+ nodes.push(
+ node.type.create(
+ { ...node.attrs, id: newId },
+ newContent,
+ node.marks,
+ ),
+ );
+ } else if (newContent !== node.content) {
+ nodes.push(node.copy(newContent));
+ } else {
+ nodes.push(node);
+ }
+ });
+ return Fragment.fromArray(nodes);
+ };
+
+ return new Slice(rewrite(slice.content), slice.openStart, slice.openEnd);
+ },
+ },
+ });
+}
diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.ts b/packages/editor-ext/src/lib/footnote/footnote-util.ts
new file mode 100644
index 00000000..7896595d
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote-util.ts
@@ -0,0 +1,132 @@
+import { Node as ProseMirrorNode } from "@tiptap/pm/model";
+
+/**
+ * Node type names for the footnote feature. Centralized so every part of the
+ * feature (nodes, plugins, commands) references the same string.
+ */
+export const FOOTNOTE_REFERENCE_NAME = "footnoteReference";
+export const FOOTNOTES_LIST_NAME = "footnotesList";
+export const FOOTNOTE_DEFINITION_NAME = "footnoteDefinition";
+
+/**
+ * Generate a uuidv7-style id (time-ordered). Implemented locally so editor-ext
+ * does not need a runtime dependency on the `uuid` package; matches the
+ * lexicographically-sortable layout uuidv7 produces.
+ */
+export function generateFootnoteId(): string {
+ const now = Date.now();
+ const timeHex = now.toString(16).padStart(12, "0");
+
+ const rand = (length: number) => {
+ let out = "";
+ for (let i = 0; i < length; i++) {
+ out += Math.floor(Math.random() * 16).toString(16);
+ }
+ return out;
+ };
+
+ // version 7 nibble, then variant (8..b) nibble.
+ const versioned = "7" + rand(3);
+ const variantNibble = (8 + Math.floor(Math.random() * 4)).toString(16);
+ const variant = variantNibble + rand(3);
+
+ return (
+ timeHex.slice(0, 8) +
+ "-" +
+ timeHex.slice(8, 12) +
+ "-" +
+ versioned +
+ "-" +
+ variant +
+ "-" +
+ rand(12)
+ );
+}
+
+/**
+ * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
+ * an original id `X` during collision resolution. The result is a pure function
+ * of (`originalId`, `occurrence`, `taken`) so that every collaborating client —
+ * and every import path — computes the SAME new id for the same input document.
+ *
+ * CRITICAL: this MUST NOT use Math.random()/Date.now()/uuid. Two clients that
+ * each make a local edit on the same duplicate-id document have to converge on
+ * identical ids; a random id would diverge permanently over Yjs.
+ *
+ * Scheme: the base candidate is `${originalId}__${occurrence}` (e.g. `X__2`,
+ * `X__3`). If that candidate already exists in `taken` (an existing footnote id,
+ * or one we already minted in this pass), a stable alphabetic suffix is appended
+ * and bumped — `X__2b`, `X__2c`, ... — until the candidate is unique. `taken` is
+ * itself part of the document state, so the whole walk stays deterministic.
+ *
+ * `taken` is consulted but NOT mutated here; the caller adds the returned id to
+ * its own seen-set before requesting the next derived id.
+ *
+ * NOTE: this implementation is intentionally duplicated in
+ * packages/mcp/src/lib/collaboration.ts (deriveFootnoteId)
+ * and MUST stay in sync with it so markdown imported through either path yields
+ * identical ids.
+ */
+export function deriveFootnoteId(
+ originalId: string,
+ occurrence: number,
+ taken: Set | ReadonlySet,
+): string {
+ let candidate = `${originalId}__${occurrence}`;
+ // Deterministic suffix bump: b, c, d, ... then aa, ab, ... if ever exhausted.
+ let n = 0;
+ while (taken.has(candidate)) {
+ n += 1;
+ candidate = `${originalId}__${occurrence}${suffix(n)}`;
+ }
+ return candidate;
+}
+
+/**
+ * Map 1 -> "b", 2 -> "c", ... 25 -> "z", 26 -> "ba", ... (base-25 over b..z,
+ * skipping "a" so the first bump is visibly distinct from the un-bumped base).
+ * Purely deterministic.
+ */
+function suffix(n: number): string {
+ let out = "";
+ let x = n;
+ while (x > 0) {
+ const rem = (x - 1) % 25;
+ out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
+ x = Math.floor((x - 1) / 25);
+ }
+ return out;
+}
+
+/**
+ * Collect every `footnoteReference` id in document order. This is the single
+ * source of truth for numbering and ordering — a pure function of the document
+ * so every collaborating client computes the same result.
+ */
+export function collectReferenceIds(doc: ProseMirrorNode): string[] {
+ const ids: string[] = [];
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) {
+ const id = node.attrs.id;
+ if (id) ids.push(id);
+ }
+ });
+ return ids;
+}
+
+/**
+ * Build a map of `referenceId -> displayNumber` (1-based) from document order.
+ * Pure function — the basis for the numbering decorations and any test.
+ */
+export function computeFootnoteNumbers(
+ doc: ProseMirrorNode,
+): Map {
+ const numbers = new Map();
+ let n = 0;
+ for (const id of collectReferenceIds(doc)) {
+ if (!numbers.has(id)) {
+ numbers.set(id, ++n);
+ }
+ }
+ return numbers;
+}
diff --git a/packages/editor-ext/src/lib/footnote/footnote.test.ts b/packages/editor-ext/src/lib/footnote/footnote.test.ts
new file mode 100644
index 00000000..9ecf9a55
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnote.test.ts
@@ -0,0 +1,948 @@
+import { describe, it, expect } from "vitest";
+import { Editor, Extension, getSchema } from "@tiptap/core";
+import { Document } from "@tiptap/extension-document";
+import { Paragraph } from "@tiptap/extension-paragraph";
+import { Text } from "@tiptap/extension-text";
+import { Superscript } from "@tiptap/extension-superscript";
+import { Plugin, PluginKey } from "@tiptap/pm/state";
+import { Node as PMNode } from "@tiptap/pm/model";
+import { EditorState } from "@tiptap/pm/state";
+import { FootnoteReference } from "./footnote-reference";
+import { FootnotesList } from "./footnotes-list";
+import { FootnoteDefinition } from "./footnote-definition";
+import { TrailingNode } from "../trailing-node";
+import { footnoteSyncPlugin } from "./footnote-sync";
+import { getFootnoteNumber } from "./footnote-numbering";
+import {
+ computeFootnoteNumbers,
+ collectReferenceIds,
+ FOOTNOTE_REFERENCE_NAME,
+ FOOTNOTES_LIST_NAME,
+ FOOTNOTE_DEFINITION_NAME,
+} from "./footnote-util";
+
+const extensions = [
+ Document,
+ Paragraph,
+ Text,
+ FootnoteReference,
+ FootnotesList,
+ FootnoteDefinition,
+];
+
+function makeEditor(content?: any) {
+ return new Editor({
+ extensions,
+ content: content ?? { type: "doc", content: [{ type: "paragraph" }] },
+ });
+}
+
+function countType(doc: PMNode, name: string): number {
+ let n = 0;
+ doc.descendants((node) => {
+ if (node.type.name === name) n++;
+ });
+ return n;
+}
+
+describe("footnote numbering (pure function)", () => {
+ it("numbers references in document order", () => {
+ const schema = getSchema(extensions);
+ const doc = PMNode.fromJSON(schema, {
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
+ { type: "text", text: "b" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "x" },
+ content: [{ type: "paragraph" }],
+ },
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "y" },
+ content: [{ type: "paragraph" }],
+ },
+ ],
+ },
+ ],
+ });
+
+ expect(collectReferenceIds(doc)).toEqual(["x", "y"]);
+ const numbers = computeFootnoteNumbers(doc);
+ expect(numbers.get("x")).toBe(1);
+ expect(numbers.get("y")).toBe(2);
+ });
+});
+
+describe("setFootnote command", () => {
+ it("inserts a reference and a matching definition in the footnotes list", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "Hello" }] },
+ ],
+ });
+ // Cursor at end of the word.
+ editor.commands.setTextSelection(6);
+ const ok = editor.commands.setFootnote();
+ expect(ok).toBe(true);
+
+ const doc = editor.state.doc;
+ expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(1);
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1);
+
+ // The reference id and the definition id match.
+ let refId: string | null = null;
+ let defId: string | null = null;
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) refId = node.attrs.id;
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) defId = node.attrs.id;
+ });
+ expect(refId).toBeTruthy();
+ expect(refId).toBe(defId);
+ editor.destroy();
+ });
+
+ it("inserts the definition at the correct position matching reference order", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "AAAA" }] },
+ { type: "paragraph", content: [{ type: "text", text: "BBBB" }] },
+ ],
+ });
+
+ // First footnote: place inside the SECOND paragraph (after "BBBB").
+ editor.commands.setTextSelection(11); // end of BBBB
+ editor.commands.setFootnote();
+
+ // Second footnote: place inside the FIRST paragraph (after "AAAA"),
+ // which is BEFORE the first reference in document order.
+ editor.commands.setTextSelection(5); // end of AAAA
+ editor.commands.setFootnote();
+
+ const doc = editor.state.doc;
+ // Reference order in document.
+ const refOrder = collectReferenceIds(doc);
+ // Definition order in the list.
+ const defOrder: string[] = [];
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
+ defOrder.push(node.attrs.id);
+ }
+ });
+
+ expect(defOrder).toEqual(refOrder);
+ expect(defOrder.length).toBe(2);
+ editor.destroy();
+ });
+});
+
+describe("removeFootnote command (cascade)", () => {
+ it("removes both the reference and its definition, and drops the empty list", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "Hello" }] },
+ ],
+ });
+ editor.commands.setTextSelection(6);
+ editor.commands.setFootnote();
+
+ let id: string | null = null;
+ editor.state.doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) id = node.attrs.id;
+ });
+ expect(id).toBeTruthy();
+
+ editor.commands.removeFootnote(id!);
+
+ const doc = editor.state.doc;
+ expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(0);
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(0);
+ // empty list removed
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(0);
+ editor.destroy();
+ });
+});
+
+describe("footnote sync plugin (orphans)", () => {
+ it("creates an empty definition for a reference pasted without one", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "x" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "orphan-ref" } },
+ ],
+ },
+ ],
+ });
+ // Trigger a doc change so appendTransaction runs.
+ editor.commands.insertContentAt(1, " ");
+
+ const doc = editor.state.doc;
+ let defFound = false;
+ doc.descendants((node) => {
+ if (
+ node.type.name === FOOTNOTE_DEFINITION_NAME &&
+ node.attrs.id === "orphan-ref"
+ ) {
+ defFound = true;
+ }
+ });
+ expect(defFound).toBe(true);
+ editor.destroy();
+ });
+
+ it("merges multiple footnotesList nodes into one, preserving all definitions, as the last child", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
+ { type: "text", text: "b" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
+ ],
+ },
+ // First (stray) footnotes list, e.g. from a paste/collab merge.
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "x" },
+ content: [{ type: "paragraph", content: [{ type: "text", text: "X note" }] }],
+ },
+ ],
+ },
+ { type: "paragraph", content: [{ type: "text", text: "tail" }] },
+ // Second footnotes list (the "real" trailing one).
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "y" },
+ content: [{ type: "paragraph", content: [{ type: "text", text: "Y note" }] }],
+ },
+ ],
+ },
+ ],
+ });
+ // Trigger a local doc change so appendTransaction runs.
+ editor.commands.insertContentAt(1, " ");
+
+ const doc = editor.state.doc;
+ // Converged to exactly ONE list.
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ // Both definitions preserved (no tracking lost).
+ const defIds: string[] = [];
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) defIds.push(node.attrs.id);
+ });
+ expect(defIds.sort()).toEqual(["x", "y"]);
+ // The single list is the LAST child of the document.
+ const lastChild = doc.child(doc.childCount - 1);
+ expect(lastChild.type.name).toBe(FOOTNOTES_LIST_NAME);
+ editor.destroy();
+ });
+
+ it("leaves a correct doc (single trailing list) unchanged — no merge loop", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "x" },
+ content: [{ type: "paragraph", content: [{ type: "text", text: "X note" }] }],
+ },
+ ],
+ },
+ ],
+ });
+ const before = editor.state.doc.toJSON();
+ // A change that doesn't touch footnote structure.
+ editor.commands.insertContentAt(1, "z");
+ const doc = editor.state.doc;
+ // Still exactly one list, still last, definition preserved.
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ const lastChild = doc.child(doc.childCount - 1);
+ expect(lastChild.type.name).toBe(FOOTNOTES_LIST_NAME);
+ // The footnotes list subtree is identical to before (no spurious rewrite).
+ const beforeList = before.content.find(
+ (n: any) => n.type === FOOTNOTES_LIST_NAME,
+ );
+ const afterList = doc
+ .toJSON()
+ .content.find((n: any) => n.type === FOOTNOTES_LIST_NAME);
+ expect(afterList).toEqual(beforeList);
+ editor.destroy();
+ });
+
+ it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => {
+ // Reproduces the verified data-loss bug: two footnoteDefinition nodes share
+ // id "d", and there are two references with id "d". The OLD code built the
+ // definitions Map last-wins and emitted exactly one definition for the
+ // de-duplicated reference, so the very first keystroke's sync transaction
+ // deleted the whole list and rebuilt it from one definition — silently
+ // destroying "first" and keeping only "second".
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
+ { type: "text", text: "b" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "d" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "first" }] },
+ ],
+ },
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "d" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "second" }] },
+ ],
+ },
+ ],
+ },
+ ],
+ });
+ // The first local keystroke fires the sync plugin's appendTransaction.
+ editor.commands.insertContentAt(1, " ");
+
+ const doc = editor.state.doc;
+ // BOTH definitions survive.
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
+ const defTexts: string[] = [];
+ const defIds: string[] = [];
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) {
+ defIds.push(node.attrs.id);
+ defTexts.push(node.textContent);
+ }
+ });
+ // No content was lost: both "first" and "second" are still present.
+ expect(defTexts.sort()).toEqual(["first", "second"]);
+ // The colliding ids were made distinct.
+ expect(new Set(defIds).size).toBe(2);
+ // Each definition's id matches exactly one reference (1:1 pairing).
+ const refIds: string[] = [];
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id);
+ });
+ expect(refIds.sort()).toEqual(defIds.sort());
+ editor.destroy();
+ });
+
+ it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => {
+ // Cross-client determinism guard. Two collaborating clients each see the
+ // SAME duplicate-id document and each make a local edit. The sync plugin
+ // runs identically on every client, so it MUST mint the SAME new ids on both
+ // — otherwise the two clients diverge permanently over Yjs (duplicated
+ // footnotes). This is exactly the blocker the previous random-id
+ // (generateFootnoteId / Math.random) implementation caused: it would mint
+ // DIFFERENT ids on each client and this assertion would fail.
+ const duplicateDoc = {
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
+ { type: "text", text: "b" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
+ { type: "text", text: "c" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "d" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "d" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "one" }] },
+ ],
+ },
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "d" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "two" }] },
+ ],
+ },
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "d" },
+ content: [
+ {
+ type: "paragraph",
+ content: [{ type: "text", text: "three" }],
+ },
+ ],
+ },
+ ],
+ },
+ ],
+ };
+
+ const idsAfterLocalEdit = () => {
+ // A fresh editor instance = an independent "client" running the same
+ // plugin pipeline on the same starting document.
+ const editor = makeEditor(structuredClone(duplicateDoc));
+ editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs
+ const refIds: string[] = [];
+ const defIds: string[] = [];
+ editor.state.doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_REFERENCE_NAME)
+ refIds.push(node.attrs.id);
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME)
+ defIds.push(node.attrs.id);
+ });
+ editor.destroy();
+ return { refIds, defIds };
+ };
+
+ const clientA = idsAfterLocalEdit();
+ const clientB = idsAfterLocalEdit();
+
+ // Both clients computed IDENTICAL ids (the property that makes Yjs converge).
+ expect(clientA.refIds).toEqual(clientB.refIds);
+ expect(clientA.defIds).toEqual(clientB.defIds);
+
+ // And the ids are deterministic-derived (not random uuid-style): the keeper
+ // keeps "d", the duplicates become "d__2", "d__3".
+ expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"]));
+ // Every definition survived with a unique id, 1:1 with the references.
+ expect(clientA.defIds.length).toBe(3);
+ expect(new Set(clientA.defIds).size).toBe(3);
+ expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort());
+ });
+
+ it("removes an orphan definition with no matching reference", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "x" }] },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "orphan-def" },
+ content: [{ type: "paragraph" }],
+ },
+ ],
+ },
+ ],
+ });
+ editor.commands.insertContentAt(1, "y");
+
+ const doc = editor.state.doc;
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(0);
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(0);
+ editor.destroy();
+ });
+});
+
+/**
+ * Live-editor regression tests for the sync-plugin infinite loop (the hard
+ * freeze when activating /footnote). These drive a REAL Tiptap editor through
+ * the same plugin pipeline the browser uses — including the TrailingNode plugin,
+ * which is what turned the "move list to the end" pass into an infinite
+ * ping-pong (list moved last -> trailing paragraph appended after it -> list no
+ * longer last -> moved again -> ...).
+ *
+ * If the loop regresses, ProseMirror's appendTransaction round loop never
+ * terminates and these tests HANG (the vitest timeout fails them). The
+ * transaction counter additionally fails fast with a bounded iteration cap, so
+ * a regression surfaces as an explicit error instead of only a slow timeout.
+ */
+describe("footnote sync plugin (no infinite loop — live editor)", () => {
+ // Hard cap on how many doc-changing appendTransaction rounds we tolerate for a
+ // single user action. Convergence takes a couple of rounds at most; anything
+ // approaching this means the plugins are oscillating.
+ const MAX_ROUNDS = 50;
+
+ // The production editor wires FootnoteReference alongside TrailingNode and
+ // Superscript; both participate in the loop the bug exhibited, so we mirror
+ // that here.
+ function makeLiveEditor(content?: any) {
+ let rounds = 0;
+ // A guard plugin that counts doc-changing appendTransaction rounds and
+ // throws if they exceed the cap, converting a would-be infinite loop into a
+ // deterministic failure instead of a wall-clock hang.
+ const LoopGuard = Extension.create({
+ name: "footnoteLoopGuard",
+ // Run last so it observes every other plugin's appended transaction.
+ priority: -1000,
+ addProseMirrorPlugins() {
+ return [
+ new Plugin({
+ key: new PluginKey("footnoteLoopGuard"),
+ appendTransaction(transactions) {
+ if (transactions.some((t) => t.docChanged)) {
+ rounds += 1;
+ if (rounds > MAX_ROUNDS) {
+ throw new Error(
+ `footnote sync did not converge: exceeded ${MAX_ROUNDS} appendTransaction rounds (infinite loop)`,
+ );
+ }
+ }
+ return null;
+ },
+ }),
+ ];
+ },
+ });
+
+ const editor = new Editor({
+ extensions: [
+ Document,
+ Paragraph,
+ Text,
+ Superscript,
+ TrailingNode,
+ LoopGuard,
+ FootnoteReference,
+ FootnotesList,
+ FootnoteDefinition,
+ ],
+ content: content ?? { type: "doc", content: [{ type: "paragraph" }] },
+ });
+ return { editor, getRounds: () => rounds, resetRounds: () => (rounds = 0) };
+ }
+
+ function lastFootnotesListIsTrailing(doc: PMNode): boolean {
+ // Canonical placement: the list is the last meaningful block — only empty
+ // paragraphs (the trailing-node) may follow it.
+ let listIndex = -1;
+ for (let i = 0; i < doc.childCount; i++) {
+ if (doc.child(i).type.name === FOOTNOTES_LIST_NAME) listIndex = i;
+ }
+ if (listIndex === -1) return false;
+ for (let i = listIndex + 1; i < doc.childCount; i++) {
+ const child = doc.child(i);
+ if (!(child.type.name === "paragraph" && child.content.size === 0)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ it("setFootnote() RETURNS (no hang) and produces one ref + one def in a trailing list", () => {
+ const { editor } = makeLiveEditor({
+ type: "doc",
+ content: [{ type: "paragraph", content: [{ type: "text", text: "Hi" }] }],
+ });
+ editor.commands.setTextSelection(3);
+ const ok = editor.commands.setFootnote();
+ expect(ok).toBe(true);
+
+ const doc = editor.state.doc;
+ expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(1);
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1);
+ expect(lastFootnotesListIsTrailing(doc)).toBe(true);
+ editor.destroy();
+ });
+
+ it("a second setFootnote() does not hang: two refs + two defs in one list", () => {
+ const { editor } = makeLiveEditor({
+ type: "doc",
+ content: [{ type: "paragraph", content: [{ type: "text", text: "Hi" }] }],
+ });
+ editor.commands.setTextSelection(3);
+ editor.commands.setFootnote();
+ editor.commands.setTextSelection(3);
+ editor.commands.setFootnote();
+
+ const doc = editor.state.doc;
+ expect(countType(doc, FOOTNOTE_REFERENCE_NAME)).toBe(2);
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ expect(lastFootnotesListIsTrailing(doc)).toBe(true);
+ editor.destroy();
+ });
+
+ it("converges and stabilizes: an unrelated edit does not keep producing transactions", () => {
+ const { editor, getRounds, resetRounds } = makeLiveEditor({
+ type: "doc",
+ content: [{ type: "paragraph", content: [{ type: "text", text: "Hi" }] }],
+ });
+ editor.commands.setTextSelection(3);
+ editor.commands.setFootnote();
+
+ // Now the doc is canonical. Dispatch an unrelated edit (insert text) and
+ // assert the sync plugin converges in a bounded number of rounds and the
+ // document is stable (one ref/def/list, list trailing).
+ resetRounds();
+ editor.commands.insertContentAt(1, "Z");
+ const afterFirst = editor.state.doc.toJSON();
+ const roundsAfterEdit = getRounds();
+ expect(roundsAfterEdit).toBeLessThan(MAX_ROUNDS);
+
+ // A follow-up no-op-ish edit must not re-trigger structural rewrites: the
+ // footnotes section is identical before and after a further unrelated edit.
+ editor.commands.insertContentAt(2, "Y");
+ const afterSecond = editor.state.doc.toJSON();
+
+ const listOf = (json: any) =>
+ json.content.find((n: any) => n.type === FOOTNOTES_LIST_NAME);
+ expect(listOf(afterSecond)).toEqual(listOf(afterFirst));
+ expect(countType(editor.state.doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ editor.destroy();
+ });
+
+ it("two footnotesList nodes converge to one (merge) without looping", () => {
+ const { editor } = makeLiveEditor({
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
+ { type: "text", text: "b" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "x" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "X" }] },
+ ],
+ },
+ ],
+ },
+ { type: "paragraph", content: [{ type: "text", text: "tail" }] },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "y" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "Y" }] },
+ ],
+ },
+ ],
+ },
+ ],
+ });
+ // Trigger a local doc change so appendTransaction runs (must not hang).
+ editor.commands.insertContentAt(1, " ");
+
+ const doc = editor.state.doc;
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ const defIds: string[] = [];
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME)
+ defIds.push(node.attrs.id);
+ });
+ expect(defIds.sort()).toEqual(["x", "y"]);
+ expect(lastFootnotesListIsTrailing(doc)).toBe(true);
+ editor.destroy();
+ });
+});
+
+/**
+ * Data-loss-window regression guard (Fix 1). A pure reference REORDER must not
+ * cause the sync plugin to delete-and-recreate any definition subtree — doing so
+ * (the previous behaviour) would, through Yjs, replace the CRDT subtree of every
+ * definition and could lose a collaborator's in-flight characters on merge.
+ *
+ * Numbering is decoration-only (footnote-numbering.ts derives numbers from
+ * reference order), so the bottom list's PHYSICAL order need not match reference
+ * order for the displayed numbers to be correct. We therefore assert: the
+ * existing definition NODE INSTANCES are preserved (identity-equal) after the
+ * sync pass, AND the derived numbers follow the new reference order.
+ */
+describe("footnote sync plugin (no rebuild on reorder — data-loss guard)", () => {
+ function reorderedDoc() {
+ // The "out of order" end-state of a reorder: references occur as [b, a] but
+ // the bottom list still physically holds definitions in [a, b] order. This
+ // is exactly the situation a reference reorder produces (decoration-only
+ // numbering keeps the displayed numbers correct without physically moving
+ // the definition subtrees). The sync plugin must leave the definitions
+ // ALONE here — no delete/recreate of any definition subtree.
+ return {
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "p" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "b" } },
+ { type: "text", text: "q" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "a" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "A" }] },
+ ],
+ },
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "b" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "B" }] },
+ ],
+ },
+ ],
+ },
+ ],
+ };
+ }
+
+ function getDefNodesById(doc: PMNode): Map {
+ const m = new Map();
+ doc.descendants((node) => {
+ if (node.type.name === FOOTNOTE_DEFINITION_NAME) m.set(node.attrs.id, node);
+ });
+ return m;
+ }
+
+ it("does NOT delete/recreate existing definition subtrees for an out-of-order list (numbers still correct)", () => {
+ const editor = makeEditor(reorderedDoc());
+
+ // Capture the exact definition NODE INSTANCES before any sync pass.
+ const before = getDefNodesById(editor.state.doc);
+ // Sanity: both carry their content right now.
+ expect(before.get("a")!.textContent).toBe("A");
+ expect(before.get("b")!.textContent).toBe("B");
+
+ // Trigger a local edit elsewhere in the body so the sync plugin runs.
+ editor.commands.insertContentAt(1, "z");
+
+ const doc = editor.state.doc;
+
+ // Reference order is [b, a]; the displayed numbers follow reference order
+ // (decoration-only numbering): b -> 1, a -> 2 — regardless of physical list
+ // order.
+ expect(collectReferenceIds(doc)).toEqual(["b", "a"]);
+ const numbers = computeFootnoteNumbers(doc);
+ expect(numbers.get("b")).toBe(1);
+ expect(numbers.get("a")).toBe(2);
+
+ // CRITICAL regression guard: both definitions still exist and are the SAME
+ // node instances as before the edit — the plugin did NOT delete/recreate the
+ // list (which would replace every definition's CRDT subtree and open the
+ // concurrent-edit data-loss window). Identity equality proves the subtree
+ // was preserved verbatim.
+ const after = getDefNodesById(doc);
+ expect(after.get("a")).toBe(before.get("a"));
+ expect(after.get("b")).toBe(before.get("b"));
+ // Content intact, exactly one list, both definitions present.
+ expect(after.get("a")!.textContent).toBe("A");
+ expect(after.get("b")!.textContent).toBe("B");
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(1);
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2);
+
+ editor.destroy();
+ });
+});
+
+/**
+ * Sync-plugin guard paths that are awkward to exercise through a live editor:
+ * the remote-transaction skip and the enableSync:false (read-only) mode.
+ */
+describe("footnote sync plugin (guards)", () => {
+ // Build a non-canonical document (an orphan reference with no definition) so a
+ // sync pass would normally append a transaction.
+ function nonCanonicalState() {
+ const schema = getSchema(extensions);
+ const doc = PMNode.fromJSON(schema, {
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "x" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "orphan" } },
+ ],
+ },
+ ],
+ });
+ return EditorState.create({ schema, doc });
+ }
+
+ it("isRemoteTransaction => true: appendTransaction returns null (no rebuild on remote txns)", () => {
+ // The sync plugin must SKIP remote/collab transactions so orphan cleanup and
+ // structural rewrites only ever run on local edits.
+ const plugin = footnoteSyncPlugin(() => true);
+ const state = nonCanonicalState();
+
+ // Produce a doc-changing transaction (insert a space) and feed it to the
+ // plugin's appendTransaction exactly as ProseMirror would.
+ const tr = state.tr.insertText(" ", 1);
+ const newState = state.apply(tr);
+ const result = plugin.spec.appendTransaction!(
+ [tr],
+ state,
+ newState,
+ );
+ expect(result).toBeNull();
+ });
+
+ it("isRemoteTransaction => false: appendTransaction DOES rebuild (sanity)", () => {
+ // Control: with a local (non-remote) transaction the same non-canonical doc
+ // triggers a sync transaction, proving the null above is the remote guard
+ // and not a no-op everywhere.
+ const plugin = footnoteSyncPlugin(() => false);
+ const state = nonCanonicalState();
+ const tr = state.tr.insertText(" ", 1);
+ const newState = state.apply(tr);
+ const result = plugin.spec.appendTransaction!([tr], state, newState);
+ expect(result).not.toBeNull();
+ expect(result!.docChanged).toBe(true);
+ });
+
+ it("enableSync:false: the plugin never mutates the doc (read-only viewer)", () => {
+ // Build an editor with sync disabled. An orphan reference (no definition)
+ // must NOT trigger a definition insertion — the document is left untouched.
+ const editor = new Editor({
+ extensions: [
+ Document,
+ Paragraph,
+ Text,
+ FootnoteReference.configure({ enableSync: false }),
+ FootnotesList,
+ FootnoteDefinition,
+ ],
+ content: {
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "x" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "orphan" } },
+ ],
+ },
+ ],
+ },
+ });
+ // A local edit that would normally trigger orphan-definition synthesis.
+ editor.commands.insertContentAt(1, "y");
+
+ const doc = editor.state.doc;
+ // No definition (and no list) was ever created — sync is disabled.
+ expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(0);
+ expect(countType(doc, FOOTNOTES_LIST_NAME)).toBe(0);
+ // Numbering decorations still work: the reference is numbered 1.
+ expect(getFootnoteNumber(editor.state, "orphan")).toBe(1);
+ editor.destroy();
+ });
+});
+
+/**
+ * Numbering cache (Fix 2). NodeViews must read footnote numbers from the
+ * numbering plugin's cached map (updated once per doc change) rather than
+ * recomputing the whole map per render. We assert the cache exists, is correct,
+ * and stays current across edits.
+ */
+describe("footnote numbering cache", () => {
+ it("exposes correct numbers via getFootnoteNumber and updates on edits", () => {
+ const editor = makeEditor({
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "a" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "x" } },
+ { type: "text", text: "b" },
+ { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "y" } },
+ ],
+ },
+ {
+ type: FOOTNOTES_LIST_NAME,
+ content: [
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "x" },
+ content: [{ type: "paragraph" }],
+ },
+ {
+ type: FOOTNOTE_DEFINITION_NAME,
+ attrs: { id: "y" },
+ content: [{ type: "paragraph" }],
+ },
+ ],
+ },
+ ],
+ });
+
+ // The cache mirrors computeFootnoteNumbers — but is read in O(1) per id.
+ expect(getFootnoteNumber(editor.state, "x")).toBe(1);
+ expect(getFootnoteNumber(editor.state, "y")).toBe(2);
+ // The cached map is the SAME values a fresh full computation would yield.
+ const fresh = computeFootnoteNumbers(editor.state.doc);
+ expect(getFootnoteNumber(editor.state, "x")).toBe(fresh.get("x"));
+ expect(getFootnoteNumber(editor.state, "y")).toBe(fresh.get("y"));
+
+ // After inserting a new earlier reference, the cache updates so the numbers
+ // shift (decoration-only numbering follows reference order).
+ editor.commands.insertContentAt(1, {
+ type: FOOTNOTE_REFERENCE_NAME,
+ attrs: { id: "z" },
+ });
+ expect(getFootnoteNumber(editor.state, "z")).toBe(1);
+ expect(getFootnoteNumber(editor.state, "x")).toBe(2);
+ expect(getFootnoteNumber(editor.state, "y")).toBe(3);
+ editor.destroy();
+ });
+});
diff --git a/packages/editor-ext/src/lib/footnote/footnotes-list.ts b/packages/editor-ext/src/lib/footnote/footnotes-list.ts
new file mode 100644
index 00000000..516fcf45
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/footnotes-list.ts
@@ -0,0 +1,56 @@
+import { mergeAttributes, Node } from "@tiptap/core";
+import { ReactNodeViewRenderer } from "@tiptap/react";
+import { FOOTNOTES_LIST_NAME } from "./footnote-util";
+
+export interface FootnotesListOptions {
+ HTMLAttributes: Record;
+ view: any;
+}
+
+/**
+ * Block container that holds all footnote definitions. There is a single
+ * instance per document and it is always the last child of the doc (enforced by
+ * the sync plugin). Modeled on the callout block node.
+ */
+export const FootnotesList = Node.create({
+ name: FOOTNOTES_LIST_NAME,
+
+ group: "block",
+ content: "footnoteDefinition+",
+ isolating: true,
+ selectable: false,
+ defining: true,
+
+ addOptions() {
+ return {
+ HTMLAttributes: {},
+ view: null,
+ };
+ },
+
+ parseHTML() {
+ return [
+ {
+ tag: "section[data-footnotes]",
+ },
+ ];
+ },
+
+ renderHTML({ HTMLAttributes }) {
+ return [
+ "section",
+ mergeAttributes(
+ { "data-footnotes": "", class: "footnotes" },
+ this.options.HTMLAttributes,
+ HTMLAttributes,
+ ),
+ 0,
+ ];
+ },
+
+ addNodeView() {
+ if (!this.options.view) return null;
+ this.editor.isInitialized = true;
+ return ReactNodeViewRenderer(this.options.view);
+ },
+});
diff --git a/packages/editor-ext/src/lib/footnote/index.ts b/packages/editor-ext/src/lib/footnote/index.ts
new file mode 100644
index 00000000..02defff1
--- /dev/null
+++ b/packages/editor-ext/src/lib/footnote/index.ts
@@ -0,0 +1,6 @@
+export * from "./footnote-util";
+export * from "./footnote-reference";
+export * from "./footnotes-list";
+export * from "./footnote-definition";
+export * from "./footnote-numbering";
+export * from "./footnote-sync";
diff --git a/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts b/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts
new file mode 100644
index 00000000..fbee45d2
--- /dev/null
+++ b/packages/editor-ext/src/lib/html-embed/html-embed-codec.spec.ts
@@ -0,0 +1,116 @@
+import { afterEach, describe, expect, it } from "vitest";
+import {
+ encodeHtmlEmbedSource,
+ decodeHtmlEmbedSource,
+} from "./html-embed";
+
+// Unit coverage for the base64 codec used by the htmlEmbed node's
+// data-source attribute (html-embed.ts). The codec has two branches:
+// - the BROWSER branch: btoa(encodeURIComponent(s)) / decodeURIComponent(atob(s));
+// - the NODE fallback: Buffer.from(..).toString("base64") / Buffer.from(s,"base64").
+// Server-side schema parsing (htmlToJson with no global btoa/atob) hits the
+// fallback, so both branches must round-trip identically; otherwise an embed
+// encoded in the browser would decode wrong on the server (or vice versa).
+//
+// We force the fallback by temporarily DELETING globalThis.btoa/atob (jsdom
+// provides them in this env), restoring them after each test so the suite stays
+// hermetic.
+
+const realBtoa = globalThis.btoa;
+const realAtob = globalThis.atob;
+
+function deleteBase64Globals(): void {
+ // @ts-expect-error — intentionally removing the globals to exercise the
+ // `typeof btoa !== "function"` Node fallback branch in the codec.
+ delete globalThis.btoa;
+ // @ts-expect-error — see above.
+ delete globalThis.atob;
+}
+
+afterEach(() => {
+ // Always restore so one test's stubbing never leaks into another.
+ globalThis.btoa = realBtoa;
+ globalThis.atob = realAtob;
+});
+
+describe("html-embed codec — browser btoa/atob branch", () => {
+ it("round-trips ASCII source", () => {
+ const src = "";
+ const enc = encodeHtmlEmbedSource(src);
+ expect(enc).not.toBe("");
+ // base64 of the encodeURIComponent form never contains a raw '<'.
+ expect(enc).not.toContain("<");
+ expect(decodeHtmlEmbedSource(enc)).toBe(src);
+ });
+
+ it("round-trips UTF-8 / non-Latin1 source (the reason for encodeURIComponent)", () => {
+ const src = 'héllo → 世界 𝕏
';
+ const enc = encodeHtmlEmbedSource(src);
+ expect(decodeHtmlEmbedSource(enc)).toBe(src);
+ });
+});
+
+describe("html-embed codec — Node Buffer fallback branch", () => {
+ it("encode uses the Buffer fallback when btoa is unavailable and still round-trips (UTF-8)", () => {
+ const src = 'héllo → 世界 𝕏';
+
+ deleteBase64Globals();
+ // With the globals gone, encode must take the Buffer path...
+ const encFallback = encodeHtmlEmbedSource(src);
+ expect(encFallback).not.toBe("");
+ // ...and decode (also via Buffer) must recover the exact source.
+ expect(decodeHtmlEmbedSource(encFallback)).toBe(src);
+ });
+
+ it("the Buffer fallback produces the SAME bytes the browser branch does (cross-env parity)", () => {
+ const src = 'café — 日本語';
+
+ // Browser branch (globals intact).
+ const encBrowser = encodeHtmlEmbedSource(src);
+
+ // Fallback branch.
+ deleteBase64Globals();
+ const encFallback = encodeHtmlEmbedSource(src);
+
+ // Identical base64 => an embed encoded in either environment decodes
+ // identically in the other (server <-> client losslessness).
+ expect(encFallback).toBe(encBrowser);
+
+ // And the fallback can decode what the browser produced.
+ expect(decodeHtmlEmbedSource(encBrowser)).toBe(src);
+ });
+
+ it("empty string -> '' on both encode and decode in the fallback (early return, branch never reached)", () => {
+ deleteBase64Globals();
+ expect(encodeHtmlEmbedSource("")).toBe("");
+ expect(decodeHtmlEmbedSource("")).toBe("");
+ });
+
+ it("decode of malformed base64 -> '' via the catch branch (fallback)", () => {
+ // In the Buffer fallback, Buffer.from(..,'base64') is lenient and never
+ // throws, so to hit the catch we need a payload whose DECODED bytes are an
+ // invalid percent-escape, which makes decodeURIComponent throw. base64 of a
+ // lone '%' decodes back to '%', and decodeURIComponent('%') is a URIError.
+ const badBase64 = Buffer.from("%", "utf-8").toString("base64"); // "JQ=="
+
+ deleteBase64Globals();
+ // Sanity: the raw decode really does throw, so we're exercising the catch.
+ expect(() =>
+ decodeURIComponent(Buffer.from(badBase64, "base64").toString("utf-8")),
+ ).toThrow();
+ // The codec swallows it and returns "" rather than propagating.
+ expect(decodeHtmlEmbedSource(badBase64)).toBe("");
+ });
+});
+
+describe("html-embed codec — decode of malformed input (browser branch)", () => {
+ it("returns '' for input atob rejects (catch branch)", () => {
+ // atob throws on characters outside the base64 alphabet; the codec catches
+ // it and returns "" instead of throwing.
+ expect(decodeHtmlEmbedSource("@@not-base64@@")).toBe("");
+ });
+
+ it("empty string short-circuits to '' (never calls atob)", () => {
+ expect(decodeHtmlEmbedSource("")).toBe("");
+ });
+});
diff --git a/packages/editor-ext/src/lib/markdown/html-embed-marked.spec.ts b/packages/editor-ext/src/lib/markdown/html-embed-marked.spec.ts
new file mode 100644
index 00000000..7904f063
--- /dev/null
+++ b/packages/editor-ext/src/lib/markdown/html-embed-marked.spec.ts
@@ -0,0 +1,105 @@
+import { describe, expect, it } from "vitest";
+import { htmlEmbedExtension } from "./utils/html-embed.marked";
+import { markdownToHtml } from "./index";
+import { encodeHtmlEmbedSource } from "../html-embed/html-embed";
+
+// CONTRACT tests for the marked block tokenizer that rebuilds an htmlEmbed node
+// from the `` marker (html-embed.marked.ts), plus the
+// observable round-trip through markdownToHtml.
+//
+// These pin the REAL tokenizer behaviour the import path depends on:
+// - the tokenizer rule is anchored (^) and only accepts the base64 alphabet
+// [A-Za-z0-9+/=], so a marker with non-base64 chars is NOT tokenized and
+// survives as a literal HTML comment (not silently turned into something the
+// server's strip no longer recognizes);
+// - start() reports the correct index of the next marker so marked invokes the
+// tokenizer at the right offset when a marker sits mid-document / after text;
+// - a marker with surrounding text on the SAME line is split out into its own
+// embed div while the surrounding text becomes ordinary paragraphs.
+//
+// The contract is asserted against the actual exported extension and pipeline —
+// no behaviour is invented; the expectations were read off the real tokenizer.
+
+const SAMPLE = "x";
+const ENC = encodeHtmlEmbedSource(SAMPLE);
+
+describe("htmlEmbed marked tokenizer — start()", () => {
+ it("returns the index of a marker that sits mid-document", () => {
+ const src = `hello world `;
+ expect(htmlEmbedExtension.start(src)).toBe(src.indexOf("`)).toBe(0);
+ });
+
+ it("returns -1 when there is no marker", () => {
+ expect(htmlEmbedExtension.start("no marker here")).toBe(-1);
+ });
+});
+
+describe("htmlEmbed marked tokenizer — tokenizer()", () => {
+ it("tokenizes a marker at the start of the input, capturing the base64 payload", () => {
+ const token = htmlEmbedExtension.tokenizer(``);
+ expect(token).toBeTruthy();
+ expect(token!.type).toBe("htmlEmbed");
+ expect(token!.raw).toBe(``);
+ expect(token!.encoded).toBe(ENC);
+ });
+
+ it("tokenizes an EMPTY marker (the [A-Za-z0-9+/=]* class allows zero chars)", () => {
+ const token = htmlEmbedExtension.tokenizer("");
+ expect(token).toBeTruthy();
+ expect(token!.encoded).toBe("");
+ expect(token!.raw).toBe("");
+ });
+
+ it("does NOT tokenize when text precedes the marker (rule is anchored ^)", () => {
+ // marked relies on start() to advance to the marker; the tokenizer itself
+ // only matches at offset 0, so a non-anchored call returns undefined.
+ expect(
+ htmlEmbedExtension.tokenizer(`hello `),
+ ).toBeUndefined();
+ });
+
+ it("does NOT tokenize a marker containing a non-base64 char ('$')", () => {
+ expect(
+ htmlEmbedExtension.tokenizer(""),
+ ).toBeUndefined();
+ });
+
+ it("does NOT tokenize a marker containing a space", () => {
+ expect(
+ htmlEmbedExtension.tokenizer(""),
+ ).toBeUndefined();
+ });
+
+ it("renderer emits the embed div the node's parseHTML recognizes", () => {
+ const token = htmlEmbedExtension.tokenizer(``)!;
+ const html = htmlEmbedExtension.renderer(token as any);
+ expect(html).toBe(
+ ``,
+ );
+ });
+});
+
+describe("htmlEmbed marked tokenizer — markdownToHtml round-trip", () => {
+ it("splits a marker out of surrounding same-line text into its own embed div", async () => {
+ const html = await markdownToHtml(`before after`);
+ // The marker became the embed div...
+ expect(html).toContain(
+ ``,
+ );
+ // ...and the surrounding text survived as ordinary paragraph content.
+ expect(html).toContain("before");
+ expect(html).toContain("after");
+ });
+
+ it("leaves a marker with non-base64 chars as a literal comment (NOT an embed div)", async () => {
+ const html = await markdownToHtml("");
+ // It is NOT tokenized into an embed div the server would strip...
+ expect(html).not.toContain('data-type="htmlEmbed"');
+ // ...it passes through unchanged as a literal HTML comment.
+ expect(html).toContain("");
+ });
+});
diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts
new file mode 100644
index 00000000..b47cf4a4
--- /dev/null
+++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts
@@ -0,0 +1,170 @@
+import { marked } from "marked";
+import { deriveFootnoteId } from "../../footnote/footnote-util";
+
+/**
+ * Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline.
+ *
+ * Two pieces:
+ * - an INLINE tokenizer for `[^id]` references -> (matches the editor-ext FootnoteReference renderHTML);
+ * - a document hook (`preprocess`/`walkTokens` is awkward for collecting +
+ * removing definitions, so we use a regex preprocessing step instead) that
+ * pulls every `[^id]: text` definition line out of the body and appends a
+ * single with one per
+ * definition, so the round-trip rebuilds footnotesList + footnoteDefinition.
+ *
+ * Only definitions that have a matching reference are emitted (and vice-versa
+ * the sync plugin fills any gaps on the editor side), keeping the output valid.
+ */
+
+const DEFINITION_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
+const REFERENCE_RE = /\[\^([^\]\s]+)\]/;
+
+interface FootnoteRefToken {
+ type: "footnoteRef";
+ raw: string;
+ id: string;
+}
+
+export const footnoteReferenceExtension = {
+ name: "footnoteRef",
+ level: "inline" as const,
+ start(src: string) {
+ return src.match(/\[\^/)?.index ?? -1;
+ },
+ tokenizer(src: string): FootnoteRefToken | undefined {
+ const match = REFERENCE_RE.exec(src);
+ // Only match at the very start of the remaining inline source.
+ if (match && match.index === 0) {
+ return {
+ type: "footnoteRef",
+ raw: match[0],
+ id: match[1],
+ };
+ }
+ return undefined;
+ },
+ renderer(token: FootnoteRefToken) {
+ return ``;
+ },
+};
+
+function escapeAttr(value: string): string {
+ return String(value).replace(/&/g, "&").replace(/"/g, """);
+}
+
+function escapeRegExp(value: string): string {
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+/**
+ * Extract `[^id]: text` definition lines from the markdown body, returning the
+ * cleaned body plus a rendered (empty string when no
+ * definitions). Call this BEFORE marked.parse and append the section to the
+ * resulting HTML.
+ */
+export function extractFootnoteDefinitions(markdown: string): {
+ body: string;
+ section: string;
+} {
+ const lines = markdown.split("\n");
+ const bodyLines: string[] = [];
+ const definitions: Array<{ id: string; text: string }> = [];
+
+ // Track fenced-code state so a `[^id]: ...` line that merely SHOWS footnote
+ // syntax inside a ``` / ~~~ code block is left in the body verbatim and not
+ // mistaken for a real definition.
+ let fence: string | null = null;
+
+ for (const line of lines) {
+ const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
+ if (fenceMatch) {
+ const marker = fenceMatch[2][0];
+ if (fence === null) {
+ fence = marker; // opening fence
+ } else if (marker === fence) {
+ fence = null; // closing fence (matching delimiter type)
+ }
+ bodyLines.push(line);
+ continue;
+ }
+
+ const m = fence === null ? DEFINITION_RE.exec(line) : null;
+ if (m) {
+ definitions.push({ id: m[1], text: m[2] });
+ } else {
+ bodyLines.push(line);
+ }
+ }
+
+ if (definitions.length === 0) {
+ return { body: markdown, section: "" };
+ }
+
+ // De-duplicate colliding definition ids. Two definitions sharing an id (e.g.
+ // `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote
+ // downstream (the editor's last-wins sync). Rename each colliding id to a
+ // DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference
+ // marker so the (reference, definition) pairing stays 1:1. The FIRST
+ // definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth
+ // duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]`
+ // marker. If there are fewer markers than definitions, the surplus definition
+ // keeps a derived (orphan) id so it is never silently merged away.
+ //
+ // The id is derived (deriveFootnoteId), NOT random: importing the same
+ // markdown through two paths (here and the MCP mirror) must yield identical
+ // ids, and re-importing the same markdown twice must be stable.
+ let dedupedBody = bodyLines.join("\n");
+ // Every original definition id is reserved up front so a derived id can never
+ // collide with an unrelated original id present in the document.
+ const taken = new Set(definitions.map((d) => d.id));
+ const seenDefIds = new Map(); // original id -> how many seen
+ for (const def of definitions) {
+ const originalId = def.id;
+ const count = seenDefIds.get(originalId) ?? 0;
+ seenDefIds.set(originalId, count + 1);
+ if (count === 0) continue; // first definition keeps its id
+
+ // count is the 0-based number of PRIOR occurrences; this is occurrence
+ // (count + 1), i.e. 2 for the first duplicate, 3 for the next, ...
+ const newId = deriveFootnoteId(originalId, count + 1, taken);
+ taken.add(newId);
+ def.id = newId;
+
+ // Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not
+ // belong to the keeper definition. After a prior duplicate rewrote its
+ // marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the
+ // remaining matches are: index 0 = the keeper's marker (left alone), index 1
+ // = this duplicate's marker. Rewrite index 1.
+ let occurrence = 0;
+ let rewritten = false;
+ const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g");
+ dedupedBody = dedupedBody.replace(re, (match) => {
+ const idx = occurrence++;
+ if (!rewritten && idx === 1) {
+ rewritten = true;
+ return `[^${newId}]`;
+ }
+ return match;
+ });
+ // If there was no second marker (more definitions than references), the
+ // duplicate simply survives as an orphan with its fresh id — no body change.
+ }
+
+ const defsHtml = definitions
+ .map((d) => {
+ // Render the definition text as inline markdown so emphasis/links inside
+ // a footnote survive the round-trip; wrap in a paragraph (the node's
+ // content is paragraph+).
+ const inner = marked.parseInline(d.text || "");
+ return `${inner}
`;
+ })
+ .join("");
+
+ return {
+ body: dedupedBody,
+ section: `${defsHtml} `,
+ };
+}
diff --git a/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts b/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts
index 58bb83f9..240e0d0e 100644
--- a/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts
+++ b/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts
@@ -2,6 +2,10 @@ import { marked } from "marked";
import { calloutExtension } from "./callout.marked";
import { mathBlockExtension } from "./math-block.marked";
import { mathInlineExtension } from "./math-inline.marked";
+import {
+ footnoteReferenceExtension,
+ extractFootnoteDefinitions,
+} from "./footnote.marked";
import { htmlEmbedExtension } from "./html-embed.marked";
marked.use({
@@ -39,6 +43,7 @@ marked.use({
calloutExtension,
mathBlockExtension,
mathInlineExtension,
+ footnoteReferenceExtension,
htmlEmbedExtension,
],
});
@@ -54,5 +59,16 @@ export function markdownToHtml(
.replace(YAML_FONT_MATTER_REGEX, "")
.trimStart();
- return marked.parse(markdown).toString();
+ // Pull `[^id]: ...` definition lines out of the body, render the body, then
+ // append a single so the round-trip rebuilds the
+ // footnotesList + footnoteDefinition nodes.
+ const { body, section } = extractFootnoteDefinitions(markdown);
+
+ const parsed = marked.parse(body);
+ if (!section) return parsed;
+
+ if (typeof parsed === "string") {
+ return parsed + section;
+ }
+ return parsed.then((html) => html + section);
}
diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts
index 449868f7..172786a3 100644
--- a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts
+++ b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts
@@ -12,12 +12,44 @@ function sanitizeMdLinkText(value: string): string {
.replace(/[\r\n]+/g, ' ');
}
+// Tags turndown treats as void (self-closing). Footnote references render as an
+// empty whose meaning lives entirely in its data-id;
+// without marking it void, turndown's blank-node removal drops it before our
+// rule runs, losing the `[^id]` marker. Mirrors turndown's built-in list.
+const TURNDOWN_VOID_ELEMENTS = [
+ 'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
+ 'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR',
+];
+
+function isVoidNode(node: any): boolean {
+ const name = node?.nodeName?.toUpperCase?.();
+ if (!name) return false;
+ if (name === 'SUP' && node.hasAttribute?.('data-footnote-ref')) {
+ return true;
+ }
+ return TURNDOWN_VOID_ELEMENTS.indexOf(name) !== -1;
+}
+
+/**
+ * An empty is "blank" to turndown, which removes blank
+ * inline nodes (RootNode/Node use a module-level isVoid the options cannot
+ * override). To survive, inject the id as text content so the node is non-blank;
+ * the footnoteReference rule then reads data-id and emits `[^id]`.
+ */
+function fillEmptyFootnoteRefs(html: string): string {
+ return html.replace(
+ /]*\bdata-footnote-ref\b[^>]*)>\s*<\/sup>/gi,
+ (_m, attrs) => ``,
+ );
+}
+
export function htmlToMarkdown(html: string): string {
const turndownService = new TurndownService({
headingStyle: 'atx',
codeBlockStyle: 'fenced',
hr: '---',
bulletListMarker: '-',
+ isVoid: isVoidNode,
});
turndownService.use([
@@ -35,8 +67,12 @@ export function htmlToMarkdown(html: string): string {
htmlEmbed,
image,
video,
+ footnoteReference,
+ footnotesList,
]);
- return turndownService.turndown(html).replaceAll('
', ' ');
+ return turndownService
+ .turndown(fillEmptyFootnoteRefs(html))
+ .replaceAll('
', ' ');
}
/**
@@ -230,6 +266,57 @@ function image(turndownService: _TurndownService) {
});
}
+/**
+ * Footnote reference (inline atom) -> pandoc/GFM marker `[^id]`.
+ * The visible number is derived (not stored), so the id is the stable anchor.
+ */
+function footnoteReference(turndownService: _TurndownService) {
+ turndownService.addRule('footnoteReference', {
+ filter: function (node: HTMLInputElement) {
+ return (
+ node.nodeName === 'SUP' && node.hasAttribute('data-footnote-ref')
+ );
+ },
+ replacement: function (_content: string, node: HTMLInputElement) {
+ const id = node.getAttribute('data-id') || '';
+ return id ? `[^${id}]` : '';
+ },
+ });
+}
+
+/**
+ * Footnotes container -> the list of `[^id]: text` definitions at the end of
+ * the document (one per line). Each footnoteDefinition inside emits its own
+ * `[^id]: ...` line; turndown joins them with the surrounding block spacing.
+ */
+function footnotesList(turndownService: _TurndownService) {
+ turndownService.addRule('footnoteDefinition', {
+ filter: function (node: HTMLInputElement) {
+ return (
+ node.nodeName === 'DIV' && node.hasAttribute('data-footnote-def')
+ );
+ },
+ replacement: function (content: string, node: HTMLInputElement) {
+ const id = node.getAttribute('data-id') || '';
+ // Collapse internal newlines so the definition stays a single MD line;
+ // continuation lines are a v2 refinement.
+ const text = content.replace(/\s*\n+\s*/g, ' ').trim();
+ return id ? `\n[^${id}]: ${text}\n` : '';
+ },
+ });
+
+ turndownService.addRule('footnotesList', {
+ filter: function (node: HTMLInputElement) {
+ return (
+ node.nodeName === 'SECTION' && node.hasAttribute('data-footnotes')
+ );
+ },
+ replacement: function (content: string) {
+ return `\n\n${content.trim()}\n`;
+ },
+ });
+}
+
function video(turndownService: _TurndownService) {
turndownService.addRule('video', {
filter: function (node: HTMLInputElement) {
diff --git a/packages/editor-ext/src/lib/page-embed/page-embed.spec.ts b/packages/editor-ext/src/lib/page-embed/page-embed.spec.ts
new file mode 100644
index 00000000..95638090
--- /dev/null
+++ b/packages/editor-ext/src/lib/page-embed/page-embed.spec.ts
@@ -0,0 +1,88 @@
+import { describe, expect, it } from "vitest";
+import { getSchema } from "@tiptap/core";
+import { generateHTML, generateJSON } from "@tiptap/html";
+import { Document } from "@tiptap/extension-document";
+import { Paragraph } from "@tiptap/extension-paragraph";
+import { Text } from "@tiptap/extension-text";
+import { PageEmbed } from "./page-embed";
+
+// CONTRACT tests for the PageEmbed node's parse/render round-trip
+// (page-embed.ts). The whole-page live embed stores ONLY a `sourcePageId`
+// reference; renderHTML must serialize it as `data-source-page-id` and parseHTML
+// must recover it. If this attribute mapping drifts, an embed saved to HTML loses
+// its target page on reload (the node view would have nothing to fetch).
+//
+// We assert at the editor-ext schema level using the same Tiptap utilities the
+// other editor-ext tests use (getSchema + @tiptap/html generateHTML/generateJSON
+// over a jsdom DOM), driving a real HTML -> node JSON -> HTML round-trip through
+// the node's actual addAttributes()/parseHTML()/renderHTML().
+
+// Minimal schema: a doc of blocks, plus the PageEmbed block node under test.
+const extensions = [Document, Paragraph, Text, PageEmbed];
+
+describe("PageEmbed schema", () => {
+ it("registers the pageEmbed node in the schema", () => {
+ const schema = getSchema(extensions);
+ expect(schema.nodes.pageEmbed).toBeTruthy();
+ });
+});
+
+describe("PageEmbed parse/render round-trip", () => {
+ it("recovers sourcePageId from data-source-page-id on parse (HTML -> JSON)", () => {
+ const html = ``;
+ const json = generateJSON(html, extensions);
+
+ const node = json.content?.[0];
+ expect(node?.type).toBe("pageEmbed");
+ expect(node?.attrs?.sourcePageId).toBe("pg-123");
+ });
+
+ it("emits data-source-page-id on render (JSON -> HTML)", () => {
+ const json = {
+ type: "doc",
+ content: [{ type: "pageEmbed", attrs: { sourcePageId: "pg-456" } }],
+ };
+ const html = generateHTML(json, extensions);
+
+ expect(html).toContain('data-type="pageEmbed"');
+ expect(html).toContain('data-source-page-id="pg-456"');
+ });
+
+ it("survives a full HTML -> node -> HTML round-trip (attribute preserved)", () => {
+ const start = ``;
+
+ // HTML -> node JSON -> HTML.
+ const json = generateJSON(start, extensions);
+ const html = generateHTML(json, extensions);
+
+ // The id survived the round-trip in the serialized HTML...
+ expect(html).toContain('data-source-page-id="pg-789"');
+
+ // ...and re-parsing the round-tripped HTML yields the same id (stable across
+ // an extra pass — no loss, no duplication).
+ const json2 = generateJSON(html, extensions);
+ expect(json2.content?.[0]?.attrs?.sourcePageId).toBe("pg-789");
+ });
+
+ it("omits data-source-page-id entirely when sourcePageId is null (renderHTML guard)", () => {
+ // The renderHTML maps a null/empty id to {} (no attribute), so an embed
+ // without a target page does not emit a stray empty attribute.
+ const json = {
+ type: "doc",
+ content: [{ type: "pageEmbed", attrs: { sourcePageId: null } }],
+ };
+ const html = generateHTML(json, extensions);
+
+ expect(html).toContain('data-type="pageEmbed"');
+ expect(html).not.toContain("data-source-page-id");
+ });
+
+ it("parses a div without the attribute to a null sourcePageId (default)", () => {
+ const html = ``;
+ const json = generateJSON(html, extensions);
+
+ expect(json.content?.[0]?.type).toBe("pageEmbed");
+ // getAttribute returns null when absent; parseHTML returns it verbatim.
+ expect(json.content?.[0]?.attrs?.sourcePageId).toBeNull();
+ });
+});
diff --git a/packages/editor-ext/vitest.config.ts b/packages/editor-ext/vitest.config.ts
index 783f61d8..617c62d3 100644
--- a/packages/editor-ext/vitest.config.ts
+++ b/packages/editor-ext/vitest.config.ts
@@ -1,12 +1,8 @@
import { defineConfig } from "vitest/config";
-// Minimal vitest setup for @docmost/editor-ext (mirrors apps/client's config,
-// trimmed to what the markdown/html-embed round-trip tests need). The markdown
-// utils run in plain Node (marked + turndown), so no jsdom/react plugin is
-// required here.
export default defineConfig({
test: {
- environment: "node",
+ environment: "jsdom",
globals: true,
include: ["src/**/*.{test,spec}.ts"],
},
diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js
index 7b47b9e9..5140acee 100644
--- a/packages/mcp/build/lib/collaboration.js
+++ b/packages/mcp/build/lib/collaboration.js
@@ -263,10 +263,145 @@ function bridgeTaskLists(html) {
}
return document.body.innerHTML;
}
+// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
+// marker becomes , and `[^id]: text`
+// definition lines are collected into a single .
+const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
+const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
+function escapeFootnoteAttr(value) {
+ return String(value).replace(/&/g, "&").replace(/"/g, """);
+}
+function escapeFootnoteRegExp(value) {
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+/**
+ * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
+ * an original id `X` during definition dedup.
+ *
+ * EXACT MIRROR of editor-ext `deriveFootnoteId`
+ * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
+ * STAY IN SYNC: the same markdown imported through the editor and through this
+ * MCP path has to produce identical ids, and the sync plugin (which re-ids on
+ * every collaborating client) relies on the same scheme to converge. NEVER use
+ * Math.random()/Date.now()/uuid here — a random id would diverge across clients.
+ *
+ * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
+ * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
+ * `taken` (the set of ids already present / already minted — pure doc state).
+ */
+function deriveFootnoteId(originalId, occurrence, taken) {
+ let candidate = `${originalId}__${occurrence}`;
+ let n = 0;
+ while (taken.has(candidate)) {
+ n += 1;
+ candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
+ }
+ return candidate;
+}
+/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
+function footnoteSuffix(n) {
+ let out = "";
+ let x = n;
+ while (x > 0) {
+ const rem = (x - 1) % 25;
+ out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
+ x = Math.floor((x - 1) / 25);
+ }
+ return out;
+}
+const footnoteRefMarkedExtension = {
+ name: "footnoteRef",
+ level: "inline",
+ start(src) {
+ return src.match(/\[\^/)?.index ?? -1;
+ },
+ tokenizer(src) {
+ const match = FOOTNOTE_REF_RE.exec(src);
+ if (match && match.index === 0) {
+ return { type: "footnoteRef", raw: match[0], id: match[1] };
+ }
+ return undefined;
+ },
+ renderer(token) {
+ return ``;
+ },
+};
+marked.use({ extensions: [footnoteRefMarkedExtension] });
+/**
+ * Pull `[^id]: text` definition lines out of the body and render a single
+ * for them (or "" when there are none).
+ */
+function extractFootnotes(markdown) {
+ const lines = markdown.split("\n");
+ const bodyLines = [];
+ const defs = [];
+ // Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
+ // block is preserved verbatim and not treated as a footnote definition.
+ let fence = null;
+ for (const line of lines) {
+ const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
+ if (fenceMatch) {
+ const marker = fenceMatch[2][0];
+ if (fence === null)
+ fence = marker;
+ else if (marker === fence)
+ fence = null;
+ bodyLines.push(line);
+ continue;
+ }
+ const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
+ if (m)
+ defs.push({ id: m[1], text: m[2] });
+ else
+ bodyLines.push(line);
+ }
+ if (defs.length === 0)
+ return { body: markdown, section: "" };
+ // De-duplicate colliding definition ids (mirror of editor-ext
+ // extractFootnoteDefinitions). Two definitions sharing an id would otherwise
+ // collapse into one footnote downstream; rename each colliding id to a
+ // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
+ // marker so the (reference, definition) pairing stays 1:1. Determinism lets
+ // the same markdown imported here and via the editor produce identical ids.
+ let dedupedBody = bodyLines.join("\n");
+ const taken = new Set(defs.map((d) => d.id));
+ const seenDefIds = new Map();
+ for (const def of defs) {
+ const originalId = def.id;
+ const count = seenDefIds.get(originalId) ?? 0;
+ seenDefIds.set(originalId, count + 1);
+ if (count === 0)
+ continue; // first definition keeps its id
+ const newId = deriveFootnoteId(originalId, count + 1, taken);
+ taken.add(newId);
+ def.id = newId;
+ // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
+ // index 1 = this duplicate's marker. Rewrite index 1.
+ let occurrence = 0;
+ let rewritten = false;
+ const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
+ dedupedBody = dedupedBody.replace(re, (match) => {
+ const idx = occurrence++;
+ if (!rewritten && idx === 1) {
+ rewritten = true;
+ return `[^${newId}]`;
+ }
+ return match;
+ });
+ }
+ const inner = defs
+ .map((d) => `${marked.parseInline(d.text || "")}
`)
+ .join("");
+ return {
+ body: dedupedBody,
+ section: `${inner} `,
+ };
+}
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(markdownContent) {
const withCallouts = await preprocessCallouts(markdownContent);
- const html = await marked.parse(withCallouts);
+ const { body, section } = extractFootnotes(withCallouts);
+ const html = (await marked.parse(body)) + section;
const bridged = bridgeTaskLists(html);
return generateJSON(bridged, docmostExtensions);
}
diff --git a/packages/mcp/build/lib/diff.js b/packages/mcp/build/lib/diff.js
index f5e7ab44..516a3c81 100644
--- a/packages/mcp/build/lib/diff.js
+++ b/packages/mcp/build/lib/diff.js
@@ -79,10 +79,26 @@ function countUniqueLinks(doc) {
visit(doc);
return hrefs.size;
}
+/** Count footnoteReference nodes anywhere under a node (reading order). */
+function countFootnoteRefs(node) {
+ if (!node || typeof node !== "object")
+ return 0;
+ let n = node.type === "footnoteReference" ? 1 : 0;
+ if (Array.isArray(node.content)) {
+ for (const child of node.content)
+ n += countFootnoteRefs(child);
+ }
+ return n;
+}
/**
- * Parse the ordered list of integers from `[N]` footnote markers found in the
- * BODY only (every top-level block before the first "Примечания..." notes
- * heading; if no such heading, the whole doc). Returned in reading order.
+ * Ordered list of footnote marker numbers found in the BODY only (every
+ * top-level block before the first "Примечания..." notes heading; if no such
+ * heading, the whole doc), in reading order.
+ *
+ * Supports BOTH representations:
+ * - real `footnoteReference` nodes (the current footnote feature) — numbered
+ * 1..n by reading position, since their visible number is derived;
+ * - legacy `[N]` text markers (older translated docs) — the literal N.
*/
function footnoteMarkers(doc, notesHeading) {
const top = Array.isArray(doc?.content) ? doc.content : [];
@@ -90,6 +106,15 @@ function footnoteMarkers(doc, notesHeading) {
n.type === "heading" &&
plainText(n).trim() === notesHeading);
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
+ // Real footnoteReference nodes take precedence: when present, number them by
+ // reading position (their displayed number is not stored).
+ let refCount = 0;
+ for (const block of bodyBlocks)
+ refCount += countFootnoteRefs(block);
+ if (refCount > 0) {
+ return Array.from({ length: refCount }, (_, i) => i + 1);
+ }
+ // Fallback: legacy `[N]` text markers.
const markers = [];
const re = /\[(\d+)\]/g;
for (const block of bodyBlocks) {
diff --git a/packages/mcp/build/lib/docmost-schema.js b/packages/mcp/build/lib/docmost-schema.js
index 97cdcafd..e89ed5a0 100644
--- a/packages/mcp/build/lib/docmost-schema.js
+++ b/packages/mcp/build/lib/docmost-schema.js
@@ -342,6 +342,78 @@ const Mention = Node.create({
return ["span", { "data-type": "mention", ...HTMLAttributes }, 0];
},
});
+/**
+ * Footnote feature (mirror of packages/editor-ext/src/lib/footnote). Three
+ * nodes connected by `id`:
+ * - FootnoteReference: inline atom marker in the body ();
+ * - FootnotesList: a single bottom container ();
+ * - FootnoteDefinition: one editable note keyed by id ().
+ * The visible number is not stored; it is derived from reference order.
+ *
+ * priority 101 so this node's parse rule beats the Superscript mark's
+ * rule (otherwise an empty reference is parsed as an empty superscript
+ * mark and dropped). Keep in sync with editor-ext.
+ */
+const FootnoteReference = Node.create({
+ name: "footnoteReference",
+ priority: 101,
+ group: "inline",
+ inline: true,
+ atom: true,
+ selectable: true,
+ draggable: false,
+ addAttributes() {
+ return {
+ id: {
+ default: null,
+ parseHTML: (el) => el.getAttribute("data-id"),
+ renderHTML: (attrs) => attrs.id ? { "data-id": attrs.id } : {},
+ },
+ };
+ },
+ parseHTML() {
+ return [{ tag: "sup[data-footnote-ref]", priority: 100 }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }];
+ },
+});
+const FootnotesList = Node.create({
+ name: "footnotesList",
+ group: "block",
+ content: "footnoteDefinition+",
+ isolating: true,
+ selectable: false,
+ defining: true,
+ parseHTML() {
+ return [{ tag: "section[data-footnotes]" }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0];
+ },
+});
+const FootnoteDefinition = Node.create({
+ name: "footnoteDefinition",
+ content: "paragraph+",
+ defining: true,
+ isolating: true,
+ selectable: false,
+ addAttributes() {
+ return {
+ id: {
+ default: null,
+ parseHTML: (el) => el.getAttribute("data-id"),
+ renderHTML: (attrs) => attrs.id ? { "data-id": attrs.id } : {},
+ },
+ };
+ },
+ parseHTML() {
+ return [{ tag: "div[data-footnote-def]" }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0];
+ },
+});
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
const MathInline = Node.create({
name: "mathInline",
@@ -978,6 +1050,9 @@ export const docmostExtensions = [
TableCell,
TableHeader,
Mention,
+ FootnoteReference,
+ FootnotesList,
+ FootnoteDefinition,
MathInline,
MathBlock,
Details,
diff --git a/packages/mcp/build/lib/markdown-converter.js b/packages/mcp/build/lib/markdown-converter.js
index 477dee5d..d5d47400 100644
--- a/packages/mcp/build/lib/markdown-converter.js
+++ b/packages/mcp/build/lib/markdown-converter.js
@@ -388,6 +388,27 @@ export function convertProseMirrorToMarkdown(content) {
// carry the real values), so escape it for the text context, not attrs.
return `@${escapeHtmlText(mentionLabel)}`;
}
+ case "footnoteReference": {
+ // Pandoc/GFM inline marker. The number is derived (not stored), so the
+ // id is the stable anchor.
+ const fnId = node.attrs?.id || "";
+ return fnId ? `[^${fnId}]` : "";
+ }
+ case "footnotesList":
+ // The container renders its definitions, each on its own `[^id]: ...`
+ // line. A blank line separates the body from the notes block.
+ return nodeContent.map(processNode).join("\n");
+ case "footnoteDefinition": {
+ const defId = node.attrs?.id || "";
+ // Collapse the definition's paragraphs into a single line; multi-line
+ // footnotes are a v2 refinement.
+ const defText = nodeContent
+ .map(processNode)
+ .join(" ")
+ .replace(/\s*\n+\s*/g, " ")
+ .trim();
+ return defId ? `[^${defId}]: ${defText}` : "";
+ }
case "attachment": {
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
// the schema stores name/url (plus mime/size/attachmentId). Emit the
diff --git a/packages/mcp/build/lib/transforms.js b/packages/mcp/build/lib/transforms.js
index 98079f72..2fc5d37b 100644
--- a/packages/mcp/build/lib/transforms.js
+++ b/packages/mcp/build/lib/transforms.js
@@ -223,6 +223,59 @@ export function noteItem(inlineNodes) {
],
};
}
+/**
+ * Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
+ * { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
+ * (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
+ */
+export function footnoteDefinition(id, inlineNodes) {
+ const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
+ return {
+ type: "footnoteDefinition",
+ attrs: { id },
+ content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
+ };
+}
+/**
+ * Replace every `[N]` body marker and `\u0000FN\u0000` comment placeholder in
+ * an inline content array with a real `footnoteReference` node, in reading
+ * order. `onMarker` is called for each replaced marker (with the original `[N]`
+ * number or the placeholder index) and returns the fresh footnote id to attach
+ * to the inserted node. Mutates `inline` in place.
+ */
+function replaceMarkersWithReferences(inline, onMarker) {
+ const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
+ for (let i = 0; i < inline.length; i++) {
+ const n = inline[i];
+ if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
+ continue;
+ }
+ if (!re.test(n.text))
+ continue;
+ re.lastIndex = 0;
+ const marks = Array.isArray(n.marks) ? n.marks : [];
+ const parts = [];
+ let last = 0;
+ let m;
+ while ((m = re.exec(n.text)) !== null) {
+ if (m.index > last) {
+ parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
+ }
+ const oldNum = m[1] != null ? Number(m[1]) : undefined;
+ const phIdx = m[2] != null ? Number(m[2]) : undefined;
+ const fnId = onMarker({ oldNum, phIdx });
+ parts.push({ type: "footnoteReference", attrs: { id: fnId } });
+ last = m.index + m[0].length;
+ }
+ if (last < n.text.length) {
+ parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
+ }
+ // Drop any zero-length text runs the slicing may have produced.
+ const cleaned = parts.filter((p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0));
+ inline.splice(i, 1, ...cleaned);
+ i += cleaned.length - 1;
+ }
+}
/**
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
* ProseMirror nodes.
@@ -321,85 +374,100 @@ export function commentsToFootnotes(doc, comments, opts = {}) {
throw new Error("notes orderedList not found");
}
const consumed = [];
- const noteByPh = new Map();
+ const noteInlineByPh = new Map();
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
if (!c || !c.selection)
return;
// Collision-proof sentinel delimited by NUL control chars, which never occur
- // in real Docmost prose — so the renumber regex below cannot mistake any body
- // text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
- // transient: the placeholder round-trips within this function (insertMarkerAfter
- // inserts it, the renumber pass replaces it with "[N]"), so it never persists
- // in a returned/pushed document.
+ // in real Docmost prose - so the marker regex cannot mistake any body text
+ // (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
+ // transient: the placeholder is inserted here and replaced by a
+ // footnoteReference node below; it never persists in a returned document.
const ph = `\u0000FN${i}\u0000`;
- // insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh
- // the `top` / `notesList` references that point into it.
+ // insertMarkerAfter returns a NEW cloned doc; reassign `working`.
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
beforeBlock: notesIdx,
});
if (!r.inserted)
return;
working = r.doc;
- noteByPh.set(ph, noteItem(mdToInlineNodes(c.content)));
+ noteInlineByPh.set(ph, mdToInlineNodes(c.content));
consumed.push(c.id);
});
// Re-resolve references into the (possibly re-cloned) working doc.
const top2 = Array.isArray(working.content) ? working.content : [];
- const notesList2 = top2
- .slice(notesIdx)
- .find((n) => isObject(n) && n.type === "orderedList");
+ const notesIdx2 = top2.findIndex((n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading);
+ const oldListIndex = top2.findIndex((n) => isObject(n) && n.type === "orderedList");
+ const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
if (!notesList2) {
throw new Error("notes orderedList not found");
}
- const oldNotes = Array.isArray(notesList2.content)
+ // Inline content of each existing note (listItem -> paragraph -> inline).
+ const oldNoteInline = (Array.isArray(notesList2.content)
? notesList2.content
- : [];
- const newNotes = [];
- let seq = 0;
- // Match either an existing "[N]" marker or a NUL-delimited "\u0000FN\u0000"
- // placeholder, in reading order across the body (blocks before the notes heading).
- const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
- // Same range regex setCalloutRange uses to detect the disclaimer callout's
- // "[1]…[K]" range; used here to decide whether a top-level callout is the
- // disclaimer (skip) or an ordinary callout (renumber normally).
+ : []).map((item) => {
+ const para = isObject(item) && Array.isArray(item.content)
+ ? item.content.find((c) => isObject(c) && c.type === "paragraph")
+ : null;
+ return para && Array.isArray(para.content) ? para.content : [];
+ });
+ // Walk the body in reading order, turning each "[N]" / placeholder marker into
+ // a real footnoteReference node and collecting its definition inline content.
+ const definitions = [];
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
- for (let i = 0; i < notesIdx; i++) {
- // Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote
- // marker and is synced separately by setCalloutRange. Renumbering it here
- // would consume note slots and corrupt the sequence. Other top-level
- // callouts may carry legitimate "[N]" body markers and are renumbered.
+ // Recursively visit inline arrays inside a block (paragraph, heading, callout
+ // child paragraphs, table cells, ...), preserving document reading order.
+ const visitInlineArrays = (container) => {
+ if (!isObject(container) || !Array.isArray(container.content))
+ return;
+ const hasText = container.content.some((n) => isObject(n) && n.type === "text");
+ if (hasText) {
+ replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
+ const fnId = freshId();
+ if (oldNum != null) {
+ const inline = oldNoteInline[oldNum - 1];
+ // Every existing body marker MUST map to a real note. An out-of-range
+ // marker means the document is internally inconsistent; fail loudly.
+ if (inline === undefined) {
+ throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`);
+ }
+ definitions.push(footnoteDefinition(fnId, inline));
+ }
+ else {
+ const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
+ definitions.push(footnoteDefinition(fnId, inline));
+ }
+ return fnId;
+ });
+ }
+ else {
+ for (const child of container.content)
+ visitInlineArrays(child);
+ }
+ };
+ const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
+ for (let i = 0; i < notesBoundary; i++) {
+ // Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
+ // marker and is synced separately by setCalloutRange.
if (isObject(top2[i]) &&
top2[i].type === "callout" &&
disclaimerRangeRe.test(blockText(top2[i]))) {
continue;
}
- walk(top2[i], (node) => {
- if (node.type !== "text" || typeof node.text !== "string")
- return;
- node.text = node.text.replace(re, (_m, oldNum, phIdx) => {
- if (oldNum != null) {
- const note = oldNotes[Number(oldNum) - 1];
- // Every existing body marker MUST map to a real note. An out-of-range
- // marker means the document is internally inconsistent; fail loudly
- // rather than silently dropping the note and desyncing the callout.
- if (note === undefined) {
- throw new Error(`footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`);
- }
- newNotes.push(note);
- }
- else {
- newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`));
- }
- return `[${++seq}]`;
- });
- });
+ visitInlineArrays(top2[i]);
}
- // Reorder the notes list IN PLACE on `working` first, THEN sync the callout
- // range. setCalloutRange clones `working`, so the reordered notes (mutated
- // before the clone) are carried into its result automatically. No null-filter
- // here: marker count and note count must stay exactly equal (the out-of-range
- // guard above guarantees no undefined entry is ever pushed).
- notesList2.content = newNotes;
- const synced = setCalloutRange(working, notesList2.content.length);
+ // Replace the old orderedList with a real footnotesList of the collected
+ // definitions (reading order). If there are no definitions, drop the list.
+ if (definitions.length > 0) {
+ top2[oldListIndex] = {
+ type: "footnotesList",
+ content: definitions,
+ };
+ }
+ else {
+ top2.splice(oldListIndex, 1);
+ }
+ // Sync the disclaimer callout range to the new note count.
+ const synced = setCalloutRange(working, definitions.length);
return { doc: synced.doc, consumed };
}
diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts
index ca2114d9..6f0ad011 100644
--- a/packages/mcp/src/lib/collaboration.ts
+++ b/packages/mcp/src/lib/collaboration.ts
@@ -296,12 +296,165 @@ function bridgeTaskLists(html: string): string {
return document.body.innerHTML;
}
+// Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline
+// marker becomes , and `[^id]: text`
+// definition lines are collected into a single .
+const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/;
+const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/;
+
+function escapeFootnoteAttr(value: string): string {
+ return String(value).replace(/&/g, "&").replace(/"/g, """);
+}
+
+function escapeFootnoteRegExp(value: string): string {
+ return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
+}
+
+/**
+ * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of
+ * an original id `X` during definition dedup.
+ *
+ * EXACT MIRROR of editor-ext `deriveFootnoteId`
+ * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST
+ * STAY IN SYNC: the same markdown imported through the editor and through this
+ * MCP path has to produce identical ids, and the sync plugin (which re-ids on
+ * every collaborating client) relies on the same scheme to converge. NEVER use
+ * Math.random()/Date.now()/uuid here — a random id would diverge across clients.
+ *
+ * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped
+ * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in
+ * `taken` (the set of ids already present / already minted — pure doc state).
+ */
+function deriveFootnoteId(
+ originalId: string,
+ occurrence: number,
+ taken: Set,
+): string {
+ let candidate = `${originalId}__${occurrence}`;
+ let n = 0;
+ while (taken.has(candidate)) {
+ n += 1;
+ candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`;
+ }
+ return candidate;
+}
+
+/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */
+function footnoteSuffix(n: number): string {
+ let out = "";
+ let x = n;
+ while (x > 0) {
+ const rem = (x - 1) % 25;
+ out = String.fromCharCode(98 + rem) + out; // 98 = 'b'
+ x = Math.floor((x - 1) / 25);
+ }
+ return out;
+}
+
+const footnoteRefMarkedExtension = {
+ name: "footnoteRef",
+ level: "inline" as const,
+ start(src: string) {
+ return src.match(/\[\^/)?.index ?? -1;
+ },
+ tokenizer(src: string) {
+ const match = FOOTNOTE_REF_RE.exec(src);
+ if (match && match.index === 0) {
+ return { type: "footnoteRef", raw: match[0], id: match[1] };
+ }
+ return undefined;
+ },
+ renderer(token: any) {
+ return ``;
+ },
+};
+
+marked.use({ extensions: [footnoteRefMarkedExtension] });
+
+/**
+ * Pull `[^id]: text` definition lines out of the body and render a single
+ * for them (or "" when there are none).
+ */
+function extractFootnotes(markdown: string): {
+ body: string;
+ section: string;
+} {
+ const lines = markdown.split("\n");
+ const bodyLines: string[] = [];
+ const defs: Array<{ id: string; text: string }> = [];
+ // Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code
+ // block is preserved verbatim and not treated as a footnote definition.
+ let fence: string | null = null;
+ for (const line of lines) {
+ const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line);
+ if (fenceMatch) {
+ const marker = fenceMatch[2][0];
+ if (fence === null) fence = marker;
+ else if (marker === fence) fence = null;
+ bodyLines.push(line);
+ continue;
+ }
+ const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null;
+ if (m) defs.push({ id: m[1], text: m[2] });
+ else bodyLines.push(line);
+ }
+ if (defs.length === 0) return { body: markdown, section: "" };
+
+ // De-duplicate colliding definition ids (mirror of editor-ext
+ // extractFootnoteDefinitions). Two definitions sharing an id would otherwise
+ // collapse into one footnote downstream; rename each colliding id to a
+ // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]`
+ // marker so the (reference, definition) pairing stays 1:1. Determinism lets
+ // the same markdown imported here and via the editor produce identical ids.
+ let dedupedBody = bodyLines.join("\n");
+ const taken = new Set(defs.map((d) => d.id));
+ const seenDefIds = new Map();
+ for (const def of defs) {
+ const originalId = def.id;
+ const count = seenDefIds.get(originalId) ?? 0;
+ seenDefIds.set(originalId, count + 1);
+ if (count === 0) continue; // first definition keeps its id
+ const newId = deriveFootnoteId(originalId, count + 1, taken);
+ taken.add(newId);
+ def.id = newId;
+ // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone),
+ // index 1 = this duplicate's marker. Rewrite index 1.
+ let occurrence = 0;
+ let rewritten = false;
+ const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g");
+ dedupedBody = dedupedBody.replace(re, (match) => {
+ const idx = occurrence++;
+ if (!rewritten && idx === 1) {
+ rewritten = true;
+ return `[^${newId}]`;
+ }
+ return match;
+ });
+ }
+
+ const inner = defs
+ .map(
+ (d) =>
+ `${marked.parseInline(d.text || "")}
`,
+ )
+ .join("");
+ return {
+ body: dedupedBody,
+ section: `${inner} `,
+ };
+}
+
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(
markdownContent: string,
): Promise {
const withCallouts = await preprocessCallouts(markdownContent);
- const html = await marked.parse(withCallouts);
+ const { body, section } = extractFootnotes(withCallouts);
+ const html = (await marked.parse(body)) + section;
const bridged = bridgeTaskLists(html);
return generateJSON(bridged, docmostExtensions);
}
diff --git a/packages/mcp/src/lib/diff.ts b/packages/mcp/src/lib/diff.ts
index befe047c..d0848997 100644
--- a/packages/mcp/src/lib/diff.ts
+++ b/packages/mcp/src/lib/diff.ts
@@ -101,10 +101,25 @@ function countUniqueLinks(doc: any): number {
return hrefs.size;
}
+/** Count footnoteReference nodes anywhere under a node (reading order). */
+function countFootnoteRefs(node: any): number {
+ if (!node || typeof node !== "object") return 0;
+ let n = node.type === "footnoteReference" ? 1 : 0;
+ if (Array.isArray(node.content)) {
+ for (const child of node.content) n += countFootnoteRefs(child);
+ }
+ return n;
+}
+
/**
- * Parse the ordered list of integers from `[N]` footnote markers found in the
- * BODY only (every top-level block before the first "Примечания..." notes
- * heading; if no such heading, the whole doc). Returned in reading order.
+ * Ordered list of footnote marker numbers found in the BODY only (every
+ * top-level block before the first "Примечания..." notes heading; if no such
+ * heading, the whole doc), in reading order.
+ *
+ * Supports BOTH representations:
+ * - real `footnoteReference` nodes (the current footnote feature) — numbered
+ * 1..n by reading position, since their visible number is derived;
+ * - legacy `[N]` text markers (older translated docs) — the literal N.
*/
function footnoteMarkers(doc: any, notesHeading: string): number[] {
const top: any[] = Array.isArray(doc?.content) ? doc.content : [];
@@ -115,6 +130,16 @@ function footnoteMarkers(doc: any, notesHeading: string): number[] {
plainText(n).trim() === notesHeading,
);
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
+
+ // Real footnoteReference nodes take precedence: when present, number them by
+ // reading position (their displayed number is not stored).
+ let refCount = 0;
+ for (const block of bodyBlocks) refCount += countFootnoteRefs(block);
+ if (refCount > 0) {
+ return Array.from({ length: refCount }, (_, i) => i + 1);
+ }
+
+ // Fallback: legacy `[N]` text markers.
const markers: number[] = [];
const re = /\[(\d+)\]/g;
for (const block of bodyBlocks) {
diff --git a/packages/mcp/src/lib/docmost-schema.ts b/packages/mcp/src/lib/docmost-schema.ts
index c45c275a..3d8d25d7 100644
--- a/packages/mcp/src/lib/docmost-schema.ts
+++ b/packages/mcp/src/lib/docmost-schema.ts
@@ -378,6 +378,83 @@ const Mention = Node.create({
},
});
+/**
+ * Footnote feature (mirror of packages/editor-ext/src/lib/footnote). Three
+ * nodes connected by `id`:
+ * - FootnoteReference: inline atom marker in the body ();
+ * - FootnotesList: a single bottom container ();
+ * - FootnoteDefinition: one editable note keyed by id ().
+ * The visible number is not stored; it is derived from reference order.
+ *
+ * priority 101 so this node's parse rule beats the Superscript mark's
+ * rule (otherwise an empty reference is parsed as an empty superscript
+ * mark and dropped). Keep in sync with editor-ext.
+ */
+const FootnoteReference = Node.create({
+ name: "footnoteReference",
+ priority: 101,
+ group: "inline",
+ inline: true,
+ atom: true,
+ selectable: true,
+ draggable: false,
+ addAttributes() {
+ return {
+ id: {
+ default: null,
+ parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
+ renderHTML: (attrs: Record) =>
+ attrs.id ? { "data-id": attrs.id } : {},
+ },
+ };
+ },
+ parseHTML() {
+ return [{ tag: "sup[data-footnote-ref]", priority: 100 }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }];
+ },
+});
+
+const FootnotesList = Node.create({
+ name: "footnotesList",
+ group: "block",
+ content: "footnoteDefinition+",
+ isolating: true,
+ selectable: false,
+ defining: true,
+ parseHTML() {
+ return [{ tag: "section[data-footnotes]" }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0];
+ },
+});
+
+const FootnoteDefinition = Node.create({
+ name: "footnoteDefinition",
+ content: "paragraph+",
+ defining: true,
+ isolating: true,
+ selectable: false,
+ addAttributes() {
+ return {
+ id: {
+ default: null,
+ parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
+ renderHTML: (attrs: Record) =>
+ attrs.id ? { "data-id": attrs.id } : {},
+ },
+ };
+ },
+ parseHTML() {
+ return [{ tag: "div[data-footnote-def]" }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0];
+ },
+});
+
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
const MathInline = Node.create({
name: "mathInline",
@@ -1069,6 +1146,9 @@ export const docmostExtensions = [
TableCell,
TableHeader,
Mention,
+ FootnoteReference,
+ FootnotesList,
+ FootnoteDefinition,
MathInline,
MathBlock,
Details,
diff --git a/packages/mcp/src/lib/markdown-converter.ts b/packages/mcp/src/lib/markdown-converter.ts
index cbaa7042..4e35c995 100644
--- a/packages/mcp/src/lib/markdown-converter.ts
+++ b/packages/mcp/src/lib/markdown-converter.ts
@@ -430,6 +430,30 @@ export function convertProseMirrorToMarkdown(content: any): string {
return `@${escapeHtmlText(mentionLabel)}`;
}
+ case "footnoteReference": {
+ // Pandoc/GFM inline marker. The number is derived (not stored), so the
+ // id is the stable anchor.
+ const fnId = node.attrs?.id || "";
+ return fnId ? `[^${fnId}]` : "";
+ }
+
+ case "footnotesList":
+ // The container renders its definitions, each on its own `[^id]: ...`
+ // line. A blank line separates the body from the notes block.
+ return nodeContent.map(processNode).join("\n");
+
+ case "footnoteDefinition": {
+ const defId = node.attrs?.id || "";
+ // Collapse the definition's paragraphs into a single line; multi-line
+ // footnotes are a v2 refinement.
+ const defText = nodeContent
+ .map(processNode)
+ .join(" ")
+ .replace(/\s*\n+\s*/g, " ")
+ .trim();
+ return defId ? `[^${defId}]: ${defText}` : "";
+ }
+
case "attachment": {
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
// the schema stores name/url (plus mime/size/attachmentId). Emit the
diff --git a/packages/mcp/src/lib/transforms.ts b/packages/mcp/src/lib/transforms.ts
index d8fba091..98269aff 100644
--- a/packages/mcp/src/lib/transforms.ts
+++ b/packages/mcp/src/lib/transforms.ts
@@ -264,6 +264,66 @@ export function noteItem(inlineNodes: any[]): any {
};
}
+/**
+ * Wrap inline ProseMirror nodes in a real footnoteDefinition node keyed by id:
+ * { type:"footnoteDefinition", attrs:{id}, content:[{ type:"paragraph", content }] }
+ * (mirrors the editor-ext / docmost-schema FootnoteDefinition node).
+ */
+export function footnoteDefinition(id: string, inlineNodes: any[]): any {
+ const content = Array.isArray(inlineNodes) ? clone(inlineNodes) : [];
+ return {
+ type: "footnoteDefinition",
+ attrs: { id },
+ content: [{ type: "paragraph", attrs: { id: freshId() }, content }],
+ };
+}
+
+/**
+ * Replace every `[N]` body marker and `\u0000FN\u0000` comment placeholder in
+ * an inline content array with a real `footnoteReference` node, in reading
+ * order. `onMarker` is called for each replaced marker (with the original `[N]`
+ * number or the placeholder index) and returns the fresh footnote id to attach
+ * to the inserted node. Mutates `inline` in place.
+ */
+function replaceMarkersWithReferences(
+ inline: any[],
+ onMarker: (info: { oldNum?: number; phIdx?: number }) => string,
+): void {
+ const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
+ for (let i = 0; i < inline.length; i++) {
+ const n = inline[i];
+ if (!isObject(n) || n.type !== "text" || typeof n.text !== "string") {
+ continue;
+ }
+ if (!re.test(n.text)) continue;
+ re.lastIndex = 0;
+
+ const marks = Array.isArray(n.marks) ? n.marks : [];
+ const parts: any[] = [];
+ let last = 0;
+ let m: RegExpExecArray | null;
+ while ((m = re.exec(n.text)) !== null) {
+ if (m.index > last) {
+ parts.push({ ...n, text: n.text.slice(last, m.index), marks: [...marks] });
+ }
+ const oldNum = m[1] != null ? Number(m[1]) : undefined;
+ const phIdx = m[2] != null ? Number(m[2]) : undefined;
+ const fnId = onMarker({ oldNum, phIdx });
+ parts.push({ type: "footnoteReference", attrs: { id: fnId } });
+ last = m.index + m[0].length;
+ }
+ if (last < n.text.length) {
+ parts.push({ ...n, text: n.text.slice(last), marks: [...marks] });
+ }
+ // Drop any zero-length text runs the slicing may have produced.
+ const cleaned = parts.filter(
+ (p) => p.type !== "text" || (typeof p.text === "string" && p.text.length > 0),
+ );
+ inline.splice(i, 1, ...cleaned);
+ i += cleaned.length - 1;
+ }
+}
+
/**
* Convert a comment's markdown (e.g. `**Lead.** body...`) into inline
* ProseMirror nodes.
@@ -388,54 +448,91 @@ export function commentsToFootnotes(
}
const consumed: string[] = [];
- const noteByPh = new Map();
+ const noteInlineByPh = new Map();
(Array.isArray(comments) ? comments : []).forEach((c, i) => {
if (!c || !c.selection) return;
// Collision-proof sentinel delimited by NUL control chars, which never occur
- // in real Docmost prose — so the renumber regex below cannot mistake any body
- // text (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
- // transient: the placeholder round-trips within this function (insertMarkerAfter
- // inserts it, the renumber pass replaces it with "[N]"), so it never persists
- // in a returned/pushed document.
+ // in real Docmost prose - so the marker regex cannot mistake any body text
+ // (e.g. "Press F1 for help", model "FN2") for a placeholder. The NUL is
+ // transient: the placeholder is inserted here and replaced by a
+ // footnoteReference node below; it never persists in a returned document.
const ph = `\u0000FN${i}\u0000`;
- // insertMarkerAfter returns a NEW cloned doc; reassign `working` and refresh
- // the `top` / `notesList` references that point into it.
+ // insertMarkerAfter returns a NEW cloned doc; reassign `working`.
const r = insertMarkerAfter(working, c.selection.trimEnd(), ph, {
beforeBlock: notesIdx,
});
if (!r.inserted) return;
working = r.doc;
- noteByPh.set(ph, noteItem(mdToInlineNodes(c.content)));
+ noteInlineByPh.set(ph, mdToInlineNodes(c.content));
consumed.push(c.id);
});
// Re-resolve references into the (possibly re-cloned) working doc.
const top2: any[] = Array.isArray(working.content) ? working.content : [];
- const notesList2 = top2
- .slice(notesIdx)
- .find((n) => isObject(n) && n.type === "orderedList");
+ const notesIdx2 = top2.findIndex(
+ (n) => isObject(n) && n.type === "heading" && blockText(n).trim() === notesHeading,
+ );
+ const oldListIndex = top2.findIndex(
+ (n) => isObject(n) && n.type === "orderedList",
+ );
+ const notesList2 = oldListIndex >= 0 ? top2[oldListIndex] : null;
if (!notesList2) {
throw new Error("notes orderedList not found");
}
- const oldNotes: any[] = Array.isArray(notesList2.content)
+ // Inline content of each existing note (listItem -> paragraph -> inline).
+ const oldNoteInline = (Array.isArray(notesList2.content)
? notesList2.content
- : [];
- const newNotes: any[] = [];
- let seq = 0;
- // Match either an existing "[N]" marker or a NUL-delimited "\u0000FN\u0000"
- // placeholder, in reading order across the body (blocks before the notes heading).
- const re = /\[(\d+)\]|\u0000FN(\d+)\u0000/g;
- // Same range regex setCalloutRange uses to detect the disclaimer callout's
- // "[1]…[K]" range; used here to decide whether a top-level callout is the
- // disclaimer (skip) or an ordinary callout (renumber normally).
+ : []
+ ).map((item: any) => {
+ const para =
+ isObject(item) && Array.isArray(item.content)
+ ? item.content.find((c: any) => isObject(c) && c.type === "paragraph")
+ : null;
+ return para && Array.isArray(para.content) ? para.content : [];
+ });
+
+ // Walk the body in reading order, turning each "[N]" / placeholder marker into
+ // a real footnoteReference node and collecting its definition inline content.
+ const definitions: any[] = [];
const disclaimerRangeRe = /(\[1\]\s*(?:…|\.\.\.)\s*\[)\d+(\])/;
- for (let i = 0; i < notesIdx; i++) {
- // Skip ONLY the disclaimer callout: its "[1]…[K]" range is NOT a footnote
- // marker and is synced separately by setCalloutRange. Renumbering it here
- // would consume note slots and corrupt the sequence. Other top-level
- // callouts may carry legitimate "[N]" body markers and are renumbered.
+
+ // Recursively visit inline arrays inside a block (paragraph, heading, callout
+ // child paragraphs, table cells, ...), preserving document reading order.
+ const visitInlineArrays = (container: any): void => {
+ if (!isObject(container) || !Array.isArray(container.content)) return;
+ const hasText = container.content.some(
+ (n: any) => isObject(n) && n.type === "text",
+ );
+ if (hasText) {
+ replaceMarkersWithReferences(container.content, ({ oldNum, phIdx }) => {
+ const fnId = freshId();
+ if (oldNum != null) {
+ const inline = oldNoteInline[oldNum - 1];
+ // Every existing body marker MUST map to a real note. An out-of-range
+ // marker means the document is internally inconsistent; fail loudly.
+ if (inline === undefined) {
+ throw new Error(
+ `footnote [${oldNum}] has no matching note (notes list has ${oldNoteInline.length} items); document is inconsistent`,
+ );
+ }
+ definitions.push(footnoteDefinition(fnId, inline));
+ } else {
+ const inline = noteInlineByPh.get(`\u0000FN${phIdx}\u0000`) || [];
+ definitions.push(footnoteDefinition(fnId, inline));
+ }
+ return fnId;
+ });
+ } else {
+ for (const child of container.content) visitInlineArrays(child);
+ }
+ };
+
+ const notesBoundary = notesIdx2 >= 0 ? notesIdx2 : oldListIndex;
+ for (let i = 0; i < notesBoundary; i++) {
+ // Skip ONLY the disclaimer callout: its "[1]...[K]" range is NOT a footnote
+ // marker and is synced separately by setCalloutRange.
if (
isObject(top2[i]) &&
top2[i].type === "callout" &&
@@ -443,35 +540,22 @@ export function commentsToFootnotes(
) {
continue;
}
- walk(top2[i], (node) => {
- if (node.type !== "text" || typeof node.text !== "string") return;
- node.text = node.text.replace(re, (_m: string, oldNum: string, phIdx: string) => {
- if (oldNum != null) {
- const note = oldNotes[Number(oldNum) - 1];
- // Every existing body marker MUST map to a real note. An out-of-range
- // marker means the document is internally inconsistent; fail loudly
- // rather than silently dropping the note and desyncing the callout.
- if (note === undefined) {
- throw new Error(
- `footnote [${oldNum}] has no matching note (notes list has ${oldNotes.length} items); document is inconsistent`,
- );
- }
- newNotes.push(note);
- } else {
- newNotes.push(noteByPh.get(`\u0000FN${phIdx}\u0000`));
- }
- return `[${++seq}]`;
- });
- });
+ visitInlineArrays(top2[i]);
}
- // Reorder the notes list IN PLACE on `working` first, THEN sync the callout
- // range. setCalloutRange clones `working`, so the reordered notes (mutated
- // before the clone) are carried into its result automatically. No null-filter
- // here: marker count and note count must stay exactly equal (the out-of-range
- // guard above guarantees no undefined entry is ever pushed).
- notesList2.content = newNotes;
- const synced = setCalloutRange(working, notesList2.content.length);
+ // Replace the old orderedList with a real footnotesList of the collected
+ // definitions (reading order). If there are no definitions, drop the list.
+ if (definitions.length > 0) {
+ top2[oldListIndex] = {
+ type: "footnotesList",
+ content: definitions,
+ };
+ } else {
+ top2.splice(oldListIndex, 1);
+ }
+
+ // Sync the disclaimer callout range to the new note count.
+ const synced = setCalloutRange(working, definitions.length);
return { doc: synced.doc, consumed };
}
diff --git a/packages/mcp/test/unit/footnotes.test.mjs b/packages/mcp/test/unit/footnotes.test.mjs
new file mode 100644
index 00000000..df45a7b9
--- /dev/null
+++ b/packages/mcp/test/unit/footnotes.test.mjs
@@ -0,0 +1,153 @@
+import { test } from "node:test";
+import assert from "node:assert/strict";
+
+import { convertProseMirrorToMarkdown } from "../../build/lib/markdown-converter.js";
+import { markdownToProseMirror } from "../../build/lib/collaboration.js";
+
+/** Recursively collect every node of `type`. */
+function findAll(node, type, acc = []) {
+ if (!node || typeof node !== "object") return acc;
+ if (node.type === type) acc.push(node);
+ if (Array.isArray(node.content)) {
+ for (const c of node.content) findAll(c, type, acc);
+ }
+ return acc;
+}
+
+const footnoteDoc = {
+ type: "doc",
+ content: [
+ {
+ type: "paragraph",
+ content: [
+ { type: "text", text: "Water" },
+ { type: "footnoteReference", attrs: { id: "fn1" } },
+ { type: "text", text: " and clay" },
+ { type: "footnoteReference", attrs: { id: "fn2" } },
+ { type: "text", text: "." },
+ ],
+ },
+ {
+ type: "footnotesList",
+ content: [
+ {
+ type: "footnoteDefinition",
+ attrs: { id: "fn1" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "First note." }] },
+ ],
+ },
+ {
+ type: "footnoteDefinition",
+ attrs: { id: "fn2" },
+ content: [
+ { type: "paragraph", content: [{ type: "text", text: "Second note." }] },
+ ],
+ },
+ ],
+ },
+ ],
+};
+
+test("JSON -> Markdown emits pandoc footnote syntax", () => {
+ const md = convertProseMirrorToMarkdown(footnoteDoc);
+ assert.match(md, /\[\^fn1\]/);
+ assert.match(md, /\[\^fn2\]/);
+ assert.match(md, /\[\^fn1\]: First note\./);
+ assert.match(md, /\[\^fn2\]: Second note\./);
+});
+
+test("Markdown -> JSON rebuilds footnote nodes", async () => {
+ const md = convertProseMirrorToMarkdown(footnoteDoc);
+ const json = await markdownToProseMirror(md);
+
+ const refs = findAll(json, "footnoteReference");
+ const list = findAll(json, "footnotesList");
+ const defs = findAll(json, "footnoteDefinition");
+
+ assert.equal(refs.length, 2);
+ assert.deepEqual(
+ refs.map((r) => r.attrs.id),
+ ["fn1", "fn2"],
+ );
+ assert.equal(list.length, 1);
+ assert.equal(defs.length, 2);
+ assert.deepEqual(
+ defs.map((d) => d.attrs.id),
+ ["fn1", "fn2"],
+ );
+});
+
+test("JSON -> MD -> JSON preserves footnote ids and text", async () => {
+ const md = convertProseMirrorToMarkdown(footnoteDoc);
+ const json = await markdownToProseMirror(md);
+ const md2 = convertProseMirrorToMarkdown(json);
+
+ // The second markdown serialization carries the same markers + definitions.
+ assert.match(md2, /\[\^fn1\]/);
+ assert.match(md2, /\[\^fn2\]/);
+ assert.match(md2, /\[\^fn1\]: First note\./);
+ assert.match(md2, /\[\^fn2\]: Second note\./);
+});
+
+test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => {
+ // The MCP import must derive duplicate ids deterministically (NOT random) so
+ // the same markdown imported here and via the editor produces identical ids,
+ // and re-importing is stable. This is the test that would FAIL on the old
+ // Math.random()/Date.now() implementation.
+ const md = [
+ "See[^d] one[^d] two[^d].",
+ "",
+ "[^d]: first",
+ "[^d]: second",
+ "[^d]: third",
+ ].join("\n");
+
+ const idsOf = async () => {
+ const json = await markdownToProseMirror(md);
+ const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id);
+ const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id);
+ return { refs, defs };
+ };
+
+ const a = await idsOf();
+ const b = await idsOf();
+
+ // Identical across runs.
+ assert.deepEqual(a.refs, b.refs);
+ assert.deepEqual(a.defs, b.defs);
+ // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3".
+ assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]);
+ // 1:1 reference <-> definition pairing, all distinct.
+ assert.equal(new Set(a.defs).size, 3);
+ assert.deepEqual([...a.refs].sort(), [...a.defs].sort());
+});
+
+test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => {
+ // Markdown that DOCUMENTS footnote syntax inside a code fence. The example
+ // definition line must be preserved verbatim inside the code block and not
+ // pulled out into a real footnotesList / footnoteDefinition.
+ const md = [
+ "Intro text.",
+ "",
+ "```markdown",
+ "Body[^demo]",
+ "",
+ "[^demo]: example definition",
+ "```",
+ "",
+ "Outro.",
+ ].join("\n");
+
+ const json = await markdownToProseMirror(md);
+
+ // No real footnote nodes were extracted from the code block.
+ assert.equal(findAll(json, "footnotesList").length, 0);
+ assert.equal(findAll(json, "footnoteDefinition").length, 0);
+
+ // The example definition line survives somewhere in the code block text.
+ const codeBlocks = findAll(json, "codeBlock");
+ assert.ok(codeBlocks.length >= 1, "code block present");
+ const codeText = JSON.stringify(json);
+ assert.match(codeText, /\[\^demo\]: example definition/);
+});
diff --git a/packages/mcp/test/unit/http-idle-eviction.test.mjs b/packages/mcp/test/unit/http-idle-eviction.test.mjs
new file mode 100644
index 00000000..6521f268
--- /dev/null
+++ b/packages/mcp/test/unit/http-idle-eviction.test.mjs
@@ -0,0 +1,273 @@
+// Unit tests for createMcpHttpHandler's idle-session eviction (http.ts).
+//
+// http.ts keeps one transport per MCP session alive between requests, keyed by
+// the mcp-session-id header, and runs a periodic sweep (setInterval, every 5
+// min) that closes any transport idle longer than the idle TTL
+// (MCP_SESSION_IDLE_MS, default 30 min) and drops its lastSeen + sessionIdentity
+// bookkeeping. Routing a request to an existing transport refreshes its
+// lastSeen.
+//
+// We drive this DETERMINISTICALLY rather than waiting wall-clock: the env knob
+// MCP_SESSION_IDLE_MS is read ONCE when the handler is created, so we set it
+// small; and node:test's mock.timers lets us mock both `setInterval` (the sweep)
+// and `Date` (the lastSeen comparison clock) so ticking advances the clock and
+// fires the sweep on demand.
+//
+// IMPORTANT mock.timers semantics: when a tick spans MULTIPLE timer fires (or
+// overshoots a fire), the callbacks all observe Date.now() == the FINAL ticked
+// time, not their individual scheduled times. So to make the sweep's
+// `now - lastSeen` comparison meaningful we tick EXACTLY to a sweep boundary
+// (a multiple of the sweep interval): then Date.now() inside the sweep equals
+// that boundary. The mocked clock starts at 0, so sweeps fire at SWEEP, 2*SWEEP,
+// ... We pin each session's lastSeen by establishing/touching it at a known
+// pre-boundary clock, then tick the remaining delta to land exactly on the
+// boundary.
+//
+// Sessions are established over a real loopback http server (so the SDK's
+// StreamableHTTPServerTransport gets genuine Node req/res and a real
+// mcp-session-id), exactly like http-resolver.test.mjs, and the server is closed
+// in a finally.
+//
+// Eviction is asserted via its OBSERVABLE effect: once a session is evicted its
+// transport is gone from the handler's internal map, so a subsequent non-init
+// request replaying that session id is treated as unknown (400 "no valid
+// session ID") — the same response an id that was never established would get.
+// An active (recently-seen) session is retained and its subsequent request is
+// NOT a 400.
+import { test, mock } from "node:test";
+import assert from "node:assert/strict";
+
+const INIT_BODY = {
+ jsonrpc: "2.0",
+ id: 1,
+ method: "initialize",
+ params: {
+ protocolVersion: "2025-03-26",
+ capabilities: {},
+ clientInfo: { name: "test", version: "0.0.0" },
+ },
+};
+
+const SWEEP_MS = 5 * 60 * 1000; // setInterval cadence in http.ts.
+
+// Spin a loopback http server bridging every request into the MCP handler with
+// its JSON body parsed, mirroring the embedding host. Returns { call, close }.
+async function startLoopback(handler) {
+ const http = await import("node:http");
+ const server = http.createServer((req, res) => {
+ let raw = "";
+ req.on("data", (c) => (raw += c));
+ req.on("end", () => {
+ const body = raw ? JSON.parse(raw) : undefined;
+ handler.handleRequest(req, res, body).catch(() => {
+ if (!res.headersSent) {
+ res.statusCode = 500;
+ res.end();
+ }
+ });
+ });
+ });
+ await new Promise((r) => server.listen(0, "127.0.0.1", r));
+ const { port } = server.address();
+
+ const call = (headers, body) =>
+ new Promise((resolve) => {
+ const r = http.request(
+ {
+ host: "127.0.0.1",
+ port,
+ method: "POST",
+ path: "/mcp",
+ headers: {
+ "Content-Type": "application/json",
+ Accept: "application/json, text/event-stream",
+ ...headers,
+ },
+ },
+ (resp) => {
+ let data = "";
+ resp.on("data", (c) => (data += c));
+ resp.on("end", () =>
+ resolve({
+ statusCode: resp.statusCode,
+ sessionId: resp.headers["mcp-session-id"],
+ body: data,
+ }),
+ );
+ },
+ );
+ r.end(JSON.stringify(body));
+ });
+
+ return { call, close: () => new Promise((r) => server.close(r)) };
+}
+
+// The sweep closes transports asynchronously (void transport.close()), whose
+// onclose then removes the entry from the internal map. Yield to the event loop
+// so those microtasks settle before we assert the observable effect.
+const settle = () => new Promise((r) => setImmediate(r));
+
+// Set the idle TTL env knob (read once at handler creation) and enable mocked
+// setInterval + Date BEFORE creating the handler, so the sweep interval and
+// every Date.now() (lastSeen at init, lastSeen on routing, and the sweep's
+// comparison) all run on the same mocked clock. Returns restore() to undo it.
+function withMockedTimers(idleMs) {
+ const prevIdle = process.env.MCP_SESSION_IDLE_MS;
+ process.env.MCP_SESSION_IDLE_MS = String(idleMs);
+ mock.timers.enable({ apis: ["setInterval", "Date"] });
+ return () => {
+ mock.timers.reset();
+ if (prevIdle === undefined) delete process.env.MCP_SESSION_IDLE_MS;
+ else process.env.MCP_SESSION_IDLE_MS = prevIdle;
+ };
+}
+
+test("idle session is evicted by the sweep; an active session is retained", async () => {
+ // A small TTL: idle longer than 1s triggers eviction. Both sessions start at
+ // clock 0; we keep one fresh (touch it just before the sweep) and leave the
+ // other idle, then fire ONE sweep exactly on its boundary.
+ const idleMs = 1000;
+ const restore = withMockedTimers(idleMs);
+
+ const { createMcpHttpHandler } = await import("../../build/http.js");
+ const handler = createMcpHttpHandler(() => ({
+ apiUrl: "http://127.0.0.1:3000/api",
+ getToken: async () => "t",
+ }));
+
+ const lb = await startLoopback(handler);
+ try {
+ // T0 (clock 0): establish both sessions; lastSeen(A) = lastSeen(B) = 0.
+ const a = await lb.call({}, INIT_BODY);
+ const b = await lb.call({}, INIT_BODY);
+ assert.ok(a.sessionId, "session A must get an mcp-session-id");
+ assert.ok(b.sessionId, "session B must get an mcp-session-id");
+ assert.notEqual(a.sessionId, b.sessionId, "distinct sessions");
+
+ // Advance to just before the first sweep boundary (SWEEP - 1ms): no sweep
+ // fires yet (boundary not reached). lastSeen(A) stays 0.
+ mock.timers.tick(SWEEP_MS - 1);
+ // Touch ONLY B here, refreshing lastSeen(B) to SWEEP-1 (active); A is left
+ // idle since clock 0.
+ const touchB = await lb.call(
+ { "mcp-session-id": b.sessionId },
+ { jsonrpc: "2.0", method: "ping", id: 5 },
+ );
+ assert.notEqual(touchB.statusCode, 400, "B alive right before the sweep");
+
+ // Land EXACTLY on the sweep boundary (clock = SWEEP). Inside the sweep
+ // Date.now() == SWEEP, so:
+ // idle(A) = SWEEP - 0 = SWEEP > TTL(1s) -> A EVICTED
+ // idle(B) = SWEEP - (SWEEP-1) = 1ms < TTL(1s) -> B RETAINED
+ mock.timers.tick(1);
+ await settle();
+
+ // OBSERVABLE EFFECT 1 — A evicted: replaying its session id on a non-init
+ // request is now treated as unknown (400, no valid session).
+ const aAfter = await lb.call(
+ { "mcp-session-id": a.sessionId },
+ { jsonrpc: "2.0", method: "ping", id: 10 },
+ );
+ assert.equal(aAfter.statusCode, 400, "evicted session id is unknown -> 400");
+ assert.match(aAfter.body, /no valid session ID/);
+
+ // OBSERVABLE EFFECT 2 — B retained: a subsequent request on its session id
+ // is routed to the live transport, NOT rejected as an unknown session.
+ const bAfter = await lb.call(
+ { "mcp-session-id": b.sessionId },
+ { jsonrpc: "2.0", method: "ping", id: 11 },
+ );
+ assert.notEqual(
+ bAfter.statusCode,
+ 400,
+ "active session must survive the sweep (not 400)",
+ );
+ } finally {
+ await lb.close();
+ restore();
+ }
+});
+
+test("a session left idle past the TTL is dropped so its id becomes unknown", async () => {
+ // Simplest single-session eviction: establish a session, let it go idle past
+ // the TTL, fire the sweep on its boundary, and confirm its id is now unknown
+ // (400). Pins the core "lastSeen older than TTL -> closed and dropped" path.
+ const idleMs = 1000;
+ const restore = withMockedTimers(idleMs);
+
+ const { createMcpHttpHandler } = await import("../../build/http.js");
+ const handler = createMcpHttpHandler(() => ({
+ apiUrl: "http://127.0.0.1:3000/api",
+ getToken: async () => "t",
+ }));
+
+ const lb = await startLoopback(handler);
+ try {
+ const s = await lb.call({}, INIT_BODY);
+ assert.ok(s.sessionId, "session must get an mcp-session-id");
+
+ // Fire the first sweep exactly on its boundary: Date.now() == SWEEP, idle =
+ // SWEEP - 0 = SWEEP > TTL, so the untouched session is evicted.
+ mock.timers.tick(SWEEP_MS);
+ await settle();
+
+ const after = await lb.call(
+ { "mcp-session-id": s.sessionId },
+ { jsonrpc: "2.0", method: "ping", id: 30 },
+ );
+ assert.equal(after.statusCode, 400, "idle session id is unknown -> 400");
+ assert.match(after.body, /no valid session ID/);
+ } finally {
+ await lb.close();
+ restore();
+ }
+});
+
+test("activity refreshes lastSeen so a busy session is never evicted", async () => {
+ // A session kept busy (a request just before the sweep) refreshes its
+ // lastSeen, so even though it was created long ago the sweep must not evict
+ // it. Pins the "routing to an existing transport refreshes its idle
+ // timestamp" branch of http.ts.
+ const idleMs = 1000;
+ const restore = withMockedTimers(idleMs);
+
+ const { createMcpHttpHandler } = await import("../../build/http.js");
+ const handler = createMcpHttpHandler(() => ({
+ apiUrl: "http://127.0.0.1:3000/api",
+ getToken: async () => "t",
+ }));
+
+ const lb = await startLoopback(handler);
+ try {
+ const s = await lb.call({}, INIT_BODY);
+ assert.ok(s.sessionId, "session must get an mcp-session-id");
+
+ // Age to just before the sweep boundary, then touch the session so its
+ // lastSeen is refreshed to SWEEP-1 (well within the TTL of the imminent
+ // sweep).
+ mock.timers.tick(SWEEP_MS - 1);
+ const touch = await lb.call(
+ { "mcp-session-id": s.sessionId },
+ { jsonrpc: "2.0", method: "ping", id: 40 },
+ );
+ assert.notEqual(touch.statusCode, 400, "session still alive before sweep");
+
+ // Land exactly on the sweep boundary: idle = SWEEP - (SWEEP-1) = 1ms < TTL,
+ // so the busy session is retained.
+ mock.timers.tick(1);
+ await settle();
+
+ const after = await lb.call(
+ { "mcp-session-id": s.sessionId },
+ { jsonrpc: "2.0", method: "ping", id: 41 },
+ );
+ assert.notEqual(
+ after.statusCode,
+ 400,
+ "a session touched just before the sweep must not be evicted",
+ );
+ } finally {
+ await lb.close();
+ restore();
+ }
+});
diff --git a/packages/mcp/test/unit/transforms.test.mjs b/packages/mcp/test/unit/transforms.test.mjs
index 3f66593c..f7999113 100644
--- a/packages/mcp/test/unit/transforms.test.mjs
+++ b/packages/mcp/test/unit/transforms.test.mjs
@@ -34,6 +34,18 @@ const li = (text) => ({
const doc = (...children) => ({ type: "doc", content: children });
const snapshot = (v) => JSON.parse(JSON.stringify(v));
+// Collect every footnoteReference id under a node, in reading order.
+const collectRefIds = (node, acc = []) => {
+ if (!node || typeof node !== "object") return acc;
+ if (node.type === "footnoteReference") acc.push(node.attrs?.id);
+ if (Array.isArray(node.content)) {
+ for (const c of node.content) collectRefIds(c, acc);
+ }
+ return acc;
+};
+// Plain text of a footnoteDefinition.
+const defText = (def) => blockText(def);
+
// ---------------------------------------------------------------------------
// blockText / walk / getList
// ---------------------------------------------------------------------------
@@ -173,21 +185,30 @@ test("commentsToFootnotes anchors comments and renumbers by position", () => {
const { doc: out, consumed } = commentsToFootnotes(d, comments);
assert.deepEqual(consumed.sort(), ["cA", "cB"]);
- // Markers in reading order: p1 "apple"->[1], p2 existing->[2], p3 "banana"->[3]
- assert.match(blockText(out.content[1]), /\[1\]/);
- assert.match(blockText(out.content[2]), /\[2\]/);
- assert.match(blockText(out.content[3]), /\[3\]/);
+ // Real footnoteReference nodes were inserted at p1 (apple), p2 (existing),
+ // p3 (banana), in reading order — the old `[N]` text markers are gone.
+ const refIds = collectRefIds(out);
+ assert.equal(refIds.length, 3);
+ // Body paragraphs p1..p3 no longer carry literal [N] text markers.
+ assert.doesNotMatch(blockText(out.content[1]), /\[\d+\]/);
+ assert.doesNotMatch(blockText(out.content[2]), /\[\d+\]/);
+ assert.doesNotMatch(blockText(out.content[3]), /\[\d+\]/);
- // No stray placeholders remain.
- const allText = blockText(out);
- assert.doesNotMatch(allText, / F\d+ /);
+ // No stray NUL placeholders remain.
+ assert.doesNotMatch(blockText(out), /\u0000/);
- // Notes list reordered to [apple, existing, banana] (reading order).
- const list = out.content.find((n) => n.type === "orderedList");
+ // The bottom footnotesList holds the definitions in reading order, each keyed
+ // by the matching reference id.
+ const list = out.content.find((n) => n.type === "footnotesList");
+ assert.ok(list, "footnotesList present");
assert.equal(list.content.length, 3);
- assert.equal(blockText(list.content[0]), "apple note");
- assert.equal(blockText(list.content[1]), "existing note one");
- assert.equal(blockText(list.content[2]), "banana note");
+ assert.deepEqual(
+ list.content.map((d) => d.attrs.id),
+ refIds,
+ );
+ assert.equal(defText(list.content[0]), "apple note");
+ assert.equal(defText(list.content[1]), "existing note one");
+ assert.equal(defText(list.content[2]), "banana note");
// Callout range synced to 3 notes.
assert.match(blockText(out.content[0]), /\[1\]…\[3\]/);
@@ -224,15 +245,16 @@ test("commentsToFootnotes leaves literal 'F1'/'FN2'/'F12' body text untouched",
// The literal "F1"/"FN2"/"F12" prose is preserved verbatim (no bogus
// footnotes, no eaten spaces around them).
assert.match(bodyText, /Press F1 for help, model FN2 and F12 for tools/);
- // Exactly one real footnote marker was produced, at the anchored word.
- const markerCount = (bodyText.match(/\[\d+\]/g) || []).length;
- assert.equal(markerCount, 1);
- assert.match(bodyText, /apple \[1\]/);
+ // Exactly one real footnoteReference node was produced, at the anchored word.
+ const refIds = collectRefIds(out);
+ assert.equal(refIds.length, 1);
// Exactly one note in the list — "F1"/"FN2"/"F12" did not spawn extra notes.
- const list = out.content.find((n) => n.type === "orderedList");
+ const list = out.content.find((n) => n.type === "footnotesList");
+ assert.ok(list, "footnotesList present");
assert.equal(list.content.length, 1);
- assert.equal(blockText(list.content[0]), "apple note");
+ assert.equal(list.content[0].attrs.id, refIds[0]);
+ assert.equal(defText(list.content[0]), "apple note");
// No stray placeholder sentinel remains anywhere: the NUL-delimited sentinel
// is fully consumed by the renumber pass, so no raw NUL control char persists
@@ -287,17 +309,25 @@ test("commentsToFootnotes renumbers body callouts but skips the disclaimer range
assert.deepEqual(consumed, []);
// The disclaimer's "[1]…[K]" range is NOT treated as body markers: it stays
- // a range and is synced to the note count (2), not renumbered into [1],[2].
+ // a range and is synced to the note count (2), not turned into references.
assert.match(blockText(out.content[0]), /\[1\]…\[2\]/);
- // The body callout's [1] is renumbered as a real reading-order marker.
- assert.match(blockText(out.content[1]), /noted \[1\] above/);
- // The following paragraph's [2] keeps reading order.
- assert.match(blockText(out.content[2]), /with \[2\] too/);
+ // The body callout's [1] and the paragraph's [2] became footnoteReference
+ // nodes in reading order (the literal text markers are gone).
+ const refIds = collectRefIds(out);
+ assert.equal(refIds.length, 2);
+ assert.match(blockText(out.content[1]), /noted +above/); // [1] -> node, no text
+ assert.match(blockText(out.content[2]), /with +too/); // [2] -> node, no text
- // Notes list still has the two original notes in order.
- const list = out.content.find((n) => n.type === "orderedList");
+ // The footnotesList holds the two original notes in reading order, keyed to
+ // the new reference ids.
+ const list = out.content.find((n) => n.type === "footnotesList");
+ assert.ok(list, "footnotesList present");
assert.equal(list.content.length, 2);
- assert.equal(blockText(list.content[0]), "first note");
- assert.equal(blockText(list.content[1]), "second note");
+ assert.deepEqual(
+ list.content.map((d) => d.attrs.id),
+ refIds,
+ );
+ assert.equal(defText(list.content[0]), "first note");
+ assert.equal(defText(list.content[1]), "second note");
});