From f86b8b69a06469cdfae76e7a1dd9599efb443516 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 05:56:05 +0300 Subject: [PATCH 01/43] fix(mcp): structural-diff write-back so agent edits don't jump the cursor (#152) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mutatePageContent wrote agent edits back by DELETING the whole Yjs fragment and re-applying a fresh Y.Doc. Yjs is a CRDT — the editor anchors its selection to node ids — so wiping every id made an open editor's cursor lose its anchor and snap to the end of the document on every agent write. It was most visible on comment anchoring (issue #152): a comment changes no text, yet the cursor jumped. (Before commit 4201f0a3 the anchoring silently no-op'd, so the destructive write never ran for comments — hence the regression.) Fix: write via `updateYFragment` (y-prosemirror) — the same routine the editor uses to sync its own edits into Yjs. It structurally diffs the new doc against the live fragment and touches only changed nodes, preserving the ids of unchanged ones, so the cursor stays put. This improves ALL agent write tools (text edits, node ops, comments, replace) — minimal diff instead of full replace: less collab noise, stable block-ids, other users' cursors no longer disrupted. - collaboration.ts: new `applyDocToFragment` (sanitize -> PMNode.fromJSON against a memoized docmost schema -> updateYFragment in one transact), keeping the `findUnstorableAttr` encode diagnostic; swap the destructive write-back for it. - package.json: `y-prosemirror` promoted to a direct dependency (was transitive). - test: comment-cursor-stability.test.mjs — a Yjs RelativePosition (the cursor anchor) survives both a sibling edit and a comment-mark anchoring (the old full-replace tombstoned it -> null). 292 package tests green. Co-Authored-By: Claude Opus 4.8 --- packages/mcp/build/lib/collaboration.js | 55 +++++++++++--- packages/mcp/node_modules/y-prosemirror | 1 + packages/mcp/package.json | 1 + packages/mcp/src/lib/collaboration.ts | 58 +++++++++++--- .../unit/comment-cursor-stability.test.mjs | 75 +++++++++++++++++++ pnpm-lock.yaml | 3 + 6 files changed, 173 insertions(+), 20 deletions(-) create mode 120000 packages/mcp/node_modules/y-prosemirror create mode 100644 packages/mcp/test/unit/comment-cursor-stability.test.mjs diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index 5140acee..35bd7a13 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -4,11 +4,18 @@ import * as Y from "yjs"; import WebSocket from "ws"; import { marked } from "marked"; import { generateJSON } from "@tiptap/html"; +import { getSchema } from "@tiptap/core"; +import { Node as PMNode } from "@tiptap/pm/model"; +import { updateYFragment } from "y-prosemirror"; import { JSDOM } from "jsdom"; import { docmostExtensions } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; import { summarizeChange } from "./diff.js"; +// The ProseMirror schema for the docmost editor, built once (mirrors diff.ts). +// `updateYFragment` needs a real PM Node, so we re-hydrate the transformed JSON +// against this schema before diffing it into the live Yjs fragment. +const docmostSchema = getSchema(docmostExtensions); // Setup DOM environment for Tiptap HTML parsing in Node.js const dom = new JSDOM(""); global.window = dom.window; @@ -450,6 +457,40 @@ export function buildYDoc(doc) { throw new Error(`Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`); } } +/** + * Write a new ProseMirror doc into the live Yjs fragment by STRUCTURAL DIFF, + * preserving the Yjs identity of unchanged nodes (issue #152). + * + * The previous approach deleted the whole fragment and re-applied a fresh Y.Doc, + * which discarded every Yjs node id. y-prosemirror anchors the editor selection + * to those ids, so an open editor's cursor lost its anchor and snapped to the + * end of the document on every agent write (most visibly on comment anchoring, + * which changes no text at all). `updateYFragment` is exactly the routine the + * editor itself uses to sync ProseMirror edits into Yjs: it diffs the new node + * against the current fragment and touches only the changed children, so + * unchanged nodes keep their ids and the live cursor stays put. + * + * Must run inside a single `transact` so the diff applies atomically (no remote + * update interleaves). Keeps `buildYDoc`'s `findUnstorableAttr` diagnostic for + * the opaque "Unexpected content type" encode failure. + */ +export function applyDocToFragment(ydoc, newDoc) { + const safe = sanitizeForYjs(newDoc); + const fragment = ydoc.getXmlFragment("default"); + try { + const pmNode = PMNode.fromJSON(docmostSchema, safe); + ydoc.transact(() => { + updateYFragment(ydoc, fragment, pmNode, { + mapping: new Map(), + isOMark: new Map(), + }); + }); + } + catch (e) { + const bad = findUnstorableAttr(safe); + throw new Error(`Failed to encode document to Yjs (updateYFragment): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`); + } +} /** * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc. * Throws the same descriptive error as the apply path when it is not. Used by @@ -649,16 +690,10 @@ export async function mutatePageContent(pageId, collabToken, baseUrl, transform) finish(null, mutationResult); return; } - const tempDoc = buildYDoc(newDoc); - // Fetch the fragment immediately before the transact that mutates - // it, rather than reusing a handle grabbed across the transform. - const fragment = ydoc.getXmlFragment("default"); - ydoc.transact(() => { - if (fragment.length > 0) { - fragment.delete(0, fragment.length); - } - Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); - }); + // Structural diff into the live fragment (issue #152): preserves + // the Yjs ids of unchanged nodes, so an open editor's cursor is not + // yanked to the end of the document on every agent write. + applyDocToFragment(ydoc, newDoc); } catch (e) { // Includes errors thrown by transform (e.g. "afterText not found", diff --git a/packages/mcp/node_modules/y-prosemirror b/packages/mcp/node_modules/y-prosemirror new file mode 120000 index 00000000..16997d1b --- /dev/null +++ b/packages/mcp/node_modules/y-prosemirror @@ -0,0 +1 @@ +../../../node_modules/.pnpm/y-prosemirror@1.3.7_prosemirror-model@1.25.1_prosemirror-state@1.4.3_prosemirror-view@1_0ad6648b7e1f6d6f3287a40e0e62139b/node_modules/y-prosemirror \ No newline at end of file diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 2b1074fb..3edc1902 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -52,6 +52,7 @@ "jsdom": "^27.4.0", "marked": "^17.0.1", "ws": "^8.19.0", + "y-prosemirror": "1.3.7", "yjs": "^13.6.29", "zod": "^3.22.0" }, diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index 6f0ad011..cb84f410 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -4,12 +4,20 @@ import * as Y from "yjs"; import WebSocket from "ws"; import { marked } from "marked"; import { generateJSON } from "@tiptap/html"; +import { getSchema } from "@tiptap/core"; +import { Node as PMNode } from "@tiptap/pm/model"; +import { updateYFragment } from "y-prosemirror"; import { JSDOM } from "jsdom"; import { docmostExtensions } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; import { summarizeChange, VerifyReport } from "./diff.js"; +// The ProseMirror schema for the docmost editor, built once (mirrors diff.ts). +// `updateYFragment` needs a real PM Node, so we re-hydrate the transformed JSON +// against this schema before diffing it into the live Yjs fragment. +const docmostSchema = getSchema(docmostExtensions); + /** * The resolved value of every content-mutating collab write: the document that * was written (or the live doc when the transform aborted) plus a verifiable @@ -506,6 +514,42 @@ export function buildYDoc(doc: any): Y.Doc { } } +/** + * Write a new ProseMirror doc into the live Yjs fragment by STRUCTURAL DIFF, + * preserving the Yjs identity of unchanged nodes (issue #152). + * + * The previous approach deleted the whole fragment and re-applied a fresh Y.Doc, + * which discarded every Yjs node id. y-prosemirror anchors the editor selection + * to those ids, so an open editor's cursor lost its anchor and snapped to the + * end of the document on every agent write (most visibly on comment anchoring, + * which changes no text at all). `updateYFragment` is exactly the routine the + * editor itself uses to sync ProseMirror edits into Yjs: it diffs the new node + * against the current fragment and touches only the changed children, so + * unchanged nodes keep their ids and the live cursor stays put. + * + * Must run inside a single `transact` so the diff applies atomically (no remote + * update interleaves). Keeps `buildYDoc`'s `findUnstorableAttr` diagnostic for + * the opaque "Unexpected content type" encode failure. + */ +export function applyDocToFragment(ydoc: Y.Doc, newDoc: any): void { + const safe = sanitizeForYjs(newDoc); + const fragment = ydoc.getXmlFragment("default"); + try { + const pmNode = PMNode.fromJSON(docmostSchema, safe); + ydoc.transact(() => { + updateYFragment(ydoc, fragment, pmNode, { + mapping: new Map(), + isOMark: new Map(), + }); + }); + } catch (e) { + const bad = findUnstorableAttr(safe); + throw new Error( + `Failed to encode document to Yjs (updateYFragment): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`, + ); + } +} + /** * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc. * Throws the same descriptive error as the apply path when it is not. Used by @@ -727,16 +771,10 @@ export async function mutatePageContent( return; } - const tempDoc = buildYDoc(newDoc); - // Fetch the fragment immediately before the transact that mutates - // it, rather than reusing a handle grabbed across the transform. - const fragment = ydoc.getXmlFragment("default"); - ydoc.transact(() => { - if (fragment.length > 0) { - fragment.delete(0, fragment.length); - } - Y.applyUpdate(ydoc, Y.encodeStateAsUpdate(tempDoc)); - }); + // Structural diff into the live fragment (issue #152): preserves + // the Yjs ids of unchanged nodes, so an open editor's cursor is not + // yanked to the end of the document on every agent write. + applyDocToFragment(ydoc, newDoc); } catch (e) { // Includes errors thrown by transform (e.g. "afterText not found", // "text not found"): propagate them verbatim to the caller. diff --git a/packages/mcp/test/unit/comment-cursor-stability.test.mjs b/packages/mcp/test/unit/comment-cursor-stability.test.mjs new file mode 100644 index 00000000..e494d131 --- /dev/null +++ b/packages/mcp/test/unit/comment-cursor-stability.test.mjs @@ -0,0 +1,75 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import * as Y from "yjs"; +import { applyDocToFragment } from "../../build/lib/collaboration.js"; + +// Regression for issue #152: agent writes (comment anchoring especially) must +// NOT yank the open editor's cursor to the end of the document. The cursor is a +// Yjs RelativePosition anchored to node ids; the old write-back deleted the whole +// fragment and rebuilt it, destroying every id, so the position no longer +// resolved. `applyDocToFragment` uses `updateYFragment` (the editor's own diff), +// which keeps unchanged nodes' ids — so a RelativePosition still resolves. + +const para = (text, marks) => ({ + type: "paragraph", + content: [{ type: "text", text, ...(marks ? { marks } : {}) }], +}); +const doc = (...paras) => ({ type: "doc", content: paras }); + +/** The XmlText of the Nth paragraph in the live fragment. */ +function paragraphText(ydoc, n) { + const el = ydoc.getXmlFragment("default").get(n); // XmlElement + return el.get(0); // its XmlText child +} + +test("an UNCHANGED node keeps its Yjs identity across an edit (cursor survives)", () => { + const ydoc = new Y.Doc(); + applyDocToFragment(ydoc, doc(para("Hello world"), para("Second"))); + + // Anchor a cursor at offset 5 inside the FIRST (soon-to-be-unchanged) paragraph. + const relPos = Y.createRelativePositionFromTypeIndex(paragraphText(ydoc, 0), 5); + + // Edit only the SECOND paragraph; the first is untouched. + applyDocToFragment(ydoc, doc(para("Hello world"), para("Second edited"))); + + const abs = Y.createAbsolutePositionFromRelativePosition(relPos, ydoc); + assert.notEqual(abs, null, "the cursor's relative position must still resolve"); + assert.equal(abs.index, 5, "the cursor must stay at the same offset"); + // And the edit actually landed. + assert.equal(paragraphText(ydoc, 1).toString(), "Second edited"); +}); + +test("anchoring a comment mark keeps the cursor in the marked text (issue #152)", () => { + const ydoc = new Y.Doc(); + applyDocToFragment(ydoc, doc(para("Hello world"))); + + // The user's cursor sits inside the text that is about to be commented. + const relPos = Y.createRelativePositionFromTypeIndex(paragraphText(ydoc, 0), 3); + + // Agent anchors a comment over "Hello" — text is identical, only a mark added. + applyDocToFragment( + ydoc, + doc({ + type: "paragraph", + content: [ + { + type: "text", + text: "Hello", + marks: [ + { type: "comment", attrs: { commentId: "c1", resolved: false } }, + ], + }, + { type: "text", text: " world" }, + ], + }), + ); + + // The text is intact (the mark splits "Hello" / " world" but reads the same). + const para0 = ydoc.getXmlFragment("default").get(0); + assert.equal(para0.toString().replace(/<[^>]*>/g, ""), "Hello world"); + + // ...and the cursor anchored before the write still resolves (did not jump to + // the document end as it did with the destructive full-replace). + const abs = Y.createAbsolutePositionFromRelativePosition(relPos, ydoc); + assert.notEqual(abs, null, "comment anchoring must not destroy the cursor anchor"); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d6af709c..4a55e7a0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -946,6 +946,9 @@ importers: ws: specifier: 8.20.1 version: 8.20.1 + y-prosemirror: + specifier: 1.3.7 + version: 1.3.7(prosemirror-model@1.25.1)(prosemirror-state@1.4.3)(prosemirror-view@1.40.0)(y-protocols@1.0.6(yjs@13.6.30(patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810)))(yjs@13.6.30(patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810)) yjs: specifier: ^13.6.29 version: 13.6.30(patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810) From c7c0c28e381cf035d145cfa3432ec691532f2dbc Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 12:56:23 +0300 Subject: [PATCH 02/43] refactor(mcp): single docmostSchema + shared encode-error helper + catch test (#152 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Review of #154 (Request changes) — all clean follow-ups, no defect in the fix: 1. Single source of the ProseMirror schema: export `docmostSchema` from docmost-schema.ts (next to docmostExtensions); diff.ts and collaboration.ts import it instead of each calling getSchema(docmostExtensions) — the schema can no longer drift between call sites. Removed both local builds + the now unused getSchema imports. 2. Doc fix: assertYjsEncodable's docstring and the client.ts comment no longer claim "the same encoder as apply" — apply uses updateYFragment, the dry-run uses toYdoc; both reject the same unstorable attrs but are NOT byte-identical. Reworded to "independent encodability gate". 3+4+5. Extracted `unstorableYjsError(safe, label, e)` — buildYDoc and applyDocToFragment now share one message template (label kept for diagnostics: toYdoc vs updateYFragment), so the wording can't drift between dry-run/apply. 6. Test for applyDocToFragment's catch branch: an unknown node type makes the schema-validated PMNode.fromJSON throw, and the function must re-throw it wrapped with the (updateYFragment) diagnostic. build/ rebuilt for the three changed lib modules; 293 package tests green. (Left build/client.js untouched: rebuilding it would pull in a pre-existing, unrelated src/build drift — a listSidebarPages slugId fix never rebuilt on develop — and my client.ts change there is comment-only.) Co-Authored-By: Claude Opus 4.8 --- packages/mcp/build/lib/collaboration.js | 36 ++++++++++------ packages/mcp/build/lib/diff.js | 9 ++-- packages/mcp/build/lib/docmost-schema.js | 9 +++- packages/mcp/src/client.ts | 6 +-- packages/mcp/src/lib/collaboration.ts | 42 +++++++++++-------- packages/mcp/src/lib/diff.ts | 9 ++-- packages/mcp/src/lib/docmost-schema.ts | 10 ++++- .../unit/comment-cursor-stability.test.mjs | 18 ++++++++ 8 files changed, 92 insertions(+), 47 deletions(-) diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index 35bd7a13..1fc4c1d6 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -4,18 +4,25 @@ import * as Y from "yjs"; import WebSocket from "ws"; import { marked } from "marked"; import { generateJSON } from "@tiptap/html"; -import { getSchema } from "@tiptap/core"; import { Node as PMNode } from "@tiptap/pm/model"; import { updateYFragment } from "y-prosemirror"; import { JSDOM } from "jsdom"; -import { docmostExtensions } from "./docmost-schema.js"; +import { docmostExtensions, docmostSchema } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; import { summarizeChange } from "./diff.js"; -// The ProseMirror schema for the docmost editor, built once (mirrors diff.ts). -// `updateYFragment` needs a real PM Node, so we re-hydrate the transformed JSON -// against this schema before diffing it into the live Yjs fragment. -const docmostSchema = getSchema(docmostExtensions); +/** + * Build the descriptive error for an opaque Yjs encode failure ("Unexpected + * content type"), shared by both encode paths (`buildYDoc` -> `toYdoc` and + * `applyDocToFragment` -> `updateYFragment`) so the message wording stays in one + * place. `label` names the stage that failed (diagnostic). `sanitizeForYjs` + * already stripped `undefined` attrs, so a remaining failure is pinpointed via + * `findUnstorableAttr`. + */ +function unstorableYjsError(safe, label, e) { + const bad = findUnstorableAttr(safe); + return new Error(`Failed to encode document to Yjs (${label}): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`); +} // Setup DOM environment for Tiptap HTML parsing in Node.js const dom = new JSDOM(""); global.window = dom.window; @@ -453,8 +460,7 @@ export function buildYDoc(doc) { return TiptapTransformer.toYdoc(safe, "default", docmostExtensions); } catch (e) { - const bad = findUnstorableAttr(safe); - throw new Error(`Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`); + throw unstorableYjsError(safe, "toYdoc", e); } } /** @@ -487,14 +493,18 @@ export function applyDocToFragment(ydoc, newDoc) { }); } catch (e) { - const bad = findUnstorableAttr(safe); - throw new Error(`Failed to encode document to Yjs (updateYFragment): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`); + throw unstorableYjsError(safe, "updateYFragment", e); } } /** - * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc. - * Throws the same descriptive error as the apply path when it is not. Used by - * the dry-run preview so it fails identically to apply. + * Run an independent Yjs-encodability check (the same `sanitizeForYjs` + schema + * the apply path uses) and throw the same descriptive error when the doc cannot + * be stored. Used by the dry-run preview. + * + * Note: it does NOT run `updateYFragment` against the live fragment, so it is an + * encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc` + * (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different + * encoders that nonetheless reject the same unstorable attributes. */ export function assertYjsEncodable(doc) { buildYDoc(doc); diff --git a/packages/mcp/build/lib/diff.js b/packages/mcp/build/lib/diff.js index 516a3c81..c19ff9a9 100644 --- a/packages/mcp/build/lib/diff.js +++ b/packages/mcp/build/lib/diff.js @@ -16,13 +16,10 @@ * If recreateTransform / the changeset throws on a pathological document pair, * we fall back to a coarse block-level text diff so the tool never hard-fails. */ -import { getSchema } from "@tiptap/core"; import { Node } from "@tiptap/pm/model"; import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset"; import { recreateTransform } from "@fellow/prosemirror-recreate-transform"; -import { docmostExtensions } from "./docmost-schema.js"; -/** Build the schema once; it is pure and reused across calls. */ -const schema = getSchema(docmostExtensions); +import { docmostSchema } from "./docmost-schema.js"; /** Recursively concatenate the plain text of a JSON node. */ function plainText(node) { if (!node || typeof node !== "object") @@ -242,8 +239,8 @@ export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примеча let fellBack = false; const changedBlocks = new Set(); try { - const oldNode = Node.fromJSON(schema, oldDocJson); - const newNode = Node.fromJSON(schema, newDocJson); + const oldNode = Node.fromJSON(docmostSchema, oldDocJson); + const newNode = Node.fromJSON(docmostSchema, newDocJson); const tr = recreateTransform(oldNode, newNode, { complexSteps: false, wordDiffs: true, diff --git a/packages/mcp/build/lib/docmost-schema.js b/packages/mcp/build/lib/docmost-schema.js index 976e2d7f..6b6c221d 100644 --- a/packages/mcp/build/lib/docmost-schema.js +++ b/packages/mcp/build/lib/docmost-schema.js @@ -14,7 +14,7 @@ import TaskItem from "@tiptap/extension-task-item"; import Highlight from "@tiptap/extension-highlight"; import Subscript from "@tiptap/extension-subscript"; import Superscript from "@tiptap/extension-superscript"; -import { Node, Extension, Mark } from "@tiptap/core"; +import { Node, Extension, Mark, getSchema } from "@tiptap/core"; // Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this // package can stay on the same @tiptap/core version as the editor and avoid a // duplicate-tiptap version split in the monorepo. Reads a single declaration @@ -1126,3 +1126,10 @@ export const docmostExtensions = [ PageBreak, DocmostAttributes, ]; +/** + * The ProseMirror schema for the docmost editor, built ONCE from + * `docmostExtensions`. Pure and reused by every consumer (diff, collaboration + * write-back) so the schema can never drift between call sites — it lives next + * to the extension list it is derived from. + */ +export const docmostSchema = getSchema(docmostExtensions); diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 9873d119..bd891fc9 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -2995,9 +2995,9 @@ export class DocmostClient { const raw = await this.getPageRaw(pageId); const current = raw.content || { type: "doc", content: [] }; runTransform(current); - // Exercise the same Yjs encoder the apply path uses, so the preview - // fails with the SAME descriptive error when the doc is not encodable - // instead of returning a misleadingly-green diff. + // Run an independent Yjs-encodability check (same sanitize + schema as the + // apply path), so the preview fails with the same descriptive error when + // the doc is not encodable instead of returning a misleadingly-green diff. assertYjsEncodable(newDoc); return { pushed: false, diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index cb84f410..7d3fdc0e 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -4,19 +4,28 @@ import * as Y from "yjs"; import WebSocket from "ws"; import { marked } from "marked"; import { generateJSON } from "@tiptap/html"; -import { getSchema } from "@tiptap/core"; import { Node as PMNode } from "@tiptap/pm/model"; import { updateYFragment } from "y-prosemirror"; import { JSDOM } from "jsdom"; -import { docmostExtensions } from "./docmost-schema.js"; +import { docmostExtensions, docmostSchema } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; import { summarizeChange, VerifyReport } from "./diff.js"; -// The ProseMirror schema for the docmost editor, built once (mirrors diff.ts). -// `updateYFragment` needs a real PM Node, so we re-hydrate the transformed JSON -// against this schema before diffing it into the live Yjs fragment. -const docmostSchema = getSchema(docmostExtensions); +/** + * Build the descriptive error for an opaque Yjs encode failure ("Unexpected + * content type"), shared by both encode paths (`buildYDoc` -> `toYdoc` and + * `applyDocToFragment` -> `updateYFragment`) so the message wording stays in one + * place. `label` names the stage that failed (diagnostic). `sanitizeForYjs` + * already stripped `undefined` attrs, so a remaining failure is pinpointed via + * `findUnstorableAttr`. + */ +function unstorableYjsError(safe: any, label: string, e: unknown): Error { + const bad = findUnstorableAttr(safe); + return new Error( + `Failed to encode document to Yjs (${label}): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`, + ); +} /** * The resolved value of every content-mutating collab write: the document that @@ -507,10 +516,7 @@ export function buildYDoc(doc: any): Y.Doc { try { return TiptapTransformer.toYdoc(safe, "default", docmostExtensions); } catch (e) { - const bad = findUnstorableAttr(safe); - throw new Error( - `Failed to encode document to Yjs (toYdoc): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`, - ); + throw unstorableYjsError(safe, "toYdoc", e); } } @@ -543,17 +549,19 @@ export function applyDocToFragment(ydoc: Y.Doc, newDoc: any): void { }); }); } catch (e) { - const bad = findUnstorableAttr(safe); - throw new Error( - `Failed to encode document to Yjs (updateYFragment): ${e instanceof Error ? e.message : String(e)}.${bad ? ` Offending attribute: ${bad}.` : " A node/mark attribute likely holds a value Yjs cannot store (e.g. undefined)."}`, - ); + throw unstorableYjsError(safe, "updateYFragment", e); } } /** - * Validate that a doc is Yjs-encodable by building (and discarding) a Y.Doc. - * Throws the same descriptive error as the apply path when it is not. Used by - * the dry-run preview so it fails identically to apply. + * Run an independent Yjs-encodability check (the same `sanitizeForYjs` + schema + * the apply path uses) and throw the same descriptive error when the doc cannot + * be stored. Used by the dry-run preview. + * + * Note: it does NOT run `updateYFragment` against the live fragment, so it is an + * encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc` + * (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different + * encoders that nonetheless reject the same unstorable attributes. */ export function assertYjsEncodable(doc: any): void { buildYDoc(doc); diff --git a/packages/mcp/src/lib/diff.ts b/packages/mcp/src/lib/diff.ts index d0848997..ba216df4 100644 --- a/packages/mcp/src/lib/diff.ts +++ b/packages/mcp/src/lib/diff.ts @@ -17,11 +17,10 @@ * we fall back to a coarse block-level text diff so the tool never hard-fails. */ -import { getSchema } from "@tiptap/core"; import { Node } from "@tiptap/pm/model"; import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset"; import { recreateTransform } from "@fellow/prosemirror-recreate-transform"; -import { docmostExtensions } from "./docmost-schema.js"; +import { docmostSchema } from "./docmost-schema.js"; /** A single inserted/deleted change with its containing-block context. */ export interface DiffChange { @@ -49,8 +48,6 @@ export interface DiffResult { markdown: string; } -/** Build the schema once; it is pure and reused across calls. */ -const schema = getSchema(docmostExtensions); /** Recursively concatenate the plain text of a JSON node. */ function plainText(node: any): string { @@ -288,8 +285,8 @@ export function diffDocs( const changedBlocks = new Set(); try { - const oldNode = Node.fromJSON(schema, oldDocJson); - const newNode = Node.fromJSON(schema, newDocJson); + const oldNode = Node.fromJSON(docmostSchema, oldDocJson); + const newNode = Node.fromJSON(docmostSchema, newDocJson); const tr = recreateTransform(oldNode, newNode, { complexSteps: false, wordDiffs: true, diff --git a/packages/mcp/src/lib/docmost-schema.ts b/packages/mcp/src/lib/docmost-schema.ts index 63bef5c2..546b9844 100644 --- a/packages/mcp/src/lib/docmost-schema.ts +++ b/packages/mcp/src/lib/docmost-schema.ts @@ -14,7 +14,7 @@ import TaskItem from "@tiptap/extension-task-item"; import Highlight from "@tiptap/extension-highlight"; import Subscript from "@tiptap/extension-subscript"; import Superscript from "@tiptap/extension-superscript"; -import { Node, Extension, Mark } from "@tiptap/core"; +import { Node, Extension, Mark, getSchema } from "@tiptap/core"; // Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this // package can stay on the same @tiptap/core version as the editor and avoid a @@ -1223,3 +1223,11 @@ export const docmostExtensions = [ PageBreak, DocmostAttributes, ]; + +/** + * The ProseMirror schema for the docmost editor, built ONCE from + * `docmostExtensions`. Pure and reused by every consumer (diff, collaboration + * write-back) so the schema can never drift between call sites — it lives next + * to the extension list it is derived from. + */ +export const docmostSchema = getSchema(docmostExtensions); diff --git a/packages/mcp/test/unit/comment-cursor-stability.test.mjs b/packages/mcp/test/unit/comment-cursor-stability.test.mjs index e494d131..517d65e5 100644 --- a/packages/mcp/test/unit/comment-cursor-stability.test.mjs +++ b/packages/mcp/test/unit/comment-cursor-stability.test.mjs @@ -73,3 +73,21 @@ test("anchoring a comment mark keeps the cursor in the marked text (issue #152)" const abs = Y.createAbsolutePositionFromRelativePosition(relPos, ydoc); assert.notEqual(abs, null, "comment anchoring must not destroy the cursor anchor"); }); + +// The diagnostic catch branch of applyDocToFragment (#154 review): a doc that +// cannot be hydrated/encoded must be re-thrown wrapped with the stage label, not +// leak the raw ProseMirror/Yjs error. An unknown node type makes +// PMNode.fromJSON (against the docmost schema) throw — a reliable trigger +// (sanitizeForYjs only strips `undefined`, so an undefined attr would be removed +// before it could fail). +test("applyDocToFragment wraps an encode/build failure with the (updateYFragment) diagnostic", () => { + const ydoc = new Y.Doc(); + const bad = { + type: "doc", + content: [{ type: "totally_unknown_node_xyz_12345" }], + }; + assert.throws( + () => applyDocToFragment(ydoc, bad), + /Failed to encode document to Yjs \(updateYFragment\)/, + ); +}); From aca075108cc25243c3faad5d26e0e0426884d11d Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 14:48:59 +0300 Subject: [PATCH 03/43] refactor(mcp): accurate encode-failure labels + diff edge-case tests (#154 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the approve-with-comments review on PR #154: - applyDocToFragment: hydrate PMNode.fromJSON in its OWN try so a hydration failure (e.g. an unknown node type) is labelled "fromJSON" — the stage that actually threw — instead of the misleading "updateYFragment". The diagnostic comment on unstorableYjsError ("label names the stage that failed") is now truthful. - assertYjsEncodable: also rehearse PMNode.fromJSON(docmostSchema, …) so a doc that would only fail in apply's hydration step is rejected at preview time too, narrowing the preview/apply gap (review suggestion B). Still cheap — no live fragment, no updateYFragment. - Tests: relabel the diagnostic test to (fromJSON); add structural-diff edge cases — neighbour deletion keeps the unchanged node's cursor anchor, doc->empty clears the fragment without throwing, top-level node-type change diffs in place — plus a preview-gate test for the new fromJSON rehearsal. 297/297 green. build/ rebuilt for the changed lib module only (build/client.js left untouched to avoid pulling in pre-existing unrelated src/build drift). Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/mcp/build/lib/collaboration.js | 24 +++++- packages/mcp/src/lib/collaboration.ts | 22 +++++- .../unit/comment-cursor-stability.test.mjs | 79 ++++++++++++++++++- 3 files changed, 117 insertions(+), 8 deletions(-) diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index 1fc4c1d6..fc72bbf3 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -483,8 +483,17 @@ export function buildYDoc(doc) { export function applyDocToFragment(ydoc, newDoc) { const safe = sanitizeForYjs(newDoc); const fragment = ydoc.getXmlFragment("default"); + // Hydrate the ProseMirror node in its OWN try so a failure here (e.g. an + // unknown node type) is labelled "fromJSON" — the stage that actually threw — + // instead of being misattributed to the Yjs write stage (#154 review). + let pmNode; + try { + pmNode = PMNode.fromJSON(docmostSchema, safe); + } + catch (e) { + throw unstorableYjsError(safe, "fromJSON", e); + } try { - const pmNode = PMNode.fromJSON(docmostSchema, safe); ydoc.transact(() => { updateYFragment(ydoc, fragment, pmNode, { mapping: new Map(), @@ -504,10 +513,21 @@ export function applyDocToFragment(ydoc, newDoc) { * Note: it does NOT run `updateYFragment` against the live fragment, so it is an * encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc` * (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different - * encoders that nonetheless reject the same unstorable attributes. + * encoders that nonetheless reject the same unstorable attributes. To narrow the + * preview/apply gap it ALSO rehearses the apply path's `PMNode.fromJSON` + * hydration, so a doc that would only fail there (e.g. an unknown node type) is + * rejected at preview time too (#154 review). Still cheap: no live fragment, no + * `updateYFragment`. */ export function assertYjsEncodable(doc) { buildYDoc(doc); + const safe = sanitizeForYjs(doc); + try { + PMNode.fromJSON(docmostSchema, safe); + } + catch (e) { + throw unstorableYjsError(safe, "fromJSON", e); + } } /** Time we wait for the initial handshake/sync before giving up. */ const CONNECT_TIMEOUT_MS = 25000; diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index 7d3fdc0e..efc7bf17 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -540,8 +540,16 @@ export function buildYDoc(doc: any): Y.Doc { export function applyDocToFragment(ydoc: Y.Doc, newDoc: any): void { const safe = sanitizeForYjs(newDoc); const fragment = ydoc.getXmlFragment("default"); + // Hydrate the ProseMirror node in its OWN try so a failure here (e.g. an + // unknown node type) is labelled "fromJSON" — the stage that actually threw — + // instead of being misattributed to the Yjs write stage (#154 review). + let pmNode: PMNode; + try { + pmNode = PMNode.fromJSON(docmostSchema, safe); + } catch (e) { + throw unstorableYjsError(safe, "fromJSON", e); + } try { - const pmNode = PMNode.fromJSON(docmostSchema, safe); ydoc.transact(() => { updateYFragment(ydoc, fragment, pmNode, { mapping: new Map(), @@ -561,10 +569,20 @@ export function applyDocToFragment(ydoc: Y.Doc, newDoc: any): void { * Note: it does NOT run `updateYFragment` against the live fragment, so it is an * encodability GATE, not a byte-for-byte rehearsal of apply — `buildYDoc` * (`toYdoc`) and `applyDocToFragment` (`updateYFragment`) are two different - * encoders that nonetheless reject the same unstorable attributes. + * encoders that nonetheless reject the same unstorable attributes. To narrow the + * preview/apply gap it ALSO rehearses the apply path's `PMNode.fromJSON` + * hydration, so a doc that would only fail there (e.g. an unknown node type) is + * rejected at preview time too (#154 review). Still cheap: no live fragment, no + * `updateYFragment`. */ export function assertYjsEncodable(doc: any): void { buildYDoc(doc); + const safe = sanitizeForYjs(doc); + try { + PMNode.fromJSON(docmostSchema, safe); + } catch (e) { + throw unstorableYjsError(safe, "fromJSON", e); + } } /** Time we wait for the initial handshake/sync before giving up. */ diff --git a/packages/mcp/test/unit/comment-cursor-stability.test.mjs b/packages/mcp/test/unit/comment-cursor-stability.test.mjs index 517d65e5..23614fb9 100644 --- a/packages/mcp/test/unit/comment-cursor-stability.test.mjs +++ b/packages/mcp/test/unit/comment-cursor-stability.test.mjs @@ -1,7 +1,10 @@ import { test } from "node:test"; import assert from "node:assert/strict"; import * as Y from "yjs"; -import { applyDocToFragment } from "../../build/lib/collaboration.js"; +import { + applyDocToFragment, + assertYjsEncodable, +} from "../../build/lib/collaboration.js"; // Regression for issue #152: agent writes (comment anchoring especially) must // NOT yank the open editor's cursor to the end of the document. The cursor is a @@ -79,8 +82,9 @@ test("anchoring a comment mark keeps the cursor in the marked text (issue #152)" // leak the raw ProseMirror/Yjs error. An unknown node type makes // PMNode.fromJSON (against the docmost schema) throw — a reliable trigger // (sanitizeForYjs only strips `undefined`, so an undefined attr would be removed -// before it could fail). -test("applyDocToFragment wraps an encode/build failure with the (updateYFragment) diagnostic", () => { +// before it could fail). The hydration now has its OWN try, so the label is the +// accurate stage `fromJSON` (the earlier `updateYFragment` label was misleading). +test("applyDocToFragment wraps a hydration failure with the (fromJSON) diagnostic", () => { const ydoc = new Y.Doc(); const bad = { type: "doc", @@ -88,6 +92,73 @@ test("applyDocToFragment wraps an encode/build failure with the (updateYFragment }; assert.throws( () => applyDocToFragment(ydoc, bad), - /Failed to encode document to Yjs \(updateYFragment\)/, + /Failed to encode document to Yjs \(fromJSON\)/, + ); +}); + +// #154 review (suggestion 2): structural-diff edge cases the cursor-survival +// path must handle without losing the unchanged node's id or throwing. + +test("deleting a NEIGHBOUR keeps the unchanged node's cursor anchor (diff path)", () => { + const ydoc = new Y.Doc(); + applyDocToFragment(ydoc, doc(para("Keep me"), para("Delete me"))); + + // Anchor inside the first paragraph, which survives the deletion unchanged. + const relPos = Y.createRelativePositionFromTypeIndex(paragraphText(ydoc, 0), 4); + + // Remove the second paragraph entirely; the first must keep its Yjs identity. + applyDocToFragment(ydoc, doc(para("Keep me"))); + + const abs = Y.createAbsolutePositionFromRelativePosition(relPos, ydoc); + assert.notEqual(abs, null, "the surviving node's cursor anchor must still resolve"); + assert.equal(abs.index, 4, "the cursor must stay at the same offset"); + assert.equal(ydoc.getXmlFragment("default").length, 1, "neighbour was deleted"); + assert.equal(paragraphText(ydoc, 0).toString(), "Keep me"); +}); + +test("writing an EMPTY document clears the fragment without throwing", () => { + const ydoc = new Y.Doc(); + applyDocToFragment(ydoc, doc(para("Something"), para("Else"))); + assert.equal(ydoc.getXmlFragment("default").length, 2); + + assert.doesNotThrow(() => + applyDocToFragment(ydoc, { type: "doc", content: [] }), + ); + assert.equal( + ydoc.getXmlFragment("default").length, + 0, + "the fragment is emptied (doc -> empty)", + ); +}); + +test("changing a top-level node TYPE diffs in place (paragraph -> heading)", () => { + const ydoc = new Y.Doc(); + applyDocToFragment(ydoc, doc(para("Title text"), para("Body"))); + + // Replace the first paragraph with a heading carrying the same text. + applyDocToFragment( + ydoc, + doc( + { type: "heading", attrs: { level: 2 }, content: [{ type: "text", text: "Title text" }] }, + para("Body"), + ), + ); + + const first = ydoc.getXmlFragment("default").get(0); + assert.equal(first.nodeName, "heading", "the top-level node type changed"); + assert.equal(first.toString().replace(/<[^>]*>/g, ""), "Title text"); +}); + +// #154 review (suggestion B / architecture B): the dry-run gate now also +// rehearses PMNode.fromJSON, so a doc that fails ONLY in hydration (not in +// toYdoc) is rejected at preview time, with the accurate `fromJSON` label. +test("assertYjsEncodable rejects an un-hydratable doc at preview time (fromJSON gate)", () => { + const bad = { + type: "doc", + content: [{ type: "totally_unknown_node_xyz_67890" }], + }; + assert.throws( + () => assertYjsEncodable(bad), + /Failed to encode document to Yjs/, ); }); From 13cac155c118b75a3eafcd1d6387c42c813c48ad Mon Sep 17 00:00:00 2001 From: claude_code Date: Wed, 24 Jun 2026 15:14:29 +0300 Subject: [PATCH 04/43] chore(ai-chat): add temporary Safari stream-drop diagnostics Investigate the Safari-only "Lost connection to the AI provider" mid-stream disconnect (Chrome unaffected). Pure instrumentation, no behavior change: the 15s heartbeat interval and all stream callbacks are unchanged. - sse-resilience.ts: startSseHeartbeat() gains an optional onBeat hook fired after each successfully written ping (beat counter). - ai-chat.service.ts: track stream start, first-chunk latency, model-silent gap and heartbeat count; log them on finish/error/abort to classify the drop (idle-gap vs hard wall-clock cap vs slow first chunk). - ai-chat.controller.ts: append elapsed-since-request to the disconnect warn. All blocks tagged "DIAGNOSTIC ... temporary" for easy removal once the Safari failure mode is identified. Co-Authored-By: Claude Opus 4.8 --- .../src/core/ai-chat/ai-chat.controller.ts | 6 ++- .../src/core/ai-chat/ai-chat.service.ts | 46 ++++++++++++++++++- .../server/src/core/ai-chat/sse-resilience.ts | 9 ++++ 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.ts b/apps/server/src/core/ai-chat/ai-chat.controller.ts index 0870969e..a8ddccb1 100644 --- a/apps/server/src/core/ai-chat/ai-chat.controller.ts +++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts @@ -159,6 +159,9 @@ export class AiChatController { // we also drop it on response `finish` so it never lingers after the stream // completes normally (the AI SDK pipes the response fire-and-forget, so we // cannot simply remove it once `stream()` returns). + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: wall-clock at + // which a Safari disconnect is observed, measured from request receipt. + const reqStartedAt = Date.now(); const controller = new AbortController(); const onClose = (): void => { // A genuine disconnect leaves the response unfinished (unlike a normal @@ -167,7 +170,8 @@ export class AiChatController { // so log it here before aborting the agent loop. if (!res.raw.writableEnded) { this.logger.warn( - 'AI chat stream: client disconnected before completion; aborting turn', + `AI chat stream: client disconnected before completion; aborting turn ` + + `(elapsed=${Date.now() - reqStartedAt}ms since request received)`, ); controller.abort(); } diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 91cb64af..1cce9cf3 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -380,6 +380,15 @@ export class AiChatService { const capturedSteps: StepLike[] = []; let inProgressText = ''; + // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure + // first-chunk latency, the model-silent gap right before a disconnect, and + // how many SSE heartbeats were written, so a Safari drop can be classified + // (idle-gap vs hard wall-clock cap vs slow first chunk). + const streamStartedAt = Date.now(); + let firstModelChunkAt: number | undefined; + let lastModelChunkAt = streamStartedAt; + let heartbeatsSent = 0; + // NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous // failure here (or in pipe below) would skip the terminal callbacks, so the // catch releases the leased external clients to avoid a connection leak. @@ -404,6 +413,12 @@ export class AiChatService { prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), abortSignal: signal, onChunk: ({ chunk }) => { + // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model + // output chunk means the stream is actively emitting bytes; track first + // + most-recent activity timestamps. + const now = Date.now(); + firstModelChunkAt ??= now; + lastModelChunkAt = now; // 'text-delta' is the assistant's prose; tool-call args are separate chunk // types — so this mirrors exactly what streams to the client. if (chunk.type === 'text-delta') inProgressText += chunk.text; @@ -415,6 +430,14 @@ export class AiChatService { inProgressText = ''; }, onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success + // baseline for Safari comparison. + const diagNow = Date.now(); + this.logger.log( + `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` + + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + + `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`, + ); await persistAssistant({ text, toolCalls: serializeSteps(steps), @@ -464,6 +487,14 @@ export class AiChatService { const e = error as { stack?: string }; const errorText = describeProviderError(error, String(error)); this.logger.error(`AI chat stream error: ${errorText}`, e?.stack); + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of + // an error-terminated stream. + const diagNow = Date.now(); + this.logger.warn( + `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` + + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + + `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`, + ); // Persist the PARTIAL answer streamed before the failure (text + any // finished tool steps) WITH the error in metadata, so the turn shows what // the user already saw plus the cause — not just a bare error. @@ -488,6 +519,15 @@ export class AiChatService { `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` + `step(s), ${partialChars} chars partial text; persisting partial turn.`, ); + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key + // line — classifies the Safari drop. + const diagNow = Date.now(); + this.logger.warn( + `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` + + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + + `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` + + `steps=${steps.length}`, + ); await persistAssistant( buildPartialAssistantRecord(capturedSteps, inProgressText, 'aborted'), ); @@ -566,7 +606,11 @@ export class AiChatService { // headers are sent, and is guarded for response-likes that lack it. res.raw.flushHeaders?.(); // Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout). - startSseHeartbeat(res.raw); + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: count beats so a disconnect log can show + // how many pings were written before Safari dropped. + startSseHeartbeat(res.raw, 15_000, () => { + heartbeatsSent += 1; + }); } catch (err) { // Synchronous failure before/while wiring the stream: the terminal // callbacks will not run, so release the leased external clients here and diff --git a/apps/server/src/core/ai-chat/sse-resilience.ts b/apps/server/src/core/ai-chat/sse-resilience.ts index dbf3d8e4..826aff9d 100644 --- a/apps/server/src/core/ai-chat/sse-resilience.ts +++ b/apps/server/src/core/ai-chat/sse-resilience.ts @@ -28,15 +28,24 @@ import type { ServerResponse } from 'node:http'; * the response finishes or the socket closes. The interval is unref()'d so it * never keeps the process alive, and writes are guarded so we never write to an * already-ended/destroyed socket. + * + * `onBeat` is an OPTIONAL diagnostic hook invoked once after each heartbeat that + * was actually written (only when the write did not throw). It is purely for + * telemetry/counters and never affects the heartbeat behavior. */ export function startSseHeartbeat( res: ServerResponse, intervalMs = 15_000, + onBeat?: () => void, ): () => void { const timer = setInterval(() => { if (res.writableEnded || res.destroyed) return; try { res.write(': ping\n\n'); + // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Notify the + // optional hook only after a successful write, so beat counters reflect + // pings that actually reached the socket. + onBeat?.(); } catch { // Socket vanished between the guard and the write; nothing to do. } From 17e683a3113a1d4c8f06cb7a8a1ad453e8667155 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 15:34:41 +0300 Subject: [PATCH 05/43] feat(footnotes): reuse semantics + import diagnostics (#166) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Footnotes were strict 1:1: a repeated `[^a]` reference was treated as a collision and re-id'd to `a__2`, and a reference with no definition synthesized its own empty one — so an agent-authored article with reused labels produced dozens of empty `kowiki__N` footnotes. Move to Pandoc REUSE semantics and add non-fatal import diagnostics. Reuse (core): - resolveCollisions (footnote-sync): repeated references sharing an id are REUSE (recorded once in document order, never re-id'd) — one number, one shared definition. Only a duplicate DEFINITION is re-id'd deterministically and, with no matching reference, dropped by the existing orphan policy (first-wins). CollisionPlan.refReids is now always empty (harmless no-op downstream). - extractFootnoteDefinitions (marked) and extractFootnotes (MCP): duplicate definition ids are FIRST-WINS (keep first, drop rest); reference markers are never rewritten. Removed the marker-rewriting and the now-dead deriveFootnoteId mirror + helpers from the MCP path. Import diagnostics: - New analyzeFootnotes() (MCP): fence-aware pure scan reporting dangling references, empty/duplicate definitions and `[^id]` markers inside table rows. - createPage / updatePage / importPageMarkdown now attach `footnoteWarnings` (only when non-empty) so an agent can fix its markup; the page is still created. Paste-reuse: - footnotePastePlugin remaps only ids the pasted slice DEFINES (a colliding definition); a pasted lone reference to an existing id keeps it (reuse). Tests: reuse/first-wins rewrites of footnote.test, footnote-markdown.test, footnote.marked.orphan.test and the MCP footnotes.test; new footnote-paste.test (editor-ext) and footnote-analyze.test (MCP). Deleted derive-id-parity.test.mjs (the MCP no longer derives ids; editor-ext's deriveFootnoteId keeps its own golden test). editor-ext 128, MCP 299, server roundtrip 2, client views 3, client+server tsc clean. Two review suggestions applied: corrected a stale "duplicated in MCP" comment and the dangling-reference warning wording. Note: the multi-backlink editor UI (a reused definition linking back to each of its references) is deferred to a follow-up — this PR delivers the data-integrity core (reuse + warnings + paste-reuse). Forward links and numbering already reuse correctly; the backlink currently targets the first reference. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lib/footnote/footnote-markdown.test.ts | 47 ++--- .../src/lib/footnote/footnote-paste.test.ts | 162 +++++++++++++++ .../src/lib/footnote/footnote-sync.ts | 184 ++++++++---------- .../src/lib/footnote/footnote-util.ts | 9 +- .../src/lib/footnote/footnote.test.ts | 63 +++--- .../utils/footnote.marked.orphan.test.ts | 55 +++--- .../src/lib/markdown/utils/footnote.marked.ts | 71 ++----- packages/mcp/build/client.js | 25 ++- packages/mcp/build/lib/collaboration.js | 83 ++------ packages/mcp/build/lib/footnote-analyze.js | 115 +++++++++++ packages/mcp/src/client.ts | 14 +- packages/mcp/src/lib/collaboration.ts | 92 ++------- packages/mcp/src/lib/footnote-analyze.ts | 138 +++++++++++++ .../mcp/test/unit/derive-id-parity.test.mjs | 134 ------------- .../mcp/test/unit/footnote-analyze.test.mjs | 106 ++++++++++ packages/mcp/test/unit/footnotes.test.mjs | 34 ++-- 16 files changed, 774 insertions(+), 558 deletions(-) create mode 100644 packages/editor-ext/src/lib/footnote/footnote-paste.test.ts create mode 100644 packages/mcp/build/lib/footnote-analyze.js create mode 100644 packages/mcp/src/lib/footnote-analyze.ts delete mode 100644 packages/mcp/test/unit/derive-id-parity.test.mjs create mode 100644 packages/mcp/test/unit/footnote-analyze.test.mjs diff --git a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts index 844134f6..6c87f2d6 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-markdown.test.ts @@ -55,10 +55,11 @@ describe("footnote markdown round-trip", () => { expect(html).not.toContain("data-footnote-def"); }); - it("extractFootnoteDefinitions de-duplicates colliding ids and rewrites markers", () => { - // Two definitions share id `d`, and the body has two `[^d]` markers. The - // output must keep BOTH definitions with DISTINCT ids and rewrite the second - // marker so the (reference, definition) pairing stays 1:1. + it("extractFootnoteDefinitions keeps the FIRST duplicate definition and reuses markers", () => { + // Two definitions share id `d`, and the body has two `[^d]` markers. Under + // the import model (#166) duplicate definition ids are FIRST-WINS: only the + // first definition is kept; markers are NEVER rewritten, so the two `[^d]` + // references reuse the single footnote. const md = [ "See here[^d] and there[^d].", "", @@ -68,30 +69,23 @@ describe("footnote markdown round-trip", () => { const { body, section } = extractFootnoteDefinitions(md); - // Pull out the def ids from the section in order. const defIds = Array.from( section.matchAll(/data-footnote-def data-id="([^"]+)"/g), ).map((m) => m[1]); - expect(defIds.length).toBe(2); - expect(new Set(defIds).size).toBe(2); // distinct - expect(defIds[0]).toBe("d"); // first definition keeps the id - - // Both definition texts survive. + expect(defIds).toEqual(["d"]); // first-wins: one definition expect(section).toContain("first"); - expect(section).toContain("second"); + expect(section).not.toContain("second"); // duplicate dropped - // The body still has two markers, now pointing at the two distinct ids. + // Both markers stay `[^d]` (reuse) — no `d__2` minting. const refIds = Array.from(body.matchAll(/\[\^([^\]\s]+)\]/g)).map( (m) => m[1], ); - expect(refIds.length).toBe(2); - expect(refIds.sort()).toEqual(defIds.sort()); + expect(refIds).toEqual(["d", "d"]); }); - it("extractFootnoteDefinitions dedups DETERMINISTICALLY (same input -> same ids)", () => { - // The derived id must be a pure function of the input markdown so importing - // the same source twice (or via the editor and the MCP mirror) yields - // identical ids — never random/time-based. + it("extractFootnoteDefinitions is DETERMINISTIC and stable (same input -> same output)", () => { + // The output must be a pure function of the input markdown so importing the + // same source twice (or via the editor and the MCP mirror) is identical. const md = [ "See[^d] one[^d] two[^d].", "", @@ -113,15 +107,13 @@ describe("footnote markdown round-trip", () => { const a = run(); const b = run(); - // Identical across runs (this is what would FAIL on the random-id version). - expect(a.defIds).toEqual(b.defIds); - expect(a.refIds).toEqual(b.refIds); - // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3". - expect(a.defIds).toEqual(["d", "d__2", "d__3"]); - expect(a.refIds.sort()).toEqual(a.defIds.sort()); + expect(a).toEqual(b); + // First-wins: one kept definition `d`; all three reuse markers stay `d`. + expect(a.defIds).toEqual(["d"]); + expect(a.refIds).toEqual(["d", "d", "d"]); }); - it("markdownToHtml with duplicate ids renders two distinct footnote defs", async () => { + it("markdownToHtml with a reused id renders ONE shared footnote def", async () => { const md = [ "See here[^d] and there[^d].", "", @@ -132,9 +124,8 @@ describe("footnote markdown round-trip", () => { const defIds = Array.from( html.matchAll(/data-footnote-def data-id="([^"]+)"/g), ).map((m) => m[1]); - expect(defIds.length).toBe(2); - expect(new Set(defIds).size).toBe(2); + expect(defIds).toEqual(["d"]); // one shared definition expect(html).toContain("first"); - expect(html).toContain("second"); + expect(html).not.toContain("second"); }); }); diff --git a/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts new file mode 100644 index 00000000..5790faf8 --- /dev/null +++ b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts @@ -0,0 +1,162 @@ +import { describe, it, expect } from "vitest"; +import { Editor } from "@tiptap/core"; +import { Document } from "@tiptap/extension-document"; +import { Paragraph } from "@tiptap/extension-paragraph"; +import { Text } from "@tiptap/extension-text"; +import { Node as PMNode, Fragment, Slice } from "@tiptap/pm/model"; +import { FootnoteReference } from "./footnote-reference"; +import { FootnotesList } from "./footnotes-list"; +import { FootnoteDefinition } from "./footnote-definition"; +import { footnotePastePlugin } from "./footnote-sync"; +import { + FOOTNOTE_REFERENCE_NAME, + FOOTNOTE_DEFINITION_NAME, + FOOTNOTES_LIST_NAME, +} from "./footnote-util"; + +// transformPasted reuse semantics (#166): a pasted reference to an id that +// already exists must KEEP the id (reuse → resolves to the existing footnote); +// only a pasted DEFINITION that collides is re-id'd (it would otherwise clobber +// the existing definition's text), and its paired references follow it. + +const extensions = [ + Document, + Paragraph, + Text, + FootnoteReference, + FootnotesList, + FootnoteDefinition, +]; + +/** An editor whose doc already contains footnote "a" (ref + definition). */ +function makeEditorWithFootnoteA() { + return new Editor({ + extensions, + content: { + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { type: "text", text: "x" }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } }, + ], + }, + { + type: FOOTNOTES_LIST_NAME, + content: [ + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "a" }, + content: [ + { type: "paragraph", content: [{ type: "text", text: "note A" }] }, + ], + }, + ], + }, + ], + }, + }); +} + +/** Run footnotePastePlugin's transformPasted against the editor's current doc. */ +function paste(editor: Editor, slice: Slice): Slice { + const plugin = footnotePastePlugin(); + return plugin.props!.transformPasted!(slice, editor.view); +} + +/** Collect the ids of footnote refs/defs in a slice, in order (single DFS). */ +function sliceFootnoteIds(slice: Slice): Array<{ kind: string; id: string }> { + const out: Array<{ kind: string; id: string }> = []; + const walk = (frag: Fragment) => { + frag.forEach((node: PMNode) => { + if (node.type.name === FOOTNOTE_REFERENCE_NAME) + out.push({ kind: "ref", id: node.attrs.id }); + if (node.type.name === FOOTNOTE_DEFINITION_NAME) + out.push({ kind: "def", id: node.attrs.id }); + walk(node.content); + }); + }; + walk(slice.content); + return out; +} + +describe("footnotePastePlugin — reuse-aware id remap", () => { + it("keeps a pasted lone reference to an existing id (reuse, no remap)", () => { + const editor = makeEditorWithFootnoteA(); + const { schema } = editor; + // Paste: a paragraph containing only a reference to the existing id "a". + const slice = new Slice( + Fragment.from( + schema.nodes.paragraph.create(null, [ + schema.text("see "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + ), + 0, + 0, + ); + const out = paste(editor, slice); + // The reference keeps id "a" so it reuses the existing footnote. + expect(sliceFootnoteIds(out)).toEqual([{ kind: "ref", id: "a" }]); + editor.destroy(); + }); + + it("re-ids a pasted DEFINITION (and its paired reference) that collides", () => { + const editor = makeEditorWithFootnoteA(); + const { schema } = editor; + // Paste: a reference AND a definition both carrying the existing id "a". The + // definition would clobber the existing one, so both are remapped together. + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.text("dup "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("pasted note")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = paste(editor, slice); + const ids = sliceFootnoteIds(out); + // Both the pasted ref and def were remapped to the SAME fresh id (paired), + // and it is the deterministic derived id (not "a"). + const remappedIds = new Set(ids.map((x) => x.id)); + expect(remappedIds.size).toBe(1); + expect(remappedIds.has("a")).toBe(false); + expect([...remappedIds][0]).toBe("a__2"); + editor.destroy(); + }); + + it("leaves the slice untouched when no pasted definition collides", () => { + const editor = makeEditorWithFootnoteA(); + const { schema } = editor; + // A pasted reference+definition for a BRAND-NEW id "b" — no collision. + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.text("new "), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [ + schema.nodes.paragraph.create(null, [schema.text("note B")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = paste(editor, slice); + expect(sliceFootnoteIds(out)).toEqual([ + { kind: "ref", id: "b" }, + { kind: "def", id: "b" }, + ]); + editor.destroy(); + }); +}); diff --git a/packages/editor-ext/src/lib/footnote/footnote-sync.ts b/packages/editor-ext/src/lib/footnote/footnote-sync.ts index 505a60d0..e861ed0e 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-sync.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-sync.ts @@ -73,51 +73,58 @@ function scan(doc: ProseMirrorNode): FootnoteScan { * * The overriding invariant is that NO definition is ever dropped here: every * definition occurrence ends up with a unique id and therefore survives the - * canonical rebuild. Duplicate references are likewise re-id'd (and paired with - * a duplicate definition when one exists) so importing/pasting `[^d]` twice with - * two `[^d]:` definitions yields TWO distinct footnotes rather than one. + * canonical rebuild. Repeated references that share an id are REUSE (one + * footnote) and are left untouched; only duplicate DEFINITIONS are re-id'd, so a + * pasted/merged second `[^d]:` survives as its own (then orphaned) footnote. */ interface CollisionPlan { /** - * Reference ids in document order, de-duplicated AFTER re-id. This is the - * source of truth for definition order/numbering, exactly as before — only - * now collisions have been resolved so it no longer hides duplicates. + * Distinct reference ids in document order (first appearance). Repeated ids + * are reuse and collapse to a single entry. Source of truth for definition + * order/numbering. */ referenceIds: string[]; - /** id -> definition node, after duplicates were re-id'd. One entry per id. */ + /** id -> definition node, after duplicate definitions were re-id'd. One per id. */ definitions: Map; /** - * Body reference re-id edits to apply (position of a reference node -> the - * fresh id it must carry). Empty when there are no colliding references. + * Body reference re-id edits. ALWAYS EMPTY under reuse semantics (references + * are never re-id'd); retained so the downstream consumer stays a harmless + * no-op rather than needing removal. */ refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>; - /** True when any collision required a re-id (refs and/or defs). */ + /** True when a duplicate definition required a re-id. */ changed: boolean; } /** - * Resolve duplicate-id collisions among references and definitions WITHOUT ever - * dropping a definition. + * Resolve the footnote id topology WITHOUT ever dropping a definition. * - * Strategy: - * - Walk references in document order. The FIRST reference for an id keeps it. - * Any later reference sharing that id is a duplicate and gets a fresh unique - * id; if a still-unclaimed duplicate definition with the original id exists, - * it is re-id'd to the SAME fresh id so the (ref, def) pair stays matched. - * - Walk definitions in document order. The FIRST definition for an id keeps - * it; later duplicates that were not already claimed by a duplicate reference - * get their own fresh unique id (surviving as a distinct footnote/orphan). + * Reference REUSE (Pandoc semantics, #166): repeated `[^a]` references that share + * an id are the SAME footnote — they get one number and one definition and are + * NEVER re-id'd. So the reference walk only records the FIRST occurrence of each + * id (de-duplicating in document order); later occurrences are reuse and produce + * no mutation at all. * - * Re-id determinism: every fresh id is DERIVED from document state via - * deriveFootnoteId (e.g. `X__2`, `X__3`, collision-bumped against the set of ids - * already present) — NEVER random/time-based. Because the sync plugin runs - * identically on every collaborating client, a deterministic re-id is the only - * way they can converge on the SAME ids; a random id (the previous - * implementation) made two clients editing the same duplicate-id document mint - * DIFFERENT ids for the same duplicate, causing permanent Yjs divergence. + * Duplicate DEFINITIONS (two `[^d]:` nodes sharing an id reaching the LIVE editor + * via paste/collab merge) keep the never-lose policy: the first keeps the id, and + * each later duplicate is re-id'd to a DETERMINISTIC fresh id (deriveFootnoteId: + * `X__2`, `X__3`, collision-bumped) so it survives as a distinct footnote — which, + * having no matching reference, then falls under the normal orphan policy. It is + * only ever dropped for lacking a reference, never for colliding. The IMPORT + * paths (footnote.marked.ts / MCP extractFootnotes) instead apply first-wins + + * drop + warn for duplicate definitions; that divergence is intentional — import + * is an agent-authored artifact we sanitize, the editor is live user data we must + * not lose. + * + * Re-id determinism: every fresh id is DERIVED from document state, NEVER + * random/time-based, because the sync plugin runs identically on every + * collaborating client and a random id would make two clients mint DIFFERENT ids + * for the same duplicate, causing permanent Yjs divergence. */ function resolveCollisions(scan: FootnoteScan): CollisionPlan { const definitions = new Map(); + // References are never re-id'd under reuse semantics, so this stays empty; it + // is retained so the CollisionPlan shape (and its no-op consumer) is unchanged. const refReids: Array<{ pos: number; node: ProseMirrorNode; @@ -127,17 +134,14 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { const seenRefIds = new Set(); let changed = false; - // `taken` is the set of every id that must be avoided when minting a derived - // id: all original reference + definition ids in the document PLUS every id we - // mint during this pass. It is pure document state, so the derivation stays - // deterministic across clients. Per-original occurrence counters make the k-th - // duplicate of `X` deterministically become `X__2`, `X__3`, ... + // `taken` is the set of every id to avoid when minting a derived id for a + // duplicate definition: all original reference + definition ids PLUS every id + // minted in this pass. Pure document state, so the derivation is deterministic + // across clients. const taken = new Set(); for (const occ of scan.refOccurrences) taken.add(occ.id); for (const occ of scan.defOccurrences) taken.add(occ.id); const occurrenceOf = new Map(); - // Mint a deterministic unique id for a duplicate of `originalId`. The first - // duplicate is occurrence 2 (the keeper is occurrence 1), then 3, 4, ... const mintId = (originalId: string): string => { const next = (occurrenceOf.get(originalId) ?? 1) + 1; occurrenceOf.set(originalId, next); @@ -146,63 +150,23 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { return id; }; - // Bucket definition occurrences by their original id so a duplicate reference - // can claim a matching (as-yet-unclaimed) duplicate definition and re-id the - // pair together. defByOriginalId[id] is consumed front-to-back. - const defByOriginalId = new Map(); - for (const occ of scan.defOccurrences) { - const arr = defByOriginalId.get(occ.id); - if (arr) arr.push(occ); - else defByOriginalId.set(occ.id, [occ]); - } - // The FIRST definition for each id is the canonical keeper of that id. - const claimed = new Set(); - + // References: record each DISTINCT id once, in first-appearance order. Repeated + // ids are reuse — nothing to mint, nothing to re-id. for (const ref of scan.refOccurrences) { if (!seenRefIds.has(ref.id)) { - // First reference with this id keeps it. seenRefIds.add(ref.id); referenceIds.push(ref.id); - continue; - } - // Duplicate reference: assign a deterministic derived id. Pair it with the - // next unclaimed duplicate definition (NOT the first keeper) carrying the - // same original id, if one exists, so the (ref, def) pairing is preserved - // 1:1. - const newId = mintId(ref.id); - refReids.push({ pos: ref.pos, node: ref.node, newId }); - seenRefIds.add(newId); - referenceIds.push(newId); - changed = true; - - const candidates = defByOriginalId.get(ref.id) ?? []; - // Skip the first occurrence (it keeps the original id); pick the first - // duplicate not already claimed. - for (let i = 1; i < candidates.length; i++) { - const cand = candidates[i]; - if (!claimed.has(cand)) { - claimed.add(cand); - definitions.set(newId, cand.node); - break; - } } } - // Now place every definition under a unique id. The first occurrence of each - // original id keeps it; remaining duplicates either were paired with a - // duplicate reference above (already placed) or get a fresh standalone id. + // Definitions: the first occurrence of each id keeps it; a later duplicate is + // re-id'd deterministically so it is never silently dropped (never-lose). const seenDefIds = new Set(); for (const occ of scan.defOccurrences) { - if (claimed.has(occ)) continue; // already placed against a duplicate ref id if (!seenDefIds.has(occ.id)) { seenDefIds.add(occ.id); definitions.set(occ.id, occ.node); } else { - // Duplicate definition with no duplicate reference to pair with: keep it - // with a deterministic derived id so it is NEVER silently dropped. (It - // becomes an orphan and is then subject to the normal orphan policy — but - // only ever because it has no matching reference, never because it - // collided.) const newId = mintId(occ.id); definitions.set(newId, occ.node); changed = true; @@ -546,13 +510,17 @@ export const footnotePastePluginKey = new PluginKey("footnotePaste"); * Without this, pasting a reference+definition pair copied from elsewhere — or * duplicating one in place — would merge with (or clobber) the existing footnote * of the same id. The schema-sync plugin already guarantees no definition is - * ever silently deleted after the fact (it re-id's collisions), but regenerating - * at paste time keeps the pasted footnote cleanly separate from the start and - * avoids any transient merge. + * ever silently deleted after the fact (it re-id's duplicate definitions), but + * regenerating at paste time keeps the pasted footnote cleanly separate from the + * start and avoids any transient merge. * - * Only COLLIDING ids are remapped: a self-paste of a lone reference whose id is - * not present elsewhere is left untouched (so it still resolves to its existing - * definition). + * REUSE-aware (#166): only a colliding DEFINITION forces a remap. Pasting a lone + * reference whose id already exists is REUSE — it must keep the id so it resolves + * to the existing footnote (one number, shared definition). So we remap an id + * only when the pasted slice itself carries a `footnoteDefinition` for it (which + * would otherwise clobber the existing definition's text); the matching pasted + * references are remapped along with it to stay paired. A self-paste of just a + * reference is left untouched. */ export function footnotePastePlugin(): Plugin { return new Plugin({ @@ -572,31 +540,35 @@ export function footnotePastePlugin(): Plugin { }); if (existing.size === 0) return slice; - // Build a remap (old id -> fresh id) for every COLLIDING id found in the - // pasted slice, shared by references and definitions so a pasted pair - // stays matched. A paste is a distinct local user action (not a - // shared-state convergence point), so determinism is not strictly - // required here — but we derive the new id deterministically anyway - // (deriveFootnoteId against the current doc's id set) for consistency - // with the sync/import paths and to keep Math.random off this code path. - const remap = new Map(); - const collectColliding = (node: ProseMirrorNode) => { - if ( - node.type.name === FOOTNOTE_REFERENCE_NAME || - node.type.name === FOOTNOTE_DEFINITION_NAME - ) { + // Ids the pasted slice DEFINES (carries a footnoteDefinition for). Only + // these can clobber an existing footnote's text, so only these force a + // remap; a pasted reference to an already-existing id is reuse and keeps + // its id. + const sliceDefIds = new Set(); + const collectDefIds = (node: ProseMirrorNode) => { + if (node.type.name === FOOTNOTE_DEFINITION_NAME) { const id = node.attrs.id; - if (id && existing.has(id) && !remap.has(id)) { - const newId = deriveFootnoteId(id, 2, existing); - remap.set(id, newId); - // Reserve it so a second colliding id deriving to the same base - // bumps instead of clashing. - existing.add(newId); - } + if (id) sliceDefIds.add(id); } - node.descendants(collectColliding); + node.descendants(collectDefIds); }; - slice.content.descendants(collectColliding); + slice.content.descendants(collectDefIds); + + // Build a remap (old id -> fresh id) for every colliding id the slice + // DEFINES, shared by references and definitions so a pasted pair stays + // matched. The new id is derived deterministically (deriveFootnoteId + // against the current doc's id set) for consistency with the sync/import + // paths and to keep Math.random off this code path. + const remap = new Map(); + for (const id of sliceDefIds) { + if (existing.has(id) && !remap.has(id)) { + const newId = deriveFootnoteId(id, 2, existing); + remap.set(id, newId); + // Reserve it so a second colliding id deriving to the same base + // bumps instead of clashing. + existing.add(newId); + } + } if (remap.size === 0) return slice; // Rewrite the colliding ids throughout the slice. diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.ts b/packages/editor-ext/src/lib/footnote/footnote-util.ts index 7896595d..56813288 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-util.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-util.ts @@ -62,10 +62,11 @@ export function generateFootnoteId(): string { * `taken` is consulted but NOT mutated here; the caller adds the returned id to * its own seen-set before requesting the next derived id. * - * NOTE: this implementation is intentionally duplicated in - * packages/mcp/src/lib/collaboration.ts (deriveFootnoteId) - * and MUST stay in sync with it so markdown imported through either path yields - * identical ids. + * Used only inside editor-ext now (resolveCollisions for a re-id'd duplicate + * DEFINITION, and footnotePastePlugin). The MCP/marked import paths no longer + * derive ids — duplicate definitions there are first-wins-dropped (#166) — so + * there is no cross-package copy to keep in sync. The golden table in + * footnote-util.derive-id.test.ts pins the scheme. */ export function deriveFootnoteId( originalId: string, diff --git a/packages/editor-ext/src/lib/footnote/footnote.test.ts b/packages/editor-ext/src/lib/footnote/footnote.test.ts index 9ecf9a55..ff4e1625 100644 --- a/packages/editor-ext/src/lib/footnote/footnote.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote.test.ts @@ -307,13 +307,12 @@ describe("footnote sync plugin (orphans)", () => { editor.destroy(); }); - it("two definitions sharing an id (with two matching references) BOTH survive the first edit (no data loss)", () => { - // Reproduces the verified data-loss bug: two footnoteDefinition nodes share - // id "d", and there are two references with id "d". The OLD code built the - // definitions Map last-wins and emitted exactly one definition for the - // de-duplicated reference, so the very first keystroke's sync transaction - // deleted the whole list and rebuilt it from one definition — silently - // destroying "first" and keeping only "second". + it("repeated references REUSE one footnote; a duplicate definition is dropped (first-wins)", () => { + // Reuse semantics (#166): two references with id "d" are the SAME footnote + // (one number, shared definition) — they are NEVER re-id'd. Two definitions + // sharing id "d" are first-wins: the first keeps "d", the second is re-id'd + // to a deterministic orphan id and then dropped by the orphan policy (it has + // no matching reference). So the result is ONE reused footnote on "first". const editor = makeEditor({ type: "doc", content: [ @@ -351,8 +350,8 @@ describe("footnote sync plugin (orphans)", () => { editor.commands.insertContentAt(1, " "); const doc = editor.state.doc; - // BOTH definitions survive. - expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(2); + // One shared definition survives (first-wins); the duplicate is dropped. + expect(countType(doc, FOOTNOTE_DEFINITION_NAME)).toBe(1); const defTexts: string[] = []; const defIds: string[] = []; doc.descendants((node) => { @@ -361,27 +360,23 @@ describe("footnote sync plugin (orphans)", () => { defTexts.push(node.textContent); } }); - // No content was lost: both "first" and "second" are still present. - expect(defTexts.sort()).toEqual(["first", "second"]); - // The colliding ids were made distinct. - expect(new Set(defIds).size).toBe(2); - // Each definition's id matches exactly one reference (1:1 pairing). + expect(defTexts).toEqual(["first"]); + expect(defIds).toEqual(["d"]); + // Both references keep id "d" (reuse — not re-id'd). const refIds: string[] = []; doc.descendants((node) => { if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id); }); - expect(refIds.sort()).toEqual(defIds.sort()); + expect(refIds).toEqual(["d", "d"]); editor.destroy(); }); - it("re-ids colliding duplicates DETERMINISTICALLY (two clients converge to identical ids)", () => { + it("reuse outcome is DETERMINISTIC across clients (Yjs convergence)", () => { // Cross-client determinism guard. Two collaborating clients each see the - // SAME duplicate-id document and each make a local edit. The sync plugin - // runs identically on every client, so it MUST mint the SAME new ids on both - // — otherwise the two clients diverge permanently over Yjs (duplicated - // footnotes). This is exactly the blocker the previous random-id - // (generateFootnoteId / Math.random) implementation caused: it would mint - // DIFFERENT ids on each client and this assertion would fail. + // SAME document and make a local edit; the sync plugin runs identically, so + // the resolved state MUST be identical (else they diverge over Yjs). Under + // reuse the three "d" references collapse to one footnote and the duplicate + // definitions are dropped (first-wins) — deterministically on every client. const duplicateDoc = { type: "doc", content: [ @@ -435,30 +430,28 @@ describe("footnote sync plugin (orphans)", () => { editor.commands.insertContentAt(1, " "); // local keystroke -> sync runs const refIds: string[] = []; const defIds: string[] = []; + const defTexts: string[] = []; editor.state.doc.descendants((node) => { if (node.type.name === FOOTNOTE_REFERENCE_NAME) refIds.push(node.attrs.id); - if (node.type.name === FOOTNOTE_DEFINITION_NAME) + if (node.type.name === FOOTNOTE_DEFINITION_NAME) { defIds.push(node.attrs.id); + defTexts.push(node.textContent); + } }); editor.destroy(); - return { refIds, defIds }; + return { refIds, defIds, defTexts }; }; const clientA = idsAfterLocalEdit(); const clientB = idsAfterLocalEdit(); - // Both clients computed IDENTICAL ids (the property that makes Yjs converge). - expect(clientA.refIds).toEqual(clientB.refIds); - expect(clientA.defIds).toEqual(clientB.defIds); - - // And the ids are deterministic-derived (not random uuid-style): the keeper - // keeps "d", the duplicates become "d__2", "d__3". - expect(new Set(clientA.refIds)).toEqual(new Set(["d", "d__2", "d__3"])); - // Every definition survived with a unique id, 1:1 with the references. - expect(clientA.defIds.length).toBe(3); - expect(new Set(clientA.defIds).size).toBe(3); - expect([...clientA.refIds].sort()).toEqual([...clientA.defIds].sort()); + // Both clients resolved to IDENTICAL state (the Yjs-convergence property). + expect(clientA).toEqual(clientB); + // Reuse: the three references stay "d"; one definition survives (first-wins). + expect(clientA.refIds).toEqual(["d", "d", "d"]); + expect(clientA.defIds).toEqual(["d"]); + expect(clientA.defTexts).toEqual(["one"]); }); it("removes an orphan definition with no matching reference", () => { diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts index be955793..5834c1d5 100644 --- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts +++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.orphan.test.ts @@ -13,36 +13,33 @@ function bodyMarkers(body: string): string[] { return [...body.matchAll(/\[\^([^\]\s]+)\]/g)].map((m) => m[1]); } -describe("extractFootnoteDefinitions: more definitions than markers (orphans)", () => { - // Body has ONE `[^d]` reference marker but THREE `[^d]:` definitions. The - // surplus definitions have no marker to pair with — they must NOT be silently - // merged into one footnote (the editor's last-wins sync would otherwise drop - // two of them). The dedup gives each colliding definition a deterministic - // derived id so all three survive as distinct footnoteDefinition nodes. +describe("extractFootnoteDefinitions: duplicate definition ids (first-wins)", () => { + // Body has ONE `[^d]` reference but THREE `[^d]:` definitions. Under the + // import model (#166) a duplicate definition id is FIRST-WINS: only the first + // definition is kept; the rest are DROPPED (and surfaced by analyzeFootnotes, + // not silently re-id'd into orphan footnotes as before). Reference markers are + // never rewritten, so repeated references would reuse the single footnote. const md = ["See[^d].", "", "[^d]: a", "[^d]: b", "[^d]: c"].join("\n"); - it("emits 3 DISTINCT definition ids: d, d__2, d__3 (derived scheme, in order)", () => { + it("keeps only the FIRST definition for the id (first-wins)", () => { const { section } = extractFootnoteDefinitions(md); const ids = defIds(section); - expect(ids).toEqual(["d", "d__2", "d__3"]); - // All distinct: nothing was merged away. - expect(new Set(ids).size).toBe(3); + expect(ids).toEqual(["d"]); }); - it("preserves each definition's text against its (possibly derived) id", () => { + it("keeps the first definition's text and drops the duplicates", () => { const { section } = extractFootnoteDefinitions(md); - // First definition keeps the original id and its text. expect(section).toContain('data-footnote-def data-id="d">

a

'); - // The two surplus definitions survive as orphans with derived ids. - expect(section).toContain('data-footnote-def data-id="d__2">

b

'); - expect(section).toContain('data-footnote-def data-id="d__3">

c

'); + // No derived `d__2` / `d__3` ids are emitted anymore. + expect(section).not.toContain("d__2"); + expect(section).not.toContain("d__3"); + // The dropped duplicate texts are not in the section. + expect(section).not.toContain("

b

"); + expect(section).not.toContain("

c

"); }); - it("leaves the SINGLE body marker as [^d] (no surplus marker to rewrite)", () => { + it("leaves the SINGLE body marker as [^d] (markers are never rewritten)", () => { const { body } = extractFootnoteDefinitions(md); - // There is exactly one reference marker and it is untouched: the keeper - // definition pairs with it. The orphan defs have no marker, so the body is - // unchanged except for the stripped definition lines. expect(bodyMarkers(body)).toEqual(["d"]); expect(body).toContain("See[^d]."); // The definition lines themselves were pulled OUT of the body. @@ -55,9 +52,21 @@ describe("extractFootnoteDefinitions: more definitions than markers (orphans)", const { section } = extractFootnoteDefinitions(md); expect(section.startsWith("
")).toBe(true); expect(section.endsWith("
")).toBe(true); - // Exactly three definition divs. - expect( - [...section.matchAll(/
{ + // Pandoc semantics: many `[^a]` references + one `[^a]:` definition = one + // footnote, shared. Markers are left intact so the editor numbers them as one. + const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared note"].join("\n"); + + it("emits exactly one definition and leaves every reference marker as [^a]", () => { + const { section, body } = extractFootnoteDefinitions(md); + expect(defIds(section)).toEqual(["a"]); + expect(section).toContain('data-footnote-def data-id="a">

shared note

'); + // All three reference markers stay `a` (no `a__2`/`a__3` minting). + expect(bodyMarkers(body)).toEqual(["a", "a", "a"]); }); }); diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts index b47cf4a4..58dd27d7 100644 --- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts +++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts @@ -1,5 +1,4 @@ import { marked } from "marked"; -import { deriveFootnoteId } from "../../footnote/footnote-util"; /** * Pandoc/GFM footnote support for the marked (Markdown -> HTML) pipeline. @@ -53,10 +52,6 @@ function escapeAttr(value: string): string { return String(value).replace(/&/g, "&").replace(/"/g, """); } -function escapeRegExp(value: string): string { - return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} - /** * Extract `[^id]: text` definition lines from the markdown body, returning the * cleaned body plus a rendered
(empty string when no @@ -101,70 +96,32 @@ export function extractFootnoteDefinitions(markdown: string): { return { body: markdown, section: "" }; } - // De-duplicate colliding definition ids. Two definitions sharing an id (e.g. - // `[^d]: first` / `[^d]: second`) would otherwise collapse into one footnote - // downstream (the editor's last-wins sync). Rename each colliding id to a - // DETERMINISTIC derived one AND rewrite the corresponding `[^id]` reference - // marker so the (reference, definition) pairing stays 1:1. The FIRST - // definition keeps the id and pairs with the FIRST `[^id]` marker; the Nth - // duplicate gets the derived id `${id}__${N}` and rewrites the Nth `[^id]` - // marker. If there are fewer markers than definitions, the surplus definition - // keeps a derived (orphan) id so it is never silently merged away. - // - // The id is derived (deriveFootnoteId), NOT random: importing the same - // markdown through two paths (here and the MCP mirror) must yield identical - // ids, and re-importing the same markdown twice must be stable. - let dedupedBody = bodyLines.join("\n"); - // Every original definition id is reserved up front so a derived id can never - // collide with an unrelated original id present in the document. - const taken = new Set(definitions.map((d) => d.id)); - const seenDefIds = new Map(); // original id -> how many seen + // Duplicate definition ids (e.g. `[^d]: first` / `[^d]: second`): FIRST WINS, + // the rest are DROPPED. Reference markers are left UNTOUCHED so repeated `[^a]` + // references reuse the single footnote (Pandoc semantics, #166). This differs + // from the live editor's never-lose policy (resolveCollisions re-ids a + // duplicate definition into an orphan) on purpose: an import is an + // agent-authored artifact we sanitize, and the dropped duplicate is surfaced + // to the caller via analyzeFootnotes' `duplicateDefinitions` warning instead. + const firstById = new Map(); // id -> first definition text for (const def of definitions) { - const originalId = def.id; - const count = seenDefIds.get(originalId) ?? 0; - seenDefIds.set(originalId, count + 1); - if (count === 0) continue; // first definition keeps its id - - // count is the 0-based number of PRIOR occurrences; this is occurrence - // (count + 1), i.e. 2 for the first duplicate, 3 for the next, ... - const newId = deriveFootnoteId(originalId, count + 1, taken); - taken.add(newId); - def.id = newId; - - // Rewrite the NEXT still-unrewritten `[^originalId]` marker that does not - // belong to the keeper definition. After a prior duplicate rewrote its - // marker (to `[^someNewId]`), it no longer matches `[^originalId]`, so the - // remaining matches are: index 0 = the keeper's marker (left alone), index 1 - // = this duplicate's marker. Rewrite index 1. - let occurrence = 0; - let rewritten = false; - const re = new RegExp(`\\[\\^${escapeRegExp(originalId)}\\]`, "g"); - dedupedBody = dedupedBody.replace(re, (match) => { - const idx = occurrence++; - if (!rewritten && idx === 1) { - rewritten = true; - return `[^${newId}]`; - } - return match; - }); - // If there was no second marker (more definitions than references), the - // duplicate simply survives as an orphan with its fresh id — no body change. + if (!firstById.has(def.id)) firstById.set(def.id, def.text); } - const defsHtml = definitions - .map((d) => { + const defsHtml = [...firstById.entries()] + .map(([id, text]) => { // Render the definition text as inline markdown so emphasis/links inside // a footnote survive the round-trip; wrap in a paragraph (the node's // content is paragraph+). - const inner = marked.parseInline(d.text || ""); + const inner = marked.parseInline(text || ""); return `

${inner}

`; }) .join(""); return { - body: dedupedBody, + body: bodyLines.join("\n"), section: `
${defsHtml}
`, }; } diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index a825dd03..28e5438e 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -9,6 +9,7 @@ import WebSocket from "ws"; import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js"; import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; +import { analyzeFootnotes } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js"; import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; @@ -566,7 +567,9 @@ export class DocmostClient { // Always fetch subpages to provide context to the agent let subpages = []; try { - subpages = await this.listSidebarPages(resultData.spaceId, pageId); + // `pageId` may be a slugId, but the sidebar-pages endpoint requires the + // UUID; `resultData.id` holds the resolved UUID returned by getPageRaw. + subpages = await this.listSidebarPages(resultData.spaceId, resultData.id); } catch (e) { console.warn("Failed to fetch subpages:", e); @@ -814,7 +817,11 @@ export class DocmostClient { if (title) { await this.client.post("/pages/update", { pageId: newPageId, title }); } - return this.getPage(newPageId); + const page = await this.getPage(newPageId); + // Surface non-fatal footnote problems (dangling refs, empty/duplicate + // definitions, markers in tables) so the agent can fix its markup (#166). + const { warnings } = analyzeFootnotes(content); + return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page; } /** * Update a page's content from markdown and optionally its title. @@ -844,12 +851,15 @@ export class DocmostClient { } throw new Error(`Failed to update page content: ${error.message}`); } + const { warnings } = analyzeFootnotes(content); return { success: true, modified: true, message: "Page updated successfully.", pageId: pageId, verify: mutation.verify, + // Non-fatal footnote diagnostics (#166); omitted when there are none. + ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}), }; } /** @@ -1119,6 +1129,11 @@ export class DocmostClient { if (meta?.pageId && meta.pageId !== pageId) { result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; } + // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions + // and references live there, not in the front-matter/comments sections). + const { warnings } = analyzeFootnotes(body); + if (warnings.length > 0) + result.footnoteWarnings = warnings; return result; } /** @@ -2422,9 +2437,9 @@ export class DocmostClient { const raw = await this.getPageRaw(pageId); const current = raw.content || { type: "doc", content: [] }; runTransform(current); - // Exercise the same Yjs encoder the apply path uses, so the preview - // fails with the SAME descriptive error when the doc is not encodable - // instead of returning a misleadingly-green diff. + // Run an independent Yjs-encodability check (same sanitize + schema as the + // apply path), so the preview fails with the same descriptive error when + // the doc is not encodable instead of returning a misleadingly-green diff. assertYjsEncodable(newDoc); return { pushed: false, diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index fc72bbf3..dc4ef79c 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -285,44 +285,6 @@ const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value) { return String(value).replace(/&/g, "&").replace(/"/g, """); } -function escapeFootnoteRegExp(value) { - return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} -/** - * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of - * an original id `X` during definition dedup. - * - * EXACT MIRROR of editor-ext `deriveFootnoteId` - * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST - * STAY IN SYNC: the same markdown imported through the editor and through this - * MCP path has to produce identical ids, and the sync plugin (which re-ids on - * every collaborating client) relies on the same scheme to converge. NEVER use - * Math.random()/Date.now()/uuid here — a random id would diverge across clients. - * - * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped - * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in - * `taken` (the set of ids already present / already minted — pure doc state). - */ -function deriveFootnoteId(originalId, occurrence, taken) { - let candidate = `${originalId}__${occurrence}`; - let n = 0; - while (taken.has(candidate)) { - n += 1; - candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`; - } - return candidate; -} -/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */ -function footnoteSuffix(n) { - let out = ""; - let x = n; - while (x > 0) { - const rem = (x - 1) % 25; - out = String.fromCharCode(98 + rem) + out; // 98 = 'b' - x = Math.floor((x - 1) / 25); - } - return out; -} const footnoteRefMarkedExtension = { name: "footnoteRef", level: "inline", @@ -371,43 +333,22 @@ function extractFootnotes(markdown) { } if (defs.length === 0) return { body: markdown, section: "" }; - // De-duplicate colliding definition ids (mirror of editor-ext - // extractFootnoteDefinitions). Two definitions sharing an id would otherwise - // collapse into one footnote downstream; rename each colliding id to a - // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]` - // marker so the (reference, definition) pairing stays 1:1. Determinism lets - // the same markdown imported here and via the editor produce identical ids. - let dedupedBody = bodyLines.join("\n"); - const taken = new Set(defs.map((d) => d.id)); - const seenDefIds = new Map(); + // Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of + // editor-ext extractFootnoteDefinitions). Reference markers are left untouched + // so repeated `[^a]` references reuse the single footnote (Pandoc semantics, + // #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes + // (`duplicateDefinitions`), not silently lost. MUST stay in sync with the + // editor-ext mirror. + const firstById = new Map(); // id -> first definition text for (const def of defs) { - const originalId = def.id; - const count = seenDefIds.get(originalId) ?? 0; - seenDefIds.set(originalId, count + 1); - if (count === 0) - continue; // first definition keeps its id - const newId = deriveFootnoteId(originalId, count + 1, taken); - taken.add(newId); - def.id = newId; - // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone), - // index 1 = this duplicate's marker. Rewrite index 1. - let occurrence = 0; - let rewritten = false; - const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g"); - dedupedBody = dedupedBody.replace(re, (match) => { - const idx = occurrence++; - if (!rewritten && idx === 1) { - rewritten = true; - return `[^${newId}]`; - } - return match; - }); + if (!firstById.has(def.id)) + firstById.set(def.id, def.text); } - const inner = defs - .map((d) => `

${marked.parseInline(d.text || "")}

`) + const inner = [...firstById.entries()] + .map(([id, text]) => `

${marked.parseInline(text || "")}

`) .join(""); return { - body: dedupedBody, + body: bodyLines.join("\n"), section: `
${inner}
`, }; } diff --git a/packages/mcp/build/lib/footnote-analyze.js b/packages/mcp/build/lib/footnote-analyze.js new file mode 100644 index 00000000..919674b3 --- /dev/null +++ b/packages/mcp/build/lib/footnote-analyze.js @@ -0,0 +1,115 @@ +/** + * Footnote diagnostics for imported Markdown (issue #166). + * + * A PURE, fence-aware text scan (independent of the Markdown->ProseMirror + * conversion path, so it reports the same problems for `create_page`, + * `update_page` and `import_page_markdown`). It never changes the document — the + * importer still creates the page; this only surfaces footnote problems to the + * caller so an agent can fix its own markup instead of shipping broken footnotes. + * + * Detected problems: + * - danglingReferences: a `[^id]` reference with no `[^id]:` definition. + * - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace. + * - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the + * first is kept on import — first-wins; see extractFootnotes). + * - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic: + * the line, trimmed, starts with `|`) — footnotes in table cells often do not + * render as expected. + */ +/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ +const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ +const REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +function forEachReference(line, onRef) { + REF_RE_G.lastIndex = 0; + let m; + while ((m = REF_RE_G.exec(line)) !== null) + onRef(m[1]); +} +/** + * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. + */ +export function analyzeFootnotes(markdown) { + const lines = markdown.split("\n"); + // Distinct reference ids in first-appearance order, plus the set of ids seen + // inside a table row. + const refIds = []; + const refIdSet = new Set(); + const referencesInTables = new Set(); + const addRef = (id, inTable) => { + if (!refIdSet.has(id)) { + refIdSet.add(id); + refIds.push(id); + } + if (inTable) + referencesInTables.add(id); + }; + // Definition texts per id, in first-appearance order of the id. + const defTextsById = new Map(); + let fence = null; + for (const line of lines) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) + fence = marker; + else if (marker === fence) + fence = null; + continue; + } + // Footnote syntax shown inside a code fence is not real markup. + if (fence !== null) + continue; + const defM = DEF_RE.exec(line); + if (defM) { + const id = defM[1]; + const text = defM[2]; + const arr = defTextsById.get(id); + if (arr) + arr.push(text); + else + defTextsById.set(id, [text]); + // A definition's TEXT can itself reference another footnote (`[^a]: see + // [^b]`); count those so such a `[^b]` is not falsely reported dangling. + forEachReference(text, (rid) => addRef(rid, false)); + continue; + } + const inTable = line.trimStart().startsWith("|"); + forEachReference(line, (id) => addRef(id, inTable)); + } + const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); + const duplicateDefinitions = []; + const emptyDefinitions = []; + for (const [id, texts] of defTextsById) { + if (texts.length >= 2) + duplicateDefinitions.push(id); + // First-wins: the kept definition is the first one; flag it if it is blank. + if ((texts[0] ?? "").trim().length === 0) + emptyDefinitions.push(id); + } + const tableRefs = [...referencesInTables]; + const warnings = []; + const list = (ids) => ids.map((id) => `[^${id}]`).join(", "); + if (danglingReferences.length > 0) { + warnings.push(`Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`); + } + if (emptyDefinitions.length > 0) { + warnings.push(`Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`); + } + if (duplicateDefinitions.length > 0) { + warnings.push(`Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`); + } + if (tableRefs.length > 0) { + warnings.push(`Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`); + } + return { + danglingReferences, + emptyDefinitions, + duplicateDefinitions, + referencesInTables: tableRefs, + warnings, + }; +} diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index bd891fc9..36ee85b6 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -23,6 +23,7 @@ import { MutationResult, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; +import { analyzeFootnotes } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, @@ -1054,7 +1055,11 @@ export class DocmostClient { await this.client.post("/pages/update", { pageId: newPageId, title }); } - return this.getPage(newPageId); + const page = await this.getPage(newPageId); + // Surface non-fatal footnote problems (dangling refs, empty/duplicate + // definitions, markers in tables) so the agent can fix its markup (#166). + const { warnings } = analyzeFootnotes(content); + return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page; } /** @@ -1095,12 +1100,15 @@ export class DocmostClient { throw new Error(`Failed to update page content: ${error.message}`); } + const { warnings } = analyzeFootnotes(content); return { success: true, modified: true, message: "Page updated successfully.", pageId: pageId, verify: mutation.verify, + // Non-fatal footnote diagnostics (#166); omitted when there are none. + ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}), }; } @@ -1416,6 +1424,10 @@ export class DocmostClient { if (meta?.pageId && meta.pageId !== pageId) { result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; } + // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions + // and references live there, not in the front-matter/comments sections). + const { warnings } = analyzeFootnotes(body); + if (warnings.length > 0) result.footnoteWarnings = warnings; return result; } diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index efc7bf17..178ff71b 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -323,51 +323,6 @@ function escapeFootnoteAttr(value: string): string { return String(value).replace(/&/g, "&").replace(/"/g, """); } -function escapeFootnoteRegExp(value: string): string { - return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); -} - -/** - * Derive a DETERMINISTIC unique footnote id for the k-th (k >= 2) occurrence of - * an original id `X` during definition dedup. - * - * EXACT MIRROR of editor-ext `deriveFootnoteId` - * (packages/editor-ext/src/lib/footnote/footnote-util.ts). These two copies MUST - * STAY IN SYNC: the same markdown imported through the editor and through this - * MCP path has to produce identical ids, and the sync plugin (which re-ids on - * every collaborating client) relies on the same scheme to converge. NEVER use - * Math.random()/Date.now()/uuid here — a random id would diverge across clients. - * - * Scheme: base candidate `${originalId}__${occurrence}` (e.g. `X__2`), bumped - * with a stable alphabetic suffix (`X__2b`, `X__2c`, ...) until it is not in - * `taken` (the set of ids already present / already minted — pure doc state). - */ -function deriveFootnoteId( - originalId: string, - occurrence: number, - taken: Set, -): string { - let candidate = `${originalId}__${occurrence}`; - let n = 0; - while (taken.has(candidate)) { - n += 1; - candidate = `${originalId}__${occurrence}${footnoteSuffix(n)}`; - } - return candidate; -} - -/** Map 1 -> "b", 2 -> "c", ... (mirror of editor-ext `suffix`). */ -function footnoteSuffix(n: number): string { - let out = ""; - let x = n; - while (x > 0) { - const rem = (x - 1) % 25; - out = String.fromCharCode(98 + rem) + out; // 98 = 'b' - x = Math.floor((x - 1) / 25); - } - return out; -} - const footnoteRefMarkedExtension = { name: "footnoteRef", level: "inline" as const, @@ -419,48 +374,27 @@ function extractFootnotes(markdown: string): { } if (defs.length === 0) return { body: markdown, section: "" }; - // De-duplicate colliding definition ids (mirror of editor-ext - // extractFootnoteDefinitions). Two definitions sharing an id would otherwise - // collapse into one footnote downstream; rename each colliding id to a - // DETERMINISTIC derived one (NOT random) and rewrite the corresponding `[^id]` - // marker so the (reference, definition) pairing stays 1:1. Determinism lets - // the same markdown imported here and via the editor produce identical ids. - let dedupedBody = bodyLines.join("\n"); - const taken = new Set(defs.map((d) => d.id)); - const seenDefIds = new Map(); + // Duplicate definition ids: FIRST WINS, the rest are DROPPED (mirror of + // editor-ext extractFootnoteDefinitions). Reference markers are left untouched + // so repeated `[^a]` references reuse the single footnote (Pandoc semantics, + // #166). The dropped duplicate is surfaced to the caller via analyzeFootnotes + // (`duplicateDefinitions`), not silently lost. MUST stay in sync with the + // editor-ext mirror. + const firstById = new Map(); // id -> first definition text for (const def of defs) { - const originalId = def.id; - const count = seenDefIds.get(originalId) ?? 0; - seenDefIds.set(originalId, count + 1); - if (count === 0) continue; // first definition keeps its id - const newId = deriveFootnoteId(originalId, count + 1, taken); - taken.add(newId); - def.id = newId; - // Remaining `[^originalId]` matches: index 0 = keeper's marker (left alone), - // index 1 = this duplicate's marker. Rewrite index 1. - let occurrence = 0; - let rewritten = false; - const re = new RegExp(`\\[\\^${escapeFootnoteRegExp(originalId)}\\]`, "g"); - dedupedBody = dedupedBody.replace(re, (match) => { - const idx = occurrence++; - if (!rewritten && idx === 1) { - rewritten = true; - return `[^${newId}]`; - } - return match; - }); + if (!firstById.has(def.id)) firstById.set(def.id, def.text); } - const inner = defs + const inner = [...firstById.entries()] .map( - (d) => + ([id, text]) => `

${marked.parseInline(d.text || "")}

`, + id, + )}">

${marked.parseInline(text || "")}

`, ) .join(""); return { - body: dedupedBody, + body: bodyLines.join("\n"), section: `
${inner}
`, }; } diff --git a/packages/mcp/src/lib/footnote-analyze.ts b/packages/mcp/src/lib/footnote-analyze.ts new file mode 100644 index 00000000..97264dbc --- /dev/null +++ b/packages/mcp/src/lib/footnote-analyze.ts @@ -0,0 +1,138 @@ +/** + * Footnote diagnostics for imported Markdown (issue #166). + * + * A PURE, fence-aware text scan (independent of the Markdown->ProseMirror + * conversion path, so it reports the same problems for `create_page`, + * `update_page` and `import_page_markdown`). It never changes the document — the + * importer still creates the page; this only surfaces footnote problems to the + * caller so an agent can fix its own markup instead of shipping broken footnotes. + * + * Detected problems: + * - danglingReferences: a `[^id]` reference with no `[^id]:` definition. + * - emptyDefinitions: a `[^id]:` whose (kept) text is empty/whitespace. + * - duplicateDefinitions: an id defined by two or more `[^id]:` lines (only the + * first is kept on import — first-wins; see extractFootnotes). + * - referencesInTables: a `[^id]` marker found in a GFM table row (heuristic: + * the line, trimmed, starts with `|`) — footnotes in table cells often do not + * render as expected. + */ + +/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ +const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ +const REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; + +export interface FootnoteDiagnostics { + /** Reference ids (distinct, document order) with no matching definition. */ + danglingReferences: string[]; + /** Definition ids whose first (kept) text is empty/whitespace. */ + emptyDefinitions: string[]; + /** Ids defined by two or more `[^id]:` lines (only the first is kept). */ + duplicateDefinitions: string[]; + /** Reference ids found inside a GFM table row (heuristic). */ + referencesInTables: string[]; + /** Human-readable warning lines for the tool result (one per problem class). */ + warnings: string[]; +} + +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +function forEachReference(line: string, onRef: (id: string) => void): void { + REF_RE_G.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]); +} + +/** + * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. + */ +export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { + const lines = markdown.split("\n"); + + // Distinct reference ids in first-appearance order, plus the set of ids seen + // inside a table row. + const refIds: string[] = []; + const refIdSet = new Set(); + const referencesInTables = new Set(); + const addRef = (id: string, inTable: boolean) => { + if (!refIdSet.has(id)) { + refIdSet.add(id); + refIds.push(id); + } + if (inTable) referencesInTables.add(id); + }; + + // Definition texts per id, in first-appearance order of the id. + const defTextsById = new Map(); + + let fence: string | null = null; + for (const line of lines) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) fence = marker; + else if (marker === fence) fence = null; + continue; + } + // Footnote syntax shown inside a code fence is not real markup. + if (fence !== null) continue; + + const defM = DEF_RE.exec(line); + if (defM) { + const id = defM[1]; + const text = defM[2]; + const arr = defTextsById.get(id); + if (arr) arr.push(text); + else defTextsById.set(id, [text]); + // A definition's TEXT can itself reference another footnote (`[^a]: see + // [^b]`); count those so such a `[^b]` is not falsely reported dangling. + forEachReference(text, (rid) => addRef(rid, false)); + continue; + } + + const inTable = line.trimStart().startsWith("|"); + forEachReference(line, (id) => addRef(id, inTable)); + } + + const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); + const duplicateDefinitions: string[] = []; + const emptyDefinitions: string[] = []; + for (const [id, texts] of defTextsById) { + if (texts.length >= 2) duplicateDefinitions.push(id); + // First-wins: the kept definition is the first one; flag it if it is blank. + if ((texts[0] ?? "").trim().length === 0) emptyDefinitions.push(id); + } + const tableRefs = [...referencesInTables]; + + const warnings: string[] = []; + const list = (ids: string[]) => ids.map((id) => `[^${id}]`).join(", "); + if (danglingReferences.length > 0) { + warnings.push( + `Footnote reference(s) with no matching definition: ${list(danglingReferences)} (each will render as an empty footnote in the editor).`, + ); + } + if (emptyDefinitions.length > 0) { + warnings.push( + `Footnote definition(s) with empty text: ${list(emptyDefinitions)}.`, + ); + } + if (duplicateDefinitions.length > 0) { + warnings.push( + `Footnote id(s) defined more than once (only the first definition was kept): ${list(duplicateDefinitions)}.`, + ); + } + if (tableRefs.length > 0) { + warnings.push( + `Footnote marker(s) inside a table row (footnotes in table cells may not render as expected): ${list(tableRefs)}.`, + ); + } + + return { + danglingReferences, + emptyDefinitions, + duplicateDefinitions, + referencesInTables: tableRefs, + warnings, + }; +} diff --git a/packages/mcp/test/unit/derive-id-parity.test.mjs b/packages/mcp/test/unit/derive-id-parity.test.mjs deleted file mode 100644 index cb74bc6f..00000000 --- a/packages/mcp/test/unit/derive-id-parity.test.mjs +++ /dev/null @@ -1,134 +0,0 @@ -import { test } from "node:test"; -import assert from "node:assert/strict"; - -import { markdownToProseMirror } from "../../build/lib/collaboration.js"; - -/** - * CROSS-PACKAGE DRIFT GUARD for the footnote id derivation scheme. - * - * `deriveFootnoteId` is duplicated in two places that MUST behave identically: - * - packages/editor-ext/src/lib/footnote/footnote-util.ts (exported) - * - packages/mcp/src/lib/collaboration.ts (internal helper) - * so the same markdown imported through the editor and through the MCP path - * derives identical footnote ids. - * - * The mcp copy is NOT exported from the compiled build (it is an internal helper - * of collaboration.js), and production source must not be modified to export it. - * So this test exercises the REAL compiled `deriveFootnoteId` *indirectly*, the - * same way production does: through `markdownToProseMirror`, which runs - * extractFootnotes -> deriveFootnoteId during duplicate-id dedup. We craft the - * `taken` set via literal pre-existing definition ids and read back the derived - * footnoteDefinition ids. - * - * GOLDEN below mirrors DERIVE_GOLDEN in - * packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts - * (asserted there by a DIRECT call). Same (originalId, occurrence, taken) -> - * same expected id. If the two copies drift, one of the two suites goes red. - */ - -/** The 25 single-letter suffixes the scheme uses (n=1..25): b, c, ..., z. */ -function singleLetterSuffixes() { - return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i)); -} - -// Identical matrix + expected values to the editor-ext golden table. -const GOLDEN = [ - { originalId: "d", occurrence: 2, taken: [], expected: "d__2" }, - { originalId: "d", occurrence: 3, taken: [], expected: "d__3" }, - { originalId: "d", occurrence: 2, taken: ["d__2"], expected: "d__2b" }, - { originalId: "d", occurrence: 2, taken: ["d__2", "d__2b"], expected: "d__2c" }, - { - originalId: "d", - occurrence: 2, - taken: ["d__2", "d__2b", "d__2c", "d__2d"], - expected: "d__2e", - }, - { - originalId: "d", - occurrence: 2, - taken: ["d__2", ...singleLetterSuffixes().map((s) => `d__2${s}`)], - expected: "d__2bb", - }, -]; - -/** Recursively collect every node of `type`. */ -function findAll(node, type, acc = []) { - if (!node || typeof node !== "object") return acc; - if (node.type === type) acc.push(node); - if (Array.isArray(node.content)) for (const c of node.content) findAll(c, type, acc); - return acc; -} - -/** - * Build markdown that drives the real `deriveFootnoteId(originalId, occurrence, - * taken)`: - * - `occurrence` duplicate definitions of `[^originalId]` so the dedup walk - * reaches the requested occurrence (occurrence=2 -> 1 keeper + 1 duplicate; - * occurrence=3 -> keeper + 2 duplicates, of which the LAST is the one whose - * id we read); - * - one literal pre-existing definition for every id in `taken`, each with its - * own reference marker so it is a real (non-orphan) definition. Those ids are - * reserved up-front in the dedup `taken` set, exactly forcing the bump. - * - * Returns the derived id of the FINAL duplicate of `originalId`. - */ -async function deriveViaMarkdown(originalId, occurrence, takenIds) { - // References: one [^originalId] per definition (keeper + duplicates) so each - // duplicate has a marker to pair with, plus one marker per taken id. - const dupCount = occurrence; // keeper + (occurrence-1) duplicates = `occurrence` defs - const refMarkers = []; - for (let i = 0; i < dupCount; i++) refMarkers.push(`[^${originalId}]`); - for (const id of takenIds) refMarkers.push(`[^${id}]`); - const refLine = `Body ${refMarkers.join(" ")}.`; - - // Definitions: `occurrence` copies of [^originalId]: ... then the taken ids. - const defLines = []; - for (let i = 0; i < dupCount; i++) { - defLines.push(`[^${originalId}]: copy ${i}`); - } - for (const id of takenIds) { - defLines.push(`[^${id}]: reserved ${id}`); - } - - const md = [refLine, "", ...defLines].join("\n"); - const json = await markdownToProseMirror(md); - const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); - - // The derived id we want is the one that is neither the keeper (originalId), - // nor any reserved taken id, nor a lower-occurrence derived id. For - // occurrence=2 that is the single bumped id; for occurrence=3 it is the - // highest `${originalId}__3...` id. Compute it generically: among the def ids - // that start with `${originalId}__${occurrence}`, the expected one is present. - return { defIds, json }; -} - -for (const row of GOLDEN) { - test(`parity: derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) -> "${row.expected}"`, async () => { - const { defIds } = await deriveViaMarkdown( - row.originalId, - row.occurrence, - row.taken, - ); - // The real compiled deriveFootnoteId must have minted exactly the golden id. - assert.ok( - defIds.includes(row.expected), - `expected derived id "${row.expected}" among def ids ${JSON.stringify(defIds)}`, - ); - // And every id is distinct: nothing collapsed. - assert.equal(new Set(defIds).size, defIds.length, "all def ids distinct"); - }); -} - -test("parity: the simple keeper+two-duplicate case mints d, d__2, d__3", async () => { - // The canonical no-collision path, asserted as a whole set for clarity. - const md = [ - "See[^d] one[^d] two[^d].", - "", - "[^d]: first", - "[^d]: second", - "[^d]: third", - ].join("\n"); - const json = await markdownToProseMirror(md); - const defIds = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); - assert.deepEqual([...defIds].sort(), ["d", "d__2", "d__3"]); -}); diff --git a/packages/mcp/test/unit/footnote-analyze.test.mjs b/packages/mcp/test/unit/footnote-analyze.test.mjs new file mode 100644 index 00000000..b2de1787 --- /dev/null +++ b/packages/mcp/test/unit/footnote-analyze.test.mjs @@ -0,0 +1,106 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { analyzeFootnotes } from "../../build/lib/footnote-analyze.js"; + +test("clean footnotes produce no diagnostics", () => { + const md = ["A[^a] and B[^b].", "", "[^a]: first", "[^b]: second"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, []); + assert.deepEqual(d.emptyDefinitions, []); + assert.deepEqual(d.duplicateDefinitions, []); + assert.deepEqual(d.referencesInTables, []); + assert.deepEqual(d.warnings, []); +}); + +test("reuse (repeated references to one definition) is NOT a warning", () => { + const md = ["A[^a] B[^a] C[^a].", "", "[^a]: shared"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, []); + assert.deepEqual(d.warnings, []); +}); + +test("dangling reference (no definition) is reported", () => { + const md = ["See[^missing] and[^a].", "", "[^a]: defined"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, ["missing"]); + assert.equal(d.warnings.length, 1); + assert.match(d.warnings[0], /no matching definition/); + assert.match(d.warnings[0], /\[\^missing\]/); +}); + +test("empty definition text is reported", () => { + const md = ["See[^a].", "", "[^a]: "].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.emptyDefinitions, ["a"]); + assert.match(d.warnings.join("\n"), /empty text/); +}); + +test("duplicate definition id is reported (first-wins)", () => { + const md = ["See[^d].", "", "[^d]: first", "[^d]: second"].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.duplicateDefinitions, ["d"]); + assert.match(d.warnings.join("\n"), /defined more than once/); +}); + +test("reference inside a GFM table row is reported (heuristic)", () => { + const md = [ + "| Col |", + "| --- |", + "| cell[^t] |", + "", + "[^t]: table note", + ].join("\n"); + const d = analyzeFootnotes(md); + assert.deepEqual(d.referencesInTables, ["t"]); + assert.match(d.warnings.join("\n"), /table/); + // It is defined, so it is NOT also dangling. + assert.deepEqual(d.danglingReferences, []); +}); + +test("footnote syntax inside a code fence is ignored", () => { + const md = [ + "Intro.", + "", + "```", + "Example[^demo]", + "[^demo]: not a real definition", + "```", + "", + "Outro[^a].", + "", + "[^a]: real", + ].join("\n"); + const d = analyzeFootnotes(md); + // `[^demo]` lives only in the fenced block, so it is neither a reference nor a + // dangling one, and `[^demo]:` is not counted as a definition. + assert.deepEqual(d.danglingReferences, []); + assert.deepEqual(d.duplicateDefinitions, []); + assert.deepEqual(d.warnings, []); +}); + +test("a reference that only appears inside a definition's text is not dangling", () => { + // `[^b]` is referenced from within [^a]'s text and has its own definition. + const md = ["See[^a].", "", "[^a]: see also [^b]", "[^b]: the other"].join( + "\n", + ); + const d = analyzeFootnotes(md); + assert.deepEqual(d.danglingReferences, []); +}); + +test("multiple problem classes accumulate distinct warnings", () => { + const md = [ + "Ref[^x] and[^dup].", + "", + "[^dup]: one", + "[^dup]: two", + "[^empty]:", + ].join("\n"); + const d = analyzeFootnotes(md); + // x has no definition; dup is defined twice; empty is empty AND has no ref. + assert.ok(d.danglingReferences.includes("x")); + assert.deepEqual(d.duplicateDefinitions, ["dup"]); + assert.deepEqual(d.emptyDefinitions, ["empty"]); + // One warning line per problem class present. + assert.ok(d.warnings.length >= 3); +}); diff --git a/packages/mcp/test/unit/footnotes.test.mjs b/packages/mcp/test/unit/footnotes.test.mjs index df45a7b9..67ec9bc5 100644 --- a/packages/mcp/test/unit/footnotes.test.mjs +++ b/packages/mcp/test/unit/footnotes.test.mjs @@ -90,11 +90,10 @@ test("JSON -> MD -> JSON preserves footnote ids and text", async () => { assert.match(md2, /\[\^fn2\]: Second note\./); }); -test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", async () => { - // The MCP import must derive duplicate ids deterministically (NOT random) so - // the same markdown imported here and via the editor produces identical ids, - // and re-importing is stable. This is the test that would FAIL on the old - // Math.random()/Date.now() implementation. +test("repeated references REUSE one footnote; duplicate definitions are first-wins (#166)", async () => { + // Reuse semantics: many `[^d]` references + several `[^d]:` definitions import + // as ONE footnote — the references all keep id "d" (reuse), and only the FIRST + // definition is kept (first-wins). Deterministic and stable across re-imports. const md = [ "See[^d] one[^d] two[^d].", "", @@ -106,21 +105,26 @@ test("duplicate-id markdown dedups DETERMINISTICALLY (same input -> same ids)", const idsOf = async () => { const json = await markdownToProseMirror(md); const refs = findAll(json, "footnoteReference").map((r) => r.attrs.id); - const defs = findAll(json, "footnoteDefinition").map((d) => d.attrs.id); - return { refs, defs }; + const defs = findAll(json, "footnoteDefinition"); + return { + refs, + defIds: defs.map((d) => d.attrs.id), + defText: defs + .map((d) => JSON.stringify(d).match(/"text":"([^"]*)"/)?.[1]) + .join("|"), + }; }; const a = await idsOf(); const b = await idsOf(); - // Identical across runs. - assert.deepEqual(a.refs, b.refs); - assert.deepEqual(a.defs, b.defs); - // Deterministic derived scheme: keeper "d", duplicates "d__2", "d__3". - assert.deepEqual([...a.defs].sort(), ["d", "d__2", "d__3"]); - // 1:1 reference <-> definition pairing, all distinct. - assert.equal(new Set(a.defs).size, 3); - assert.deepEqual([...a.refs].sort(), [...a.defs].sort()); + // Stable across runs. + assert.deepEqual(a, b); + // Reuse: all three reference markers stay "d". + assert.deepEqual(a.refs, ["d", "d", "d"]); + // First-wins: a single definition "d" with the FIRST text. + assert.deepEqual(a.defIds, ["d"]); + assert.equal(a.defText, "first"); }); test("a [^id]: line inside a fenced code block is NOT treated as a definition", async () => { From a0cc625dfe38fa1ed14b04beffa0bb60c2538c4d Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 16:16:30 +0300 Subject: [PATCH 06/43] refactor(footnotes): address PR #169 review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - footnote-sync: remove the now-dead `refReids` (CollisionPlan field, local, return, the 6a consumer loop) — references are never re-id'd under reuse, so it was dead structure on the hot reconciliation path. Rewrite the stale comments (plugin header, step 0, refOccurrences field) that still described the old "duplicates re-id'd so both survive" model to the reuse model. - Shared footnote lexer: new packages/mcp/src/lib/footnote-lex.ts (lexFootnoteLines + forEachFootnoteReference). extractFootnotes (collaboration) and analyzeFootnotes now consume the SAME fence-aware lexer, so "the analyzer sees exactly what the importer keeps/strips" is structural, not comment-kept. Removed the duplicated DEF_RE/fence machine from both consumers. - Tests: new mock test for the footnoteWarnings plumbing on createPage (problems -> field present; clean -> omitted); new paste-reuse case for TWO colliding pasted definitions (reservation -> distinct ids). Updated the derive-id golden test header (no MCP copy / parity test anymore). - CHANGELOG: [Unreleased] entries for footnote reuse (Changed, supersedes 0.93.0) and footnoteWarnings (Added). editor-ext 129, MCP 301, server roundtrip 2; client+server tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 13 +++ .../src/lib/footnote/footnote-paste.test.ts | 64 ++++++++++ .../src/lib/footnote/footnote-sync.ts | 76 +++++------- .../footnote/footnote-util.derive-id.test.ts | 16 +-- packages/mcp/build/lib/collaboration.js | 31 ++--- packages/mcp/build/lib/footnote-analyze.js | 44 ++----- packages/mcp/build/lib/footnote-lex.js | 55 +++++++++ packages/mcp/src/lib/collaboration.ts | 27 ++--- packages/mcp/src/lib/footnote-analyze.ts | 48 +++----- packages/mcp/src/lib/footnote-lex.ts | 71 +++++++++++ .../mcp/test/mock/footnote-warnings.test.mjs | 110 ++++++++++++++++++ 11 files changed, 389 insertions(+), 166 deletions(-) create mode 100644 packages/mcp/build/lib/footnote-lex.js create mode 100644 packages/mcp/src/lib/footnote-lex.ts create mode 100644 packages/mcp/test/mock/footnote-warnings.test.mjs diff --git a/CHANGELOG.md b/CHANGELOG.md index efb96a72..9ab0ca99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,9 +20,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `UPDATE users SET is_agent = true WHERE email = ''`. Never flag a human or shared account, or its normal edits get mis-attributed as AI. See the AI-agent block in `.env.example`. (#143) +- **Footnote import diagnostics.** The MCP page-write tools (`create_page`, + `update_page`, `import_page_markdown`) now return a `footnoteWarnings` array + flagging dangling references, empty or duplicate definitions, and `[^id]` + markers inside table rows, so an agent can fix its own markup. The page is + still created; the field is omitted when there are no problems. (#166) ### Changed +- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the + same id are ONE footnote — one number, one definition, several back-references + — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are + first-wins on import (the rest are dropped and reported via `footnoteWarnings`), + and a reference with no definition yields a single empty footnote rather than + one per occurrence. This supersedes the 0.93.0 "survive duplicate-id + definitions" behavior for the import path. (#166) + - **Public share AI: default per-workspace hourly assistant cap lowered 300 → 100.** The limiter falls back to this default whenever `SHARE_AI_WORKSPACE_MAX_PER_HOUR` is unset, so a `0.93.0` deployment that diff --git a/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts index 5790faf8..bd4057f9 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-paste.test.ts @@ -133,6 +133,70 @@ describe("footnotePastePlugin — reuse-aware id remap", () => { editor.destroy(); }); + it("re-ids TWO colliding pasted definitions to DISTINCT ids (reservation works)", () => { + // Existing doc has footnotes "a" and "b". Paste a slice that defines BOTH — + // each must get its own fresh id; the reservation (existing.add(newId)) keeps + // the second from deriving onto the first's new id. + const editor = new Editor({ + extensions, + content: { + type: "doc", + content: [ + { + type: "paragraph", + content: [ + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "a" } }, + { type: FOOTNOTE_REFERENCE_NAME, attrs: { id: "b" } }, + ], + }, + { + type: FOOTNOTES_LIST_NAME, + content: [ + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "a" }, + content: [{ type: "paragraph", content: [{ type: "text", text: "A" }] }], + }, + { + type: FOOTNOTE_DEFINITION_NAME, + attrs: { id: "b" }, + content: [{ type: "paragraph", content: [{ type: "text", text: "B" }] }], + }, + ], + }, + ], + }, + }); + const { schema } = editor; + const slice = new Slice( + Fragment.fromArray([ + schema.nodes.paragraph.create(null, [ + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "a" }), + schema.nodes[FOOTNOTE_REFERENCE_NAME].create({ id: "b" }), + ]), + schema.nodes[FOOTNOTES_LIST_NAME].create(null, [ + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "a" }, [ + schema.nodes.paragraph.create(null, [schema.text("pasted A")]), + ]), + schema.nodes[FOOTNOTE_DEFINITION_NAME].create({ id: "b" }, [ + schema.nodes.paragraph.create(null, [schema.text("pasted B")]), + ]), + ]), + ]), + 0, + 0, + ); + const out = paste(editor, slice); + const ids = sliceFootnoteIds(out); + const distinct = new Set(ids.map((x) => x.id)); + // Two ids, both remapped off the originals, and distinct from each other. + expect(distinct.size).toBe(2); + expect(distinct.has("a")).toBe(false); + expect(distinct.has("b")).toBe(false); + expect([...distinct].sort()).toEqual(["a__2", "b__2"]); + editor.destroy(); + }); + it("leaves the slice untouched when no pasted definition collides", () => { const editor = makeEditorWithFootnoteA(); const { schema } = editor; diff --git a/packages/editor-ext/src/lib/footnote/footnote-sync.ts b/packages/editor-ext/src/lib/footnote/footnote-sync.ts index e861ed0e..d0891e1a 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-sync.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-sync.ts @@ -29,9 +29,9 @@ interface DefOccurrence { interface FootnoteScan { /** - * Every reference occurrence in document order (NOT de-duplicated). Needed so - * that duplicate ids — which would otherwise be silently collapsed — can be - * detected and (together with their definitions) re-id'd instead of dropped. + * Every reference occurrence in document order (NOT de-duplicated). Repeated + * ids are kept so the FIRST appearance fixes definition order; later repeats + * are reuse (same footnote) and are never re-id'd. */ refOccurrences: RefOccurrence[]; /** @@ -67,15 +67,13 @@ function scan(doc: ProseMirrorNode): FootnoteScan { } /** - * Result of resolving id collisions: a 1:1, de-duplicated pairing plan plus the - * concrete reference re-id edits that must be applied to the body so the doc no - * longer contains two footnotes sharing a single id. + * Result of resolving the footnote id topology: the distinct reference order and + * one definition node per id. * - * The overriding invariant is that NO definition is ever dropped here: every - * definition occurrence ends up with a unique id and therefore survives the - * canonical rebuild. Repeated references that share an id are REUSE (one - * footnote) and are left untouched; only duplicate DEFINITIONS are re-id'd, so a - * pasted/merged second `[^d]:` survives as its own (then orphaned) footnote. + * References are NEVER re-id'd here — repeated ids are REUSE (one footnote). Only + * duplicate DEFINITIONS are re-id'd; lacking a matching reference, a re-id'd + * duplicate is then dropped by the orphan policy. No definition is ever dropped + * for COLLIDING — only for being an orphan. */ interface CollisionPlan { /** @@ -86,12 +84,6 @@ interface CollisionPlan { referenceIds: string[]; /** id -> definition node, after duplicate definitions were re-id'd. One per id. */ definitions: Map; - /** - * Body reference re-id edits. ALWAYS EMPTY under reuse semantics (references - * are never re-id'd); retained so the downstream consumer stays a harmless - * no-op rather than needing removal. - */ - refReids: Array<{ pos: number; node: ProseMirrorNode; newId: string }>; /** True when a duplicate definition required a re-id. */ changed: boolean; } @@ -123,13 +115,6 @@ interface CollisionPlan { */ function resolveCollisions(scan: FootnoteScan): CollisionPlan { const definitions = new Map(); - // References are never re-id'd under reuse semantics, so this stays empty; it - // is retained so the CollisionPlan shape (and its no-op consumer) is unchanged. - const refReids: Array<{ - pos: number; - node: ProseMirrorNode; - newId: string; - }> = []; const referenceIds: string[] = []; const seenRefIds = new Set(); let changed = false; @@ -173,7 +158,7 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { } } - return { referenceIds, definitions, refReids, changed }; + return { referenceIds, definitions, changed }; } /** @@ -209,14 +194,13 @@ function resolveCollisions(scan: FootnoteScan): CollisionPlan { * ping-pong forever (list moved to end -> trailing paragraph appended -> list * no longer last -> moved again ...). * - * Duplicate-id collisions (two references and/or two definitions sharing one - * id — produced by importing `[^d]: a` / `[^d]: b`, or by pasting/duplicating a - * reference+definition pair) are resolved up front by resolveCollisions(): the - * duplicates are re-id'd to fresh unique ids so BOTH survive as distinct - * footnotes. This guarantees the overriding invariant — no footnoteDefinition is - * ever silently deleted by this automatic (addToHistory:false) transaction. A + * The id topology is resolved up front by resolveCollisions() (#166): repeated + * references sharing an id are REUSE — one footnote, never re-id'd — while a + * duplicate DEFINITION (from pasting/duplicating a definition, or a collab merge) + * is re-id'd to a fresh unique id. No footnoteDefinition is ever silently deleted + * by this automatic (addToHistory:false) transaction because of a COLLISION; a * definition is only ever removed when it has NO matching reference (orphan - * policy), never because its id collided with another. + * policy) — which is also what then drops a re-id'd duplicate definition. */ export function footnoteSyncPlugin( isRemoteTransaction?: (tr: Transaction) => boolean, @@ -247,18 +231,16 @@ export function footnoteSyncPlugin( const info = scan(doc); - // 0) Resolve duplicate-id collisions (two references and/or two - // definitions sharing one id) by re-id'ing duplicates to fresh unique - // ids. This is the critical defense: the old last-wins Map silently - // dropped all but the last definition for a shared id; here EVERY - // definition survives with a unique id, and duplicate references are - // paired with duplicate definitions so two same-id imports/pastes yield - // two distinct footnotes instead of one. + // 0) Resolve the id topology (#166): repeated references that share an id + // are REUSE — collapsed to one entry in `referenceIds`, never re-id'd — + // while a duplicate DEFINITION is re-id'd to a fresh deterministic id + // (and, lacking a matching reference, removed by the orphan policy + // below). No definition is dropped for COLLIDING, only for being orphan. const plan = resolveCollisions(info); const referenceIds = plan.referenceIds; - // The set of ids that must have a definition, in reference order (after - // collision re-id). De-duplicated already by resolveCollisions. + // The set of ids that must have a definition, in reference order. + // De-duplicated already by resolveCollisions. const referenceIdSet = new Set(referenceIds); // 1) For each definition occurrence, compute the id it should END UP with @@ -361,21 +343,15 @@ export function footnoteSyncPlugin( // 6) Apply the targeted, minimal mutations in ONE transaction. We never // delete-and-recreate an unchanged definition subtree; we only: - // (a) re-id specific colliding references and definitions (attr-only), + // (a) re-id colliding definitions (attr-only), // (b) delete genuine orphan definitions and extra/empty lists, // (c) insert genuinely-missing empty definitions and migrate defs out // of extra lists into the primary list, // (d) create the primary list if references exist but none does yet. + // References are never re-id'd (reuse), so there is no reference edit. const tr = newState.tr; - // 6a) Re-id colliding references (inline atoms: attr-only, size-stable). - for (const reid of plan.refReids) { - tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, { - ...reid.node.attrs, - id: reid.newId, - }); - } - // 6b) Re-id colliding definitions IN PLACE (attr-only). This preserves the + // 6a) Re-id colliding definitions IN PLACE (attr-only). This preserves the // definition's content subtree — never delete+recreate it. for (const reid of defReidsToApply) { tr.setNodeMarkup(tr.mapping.map(reid.pos), undefined, { diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts index 279c2b8c..07acab01 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts @@ -4,16 +4,12 @@ import { deriveFootnoteId } from "./footnote-util"; /** * GOLDEN TABLE for `deriveFootnoteId` (and its private alphabetic `suffix`). * - * deriveFootnoteId is DELIBERATELY duplicated in - * packages/mcp/src/lib/collaboration.ts - * and the two copies MUST stay byte-for-byte equivalent in behavior so the same - * markdown imported through the editor and through the MCP path yields identical - * footnote ids. This table is the SHARED contract: the parity test - * packages/mcp/test/unit/derive-id-parity.test.mjs - * pins the exact SAME (input -> expected) pairs against the COMPILED mcp build. - * If either copy drifts, one of the two tests goes red. - * - * Keep this constant in sync with GOLDEN in the mcp parity test. + * `deriveFootnoteId` lives ONLY in editor-ext now — it is used by + * `resolveCollisions` (re-id of a duplicate definition) and `footnotePastePlugin` + * (re-id of a pasted colliding definition). The MCP/marked import paths no longer + * derive ids (duplicate definitions there are first-wins-dropped, #166), so there + * is no cross-package copy and no parity test to keep in sync. This table pins the + * deterministic scheme so a future change to it is a conscious one. */ export const DERIVE_GOLDEN: Array<{ originalId: string; diff --git a/packages/mcp/build/lib/collaboration.js b/packages/mcp/build/lib/collaboration.js index dc4ef79c..87f0ef8a 100644 --- a/packages/mcp/build/lib/collaboration.js +++ b/packages/mcp/build/lib/collaboration.js @@ -10,6 +10,7 @@ import { JSDOM } from "jsdom"; import { docmostExtensions, docmostSchema } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +import { lexFootnoteLines } from "./footnote-lex.js"; import { summarizeChange } from "./diff.js"; /** * Build the descriptive error for an opaque Yjs encode failure ("Unexpected @@ -280,7 +281,8 @@ function bridgeTaskLists(html) { // Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline // marker becomes , and `[^id]: text` // definition lines are collected into a single
. -const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +// Definition detection + fence handling are shared with analyzeFootnotes via +// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's. const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value) { return String(value).replace(/&/g, "&").replace(/"/g, """); @@ -308,28 +310,17 @@ marked.use({ extensions: [footnoteRefMarkedExtension] }); *
for them (or "" when there are none). */ function extractFootnotes(markdown) { - const lines = markdown.split("\n"); const bodyLines = []; const defs = []; - // Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code - // block is preserved verbatim and not treated as a footnote definition. - let fence = null; - for (const line of lines) { - const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) - fence = marker; - else if (marker === fence) - fence = null; - bodyLines.push(line); - continue; - } - const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null; - if (m) - defs.push({ id: m[1], text: m[2] }); + // Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code + // block is inert and stays in the body verbatim; only real definition lines + // are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics + // match exactly what import keeps/strips (#166). + for (const tok of lexFootnoteLines(markdown)) { + if (!tok.inFence && tok.definition) + defs.push(tok.definition); else - bodyLines.push(line); + bodyLines.push(tok.line); } if (defs.length === 0) return { body: markdown, section: "" }; diff --git a/packages/mcp/build/lib/footnote-analyze.js b/packages/mcp/build/lib/footnote-analyze.js index 919674b3..598148cd 100644 --- a/packages/mcp/build/lib/footnote-analyze.js +++ b/packages/mcp/build/lib/footnote-analyze.js @@ -16,24 +16,11 @@ * the line, trimmed, starts with `|`) — footnotes in table cells often do not * render as expected. */ -/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ -const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; -/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ -const REF_RE_G = /\[\^([^\]\s]+)\]/g; -/** Opening/closing fence marker (``` or ~~~). */ -const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; -/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ -function forEachReference(line, onRef) { - REF_RE_G.lastIndex = 0; - let m; - while ((m = REF_RE_G.exec(line)) !== null) - onRef(m[1]); -} +import { lexFootnoteLines, forEachFootnoteReference, } from "./footnote-lex.js"; /** * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. */ export function analyzeFootnotes(markdown) { - const lines = markdown.split("\n"); // Distinct reference ids in first-appearance order, plus the set of ids seen // inside a table row. const refIds = []; @@ -49,24 +36,13 @@ export function analyzeFootnotes(markdown) { }; // Definition texts per id, in first-appearance order of the id. const defTextsById = new Map(); - let fence = null; - for (const line of lines) { - const fenceMatch = FENCE_RE.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) - fence = marker; - else if (marker === fence) - fence = null; + // Same lexer the importer uses, so the analysis matches exactly what import + // keeps/strips (#166): fenced lines are inert, definition lines are pulled. + for (const tok of lexFootnoteLines(markdown)) { + if (tok.inFence) continue; - } - // Footnote syntax shown inside a code fence is not real markup. - if (fence !== null) - continue; - const defM = DEF_RE.exec(line); - if (defM) { - const id = defM[1]; - const text = defM[2]; + if (tok.definition) { + const { id, text } = tok.definition; const arr = defTextsById.get(id); if (arr) arr.push(text); @@ -74,11 +50,11 @@ export function analyzeFootnotes(markdown) { defTextsById.set(id, [text]); // A definition's TEXT can itself reference another footnote (`[^a]: see // [^b]`); count those so such a `[^b]` is not falsely reported dangling. - forEachReference(text, (rid) => addRef(rid, false)); + forEachFootnoteReference(text, (rid) => addRef(rid, false)); continue; } - const inTable = line.trimStart().startsWith("|"); - forEachReference(line, (id) => addRef(id, inTable)); + const inTable = tok.line.trimStart().startsWith("|"); + forEachFootnoteReference(tok.line, (id) => addRef(id, inTable)); } const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); const duplicateDefinitions = []; diff --git a/packages/mcp/build/lib/footnote-lex.js b/packages/mcp/build/lib/footnote-lex.js new file mode 100644 index 00000000..3c22d149 --- /dev/null +++ b/packages/mcp/build/lib/footnote-lex.js @@ -0,0 +1,55 @@ +/** + * Shared, fence-aware line lexer for footnote markdown (MCP-internal). + * + * Both the importer (`extractFootnotes` in collaboration.ts, which strips + * definition lines and rebuilds a footnotes section) and the diagnostics + * (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines + * are definitions and which lines are inert (inside a code fence). Sharing one + * lexer makes "the analyzer sees what the importer leaves" a structural property + * instead of two hand-kept copies that can drift (#166 review). + * + * NOTE: this is deliberately NOT shared with editor-ext's + * `extractFootnoteDefinitions` — that lives in a different package and the + * decoupling between the editor and the MCP mirror is intentional. + */ +/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */ +export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */ +export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing code fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +/** Classify every line of `markdown`, tracking fenced-code state. Pure. */ +export function lexFootnoteLines(markdown) { + const out = []; + let fence = null; + for (const line of markdown.split("\n")) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) + fence = marker; // opening fence + else if (marker === fence) + fence = null; // matching closing fence + out.push({ line, inFence: true, definition: null }); + continue; + } + if (fence !== null) { + out.push({ line, inFence: true, definition: null }); + continue; + } + const m = FOOTNOTE_DEF_RE.exec(line); + out.push({ + line, + inFence: false, + definition: m ? { id: m[1], text: m[2] } : null, + }); + } + return out; +} +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +export function forEachFootnoteReference(line, onRef) { + FOOTNOTE_REF_RE_G.lastIndex = 0; + let m; + while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) + onRef(m[1]); +} diff --git a/packages/mcp/src/lib/collaboration.ts b/packages/mcp/src/lib/collaboration.ts index 178ff71b..aec82aa1 100644 --- a/packages/mcp/src/lib/collaboration.ts +++ b/packages/mcp/src/lib/collaboration.ts @@ -10,6 +10,7 @@ import { JSDOM } from "jsdom"; import { docmostExtensions, docmostSchema } from "./docmost-schema.js"; import { withPageLock } from "./page-lock.js"; import { sanitizeForYjs, findUnstorableAttr } from "./node-ops.js"; +import { lexFootnoteLines } from "./footnote-lex.js"; import { summarizeChange, VerifyReport } from "./diff.js"; /** @@ -316,7 +317,8 @@ function bridgeTaskLists(html: string): string { // Mirror of packages/editor-ext footnote markdown handling. A `[^id]` inline // marker becomes , and `[^id]: text` // definition lines are collected into a single
. -const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +// Definition detection + fence handling are shared with analyzeFootnotes via +// lexFootnoteLines (footnote-lex.js). FOOTNOTE_REF_RE is the inline tokenizer's. const FOOTNOTE_REF_RE = /\[\^([^\]\s]+)\]/; function escapeFootnoteAttr(value: string): string { @@ -353,24 +355,15 @@ function extractFootnotes(markdown: string): { body: string; section: string; } { - const lines = markdown.split("\n"); const bodyLines: string[] = []; const defs: Array<{ id: string; text: string }> = []; - // Track fenced-code state so a `[^id]: ...` line shown inside a ``` / ~~~ code - // block is preserved verbatim and not treated as a footnote definition. - let fence: string | null = null; - for (const line of lines) { - const fenceMatch = /^(\s*)(`{3,}|~{3,})/.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) fence = marker; - else if (marker === fence) fence = null; - bodyLines.push(line); - continue; - } - const m = fence === null ? FOOTNOTE_DEF_RE.exec(line) : null; - if (m) defs.push({ id: m[1], text: m[2] }); - else bodyLines.push(line); + // Shared lexer (footnote-lex): a `[^id]: ...` line inside a ``` / ~~~ code + // block is inert and stays in the body verbatim; only real definition lines + // are pulled out. analyzeFootnotes() consumes the SAME lexer so its diagnostics + // match exactly what import keeps/strips (#166). + for (const tok of lexFootnoteLines(markdown)) { + if (!tok.inFence && tok.definition) defs.push(tok.definition); + else bodyLines.push(tok.line); } if (defs.length === 0) return { body: markdown, section: "" }; diff --git a/packages/mcp/src/lib/footnote-analyze.ts b/packages/mcp/src/lib/footnote-analyze.ts index 97264dbc..e6e0d2b9 100644 --- a/packages/mcp/src/lib/footnote-analyze.ts +++ b/packages/mcp/src/lib/footnote-analyze.ts @@ -17,12 +17,10 @@ * render as expected. */ -/** Matches a footnote DEFINITION line: `[^id]: text` (id + text captured). */ -const DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; -/** Matches every footnote REFERENCE `[^id]` in a line (global; id captured). */ -const REF_RE_G = /\[\^([^\]\s]+)\]/g; -/** Opening/closing fence marker (``` or ~~~). */ -const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; +import { + lexFootnoteLines, + forEachFootnoteReference, +} from "./footnote-lex.js"; export interface FootnoteDiagnostics { /** Reference ids (distinct, document order) with no matching definition. */ @@ -37,19 +35,10 @@ export interface FootnoteDiagnostics { warnings: string[]; } -/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ -function forEachReference(line: string, onRef: (id: string) => void): void { - REF_RE_G.lastIndex = 0; - let m: RegExpExecArray | null; - while ((m = REF_RE_G.exec(line)) !== null) onRef(m[1]); -} - /** * Analyze the footnotes in a Markdown string. Pure; safe to call on any body. */ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { - const lines = markdown.split("\n"); - // Distinct reference ids in first-appearance order, plus the set of ids seen // inside a table row. const refIds: string[] = []; @@ -66,33 +55,22 @@ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { // Definition texts per id, in first-appearance order of the id. const defTextsById = new Map(); - let fence: string | null = null; - for (const line of lines) { - const fenceMatch = FENCE_RE.exec(line); - if (fenceMatch) { - const marker = fenceMatch[2][0]; - if (fence === null) fence = marker; - else if (marker === fence) fence = null; - continue; - } - // Footnote syntax shown inside a code fence is not real markup. - if (fence !== null) continue; - - const defM = DEF_RE.exec(line); - if (defM) { - const id = defM[1]; - const text = defM[2]; + // Same lexer the importer uses, so the analysis matches exactly what import + // keeps/strips (#166): fenced lines are inert, definition lines are pulled. + for (const tok of lexFootnoteLines(markdown)) { + if (tok.inFence) continue; + if (tok.definition) { + const { id, text } = tok.definition; const arr = defTextsById.get(id); if (arr) arr.push(text); else defTextsById.set(id, [text]); // A definition's TEXT can itself reference another footnote (`[^a]: see // [^b]`); count those so such a `[^b]` is not falsely reported dangling. - forEachReference(text, (rid) => addRef(rid, false)); + forEachFootnoteReference(text, (rid) => addRef(rid, false)); continue; } - - const inTable = line.trimStart().startsWith("|"); - forEachReference(line, (id) => addRef(id, inTable)); + const inTable = tok.line.trimStart().startsWith("|"); + forEachFootnoteReference(tok.line, (id) => addRef(id, inTable)); } const danglingReferences = refIds.filter((id) => !defTextsById.has(id)); diff --git a/packages/mcp/src/lib/footnote-lex.ts b/packages/mcp/src/lib/footnote-lex.ts new file mode 100644 index 00000000..30da676b --- /dev/null +++ b/packages/mcp/src/lib/footnote-lex.ts @@ -0,0 +1,71 @@ +/** + * Shared, fence-aware line lexer for footnote markdown (MCP-internal). + * + * Both the importer (`extractFootnotes` in collaboration.ts, which strips + * definition lines and rebuilds a footnotes section) and the diagnostics + * (`analyzeFootnotes` in footnote-analyze.ts) must agree EXACTLY on which lines + * are definitions and which lines are inert (inside a code fence). Sharing one + * lexer makes "the analyzer sees what the importer leaves" a structural property + * instead of two hand-kept copies that can drift (#166 review). + * + * NOTE: this is deliberately NOT shared with editor-ext's + * `extractFootnoteDefinitions` — that lives in a different package and the + * decoupling between the editor and the MCP mirror is intentional. + */ + +/** A footnote DEFINITION line: `[^id]: text` (id + text captured). */ +export const FOOTNOTE_DEF_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; +/** Every footnote REFERENCE `[^id]` in a line (global; id captured). */ +export const FOOTNOTE_REF_RE_G = /\[\^([^\]\s]+)\]/g; +/** Opening/closing code fence marker (``` or ~~~). */ +const FENCE_RE = /^(\s*)(`{3,}|~{3,})/; + +export interface FootnoteLine { + /** The raw line, verbatim. */ + line: string; + /** + * True for a code-fence marker line AND every line inside a fence — footnote + * syntax on such lines is inert (example text, not real markup). The importer + * keeps these in the body; the analyzer skips them. + */ + inFence: boolean; + /** The parsed definition, when this is a `[^id]: text` line OUTSIDE any fence. */ + definition: { id: string; text: string } | null; +} + +/** Classify every line of `markdown`, tracking fenced-code state. Pure. */ +export function lexFootnoteLines(markdown: string): FootnoteLine[] { + const out: FootnoteLine[] = []; + let fence: string | null = null; + for (const line of markdown.split("\n")) { + const fenceMatch = FENCE_RE.exec(line); + if (fenceMatch) { + const marker = fenceMatch[2][0]; + if (fence === null) fence = marker; // opening fence + else if (marker === fence) fence = null; // matching closing fence + out.push({ line, inFence: true, definition: null }); + continue; + } + if (fence !== null) { + out.push({ line, inFence: true, definition: null }); + continue; + } + const m = FOOTNOTE_DEF_RE.exec(line); + out.push({ + line, + inFence: false, + definition: m ? { id: m[1], text: m[2] } : null, + }); + } + return out; +} + +/** Scan a line for every `[^id]` reference, invoking `onRef(id)` for each. */ +export function forEachFootnoteReference( + line: string, + onRef: (id: string) => void, +): void { + FOOTNOTE_REF_RE_G.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = FOOTNOTE_REF_RE_G.exec(line)) !== null) onRef(m[1]); +} diff --git a/packages/mcp/test/mock/footnote-warnings.test.mjs b/packages/mcp/test/mock/footnote-warnings.test.mjs new file mode 100644 index 00000000..2f8e0b7d --- /dev/null +++ b/packages/mcp/test/mock/footnote-warnings.test.mjs @@ -0,0 +1,110 @@ +// Mock-HTTP test for the footnoteWarnings plumbing (#166). createPage is the +// representative path that is fully plain-HTTP (import + getPage) and so is +// mockable here; updatePage / importPageMarkdown attach footnoteWarnings with the +// IDENTICAL wiring (`analyzeFootnotes(...)` + spread-when-non-empty) but run their +// mutation over the Hocuspocus collab WebSocket, which this plain-HTTP harness +// does not stand up. The analyzer itself is unit-tested in footnote-analyze.test. +import { test, after } from "node:test"; +import assert from "node:assert/strict"; +import http from "node:http"; +import { DocmostClient } from "../../build/client.js"; + +function readBody(req) { + return new Promise((resolve) => { + let raw = ""; + req.on("data", (c) => (raw += c)); + req.on("end", () => resolve(raw)); + }); +} + +function sendJson(res, status, obj, extraHeaders = {}) { + res.writeHead(status, { "Content-Type": "application/json", ...extraHeaders }); + res.end(JSON.stringify(obj)); +} + +const openServers = []; +function spawn(handler) { + return new Promise((resolve) => { + const server = http.createServer(handler); + openServers.push(server); + server.listen(0, "127.0.0.1", () => { + const { port } = server.address(); + resolve(`http://127.0.0.1:${port}/api`); + }); + }); +} + +after(async () => { + await Promise.all( + openServers.map((s) => new Promise((r) => s.close(r))), + ); +}); + +// A handler that imports a page, lets getPage read it back, and 404s everything +// else (listSidebarPages fails gracefully inside getPage). +function pageHandler() { + return async (req, res) => { + await readBody(req); + if (req.url === "/api/auth/login") { + sendJson(res, 200, { success: true }, { + "Set-Cookie": "authToken=t; Path=/; HttpOnly", + }); + return; + } + if (req.url === "/api/pages/import") { + sendJson(res, 200, { data: { id: "new-1" } }); + return; + } + if (req.url === "/api/pages/update") { + // The title-restore step after import. + sendJson(res, 200, { data: { id: "new-1" } }); + return; + } + if (req.url === "/api/pages/info") { + sendJson(res, 200, { + data: { + id: "new-1", + slugId: "slug-1", + title: "T", + spaceId: "sp-1", + content: { type: "doc", content: [] }, + }, + }); + return; + } + sendJson(res, 404, { message: "not found" }); + }; +} + +test("createPage attaches footnoteWarnings when the content has footnote problems", async () => { + const baseURL = await spawn(pageHandler()); + const client = new DocmostClient(baseURL, "user@example.com", "pw"); + // A dangling reference + a duplicate definition + a table marker. + const content = [ + "Intro[^missing] and| cell[^t] |.", + "", + "[^d]: one", + "[^d]: two", + "[^t]: in table", + ].join("\n"); + const result = await client.createPage("T", content, "sp-1"); + assert.ok(Array.isArray(result.footnoteWarnings), "footnoteWarnings present"); + const joined = result.footnoteWarnings.join("\n"); + assert.match(joined, /no matching definition/); // dangling [^missing] + assert.match(joined, /defined more than once/); // duplicate [^d] + // The page itself is still returned. + assert.equal(result.success, true); +}); + +test("createPage omits footnoteWarnings when the content is clean", async () => { + const baseURL = await spawn(pageHandler()); + const client = new DocmostClient(baseURL, "user@example.com", "pw"); + const content = ["A[^a] and reuse[^a].", "", "[^a]: fine"].join("\n"); + const result = await client.createPage("T", content, "sp-1"); + assert.equal( + "footnoteWarnings" in result, + false, + "no footnoteWarnings field on clean input", + ); + assert.equal(result.success, true); +}); From 0e8af1312208cbdc3574118b1457637b6824720c Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 16:44:53 +0300 Subject: [PATCH 07/43] test(footnotes): cover footnoteWarnings import plumbing + doc fixes (#169 second review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to the merged #166/#169. Addresses the second review pass (comment 1227): - footnoteWarnings plumbing: extract a single `footnoteWarningsField(markdown)` helper (footnote-analyze) and use it at all three call sites (create_page, update_page, import_page_markdown) so the field is attached identically. - New unit test footnote-warnings-import.test.mjs pins the contract that was uncovered: the field is present on problems / omitted on clean input, and the IMPORT path analyzes the BODY after the docmost:meta / docmost:comments blocks (a footnote-like token inside those JSON blocks must NOT warn; a real body marker must). Tested via the same pure composition the importer uses (footnoteWarningsField(parseDocmostMarkdown(full).body)) — no collab socket needed; a regression that analyzed fullMarkdown or skipped the body split would now go red. - footnote.marked.ts: correct the stale module header — it claimed "only definitions that have a matching reference are emitted", which was never true (orphan defs are emitted; the editor sync plugin reconciles). Now describes first-wins + reuse + sync reconciliation. - derive-id golden test: rename the describe from "(cross-package drift guard)" to "(deterministic-scheme pin)" — there is no second package to drift against. editor-ext 129, MCP 304 (+3), client+server tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../footnote/footnote-util.derive-id.test.ts | 2 +- .../src/lib/markdown/utils/footnote.marked.ts | 8 ++- packages/mcp/build/client.js | 18 +++--- packages/mcp/build/lib/footnote-analyze.js | 10 +++ packages/mcp/src/client.ts | 17 +++-- packages/mcp/src/lib/footnote-analyze.ts | 13 ++++ .../unit/footnote-warnings-import.test.mjs | 63 +++++++++++++++++++ 7 files changed, 109 insertions(+), 22 deletions(-) create mode 100644 packages/mcp/test/unit/footnote-warnings-import.test.mjs diff --git a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts index 07acab01..96d448ae 100644 --- a/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts +++ b/packages/editor-ext/src/lib/footnote/footnote-util.derive-id.test.ts @@ -52,7 +52,7 @@ function singleLetterSuffixes(): string[] { return Array.from({ length: 25 }, (_, i) => String.fromCharCode(98 + i)); } -describe("deriveFootnoteId golden table (cross-package drift guard)", () => { +describe("deriveFootnoteId golden table (deterministic-scheme pin)", () => { for (const row of DERIVE_GOLDEN) { it(`derive("${row.originalId}", ${row.occurrence}, {${row.taken.join(",")}}) === "${row.expected}" — ${row.why}`, () => { const got = deriveFootnoteId( diff --git a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts index 58dd27d7..6ad09ece 100644 --- a/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts +++ b/packages/editor-ext/src/lib/markdown/utils/footnote.marked.ts @@ -12,8 +12,12 @@ import { marked } from "marked"; * single
with one
per * definition, so the round-trip rebuilds footnotesList + footnoteDefinition. * - * Only definitions that have a matching reference are emitted (and vice-versa - * the sync plugin fills any gaps on the editor side), keeping the output valid. + * Every FIRST definition line is emitted — duplicate ids are first-wins (the + * rest are dropped, and surfaced via analyzeFootnotes), and reference markers are + * left untouched so repeated `[^a]` references reuse the one footnote (#166). + * Orphan definitions (no matching reference) are still emitted here; the editor's + * sync plugin reconciles the final reference/definition set (drops orphans, + * synthesizes a single empty definition for a reference that lacks one). */ const DEFINITION_RE = /^\[\^([^\]\s]+)\]:[ \t]*(.*)$/; diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index 28e5438e..302d2a15 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -9,7 +9,7 @@ import WebSocket from "ws"; import { convertProseMirrorToMarkdown } from "./lib/markdown-converter.js"; import { updatePageContentRealtime, replacePageContent, markdownToProseMirror, mutatePageContent, buildCollabWsUrl, assertYjsEncodable, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; -import { analyzeFootnotes } from "./lib/footnote-analyze.js"; +import { footnoteWarningsField } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js"; import { replaceNodeById, deleteNodeById, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; @@ -820,8 +820,7 @@ export class DocmostClient { const page = await this.getPage(newPageId); // Surface non-fatal footnote problems (dangling refs, empty/duplicate // definitions, markers in tables) so the agent can fix its markup (#166). - const { warnings } = analyzeFootnotes(content); - return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page; + return { ...page, ...footnoteWarningsField(content) }; } /** * Update a page's content from markdown and optionally its title. @@ -851,7 +850,6 @@ export class DocmostClient { } throw new Error(`Failed to update page content: ${error.message}`); } - const { warnings } = analyzeFootnotes(content); return { success: true, modified: true, @@ -859,7 +857,7 @@ export class DocmostClient { pageId: pageId, verify: mutation.verify, // Non-fatal footnote diagnostics (#166); omitted when there are none. - ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}), + ...footnoteWarningsField(content), }; } /** @@ -1129,11 +1127,11 @@ export class DocmostClient { if (meta?.pageId && meta.pageId !== pageId) { result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; } - // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions - // and references live there, not in the front-matter/comments sections). - const { warnings } = analyzeFootnotes(body); - if (warnings.length > 0) - result.footnoteWarnings = warnings; + // Non-fatal footnote diagnostics (#166), analyzed on the BODY (the part after + // the docmost:meta / docmost:comments blocks) — so a `[^x]`-like token inside + // those JSON blocks never produces a false warning, while real markers in the + // body do. `body` comes from parseDocmostMarkdown(fullMarkdown) above. + Object.assign(result, footnoteWarningsField(body)); return result; } /** diff --git a/packages/mcp/build/lib/footnote-analyze.js b/packages/mcp/build/lib/footnote-analyze.js index 598148cd..0bae93c7 100644 --- a/packages/mcp/build/lib/footnote-analyze.js +++ b/packages/mcp/build/lib/footnote-analyze.js @@ -89,3 +89,13 @@ export function analyzeFootnotes(markdown) { warnings, }; } +/** + * The optional `footnoteWarnings` field for a page-write tool result: present + * (with the warning lines) only when `markdown` has footnote problems, omitted + * otherwise. One helper so all three call sites (create/update/import) attach the + * field identically. Spread into the result: `{ ...result, ...footnoteWarningsField(text) }`. + */ +export function footnoteWarningsField(markdown) { + const { warnings } = analyzeFootnotes(markdown); + return warnings.length > 0 ? { footnoteWarnings: warnings } : {}; +} diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 36ee85b6..5a8aaaf7 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -23,7 +23,7 @@ import { MutationResult, } from "./lib/collaboration.js"; import { docmostExtensions } from "./lib/docmost-schema.js"; -import { analyzeFootnotes } from "./lib/footnote-analyze.js"; +import { footnoteWarningsField } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, @@ -1058,8 +1058,7 @@ export class DocmostClient { const page = await this.getPage(newPageId); // Surface non-fatal footnote problems (dangling refs, empty/duplicate // definitions, markers in tables) so the agent can fix its markup (#166). - const { warnings } = analyzeFootnotes(content); - return warnings.length > 0 ? { ...page, footnoteWarnings: warnings } : page; + return { ...page, ...footnoteWarningsField(content) }; } /** @@ -1100,7 +1099,6 @@ export class DocmostClient { throw new Error(`Failed to update page content: ${error.message}`); } - const { warnings } = analyzeFootnotes(content); return { success: true, modified: true, @@ -1108,7 +1106,7 @@ export class DocmostClient { pageId: pageId, verify: mutation.verify, // Non-fatal footnote diagnostics (#166); omitted when there are none. - ...(warnings.length > 0 ? { footnoteWarnings: warnings } : {}), + ...footnoteWarningsField(content), }; } @@ -1424,10 +1422,11 @@ export class DocmostClient { if (meta?.pageId && meta.pageId !== pageId) { result.warning = `File was exported from page ${meta.pageId} but is being imported into ${pageId}.`; } - // Non-fatal footnote diagnostics (#166), analyzed on the body (definitions - // and references live there, not in the front-matter/comments sections). - const { warnings } = analyzeFootnotes(body); - if (warnings.length > 0) result.footnoteWarnings = warnings; + // Non-fatal footnote diagnostics (#166), analyzed on the BODY (the part after + // the docmost:meta / docmost:comments blocks) — so a `[^x]`-like token inside + // those JSON blocks never produces a false warning, while real markers in the + // body do. `body` comes from parseDocmostMarkdown(fullMarkdown) above. + Object.assign(result, footnoteWarningsField(body)); return result; } diff --git a/packages/mcp/src/lib/footnote-analyze.ts b/packages/mcp/src/lib/footnote-analyze.ts index e6e0d2b9..b259ea00 100644 --- a/packages/mcp/src/lib/footnote-analyze.ts +++ b/packages/mcp/src/lib/footnote-analyze.ts @@ -114,3 +114,16 @@ export function analyzeFootnotes(markdown: string): FootnoteDiagnostics { warnings, }; } + +/** + * The optional `footnoteWarnings` field for a page-write tool result: present + * (with the warning lines) only when `markdown` has footnote problems, omitted + * otherwise. One helper so all three call sites (create/update/import) attach the + * field identically. Spread into the result: `{ ...result, ...footnoteWarningsField(text) }`. + */ +export function footnoteWarningsField(markdown: string): { + footnoteWarnings?: string[]; +} { + const { warnings } = analyzeFootnotes(markdown); + return warnings.length > 0 ? { footnoteWarnings: warnings } : {}; +} diff --git a/packages/mcp/test/unit/footnote-warnings-import.test.mjs b/packages/mcp/test/unit/footnote-warnings-import.test.mjs new file mode 100644 index 00000000..e9abab52 --- /dev/null +++ b/packages/mcp/test/unit/footnote-warnings-import.test.mjs @@ -0,0 +1,63 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { + analyzeFootnotes, + footnoteWarningsField, +} from "../../build/lib/footnote-analyze.js"; +import { + serializeDocmostMarkdown, + parseDocmostMarkdown, +} from "../../build/lib/markdown-document.js"; + +// Pins the footnoteWarnings PLUMBING contract (#169 review): the field is +// present only on problems and omitted on clean input, AND `import_page_markdown` +// analyzes the BODY (after the docmost:meta / docmost:comments blocks) — so a +// footnote-like token inside those JSON blocks never warns, while a real marker +// in the body does. importPageMarkdown does exactly +// `footnoteWarningsField(parseDocmostMarkdown(full).body)` over a collab socket +// this harness does not stand up, so we test the same pure composition directly. + +test("footnoteWarningsField is present on problems and omitted on clean input", () => { + const problem = footnoteWarningsField("See[^missing].\n\n[^a]: defined"); + assert.ok(Array.isArray(problem.footnoteWarnings)); + assert.match(problem.footnoteWarnings.join("\n"), /no matching definition/); + + const clean = footnoteWarningsField("A[^a] and reuse[^a].\n\n[^a]: fine"); + assert.deepEqual(clean, {}); // no key at all on clean input +}); + +test("import analyzes the BODY only — tokens inside meta/comments never warn", () => { + // meta + comments JSON carry `[^metaonly]` / `[^commentonly]`-looking text; the + // BODY has a genuinely dangling `[^bodyref]`. + const full = serializeDocmostMarkdown( + { pageId: "p1", note: "front-matter mentions [^metaonly] in text" }, + "Body with a dangling[^bodyref] marker.", + [{ id: "c1", content: "a comment that says [^commentonly]" }], + ); + + const { body } = parseDocmostMarkdown(full); + // Sanity: the meta/comments markers are NOT in the parsed body. + assert.ok(!body.includes("[^metaonly]")); + assert.ok(!body.includes("[^commentonly]")); + + const field = footnoteWarningsField(body); + const joined = (field.footnoteWarnings ?? []).join("\n"); + // ONLY the body's dangling reference is flagged. + assert.match(joined, /\[\^bodyref\]/); + assert.ok(!joined.includes("metaonly")); + assert.ok(!joined.includes("commentonly")); + + // Cross-check against analyzeFootnotes directly (same composition the importer uses). + assert.deepEqual(analyzeFootnotes(body).danglingReferences, ["bodyref"]); +}); + +test("import on a clean body yields no footnoteWarnings field", () => { + const full = serializeDocmostMarkdown( + { pageId: "p1" }, + "Clean body[^a] reusing[^a].\n\n[^a]: ok", + [], + ); + const { body } = parseDocmostMarkdown(full); + assert.deepEqual(footnoteWarningsField(body), {}); +}); From 255bc06883efa59cb8b283b6fd1638a8cda1e02b Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 17:11:50 +0300 Subject: [PATCH 08/43] fix(mcp): tool allowlist stored/read as jsonb string, not array MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opening the edit form for an MCP server that has a saved tool allowlist crashed the whole settings page (`TypeError: Ke.map is not a function` in Mantine) — and, worse, the allowlist was silently NOT enforced. Both stem from one root cause: the `tool_allowlist` jsonb column round-trips as a JSON STRING, not an array. Root cause: `jsonbArray` bound `JSON.stringify(value)` (already a JSON string) straight to a `::jsonb` cast. node-postgres infers the param type as jsonb and JSON-stringifies it a SECOND time, so the column stored a jsonb STRING SCALAR (`"[\"a\"]"`, jsonb_typeof = string) instead of an array. On read the driver hands back the JS string `'["a"]'`. Then: - the edit form's TagsInput called `.map` on a string -> page crash; - mcp-clients did `Array.isArray(allow)` -> false for a string -> fell through to "no restriction" and exposed ALL of the server's tools. Fix (both verified on the stand): - Write: `jsonbArray` casts `::text::jsonb` so the param is bound as text (sent verbatim) and parsed into a real jsonb array. New rows now store jsonb_typeof=array. - Read: `normalizeRow` runs every fetched row through `parseToolAllowlist`, which returns `string[] | null` for both shapes (already-array passes through; a JSON string is parsed; null/invalid -> null). This REPAIRS existing double-encoded rows on read, so the UI and the allowlist enforcement work without a data migration. Applied in findById / listByWorkspace / listEnabled. - Client: defensive `Array.isArray(...) ? ... : []` guard in the form so a bad shape can never take the settings page down again. Tests: ai-mcp-server.repo.spec (8 cases for parseToolAllowlist — array, the JSON-string read, null, empty, non-array json, unparseable, non-string elements, non-string primitive). mcp-servers-to-view + mcp-namespacing still green. Verified live: an old double-encoded row now reads as an array; a newly created server stores jsonb_typeof=array. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../components/ai-mcp-server-form.tsx | 8 ++- .../repos/ai-chat/ai-mcp-server.repo.spec.ts | 48 +++++++++++++++++ .../repos/ai-chat/ai-mcp-server.repo.ts | 51 +++++++++++++++++-- 3 files changed, 102 insertions(+), 5 deletions(-) create mode 100644 apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.spec.ts diff --git a/apps/client/src/features/workspace/components/settings/components/ai-mcp-server-form.tsx b/apps/client/src/features/workspace/components/settings/components/ai-mcp-server-form.tsx index da823ec6..a3d07a94 100644 --- a/apps/client/src/features/workspace/components/settings/components/ai-mcp-server-form.tsx +++ b/apps/client/src/features/workspace/components/settings/components/ai-mcp-server-form.tsx @@ -56,7 +56,13 @@ function buildInitialValues(server?: IAiMcpServer): FormValues { transport: server?.transport ?? "http", url: server?.url ?? "", authHeader: "", - toolAllowlist: server?.toolAllowlist ?? [], + // Defensive: TagsInput calls `.map`, so a non-array here (e.g. an API that + // returns the jsonb column as a JSON string) would crash the whole page. The + // server normalizes this now, but guard anyway so a bad shape can never take + // the settings UI down. + toolAllowlist: Array.isArray(server?.toolAllowlist) + ? server.toolAllowlist + : [], enabled: server?.enabled ?? true, }; } diff --git a/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.spec.ts b/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.spec.ts new file mode 100644 index 00000000..a04b77aa --- /dev/null +++ b/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.spec.ts @@ -0,0 +1,48 @@ +import { parseToolAllowlist } from './ai-mcp-server.repo'; + +/** + * The `tool_allowlist` jsonb column historically round-trips as a JSON STRING + * (rows written by the old double-encoding `jsonbArray`), so the driver hands + * back `'["a","b"]'` instead of an array. `parseToolAllowlist` normalizes both + * shapes to the `string[] | null` the entity type promises — fixing the settings + * UI crash (TagsInput `.map` on a string) and the tool-allowlist enforcement + * (which did `Array.isArray(allow)` and silently allowed ALL tools for a string). + */ +describe('parseToolAllowlist', () => { + it('passes a real string array through unchanged', () => { + expect(parseToolAllowlist(['search', 'crawl'])).toEqual(['search', 'crawl']); + }); + + it('parses a JSON-string array (the double-encoded read) into an array', () => { + // This is exactly what the DB returns for an old row: a jsonb string scalar. + expect(parseToolAllowlist('["alpha","beta"]')).toEqual(['alpha', 'beta']); + }); + + it('returns null for null / undefined (unrestricted)', () => { + expect(parseToolAllowlist(null)).toBeNull(); + expect(parseToolAllowlist(undefined)).toBeNull(); + }); + + it('returns [] for an empty array (no items, but a present allowlist)', () => { + expect(parseToolAllowlist([])).toEqual([]); + }); + + it('returns null for a JSON string that is not an array', () => { + expect(parseToolAllowlist('"justastring"')).toBeNull(); + expect(parseToolAllowlist('{"a":1}')).toBeNull(); + }); + + it('returns null for an unparseable string', () => { + expect(parseToolAllowlist('not json at all')).toBeNull(); + }); + + it('returns null when elements are not all strings (defensive)', () => { + expect(parseToolAllowlist([1, 2, 3] as unknown)).toBeNull(); + expect(parseToolAllowlist('[1,2,3]')).toBeNull(); + }); + + it('returns null for a non-string, non-array primitive', () => { + expect(parseToolAllowlist(42 as unknown)).toBeNull(); + expect(parseToolAllowlist(true as unknown)).toBeNull(); + }); +}); diff --git a/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.ts b/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.ts index f91f4af5..a0f2da50 100644 --- a/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.ts +++ b/apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.ts @@ -21,32 +21,35 @@ export class AiMcpServerRepo { id: string, workspaceId: string, ): Promise { - return this.db + const row = await this.db .selectFrom('aiMcpServers') .selectAll('aiMcpServers') .where('id', '=', id) .where('workspaceId', '=', workspaceId) .executeTakeFirst(); + return row ? normalizeRow(row) : row; } async listByWorkspace(workspaceId: string): Promise { - return this.db + const rows = await this.db .selectFrom('aiMcpServers') .selectAll('aiMcpServers') .where('workspaceId', '=', workspaceId) .orderBy('createdAt', 'asc') .execute(); + return rows.map(normalizeRow); } /** Enabled servers only — used by the agent loop to build the toolset. */ async listEnabled(workspaceId: string): Promise { - return this.db + const rows = await this.db .selectFrom('aiMcpServers') .selectAll('aiMcpServers') .where('workspaceId', '=', workspaceId) .where('enabled', '=', true) .orderBy('createdAt', 'asc') .execute(); + return rows.map(normalizeRow); } async insert( @@ -130,6 +133,14 @@ export class AiMcpServerRepo { * Encode a string[] as a jsonb bind for the `tool_allowlist` column. Passing a * plain JS array to the postgres driver would serialize it as a Postgres array * literal (incompatible with jsonb), so we bind the JSON text and cast it. + * + * The cast is `::text::jsonb`, NOT `::jsonb`: if the parameter is bound straight + * to a jsonb cast, node-postgres infers its type as jsonb and JSON-stringifies + * the (already-JSON) string a SECOND time, so the column ends up holding a jsonb + * STRING SCALAR (`"[\"a\"]"`) instead of a jsonb ARRAY. Forcing the param through + * `::text` first binds it as text (sent verbatim), and `::jsonb` then parses it + * into a real array. (`normalizeRow` below repairs rows written the old way.) + * * Returns null for null/empty arrays (an empty allowlist means "no restriction" * is not intended — callers pass null to clear; an empty array is normalized to * null here so it never round-trips as `[]`). @@ -139,5 +150,37 @@ function jsonbArray(value: string[] | null | undefined) { return null; } // Typed as string[] so it is assignable to the toolAllowlist column. - return sql`${JSON.stringify(value)}::jsonb`; + return sql`${JSON.stringify(value)}::text::jsonb`; +} + +/** + * Parse the `toolAllowlist` value read from the DB into the `string[] | null` + * the entity type promises. The jsonb column historically round-trips as a JSON + * STRING (rows written by the old double-encoding `jsonbArray`, see above), so + * the driver hands back a string like `'["a","b"]'` rather than an array. Be + * tolerant: an already-parsed array passes through; a JSON string is parsed; null + * / a non-array / unparseable value becomes null (unrestricted). + */ +export function parseToolAllowlist(value: unknown): string[] | null { + if (value == null) return null; + if (Array.isArray(value)) { + return value.every((v) => typeof v === 'string') ? (value as string[]) : null; + } + if (typeof value === 'string') { + try { + const parsed = JSON.parse(value); + return Array.isArray(parsed) && + parsed.every((v) => typeof v === 'string') + ? (parsed as string[]) + : null; + } catch { + return null; + } + } + return null; +} + +/** Normalize a DB row so `toolAllowlist` is always `string[] | null`. */ +function normalizeRow(row: AiMcpServer): AiMcpServer { + return { ...row, toolAllowlist: parseToolAllowlist(row.toolAllowlist) }; } From 4cc8df836f1bb88e45bca6252a5f7a0b4bd7176e Mon Sep 17 00:00:00 2001 From: claude_code Date: Wed, 24 Jun 2026 21:24:05 +0300 Subject: [PATCH 09/43] chore(ai): passive z.ai provider HTTP telemetry (#175) Investigate the intermittent (~20-30%) long-turn failure "Lost connection to the AI provider" = AI_RetryError / read ECONNRESET on the gitmost->z.ai link (browser-agnostic, mid-turn). Pure instrumentation, no behavior change: - ai-http-diagnostics.ts: a passive fetch wrapper injected into the OpenAI-compatible (z.ai) client. Per provider HTTP call it logs time-to-headers/status on success, and on a pre-response rejection the latency, error code/cause, request-body size and idle-gap since the previous call. The Response is returned untouched (streaming intact), errors rethrown unchanged; no retry/timeout/dispatcher. - ai.service.ts: wire the instrumented fetch into the openai case only. Lets us classify the reset as connection-phase vs mid-stream before choosing a fix, without repeating the reverted RetryAgent (#140). Co-Authored-By: Claude Opus 4.8 --- .../integrations/ai/ai-http-diagnostics.ts | 75 +++++++++++++++++++ apps/server/src/integrations/ai/ai.service.ts | 17 ++++- 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 apps/server/src/integrations/ai/ai-http-diagnostics.ts diff --git a/apps/server/src/integrations/ai/ai-http-diagnostics.ts b/apps/server/src/integrations/ai/ai-http-diagnostics.ts new file mode 100644 index 00000000..eb9beeb2 --- /dev/null +++ b/apps/server/src/integrations/ai/ai-http-diagnostics.ts @@ -0,0 +1,75 @@ +import { Logger } from '@nestjs/common'; + +/** + * DIAGNOSTIC (provider ECONNRESET investigation) — temporary. + * + * A PASSIVE, behavior-neutral wrapper around the global `fetch`, injected into + * the OpenAI-compatible provider client (`createOpenAI({ fetch })`, the z.ai + * path). Per provider HTTP call it logs: time-to-response-headers + status + + * request-body size on success; and on a pre-response rejection the failure + * latency + error code/cause + request-body size + the idle gap since the + * previous provider call. It NEVER retries, times out, swaps the dispatcher, or + * reads/clones the response body — the Response is returned untouched (streaming + * unaffected) and any error is rethrown unchanged. + * + * How to read the result (a long agentic turn makes one provider call per step): + * - a failed turn whose last provider line is "PRE-RESPONSE FAILED ... ECONNRESET" + * => the reset is in the CONNECTION phase of a step's request (the provider + * never replied) — usually a poisoned keep-alive socket or the provider/middle + * box resetting that request (large body / idle gap are the suspects, hence + * reqBytes + idleSincePrevCall below). + * - the last line is "OK status=200" and the turn still errors with NO + * "PRE-RESPONSE FAILED" => the cut happened MID-STREAM (after headers), a + * different failure mode. + * + * The seq/last-call timestamps are module-level, so under concurrent turns the + * idle-gap figure is approximate (fine for single-user reproduction). + */ +export function createDiagnosticFetch(context: string): typeof fetch { + const logger = new Logger(context); + let callSeq = 0; + let lastCallStartedAt: number | undefined; + + return async (input: Parameters[0], init?: Parameters[1]): Promise => { + const callId = ++callSeq; + const startedAt = Date.now(); + const idleSincePrev = + lastCallStartedAt === undefined ? undefined : startedAt - lastCallStartedAt; + lastCallStartedAt = startedAt; + // Request body size: the chat payload is a JSON string. Used to test whether + // failures correlate with the large accumulated context on later agent steps. + const body = init?.body as unknown; + const bodyBytes = + typeof body === 'string' + ? body.length + : body instanceof Uint8Array + ? body.byteLength + : undefined; + try { + // Delegate to global fetch; return the Response UNTOUCHED (never read/clone + // the body) so the streamed SSE response is unaffected. + const res = await fetch(input, init); + logger.log( + `provider HTTP DIAGNOSTIC: call#${callId} OK ` + + `headersAfter=${Date.now() - startedAt}ms status=${res.status} ` + + `reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`, + ); + return res; + } catch (err) { + // fetch() rejected => PRE-RESPONSE failure (no headers/body received yet): + // the connection/request phase. Log it and rethrow the SAME error. + const e = err as { + name?: string; + message?: string; + cause?: { code?: string; message?: string }; + }; + logger.warn( + `provider HTTP DIAGNOSTIC: call#${callId} PRE-RESPONSE FAILED ` + + `after=${Date.now() - startedAt}ms code=${e?.cause?.code ?? 'none'} ` + + `name=${e?.name ?? 'Error'} cause=${e?.cause?.message ?? e?.message ?? 'unknown'} ` + + `reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`, + ); + throw err; + } + }; +} diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 078de791..4f72d23b 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -14,6 +14,8 @@ import { AiNotConfiguredException } from './ai-not-configured.exception'; import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception'; import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception'; import { describeProviderError } from './ai-error.util'; +// DIAGNOSTIC (provider ECONNRESET investigation) — temporary. +import { createDiagnosticFetch } from './ai-http-diagnostics'; import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo'; import { SecretBoxService } from '../crypto/secret-box'; import { AiDriver } from './ai.types'; @@ -43,6 +45,13 @@ export interface ChatModelOverride { export class AiService { private readonly logger = new Logger(AiService.name); + // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: passive + // instrumentation of the OpenAI-compatible provider HTTP calls (z.ai). + // Logs call timing/outcome only — no behavior change. + private readonly aiDiagnosticFetch = createDiagnosticFetch( + 'AiService:provider-http', + ); + constructor( private readonly aiSettings: AiSettingsService, private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo, @@ -140,7 +149,13 @@ export class AiService { // Responses API (/responses), which OpenAI-compatible gateways // (OpenRouter, etc.) reject on multi-turn requests (history with // assistant messages) → 400. - return createOpenAI({ apiKey, baseURL: baseUrl }).chat(chatModel); + // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: pass the + // passive instrumented fetch (logging only; no behavior change). + return createOpenAI({ + apiKey, + baseURL: baseUrl, + fetch: this.aiDiagnosticFetch, + }).chat(chatModel); case 'gemini': return createGoogleGenerativeAI({ apiKey })(chatModel); case 'ollama': From a14560c7c9c092d5b9032d6eb76b790ba1a868e8 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 21:50:41 +0300 Subject: [PATCH 10/43] fix(ai-chat): raise undici's 300s stream timeout for long agent turns (#175) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Long research turns failed mid-task with "Lost connection to the AI provider". Node's global fetch (undici) defaults BOTH headersTimeout and bodyTimeout to 300_000ms, and the chat provider + the external-MCP dispatcher both ran on it with no override, so: - the z.ai chat stream dropped when a late step's huge accumulated context pushed the model's time-to-first-token past 5 min (the model reasons server-side with NO streamed reasoning, so the connection is silent until the first answer token — reproduced: even a trivial glm-5.2 query has a ~4-8s first-chunk gap; a long run reaches 400k+-token steps), or a reasoning model paused >5 min between chunks (bodyTimeout); - the crawl4ai SSE transport, held open across the whole turn, dropped when it idled >5 min between tool calls. Fix: a dedicated undici dispatcher whose stream timeouts are raised to a generous-but-FINITE silence timeout (default 15 min, AI_STREAM_TIMEOUT_MS) on each path. NOT disabled (0): that would let a genuinely hung provider — with the client still connected — hang forever, since the turn's abortSignal only fires on client disconnect. The timeout bounds SILENCE (time-to-first-byte and the gap BETWEEN chunks), NOT total turn duration, so an arbitrarily long turn that keeps streaming is never cut; only a stream quiet for >15 min is treated as a hang. - ai-streaming-fetch.ts: createStreamingFetch() + streamTimeoutMs() / streamingDispatcherOptions() (the shared, configurable timeout). - ai.service: the chat provider fetch is createStreamingFetch(), wrapped by the existing passive ECONNRESET telemetry (createDiagnosticFetch gained an optional baseFetch) so the telemetry observes the SAME transport. - mcp-clients: the SSRF-pinned Agent uses streamingDispatcherOptions(). Investigation: reproduced the transport mechanism against the real z.ai endpoint (a 1ms headersTimeout throws UND_ERR_HEADERS_TIMEOUT — the exact drop) and ran the actual research agent to a ~428k-token context. Verified the fixed path streams cleanly live (glm-5.2 turns finish; telemetry confirms the streaming fetch is in use). Tests: ai-streaming-fetch.spec (default 15m + env override + invalid fallback + both-timeouts + streams a delayed response); ai-http-diagnostics + ai/mcp specs green. server tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../external-mcp/mcp-clients.service.ts | 11 +++ .../integrations/ai/ai-http-diagnostics.ts | 14 +++- .../ai/ai-streaming-fetch.spec.ts | 78 +++++++++++++++++++ .../src/integrations/ai/ai-streaming-fetch.ts | 58 ++++++++++++++ apps/server/src/integrations/ai/ai.service.ts | 10 ++- 5 files changed, 164 insertions(+), 7 deletions(-) create mode 100644 apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts create mode 100644 apps/server/src/integrations/ai/ai-streaming-fetch.ts diff --git a/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts b/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts index 30e94dc0..fe83801b 100644 --- a/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts +++ b/apps/server/src/core/ai-chat/external-mcp/mcp-clients.service.ts @@ -6,6 +6,7 @@ import { createMCPClient } from '@ai-sdk/mcp'; import { Agent, type Dispatcher } from 'undici'; import { AiMcpServerRepo } from '@docmost/db/repos/ai-chat/ai-mcp-server.repo'; import { AiMcpServer } from '@docmost/db/types/entity.types'; +import { streamingDispatcherOptions } from '../../../integrations/ai/ai-streaming-fetch'; import { SecretBoxService } from '../../../integrations/crypto/secret-box'; import { isUrlAllowed, isIpAllowed } from './ssrf-guard'; @@ -400,6 +401,16 @@ export function validateResolvedAddresses( */ function buildPinnedDispatcher(): Agent { return new Agent({ + // Raise undici's default 300s headers/body timeouts on external MCP traffic + // to the same generous-but-finite silence timeout the chat fetch uses (#175). + // A long agent turn keeps an SSE transport (e.g. crawl4ai's /mcp/sse) open + // across the whole turn; that connection can idle BETWEEN tool calls longer + // than 5 min, and undici's bodyTimeout would otherwise sever it mid-task — a + // tool-call failure that aborts the streamed turn and shows the user "Lost + // connection to the AI provider". A slow single tool call (a crawl) can + // likewise exceed headersTimeout. The timeout stays FINITE so a genuinely + // hung server is still broken eventually. + ...streamingDispatcherOptions(), connect: { lookup: (hostname, _options, callback) => { // Always resolve ALL addresses ourselves; do not trust the caller's diff --git a/apps/server/src/integrations/ai/ai-http-diagnostics.ts b/apps/server/src/integrations/ai/ai-http-diagnostics.ts index eb9beeb2..0761d050 100644 --- a/apps/server/src/integrations/ai/ai-http-diagnostics.ts +++ b/apps/server/src/integrations/ai/ai-http-diagnostics.ts @@ -25,7 +25,13 @@ import { Logger } from '@nestjs/common'; * The seq/last-call timestamps are module-level, so under concurrent turns the * idle-gap figure is approximate (fine for single-user reproduction). */ -export function createDiagnosticFetch(context: string): typeof fetch { +export function createDiagnosticFetch( + context: string, + // The underlying fetch to instrument. Defaults to the global fetch; the chat + // provider passes a streaming fetch (disabled undici stream timeouts, #175) so + // the telemetry observes the SAME transport the long agent turn actually uses. + baseFetch: typeof fetch = fetch, +): typeof fetch { const logger = new Logger(context); let callSeq = 0; let lastCallStartedAt: number | undefined; @@ -46,9 +52,9 @@ export function createDiagnosticFetch(context: string): typeof fetch { ? body.byteLength : undefined; try { - // Delegate to global fetch; return the Response UNTOUCHED (never read/clone - // the body) so the streamed SSE response is unaffected. - const res = await fetch(input, init); + // Delegate to the base fetch; return the Response UNTOUCHED (never read/ + // clone the body) so the streamed SSE response is unaffected. + const res = await baseFetch(input, init); logger.log( `provider HTTP DIAGNOSTIC: call#${callId} OK ` + `headersAfter=${Date.now() - startedAt}ms status=${res.status} ` + diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts new file mode 100644 index 00000000..df6a16d5 --- /dev/null +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts @@ -0,0 +1,78 @@ +import * as http from 'node:http'; +import { + createStreamingFetch, + streamTimeoutMs, + streamingDispatcherOptions, +} from './ai-streaming-fetch'; + +/** + * #175: undici's default 300s headers/body timeouts severed long agent turns. + * The streaming fetch raises them to a generous-but-FINITE silence timeout (not + * 0 — a true hang must still break). We pin: the configured value + env override, + * that both dispatcher timeouts use it, and that a delayed response streams. + */ +describe('streamTimeoutMs', () => { + const ORIG = process.env.AI_STREAM_TIMEOUT_MS; + afterEach(() => { + if (ORIG === undefined) delete process.env.AI_STREAM_TIMEOUT_MS; + else process.env.AI_STREAM_TIMEOUT_MS = ORIG; + }); + + it('defaults to a generous-but-finite 15 minutes', () => { + delete process.env.AI_STREAM_TIMEOUT_MS; + expect(streamTimeoutMs()).toBe(900_000); + // Finite — NOT disabled (0 would let a hung provider leak forever). + expect(streamTimeoutMs()).toBeGreaterThan(0); + expect(Number.isFinite(streamTimeoutMs())).toBe(true); + }); + + it('honours a positive AI_STREAM_TIMEOUT_MS override', () => { + process.env.AI_STREAM_TIMEOUT_MS = '120000'; + expect(streamTimeoutMs()).toBe(120000); + }); + + it('ignores an invalid / non-positive override (falls back to default)', () => { + for (const bad of ['0', '-5', 'abc', '']) { + process.env.AI_STREAM_TIMEOUT_MS = bad; + expect(streamTimeoutMs()).toBe(900_000); + } + }); + + it('applies the timeout to BOTH undici stream timeouts', () => { + delete process.env.AI_STREAM_TIMEOUT_MS; + expect(streamingDispatcherOptions()).toEqual({ + headersTimeout: 900_000, + bodyTimeout: 900_000, + }); + }); +}); + +describe('createStreamingFetch — against a delayed server', () => { + let server: http.Server; + let url: string; + // The server waits before sending ANY byte (a long time-to-first-token). + const DELAY = 400; + + beforeAll(async () => { + server = http.createServer((_req, res) => { + setTimeout(() => { + res.writeHead(200, { 'Content-Type': 'text/plain' }); + res.end('ok'); + }, DELAY); + }); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const addr = server.address() as import('node:net').AddressInfo; + url = `http://127.0.0.1:${addr.port}/`; + }); + + afterAll(async () => { + await new Promise((resolve) => server.close(() => resolve())); + }); + + it('streams the delayed response instead of timing out', async () => { + const streamingFetch = createStreamingFetch(); + const res = await streamingFetch(url); + expect(res.status).toBe(200); + expect(await res.text()).toBe('ok'); + }); +}); diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.ts new file mode 100644 index 00000000..f257fe4e --- /dev/null +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.ts @@ -0,0 +1,58 @@ +import { Agent } from 'undici'; + +/** + * Default SILENCE timeout for streaming AI calls (15 min). Generous, but FINITE. + * + * Node's global fetch (undici) defaults headersTimeout and bodyTimeout to + * 300_000ms, which severed legitimate long agent turns mid-stream — surfacing as + * "Lost connection to the AI provider" (#175): a late step with a huge context + * pushes the model's time-to-first-token past 5 min, or a reasoning model pauses + * >5 min between chunks. We do NOT disable the timeout (0) — that would let a + * genuinely hung provider, with the client still connected, hang forever + * (abortSignal only fires on client disconnect). Instead we raise it well above + * any realistic gap while keeping it finite so a true hang is eventually broken. + * + * This bounds SILENCE (time-to-first-byte and the gap BETWEEN chunks), NOT total + * turn duration — so an arbitrarily long turn that keeps streaming bytes is never + * cut; only a stream that goes quiet for longer than this is treated as a hang. + */ +const DEFAULT_STREAM_TIMEOUT_MS = 900_000; + +/** + * The configured silence timeout (ms). Override with `AI_STREAM_TIMEOUT_MS`; a + * missing/invalid/non-positive value falls back to {@link DEFAULT_STREAM_TIMEOUT_MS}. + */ +export function streamTimeoutMs(): number { + const raw = Number(process.env.AI_STREAM_TIMEOUT_MS); + return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_STREAM_TIMEOUT_MS; +} + +/** + * undici `Agent` timeout options for streaming AI traffic — both stream timeouts + * set to the (generous, finite) silence timeout. Shared by the chat provider + * fetch and the external-MCP dispatcher so they behave identically (#175). + */ +export function streamingDispatcherOptions(): { + headersTimeout: number; + bodyTimeout: number; +} { + const t = streamTimeoutMs(); + return { headersTimeout: t, bodyTimeout: t }; +} + +/** + * Build a `fetch` for long-lived streaming AI calls (the agent chat turn) backed + * by a dedicated undici dispatcher whose stream timeouts are the generous-but- + * finite silence timeout above (#175). A single shared dispatcher is returned + * (callers hold it for the service lifetime) so its connection pool is reused. + */ +export function createStreamingFetch(): typeof fetch { + const dispatcher = new Agent(streamingDispatcherOptions()); + return ((input: Parameters[0], init?: RequestInit) => + fetch(input, { + ...(init ?? {}), + // `dispatcher` is an undici-specific init field (not in the DOM RequestInit + // type); Node's global fetch reads it. Cast to satisfy the type. + dispatcher, + } as RequestInit & { dispatcher: Agent })) as typeof fetch; +} diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 4f72d23b..65bdda5c 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -16,6 +16,7 @@ import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception'; import { describeProviderError } from './ai-error.util'; // DIAGNOSTIC (provider ECONNRESET investigation) — temporary. import { createDiagnosticFetch } from './ai-http-diagnostics'; +import { createStreamingFetch } from './ai-streaming-fetch'; import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo'; import { SecretBoxService } from '../crypto/secret-box'; import { AiDriver } from './ai.types'; @@ -45,11 +46,14 @@ export interface ChatModelOverride { export class AiService { private readonly logger = new Logger(AiService.name); - // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: passive - // instrumentation of the OpenAI-compatible provider HTTP calls (z.ai). - // Logs call timing/outcome only — no behavior change. + // Provider HTTP fetch for the chat path: a streaming fetch that DISABLES + // undici's 300s headers/body timeouts (#175 — long agent turns were severed + // mid-stream), wrapped with passive ECONNRESET-investigation telemetry so the + // logs observe the exact transport the turn uses. Held for the service + // lifetime to reuse the streaming dispatcher's connection pool. private readonly aiDiagnosticFetch = createDiagnosticFetch( 'AiService:provider-http', + createStreamingFetch(), ); constructor( From da15b55786d55bc381aafa3debc49f821626ffd5 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 22:31:58 +0300 Subject: [PATCH 11/43] =?UTF-8?q?refactor(ai):=20address=20PR=20#176=20rev?= =?UTF-8?q?iew=20=E2=80=94=20finite-timeout=20wording,=20env=20doc,=20test?= =?UTF-8?q?s,=20permanent=20provider-http=20module?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Wording: every comment now says the stream timeouts are RAISED to a generous-but-finite ~15-min silence timeout, not "disabled (0)" (the stale comments contradicted the code, which uses AI_STREAM_TIMEOUT_MS, default 900000ms). - Architecture (the load-bearing-temporary trap): the streaming fetch reached the chat provider only by riding the "temporary DIAGNOSTIC" telemetry, so deleting the telemetry by its own label would silently revert the timeout fix. Legitimize it: rename ai-http-diagnostics.ts -> ai-provider-http.ts, createDiagnosticFetch -> createInstrumentedFetch, field aiDiagnosticFetch -> aiProviderFetch, drop the "temporary" labels, and document the chat transport (streaming fetch + instrumentation) as one intentional construct. - Docs: AI_STREAM_TIMEOUT_MS added to .env.example next to AI_EMBEDDING_TIMEOUT_MS. - Tests: - ai-provider-http.spec: createInstrumentedFetch delegates to the injected baseFetch with the same input/init, returns the Response untouched, rethrows the error, and defaults to global fetch — covering the baseFetch seam. - ai-streaming-fetch.spec: the delayed-server test is now LOAD-BEARING — with AI_STREAM_TIMEOUT_MS set below the 1.5s server delay the call actually rejects (a lost dispatcher -> global 300s default would NOT), proving the configured dispatcher is wired; plus the default-timeout happy path. server tsc clean; ai-streaming-fetch / ai-provider-http / ai.service / mcp-servers / ai-error specs green (41). Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 6 +++ .../integrations/ai/ai-provider-http.spec.ts | 40 +++++++++++++++++++ ...ttp-diagnostics.ts => ai-provider-http.ts} | 36 ++++++++++------- .../ai/ai-streaming-fetch.spec.ts | 40 +++++++++++++++++-- apps/server/src/integrations/ai/ai.service.ts | 22 +++++----- 5 files changed, 114 insertions(+), 30 deletions(-) create mode 100644 apps/server/src/integrations/ai/ai-provider-http.spec.ts rename apps/server/src/integrations/ai/{ai-http-diagnostics.ts => ai-provider-http.ts} (65%) diff --git a/.env.example b/.env.example index fa886282..4726805b 100644 --- a/.env.example +++ b/.env.example @@ -136,6 +136,12 @@ MCP_DOCMOST_PASSWORD= # A slow/hung embeddings endpoint fails after this and the batch continues. # AI_EMBEDDING_TIMEOUT_MS=120000 +# Silence timeout (ms) for streaming chat/agent AI calls AND external-MCP traffic. +# Bounds time-to-first-byte and the gap BETWEEN chunks (NOT the total turn length), +# so an arbitrarily long turn that keeps streaming is never cut. Finite so a hung +# provider is eventually broken instead of leaking forever. Default 900000 (15 min). +# AI_STREAM_TIMEOUT_MS=900000 + # --- Anonymous public-share AI assistant --- # Opt-in per workspace (AI settings -> "public share assistant"; off by default). # When enabled, anonymous visitors of a published share can ask an AI about that diff --git a/apps/server/src/integrations/ai/ai-provider-http.spec.ts b/apps/server/src/integrations/ai/ai-provider-http.spec.ts new file mode 100644 index 00000000..7ccb744c --- /dev/null +++ b/apps/server/src/integrations/ai/ai-provider-http.spec.ts @@ -0,0 +1,40 @@ +import { createInstrumentedFetch } from './ai-provider-http'; + +/** + * createInstrumentedFetch must be behavior-neutral: it delegates to the supplied + * baseFetch with the SAME input/init, returns the Response object untouched (so + * the streamed SSE body is never read/cloned), and rethrows the same error. The + * baseFetch injection is the seam that carries the streaming fetch (#175) onto + * the chat provider, so it is tested directly. + */ +describe('createInstrumentedFetch', () => { + it('delegates to the injected baseFetch with the same input/init', async () => { + const fakeResponse = new Response('ok', { status: 200 }); + const baseFetch = jest.fn().mockResolvedValue(fakeResponse); + const instrumented = createInstrumentedFetch('test', baseFetch as never); + + const init = { method: 'POST', body: '{"q":1}' }; + const res = await instrumented('https://example.com/v1/chat', init); + + expect(baseFetch).toHaveBeenCalledTimes(1); + expect(baseFetch).toHaveBeenCalledWith('https://example.com/v1/chat', init); + // The Response is returned UNTOUCHED (same reference — never read/cloned). + expect(res).toBe(fakeResponse); + }); + + it('rethrows the base fetch error unchanged (pre-response failure)', async () => { + const err = Object.assign(new TypeError('fetch failed'), { + cause: { code: 'ECONNRESET' }, + }); + const baseFetch = jest.fn().mockRejectedValue(err); + const instrumented = createInstrumentedFetch('test', baseFetch as never); + + await expect(instrumented('https://example.com/')).rejects.toBe(err); + }); + + it('defaults to the global fetch when no baseFetch is given', () => { + // Constructing without a baseFetch must not throw — it simply wraps global + // fetch (the non-chat default). + expect(() => createInstrumentedFetch('test')).not.toThrow(); + }); +}); diff --git a/apps/server/src/integrations/ai/ai-http-diagnostics.ts b/apps/server/src/integrations/ai/ai-provider-http.ts similarity index 65% rename from apps/server/src/integrations/ai/ai-http-diagnostics.ts rename to apps/server/src/integrations/ai/ai-provider-http.ts index 0761d050..22ef2f44 100644 --- a/apps/server/src/integrations/ai/ai-http-diagnostics.ts +++ b/apps/server/src/integrations/ai/ai-provider-http.ts @@ -1,16 +1,22 @@ import { Logger } from '@nestjs/common'; /** - * DIAGNOSTIC (provider ECONNRESET investigation) — temporary. + * The provider HTTP fetch used by the chat path: a thin, behavior-neutral + * instrumentation wrapper around a supplied `fetch`. * - * A PASSIVE, behavior-neutral wrapper around the global `fetch`, injected into - * the OpenAI-compatible provider client (`createOpenAI({ fetch })`, the z.ai - * path). Per provider HTTP call it logs: time-to-response-headers + status + - * request-body size on success; and on a pre-response rejection the failure - * latency + error code/cause + request-body size + the idle gap since the - * previous provider call. It NEVER retries, times out, swaps the dispatcher, or - * reads/clones the response body — the Response is returned untouched (streaming - * unaffected) and any error is rethrown unchanged. + * It defaults to the global `fetch`, but the chat provider passes the streaming + * fetch (which RAISES undici's 300s stream timeouts to a generous-but-finite + * silence timeout so a long agent turn is not severed mid-stream — #175). So this + * wrapper observes the EXACT transport a turn uses. It NEVER retries, times out, + * swaps the dispatcher, or reads/clones the response body — the Response is + * returned untouched (streaming unaffected) and any error is rethrown unchanged. + * + * Per provider HTTP call it logs: time-to-response-headers + status + request + * body size on success; and on a pre-response rejection the failure latency + + * error code/cause + request body size + the idle gap since the previous call. + * This telemetry is intentional and kept (it diagnoses provider connection + * resets / mid-stream cuts), and it is load-bearing: the streaming fetch reaches + * the chat provider THROUGH this wrapper, so the two are one construct. * * How to read the result (a long agentic turn makes one provider call per step): * - a failed turn whose last provider line is "PRE-RESPONSE FAILED ... ECONNRESET" @@ -23,13 +29,13 @@ import { Logger } from '@nestjs/common'; * different failure mode. * * The seq/last-call timestamps are module-level, so under concurrent turns the - * idle-gap figure is approximate (fine for single-user reproduction). + * idle-gap figure is approximate (fine for single-user diagnosis). */ -export function createDiagnosticFetch( +export function createInstrumentedFetch( context: string, // The underlying fetch to instrument. Defaults to the global fetch; the chat - // provider passes a streaming fetch (disabled undici stream timeouts, #175) so - // the telemetry observes the SAME transport the long agent turn actually uses. + // provider passes the streaming fetch (raised, finite undici stream timeouts, + // #175) so the telemetry observes the SAME transport the long agent turn uses. baseFetch: typeof fetch = fetch, ): typeof fetch { const logger = new Logger(context); @@ -56,7 +62,7 @@ export function createDiagnosticFetch( // clone the body) so the streamed SSE response is unaffected. const res = await baseFetch(input, init); logger.log( - `provider HTTP DIAGNOSTIC: call#${callId} OK ` + + `provider HTTP: call#${callId} OK ` + `headersAfter=${Date.now() - startedAt}ms status=${res.status} ` + `reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`, ); @@ -70,7 +76,7 @@ export function createDiagnosticFetch( cause?: { code?: string; message?: string }; }; logger.warn( - `provider HTTP DIAGNOSTIC: call#${callId} PRE-RESPONSE FAILED ` + + `provider HTTP: call#${callId} PRE-RESPONSE FAILED ` + `after=${Date.now() - startedAt}ms code=${e?.cause?.code ?? 'none'} ` + `name=${e?.name ?? 'Error'} cause=${e?.cause?.message ?? e?.message ?? 'unknown'} ` + `reqBytes=${bodyBytes ?? 'n/a'} idleSincePrevCall=${idleSincePrev ?? 'n/a'}ms`, diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts index df6a16d5..b28ecf51 100644 --- a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts @@ -48,10 +48,13 @@ describe('streamTimeoutMs', () => { }); describe('createStreamingFetch — against a delayed server', () => { + const ORIG = process.env.AI_STREAM_TIMEOUT_MS; let server: http.Server; let url: string; - // The server waits before sending ANY byte (a long time-to-first-token). - const DELAY = 400; + // The server waits before sending ANY byte (a long time-to-first-token). It is + // > undici's ~1s timeout-timer granularity so a sub-second configured timeout + // fires deterministically in the load-bearing test below. + const DELAY = 1500; beforeAll(async () => { server = http.createServer((_req, res) => { @@ -69,10 +72,41 @@ describe('createStreamingFetch — against a delayed server', () => { await new Promise((resolve) => server.close(() => resolve())); }); - it('streams the delayed response instead of timing out', async () => { + afterEach(() => { + if (ORIG === undefined) delete process.env.AI_STREAM_TIMEOUT_MS; + else process.env.AI_STREAM_TIMEOUT_MS = ORIG; + }); + + it('streams the delayed response at the default (generous) timeout', async () => { + delete process.env.AI_STREAM_TIMEOUT_MS; // default 15 min >> DELAY const streamingFetch = createStreamingFetch(); const res = await streamingFetch(url); expect(res.status).toBe(200); expect(await res.text()).toBe('ok'); }); + + it('LOAD-BEARING: a sub-DELAY AI_STREAM_TIMEOUT_MS actually severs the response', async () => { + // Proves the configured dispatcher is wired into the fetch: with the timeout + // set below DELAY the call must reject with undici's headers-timeout. If the + // dispatcher were lost (fallback to global fetch's 300s default), the 1.5s + // response would slip through and this would NOT throw. + process.env.AI_STREAM_TIMEOUT_MS = '500'; + const streamingFetch = createStreamingFetch(); + let caught: unknown; + const startedAt = Date.now(); + try { + await streamingFetch(url).then((r) => r.text()); + } catch (e) { + caught = e; + } + // It rejected (a lost dispatcher -> global 300s default would NOT reject on a + // 1.5s response) and it did so BEFORE the response would have arrived (DELAY). + // Use `.name` (realm-safe) — undici's TypeError fails cross-realm instanceof. + expect(caught).toBeDefined(); + expect((caught as Error)?.name).toBe('TypeError'); + expect(Date.now() - startedAt).toBeLessThan(DELAY); + // When present, the undici cause is the headers timeout. + const code = (caught as { cause?: { code?: string } })?.cause?.code; + if (code) expect(code).toBe('UND_ERR_HEADERS_TIMEOUT'); + }); }); diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 65bdda5c..2a524f2c 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -14,8 +14,7 @@ import { AiNotConfiguredException } from './ai-not-configured.exception'; import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured.exception'; import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception'; import { describeProviderError } from './ai-error.util'; -// DIAGNOSTIC (provider ECONNRESET investigation) — temporary. -import { createDiagnosticFetch } from './ai-http-diagnostics'; +import { createInstrumentedFetch } from './ai-provider-http'; import { createStreamingFetch } from './ai-streaming-fetch'; import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo'; import { SecretBoxService } from '../crypto/secret-box'; @@ -46,12 +45,12 @@ export interface ChatModelOverride { export class AiService { private readonly logger = new Logger(AiService.name); - // Provider HTTP fetch for the chat path: a streaming fetch that DISABLES - // undici's 300s headers/body timeouts (#175 — long agent turns were severed - // mid-stream), wrapped with passive ECONNRESET-investigation telemetry so the - // logs observe the exact transport the turn uses. Held for the service - // lifetime to reuse the streaming dispatcher's connection pool. - private readonly aiDiagnosticFetch = createDiagnosticFetch( + // Provider HTTP fetch for the chat path: the streaming fetch — which RAISES + // undici's 300s headers/body timeouts to a generous-but-finite silence timeout + // so a long agent turn is not severed mid-stream (#175) — wrapped with the + // provider-HTTP instrumentation so the logs observe that exact transport. Held + // for the service lifetime to reuse the streaming dispatcher's connection pool. + private readonly aiProviderFetch = createInstrumentedFetch( 'AiService:provider-http', createStreamingFetch(), ); @@ -152,13 +151,12 @@ export class AiService { // endpoint. The default callable createOpenAI(...)(model) targets the // Responses API (/responses), which OpenAI-compatible gateways // (OpenRouter, etc.) reject on multi-turn requests (history with - // assistant messages) → 400. - // DIAGNOSTIC (provider ECONNRESET investigation) — temporary: pass the - // passive instrumented fetch (logging only; no behavior change). + // assistant messages) → 400. The provider fetch is the instrumented + // streaming fetch (finite-but-generous stream timeouts, #175). return createOpenAI({ apiKey, baseURL: baseUrl, - fetch: this.aiDiagnosticFetch, + fetch: this.aiProviderFetch, }).chat(chatModel); case 'gemini': return createGoogleGenerativeAI({ apiKey })(chatModel); From 59190148db2aa6c4e0b43f35b1c3ab00ecb44d38 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 22:58:15 +0300 Subject: [PATCH 12/43] feat(ai-chat): explicit chatApiStyle selector to surface reasoning (#175) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebuilt on develop (after #176) and reworked per review: instead of inferring the provider from baseUrl (`if (baseUrl)`), the admin picks the chat provider EXPLICITLY via a new `chatApiStyle` ('openai-compatible' | 'openai'), mirroring the existing sttApiStyle. A custom baseURL can front real OpenAI too, so the heuristic was fragile. Why reasoning was missing: glm-5.2 (and DeepSeek etc.) stream their thinking as `reasoning_content`, but the official @ai-sdk/openai provider does not map that field. 'openai-compatible' uses @ai-sdk/openai-compatible, which does — so reasoning parts now stream (verified live: reasoning-start/delta/end appear, and disappear when set to 'openai'). - Default (unset) = 'openai-compatible', so existing openai+baseUrl workspaces surface reasoning with no admin action. No DB migration (field lives in the settings.ai.provider JSON blob). - includeUsage: true on the openai-compatible model — without it the provider omits streamed usage, zeroing the live token counter / reasoning-token metadata. The official provider always sent it; this keeps parity. (Confirmed live: usage.totalTokens present.) - openai-compatible has no default endpoint, so with no baseURL (real OpenAI, or a role's cross-driver override that cleared it) it falls back to the official provider. Plumbing: ai.types (ChatApiStyle / CHAT_API_STYLES + AiProviderSettings / MaskedAiSettings), update DTO (@IsIn), ai-settings.service (resolve / getMasked / update allowlist), workspace.repo updateAiProviderSettings ALLOWED (the second, SQL-level allowlist the review missed — without it the field never persisted), ai.service selector. Client: ai-settings-service types + a Protocol + {/* Anonymous public-share assistant: a single master toggle + an optional cheaper model id. Reuses this card's driver/URL/key. */} diff --git a/apps/client/src/features/workspace/services/ai-settings-service.ts b/apps/client/src/features/workspace/services/ai-settings-service.ts index 1814acd5..189589b0 100644 --- a/apps/client/src/features/workspace/services/ai-settings-service.ts +++ b/apps/client/src/features/workspace/services/ai-settings-service.ts @@ -9,6 +9,12 @@ export type AiDriver = "openai" | "gemini" | "ollama"; // - 'json' -> JSON body with base64-encoded audio (OpenRouter) export type SttApiStyle = "multipart" | "json"; +// Chat provider implementation for the `openai` driver (chosen explicitly): +// - 'openai-compatible' -> maps streamed reasoning_content to reasoning parts +// (z.ai/GLM, DeepSeek, OpenRouter, ...). Default. +// - 'openai' -> official provider; real-OpenAI reasoning-model shaping. +export type ChatApiStyle = "openai-compatible" | "openai"; + // Masked AI provider settings returned by the server. // No API key is ever returned; only `hasApiKey` / `hasEmbeddingApiKey` indicate // whether one is stored. `embeddingBaseUrl` is the RAW stored value (empty means @@ -16,6 +22,7 @@ export type SttApiStyle = "multipart" | "json"; export interface IAiSettings { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; // Cheap model id for the anonymous public-share assistant; empty = chatModel. publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = @@ -49,6 +56,7 @@ export interface IAiSettings { export interface IAiSettingsUpdate { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; publicShareChatModel?: string; // Agent-role id whose persona the public-share assistant adopts; empty = // built-in locked persona. diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 182a45f2..95e33aa9 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -239,7 +239,7 @@ export class WorkspaceRepo { // is a real jsonb object, never a double-encoded string. The CASE self-heals // workspaces whose settings.ai.provider was previously corrupted into an // array/string. - const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId']; + const ALLOWED = ['driver', 'chatModel', 'chatApiStyle', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId']; const entries = Object.entries(provider).filter( ([k, v]) => v !== undefined && ALLOWED.includes(k), ); diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts index e556c0d0..0717c3c4 100644 --- a/apps/server/src/integrations/ai/ai-settings.service.ts +++ b/apps/server/src/integrations/ai/ai-settings.service.ts @@ -14,6 +14,7 @@ import { MaskedAiSettings, ResolvedAiConfig, SttApiStyle, + ChatApiStyle, } from './ai.types'; /** @@ -24,6 +25,7 @@ import { export interface UpdateAiSettingsInput { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; @@ -157,6 +159,8 @@ export class AiSettingsService { const config: ResolvedAiConfig = { driver: provider.driver, chatModel: provider.chatModel, + // Plain passthrough; getChatModel defaults unset to 'openai-compatible'. + chatApiStyle: provider.chatApiStyle, // Cheap model id for the anonymous public-share assistant; reuses the chat // driver/baseUrl/apiKey. Empty/unset → callers fall back to chatModel. publicShareChatModel: provider.publicShareChatModel, @@ -238,6 +242,7 @@ export class AiSettingsService { return { driver: provider.driver, chatModel: provider.chatModel, + chatApiStyle: provider.chatApiStyle, embeddingModel: provider.embeddingModel, baseUrl: provider.baseUrl, embeddingBaseUrl: provider.embeddingBaseUrl, @@ -278,6 +283,7 @@ export class AiSettingsService { for (const key of [ 'driver', 'chatModel', + 'chatApiStyle', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', diff --git a/apps/server/src/integrations/ai/ai.service.spec.ts b/apps/server/src/integrations/ai/ai.service.spec.ts index ef44a59d..b3c7f6f0 100644 --- a/apps/server/src/integrations/ai/ai.service.spec.ts +++ b/apps/server/src/integrations/ai/ai.service.spec.ts @@ -285,3 +285,64 @@ describe('AiService.getChatModel role model override', () => { ); }); }); + +/** + * Chat provider selection by the EXPLICIT `chatApiStyle` (NOT inferred from + * baseUrl): 'openai-compatible' (default) uses @ai-sdk/openai-compatible, which + * maps streamed reasoning_content to reasoning parts; 'openai' uses the official + * provider; and openai-compatible without a baseURL safely falls back to the + * official provider (it has no default endpoint). Asserted via `.provider`. + */ +describe('AiService.getChatModel chatApiStyle provider selection', () => { + function serviceWith(opts: { + baseUrl?: string; + chatApiStyle?: 'openai-compatible' | 'openai'; + }) { + const aiSettings = { + resolve: jest.fn().mockResolvedValue({ + driver: 'openai', + chatModel: 'glm-5.2', + apiKey: 'key', + baseUrl: opts.baseUrl, + chatApiStyle: opts.chatApiStyle, + }), + }; + return new AiService( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + aiSettings as any, + { find: jest.fn() } as never, + { decryptSecret: jest.fn() } as never, + ); + } + + const providerOf = async (svc: AiService) => + ( + (await svc.getChatModel('ws-1')) as { provider: string } + ).provider; + + it("'openai-compatible' + baseURL -> openai-compatible provider", async () => { + expect( + await providerOf( + serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai-compatible' }), + ), + ).toContain('openai-compatible'); + }); + + it("'openai' + baseURL -> official openai provider", async () => { + expect( + await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4', chatApiStyle: 'openai' })), + ).toBe('openai.chat'); + }); + + it('unset + baseURL -> defaults to openai-compatible', async () => { + expect( + await providerOf(serviceWith({ baseUrl: 'https://api.z.ai/v4' })), + ).toContain('openai-compatible'); + }); + + it("'openai-compatible' WITHOUT baseURL -> safe fallback to official openai", async () => { + expect( + await providerOf(serviceWith({ chatApiStyle: 'openai-compatible' })), + ).toBe('openai.chat'); + }); +}); diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 2a524f2c..18f15b5d 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -7,6 +7,7 @@ import { type LanguageModel, } from 'ai'; import { createOpenAI } from '@ai-sdk/openai'; +import { createOpenAICompatible } from '@ai-sdk/openai-compatible'; import { createGoogleGenerativeAI } from '@ai-sdk/google'; import { createOllama } from 'ai-sdk-ollama'; import { AiSettingsService } from './ai-settings.service'; @@ -95,6 +96,10 @@ export class AiService { let apiKey = cfg.apiKey; let baseUrl = cfg.baseUrl; + // Chat provider implementation, chosen EXPLICITLY by the admin (not inferred + // from baseUrl). Unset → 'openai-compatible' so reasoning is surfaced by + // default for this fork's openai+baseUrl setups. + const chatApiStyle = cfg.chatApiStyle ?? 'openai-compatible'; // A driver override that differs from the workspace driver needs that // driver's own creds (the workspace driver's key would be wrong/absent). @@ -145,19 +150,41 @@ export class AiService { } switch (driver) { - case 'openai': - // baseURL (when set) covers openai-compatible endpoints. Use Chat - // Completions (/chat/completions) — the portable OpenAI-compatible - // endpoint. The default callable createOpenAI(...)(model) targets the - // Responses API (/responses), which OpenAI-compatible gateways - // (OpenRouter, etc.) reject on multi-turn requests (history with - // assistant messages) → 400. The provider fetch is the instrumented - // streaming fetch (finite-but-generous stream timeouts, #175). + case 'openai': { + // The provider implementation is chosen by the admin's `chatApiStyle` + // (NOT inferred from baseUrl — a custom URL can front real OpenAI too). + // Both branches hit Chat Completions (/chat/completions); the provider + // fetch is the instrumented streaming fetch (finite-but-generous stream + // timeouts, #175). + // + // 'openai-compatible' (default) maps the third-party provider's streamed + // `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, ...) — the + // point of #175. It has no default endpoint, so it requires a baseURL; + // when there is none (real OpenAI, or a role's cross-driver override that + // cleared baseUrl) we fall back to the official provider. + if (chatApiStyle === 'openai-compatible' && baseUrl) { + return createOpenAICompatible({ + name: 'openai-compatible', + apiKey, + baseURL: baseUrl, + // Keep streamed token usage (stream_options.include_usage): without + // it @ai-sdk/openai-compatible omits usage, zeroing the live token + // counter and reasoning-token metadata. The official provider always + // sent it, so this preserves parity. + includeUsage: true, + fetch: this.aiProviderFetch, + })(chatModel); + } + // Official @ai-sdk/openai: real-OpenAI reasoning-model request shaping; + // `.chat()` targets Chat Completions (the default callable targets the + // Responses API, which openai-compatible gateways 400 on multi-turn + // history). In this fork baseUrl is normally set; undefined = real OpenAI. return createOpenAI({ apiKey, baseURL: baseUrl, fetch: this.aiProviderFetch, }).chat(chatModel); + } case 'gemini': return createGoogleGenerativeAI({ apiKey })(chatModel); case 'ollama': diff --git a/apps/server/src/integrations/ai/ai.types.ts b/apps/server/src/integrations/ai/ai.types.ts index 0a3d925e..5cdb6d1d 100644 --- a/apps/server/src/integrations/ai/ai.types.ts +++ b/apps/server/src/integrations/ai/ai.types.ts @@ -16,6 +16,15 @@ export const AI_DRIVERS: AiDriver[] = ['openai', 'gemini', 'ollama']; export type SttApiStyle = 'multipart' | 'json'; export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json']; +// Chat provider implementation for the `openai` driver. Chosen explicitly by the +// admin (NOT inferred from baseUrl — a custom URL can front real OpenAI too). +// 'openai-compatible' = @ai-sdk/openai-compatible: maps streamed +// `reasoning_content` to reasoning parts (z.ai/GLM, DeepSeek, OpenRouter, ...). +// 'openai' = official @ai-sdk/openai: real-OpenAI reasoning-model request shaping +// (max_completion_tokens, the 'developer' role), no third-party reasoning map. +export type ChatApiStyle = 'openai-compatible' | 'openai'; +export const CHAT_API_STYLES: ChatApiStyle[] = ['openai-compatible', 'openai']; + /** * Non-secret provider settings persisted under `settings.ai.provider`. * The API key is intentionally absent here. @@ -23,6 +32,9 @@ export const STT_API_STYLES: SttApiStyle[] = ['multipart', 'json']; export interface AiProviderSettings { driver: AiDriver; chatModel: string; + // Chat provider implementation for the `openai` driver. Unset → defaults to + // 'openai-compatible' (so reasoning is surfaced by default). See ChatApiStyle. + chatApiStyle?: ChatApiStyle; embeddingModel?: string; baseUrl?: string; // Embedding-specific base URL. Falls back to `baseUrl` when empty/unset. @@ -76,6 +88,7 @@ export interface ResolvedAiConfig extends Partial { export interface MaskedAiSettings { driver?: AiDriver; chatModel?: string; + chatApiStyle?: ChatApiStyle; embeddingModel?: string; baseUrl?: string; embeddingBaseUrl?: string; diff --git a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts index 37fe8143..53aa8220 100644 --- a/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts +++ b/apps/server/src/integrations/ai/dto/update-ai-settings.dto.ts @@ -1,5 +1,12 @@ import { IsIn, IsOptional, IsString } from 'class-validator'; -import { AI_DRIVERS, AiDriver, STT_API_STYLES, SttApiStyle } from '../ai.types'; +import { + AI_DRIVERS, + AiDriver, + CHAT_API_STYLES, + ChatApiStyle, + STT_API_STYLES, + SttApiStyle, +} from '../ai.types'; /** * Admin update payload for the workspace AI provider settings. @@ -18,6 +25,10 @@ export class UpdateAiSettingsDto { @IsString() chatModel?: string; + @IsOptional() + @IsIn(CHAT_API_STYLES) + chatApiStyle?: ChatApiStyle; + @IsOptional() @IsString() embeddingModel?: string; From 6edbbab43b509fc982d67ffbeae0f85169800fed Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 23:18:31 +0300 Subject: [PATCH 13/43] refactor(ai): unify provider-settings allowlist + stronger chatApiStyle tests (#177 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the second #177 review: - Architecture (the silent allowlist drift): the writable provider-setting keys were maintained by hand in two TS-uncheckable places — the key-loop in ai-settings.service and the SQL ALLOWED list in the generic workspace repo (a miss there silently dropped a field on persist, exactly what bit chatApiStyle). Introduce one typed source of truth PROVIDER_SETTINGS_KEYS in ai.types (`satisfies readonly (keyof AiProviderSettings)[]`), have the service consume it, and keep the repo's own copy (it can't import AI types) guarded by a parity test so any future drift fails in CI. - Tests: - ai.service.include-usage.spec: mocks @ai-sdk/openai-compatible and asserts the factory is called with { includeUsage: true, baseURL, apiKey, fetch, name } — `.provider` alone could not catch a dropped includeUsage (the token-usage zeroing regression); also asserts the 'openai' style does NOT use it. - ai-provider-settings-keys.spec: the allowlist parity check + DTO validation for chatApiStyle (@IsIn accepts both values, rejects garbage, optional). - CHANGELOG: [Unreleased] entries for the new "Protocol" / chatApiStyle setting and the default provider change (openai -> openai-compatible). (#175, #177) server + client tsc clean; 42 ai/settings specs green. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 13 +++++ .../repos/workspace/workspace.repo.ts | 26 ++++++++- .../ai/ai-provider-settings-keys.spec.ts | 43 ++++++++++++++ .../integrations/ai/ai-settings.service.ts | 18 +----- .../ai/ai.service.include-usage.spec.ts | 58 +++++++++++++++++++ apps/server/src/integrations/ai/ai.types.ts | 28 +++++++++ 6 files changed, 169 insertions(+), 17 deletions(-) create mode 100644 apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts create mode 100644 apps/server/src/integrations/ai/ai.service.include-usage.spec.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ab0ca99..26adb3f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,9 +25,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 flagging dangling references, empty or duplicate definitions, and `[^id]` markers inside table rows, so an agent can fix its own markup. The page is still created; the field is omitted when there are no problems. (#166) +- **AI chat "Protocol" setting (`chatApiStyle`).** A new admin choice in AI + settings for the `openai` driver: `openai-compatible` (default) routes chat + through `@ai-sdk/openai-compatible`, which surfaces a provider's streamed + reasoning (`reasoning_content` → reasoning parts) for z.ai/GLM, DeepSeek, + OpenRouter, etc.; `openai` uses the official provider (real-OpenAI + reasoning-model request shaping). Chosen explicitly rather than inferred from + the base URL, since a custom URL can front real OpenAI too. (#175, #177) ### Changed +- **AI chat default provider is now `openai-compatible` (reasoning surfaced).** + For the `openai` driver the chat provider defaults to the openai-compatible + implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the + model's reasoning out of the box. An endpoint that is real OpenAI behind a + custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177) + - **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the same id are ONE footnote — one number, one definition, several back-references — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 95e33aa9..60e0a66e 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -10,6 +10,29 @@ import { import { ExpressionBuilder, sql } from 'kysely'; import { DB, Workspaces } from '@docmost/db/types/db'; +/** + * Writable `settings.ai.provider` keys, enforced at this generic SQL layer. This + * repo cannot import AI-feature types, so this list is its own copy; a parity + * test (ai-provider-settings-keys.spec.ts) asserts it equals + * PROVIDER_SETTINGS_KEYS in ai.types so a future drift fails in CI rather than + * silently dropping a field at this boundary. + */ +export const AI_PROVIDER_SETTINGS_ALLOWED: readonly string[] = [ + 'driver', + 'chatModel', + 'chatApiStyle', + 'embeddingModel', + 'baseUrl', + 'embeddingBaseUrl', + 'sttModel', + 'sttBaseUrl', + 'sttApiStyle', + 'sttLanguage', + 'systemPrompt', + 'publicShareChatModel', + 'publicShareAssistantRoleId', +]; + @Injectable() export class WorkspaceRepo { public baseFields: Array = [ @@ -239,9 +262,8 @@ export class WorkspaceRepo { // is a real jsonb object, never a double-encoded string. The CASE self-heals // workspaces whose settings.ai.provider was previously corrupted into an // array/string. - const ALLOWED = ['driver', 'chatModel', 'chatApiStyle', 'embeddingModel', 'baseUrl', 'embeddingBaseUrl', 'sttModel', 'sttBaseUrl', 'sttApiStyle', 'sttLanguage', 'systemPrompt', 'publicShareChatModel', 'publicShareAssistantRoleId']; const entries = Object.entries(provider).filter( - ([k, v]) => v !== undefined && ALLOWED.includes(k), + ([k, v]) => v !== undefined && AI_PROVIDER_SETTINGS_ALLOWED.includes(k), ); const patch = entries.length ? sql`jsonb_build_object(${sql.join( diff --git a/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts b/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts new file mode 100644 index 00000000..64a4dbea --- /dev/null +++ b/apps/server/src/integrations/ai/ai-provider-settings-keys.spec.ts @@ -0,0 +1,43 @@ +import { validate } from 'class-validator'; +import { plainToInstance } from 'class-transformer'; +import { PROVIDER_SETTINGS_KEYS } from './ai.types'; +import { AI_PROVIDER_SETTINGS_ALLOWED } from '@docmost/db/repos/workspace/workspace.repo'; +import { UpdateAiSettingsDto } from './dto/update-ai-settings.dto'; + +/** + * Drift guard: the writable provider-settings keys are maintained in two layers + * that TypeScript cannot cross-check — PROVIDER_SETTINGS_KEYS (ai.types, used by + * the settings service) and AI_PROVIDER_SETTINGS_ALLOWED (the generic workspace + * repo's SQL boundary). A key missing from the repo copy silently drops the field + * on persist (exactly what happened to chatApiStyle), so this asserts they match. + */ +describe('provider-settings key allowlist parity', () => { + it('the repo SQL allowlist equals PROVIDER_SETTINGS_KEYS', () => { + expect([...AI_PROVIDER_SETTINGS_ALLOWED].sort()).toEqual( + [...PROVIDER_SETTINGS_KEYS].sort(), + ); + }); +}); + +/** DTO validation for the new chatApiStyle field (@IsIn(CHAT_API_STYLES)). */ +describe('UpdateAiSettingsDto.chatApiStyle', () => { + const errorsFor = async (chatApiStyle: unknown) => + validate(plainToInstance(UpdateAiSettingsDto, { chatApiStyle })); + + it('accepts both valid values', async () => { + for (const v of ['openai-compatible', 'openai']) { + const errs = await errorsFor(v); + expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined(); + } + }); + + it('rejects an unknown value', async () => { + const errs = await errorsFor('definitely-not-a-style'); + expect(errs.find((e) => e.property === 'chatApiStyle')).toBeDefined(); + }); + + it('accepts the field being omitted (optional)', async () => { + const errs = await validate(plainToInstance(UpdateAiSettingsDto, {})); + expect(errs.find((e) => e.property === 'chatApiStyle')).toBeUndefined(); + }); +}); diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts index 0717c3c4..05020fa9 100644 --- a/apps/server/src/integrations/ai/ai-settings.service.ts +++ b/apps/server/src/integrations/ai/ai-settings.service.ts @@ -15,6 +15,7 @@ import { ResolvedAiConfig, SttApiStyle, ChatApiStyle, + PROVIDER_SETTINGS_KEYS, } from './ai.types'; /** @@ -280,21 +281,8 @@ export class AiSettingsService { // Persist non-secret provider fields (only those present in the partial). const providerPatch: Partial = {}; - for (const key of [ - 'driver', - 'chatModel', - 'chatApiStyle', - 'embeddingModel', - 'baseUrl', - 'embeddingBaseUrl', - 'sttModel', - 'sttBaseUrl', - 'sttApiStyle', - 'sttLanguage', - 'systemPrompt', - 'publicShareChatModel', - 'publicShareAssistantRoleId', - ] as const) { + // Single source of truth for the writable provider keys (see ai.types). + for (const key of PROVIDER_SETTINGS_KEYS) { if (nonSecret[key] !== undefined) { (providerPatch as Record)[key] = nonSecret[key]; } diff --git a/apps/server/src/integrations/ai/ai.service.include-usage.spec.ts b/apps/server/src/integrations/ai/ai.service.include-usage.spec.ts new file mode 100644 index 00000000..7eb86749 --- /dev/null +++ b/apps/server/src/integrations/ai/ai.service.include-usage.spec.ts @@ -0,0 +1,58 @@ +// `.provider` alone cannot prove the openai-compatible factory was called with +// `includeUsage: true` — a regression dropping it (which zeroes streamed token +// usage / reasoning-token metadata) would still pass. So mock the factory and +// assert the exact args. jest.mock is module-scoped, hence a dedicated file. + +const mockCompatibleModel = { provider: 'openai-compatible.chat', modelId: 'm' }; +// jest allows `mock`-prefixed vars inside a jest.mock factory. +const mockCreateOpenAICompatible = jest.fn( + (_settings: unknown) => () => mockCompatibleModel, +); + +jest.mock('@ai-sdk/openai-compatible', () => ({ + createOpenAICompatible: (settings: unknown) => + mockCreateOpenAICompatible(settings), +})); + +import { AiService } from './ai.service'; + +describe('AiService.getChatModel openai-compatible factory args', () => { + function serviceWith(chatApiStyle?: 'openai-compatible' | 'openai') { + const aiSettings = { + resolve: jest.fn().mockResolvedValue({ + driver: 'openai', + chatModel: 'glm-5.2', + apiKey: 'the-key', + baseUrl: 'https://api.z.ai/v4', + chatApiStyle, + }), + }; + return new AiService( + // eslint-disable-next-line @typescript-eslint/no-explicit-any + aiSettings as any, + { find: jest.fn() } as never, + { decryptSecret: jest.fn() } as never, + ); + } + + beforeEach(() => mockCreateOpenAICompatible.mockClear()); + + it('passes includeUsage:true plus baseURL/apiKey/fetch (default style)', async () => { + await serviceWith().getChatModel('ws-1'); // unset -> openai-compatible + expect(mockCreateOpenAICompatible).toHaveBeenCalledTimes(1); + expect(mockCreateOpenAICompatible).toHaveBeenCalledWith( + expect.objectContaining({ + name: 'openai-compatible', + baseURL: 'https://api.z.ai/v4', + apiKey: 'the-key', + includeUsage: true, + fetch: expect.any(Function), + }), + ); + }); + + it("does NOT use the openai-compatible factory for chatApiStyle 'openai'", async () => { + await serviceWith('openai').getChatModel('ws-1'); + expect(mockCreateOpenAICompatible).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/server/src/integrations/ai/ai.types.ts b/apps/server/src/integrations/ai/ai.types.ts index 5cdb6d1d..29c8d6f2 100644 --- a/apps/server/src/integrations/ai/ai.types.ts +++ b/apps/server/src/integrations/ai/ai.types.ts @@ -57,6 +57,34 @@ export interface AiProviderSettings { publicShareAssistantRoleId?: string; } +/** + * The persisted, non-secret provider setting keys — the SINGLE source of truth + * for which fields a settings update may write through to `settings.ai.provider`. + * `satisfies readonly (keyof AiProviderSettings)[]` makes the compiler reject a + * typo or a key that is not a real provider setting. + * + * The settings service consumes this directly. The generic workspace repo cannot + * import AI types, so it keeps its own copy of the same keys, guarded by a parity + * test against this constant (so any future drift fails in CI, not silently in + * prod — a missing key there validates fine, passes the service, and is then + * dropped at the SQL boundary with no error). + */ +export const PROVIDER_SETTINGS_KEYS = [ + 'driver', + 'chatModel', + 'chatApiStyle', + 'embeddingModel', + 'baseUrl', + 'embeddingBaseUrl', + 'sttModel', + 'sttBaseUrl', + 'sttApiStyle', + 'sttLanguage', + 'systemPrompt', + 'publicShareChatModel', + 'publicShareAssistantRoleId', +] as const satisfies readonly (keyof AiProviderSettings)[]; + /** * Fully resolved provider config, including the decrypted API key for the * stored driver. Returned by `AiSettingsService.resolve`. The keys are held in From b0faa2fe321b7d028f67671f71b347645b4b06e6 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 23:51:17 +0300 Subject: [PATCH 14/43] fix(ai-chat): recycle keep-alive sockets + retry pre-response resets (#175) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The real cause of the long-task "Lost connection to the AI provider" — the earlier 300s-timeout fix (#176) was the wrong layer. The provider-HTTP telemetry on the user's deploy shows the failures are PRE-RESPONSE `read ECONNRESET` ~500ms in (not a 300s/15min timeout), correlated with idleSincePrevCall ~42s and large bodies; and crucially a retry of the SAME request often succeeds. A direct probe to the real z.ai endpoint does NOT reset (113KB bodies and a 45s-idle keep-alive reuse both succeed), and another agent (opencode) runs fine from the same infra — so the provider is healthy and the egress network is usable. The difference is the transport: undici's keep-alive pool REUSES a socket that the deployment's egress (NAT / firewall / conntrack) silently dropped during a long idle gap, so the next request resets pre-response. Fix (brings gitmost in line with clients that don't reuse stale sockets): - Keep-alive recycling: the streaming dispatcher (chat fetch AND the external-MCP dispatcher, via the shared streamingDispatcherOptions) now sets keepAliveTimeout + keepAliveMaxTimeout to a 10s recycle window (AI_STREAM_KEEPALIVE_MS), so a connection idle longer than that is closed instead of reused — a long-gap step opens a fresh connection. keepAliveMaxTimeout also caps a server-advertised keep-alive so the provider can't widen the window. - Pre-response connection retry: createStreamingFetch retries a connection-level reset (ECONNRESET / UND_ERR_SOCKET / ECONNREFUSED / EPIPE / *_TIMEOUT) on a fresh connection up to 2 times. This is SAFE because fetch() only rejects before the Response resolves — a started stream is never replayed; an abort (client disconnect) is never retried. Tests: ai-streaming-fetch.spec — keep-alive options, streamKeepAliveMs env, isRetryableConnectError, and a server that resets the first connection so the retry must land on a fresh one (+ aborted requests are not retried). Verified on the stand that a normal turn still streams (reasoning + text + finish) through the new transport. server tsc + ai/mcp specs green. Note: root cause is the deployment's egress dropping idle connections (Traefik is inbound-only); this makes the app resilient to it. AI_STREAM_KEEPALIVE_MS can be lowered if the egress drops faster than ~10s. Co-Authored-By: Claude Opus 4.8 (1M context) --- .env.example | 7 ++ .../ai/ai-streaming-fetch.spec.ts | 96 ++++++++++++++- .../src/integrations/ai/ai-streaming-fetch.ts | 112 +++++++++++++++--- 3 files changed, 198 insertions(+), 17 deletions(-) diff --git a/.env.example b/.env.example index 4726805b..97e8dba8 100644 --- a/.env.example +++ b/.env.example @@ -142,6 +142,13 @@ MCP_DOCMOST_PASSWORD= # provider is eventually broken instead of leaking forever. Default 900000 (15 min). # AI_STREAM_TIMEOUT_MS=900000 +# Keep-alive recycle window (ms) for streaming chat/agent AI + external-MCP calls. +# A pooled connection idle longer than this is closed instead of reused, so a +# NAT / egress firewall / reverse proxy that silently drops idle connections +# cannot poison a reused socket into a PRE-RESPONSE `read ECONNRESET`. Lower it if +# your egress drops idle connections faster than ~10s. Default 10000 (10 s). +# AI_STREAM_KEEPALIVE_MS=10000 + # --- Anonymous public-share AI assistant --- # Opt-in per workspace (AI settings -> "public share assistant"; off by default). # When enabled, anonymous visitors of a published share can ask an AI about that diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts index b28ecf51..1af56a26 100644 --- a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts @@ -2,7 +2,9 @@ import * as http from 'node:http'; import { createStreamingFetch, streamTimeoutMs, + streamKeepAliveMs, streamingDispatcherOptions, + isRetryableConnectError, } from './ai-streaming-fetch'; /** @@ -38,15 +40,54 @@ describe('streamTimeoutMs', () => { } }); - it('applies the timeout to BOTH undici stream timeouts', () => { + it('applies the silence timeout + keep-alive recycle window to the dispatcher', () => { delete process.env.AI_STREAM_TIMEOUT_MS; + delete process.env.AI_STREAM_KEEPALIVE_MS; expect(streamingDispatcherOptions()).toEqual({ headersTimeout: 900_000, bodyTimeout: 900_000, + keepAliveTimeout: 10_000, + keepAliveMaxTimeout: 10_000, }); }); }); +describe('streamKeepAliveMs', () => { + const ORIG = process.env.AI_STREAM_KEEPALIVE_MS; + afterEach(() => { + if (ORIG === undefined) delete process.env.AI_STREAM_KEEPALIVE_MS; + else process.env.AI_STREAM_KEEPALIVE_MS = ORIG; + }); + + it('defaults to 10s (recycle idle sockets so a NAT/proxy drop cannot poison reuse)', () => { + delete process.env.AI_STREAM_KEEPALIVE_MS; + expect(streamKeepAliveMs()).toBe(10_000); + }); + + it('honours a positive override and ignores invalid/non-positive', () => { + process.env.AI_STREAM_KEEPALIVE_MS = '4000'; + expect(streamKeepAliveMs()).toBe(4000); + for (const bad of ['0', '-1', 'x', '']) { + process.env.AI_STREAM_KEEPALIVE_MS = bad; + expect(streamKeepAliveMs()).toBe(10_000); + } + }); +}); + +describe('isRetryableConnectError', () => { + it('matches connection-level codes on the error or its cause', () => { + expect(isRetryableConnectError({ cause: { code: 'ECONNRESET' } })).toBe(true); + expect(isRetryableConnectError({ cause: { code: 'UND_ERR_SOCKET' } })).toBe(true); + expect(isRetryableConnectError({ code: 'ECONNREFUSED' })).toBe(true); + }); + it('does NOT match aborts / unrelated errors', () => { + expect(isRetryableConnectError({ name: 'AbortError', cause: { code: 'ABORT_ERR' } })).toBe(false); + expect(isRetryableConnectError({ cause: { code: 'UND_ERR_HEADERS_TIMEOUT' } })).toBe(false); + expect(isRetryableConnectError(new Error('plain'))).toBe(false); + expect(isRetryableConnectError(undefined)).toBe(false); + }); +}); + describe('createStreamingFetch — against a delayed server', () => { const ORIG = process.env.AI_STREAM_TIMEOUT_MS; let server: http.Server; @@ -110,3 +151,56 @@ describe('createStreamingFetch — against a delayed server', () => { if (code) expect(code).toBe('UND_ERR_HEADERS_TIMEOUT'); }); }); + +describe('createStreamingFetch — pre-response connection retry', () => { + let server: http.Server; + let url: string; + let requests = 0; + + beforeAll(async () => { + server = http.createServer((req, res) => { + requests += 1; + if (requests === 1) { + // Reset the FIRST connection before any response byte (a poisoned/stale + // keep-alive socket). The retry must open a fresh connection. + const sock = req.socket as import('node:net').Socket & { + resetAndDestroy?: () => void; + }; + if (typeof sock.resetAndDestroy === 'function') sock.resetAndDestroy(); + else sock.destroy(); + return; + } + res.writeHead(200, { 'Content-Type': 'text/plain' }); + res.end('ok'); + }); + await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve)); + const addr = server.address() as import('node:net').AddressInfo; + url = `http://127.0.0.1:${addr.port}/`; + }); + + afterAll(async () => { + await new Promise((resolve) => server.close(() => resolve())); + }); + + beforeEach(() => { + requests = 0; + }); + + it('retries a pre-response reset on a fresh connection and succeeds', async () => { + const streamingFetch = createStreamingFetch(); + const res = await streamingFetch(url); + expect(res.status).toBe(200); + expect(await res.text()).toBe('ok'); + // first request reset -> retry -> second request served. + expect(requests).toBeGreaterThanOrEqual(2); + }); + + it('does NOT retry an aborted request (no retry storm)', async () => { + const ctrl = new AbortController(); + ctrl.abort(); + const streamingFetch = createStreamingFetch(); + await expect(streamingFetch(url, { signal: ctrl.signal })).rejects.toBeDefined(); + // Pre-aborted: the request never reached the server, so nothing was retried. + expect(requests).toBe(0); + }); +}); diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.ts index f257fe4e..75a3770d 100644 --- a/apps/server/src/integrations/ai/ai-streaming-fetch.ts +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.ts @@ -18,41 +18,121 @@ import { Agent } from 'undici'; */ const DEFAULT_STREAM_TIMEOUT_MS = 900_000; +/** + * Default keep-alive recycle window (10s). A pooled connection idle longer than + * this is CLOSED rather than reused. + * + * Long agent turns leave gaps of tens of seconds between provider calls (one + * call per step; a crawl/search tool runs in between). A NAT / reverse proxy / + * conntrack in front of the deployment silently drops an idle connection after + * its own timeout; undici, not knowing, then reuses that dead socket and the + * next request fails PRE-RESPONSE with `read ECONNRESET` (#175 prod telemetry: + * the resets correlate with idleSincePrevCall ~42s, while a direct path to the + * provider does NOT reset). Recycling idle sockets well below such a drop window + * means a long-gap call opens a fresh connection instead of reusing a stale one. + * `keepAliveMaxTimeout` also caps a server-advertised keep-alive so the provider + * cannot push the reuse window back up. + */ +const DEFAULT_STREAM_KEEPALIVE_MS = 10_000; + +/** + * How many times to retry a PRE-RESPONSE connection failure (a reset/timeout + * before ANY response byte) on a fresh connection. Safe because `fetch()` only + * rejects before the Response resolves — a started stream is never replayed. + */ +const PRE_RESPONSE_CONNECT_RETRIES = 2; + +/** undici cause codes for a connection-level failure that occurred PRE-RESPONSE. */ +const RETRYABLE_CONNECT_CODES = new Set([ + 'ECONNRESET', + 'ECONNREFUSED', + 'EPIPE', + 'ETIMEDOUT', + 'UND_ERR_SOCKET', + 'UND_ERR_CONNECT_TIMEOUT', +]); + +function positiveEnv(name: string, fallback: number): number { + const raw = Number(process.env[name]); + return Number.isFinite(raw) && raw > 0 ? raw : fallback; +} + /** * The configured silence timeout (ms). Override with `AI_STREAM_TIMEOUT_MS`; a * missing/invalid/non-positive value falls back to {@link DEFAULT_STREAM_TIMEOUT_MS}. */ export function streamTimeoutMs(): number { - const raw = Number(process.env.AI_STREAM_TIMEOUT_MS); - return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_STREAM_TIMEOUT_MS; + return positiveEnv('AI_STREAM_TIMEOUT_MS', DEFAULT_STREAM_TIMEOUT_MS); +} + +/** Keep-alive recycle window (ms). Override with `AI_STREAM_KEEPALIVE_MS`. */ +export function streamKeepAliveMs(): number { + return positiveEnv('AI_STREAM_KEEPALIVE_MS', DEFAULT_STREAM_KEEPALIVE_MS); } /** - * undici `Agent` timeout options for streaming AI traffic — both stream timeouts - * set to the (generous, finite) silence timeout. Shared by the chat provider - * fetch and the external-MCP dispatcher so they behave identically (#175). + * undici `Agent` options for streaming AI traffic — the (generous, finite) + * silence timeouts plus the keep-alive recycle window. Shared by the chat + * provider fetch and the external-MCP dispatcher so they behave identically. */ export function streamingDispatcherOptions(): { headersTimeout: number; bodyTimeout: number; + keepAliveTimeout: number; + keepAliveMaxTimeout: number; } { const t = streamTimeoutMs(); - return { headersTimeout: t, bodyTimeout: t }; + const ka = streamKeepAliveMs(); + return { + headersTimeout: t, + bodyTimeout: t, + keepAliveTimeout: ka, + keepAliveMaxTimeout: ka, + }; +} + +/** True for a connection-level error worth retrying on a fresh connection. */ +export function isRetryableConnectError(err: unknown): boolean { + const e = err as { code?: string; cause?: { code?: string } } | undefined; + const code = e?.cause?.code ?? e?.code; + return typeof code === 'string' && RETRYABLE_CONNECT_CODES.has(code); } /** * Build a `fetch` for long-lived streaming AI calls (the agent chat turn) backed - * by a dedicated undici dispatcher whose stream timeouts are the generous-but- - * finite silence timeout above (#175). A single shared dispatcher is returned - * (callers hold it for the service lifetime) so its connection pool is reused. + * by a dedicated undici dispatcher (finite silence timeouts + keep-alive + * recycling, #175). A single shared dispatcher is returned (callers hold it for + * the service lifetime) so its connection pool is reused. + * + * On a PRE-RESPONSE connection reset (`fetch()` rejects before the Response + * resolves — so nothing has streamed) it retries a few times on a fresh + * connection. A poisoned keep-alive socket is destroyed by undici on the reset, + * so the retry lands on a new connection. An abort (client disconnect) is never + * retried. */ export function createStreamingFetch(): typeof fetch { const dispatcher = new Agent(streamingDispatcherOptions()); - return ((input: Parameters[0], init?: RequestInit) => - fetch(input, { - ...(init ?? {}), - // `dispatcher` is an undici-specific init field (not in the DOM RequestInit - // type); Node's global fetch reads it. Cast to satisfy the type. - dispatcher, - } as RequestInit & { dispatcher: Agent })) as typeof fetch; + return (async (input: Parameters[0], init?: RequestInit) => { + for (let attempt = 0; ; attempt++) { + try { + return await fetch(input, { + ...(init ?? {}), + // `dispatcher` is an undici-specific init field (not in the DOM + // RequestInit type); Node's global fetch reads it. Cast to satisfy it. + dispatcher, + } as RequestInit & { dispatcher: Agent }); + } catch (err) { + const aborted = init?.signal?.aborted === true; + if ( + aborted || + attempt >= PRE_RESPONSE_CONNECT_RETRIES || + !isRetryableConnectError(err) + ) { + throw err; + } + // Brief backoff before the fresh-connection retry. + await new Promise((resolve) => setTimeout(resolve, 150 * (attempt + 1))); + } + } + }) as typeof fetch; } From 91e7335d54f622b1eee3efa89d8e0ba4a9edd4ca Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 00:02:44 +0300 Subject: [PATCH 15/43] refactor(ai-chat): drop thinking-token text from typing indicator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The live typing placeholder now shows only the bouncing dots; the "Thinking… · N tokens" line is removed. Clean up the dead plumbing: - typing-indicator: remove thinkingTokens prop, thinkingLine and the line; keep the animated dots and the dimmed name label - message-list: remove tailThinkingTokens helper, the thinkingTokens prop pass-through, and the now-unused liveTurnTokens import - delete tail-thinking-tokens.test.ts (tested the removed helper) Co-Authored-By: Claude Opus 4.8 --- .../ai-chat/components/message-list.tsx | 15 ------ .../components/tail-thinking-tokens.test.ts | 50 ------------------- .../ai-chat/components/typing-indicator.tsx | 21 ++------ 3 files changed, 3 insertions(+), 83 deletions(-) delete mode 100644 apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts diff --git a/apps/client/src/features/ai-chat/components/message-list.tsx b/apps/client/src/features/ai-chat/components/message-list.tsx index d9995cda..f04ca2ab 100644 --- a/apps/client/src/features/ai-chat/components/message-list.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.tsx @@ -6,7 +6,6 @@ import MessageItem from "@/features/ai-chat/components/message-item.tsx"; import TypingIndicator from "@/features/ai-chat/components/typing-indicator.tsx"; import { isToolPart, toolRunState, ToolUiPart } from "@/features/ai-chat/utils/tool-parts.tsx"; import { assistantMessageHasVisibleContent } from "@/features/ai-chat/utils/message-content.ts"; -import { liveTurnTokens } from "@/features/ai-chat/utils/count-stream-tokens.ts"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; interface MessageListProps { @@ -95,19 +94,6 @@ export function typingIndicatorShowsName(messages: UIMessage[]): boolean { return !assistantMessageHasVisibleContent(last); } -/** - * The live thinking-token count to show on the standalone typing indicator. It - * is the reasoning split of the tail assistant message (estimate while streaming, - * authoritative once the server attaches usage at a step/turn boundary). Returns - * 0 when the turn has produced no reasoning yet — the indicator then shows the - * plain "Thinking…" line. - */ -export function tailThinkingTokens(messages: UIMessage[]): number { - const last = messages[messages.length - 1]; - if (!last || last.role !== "assistant") return 0; - return liveTurnTokens(last).reasoning; -} - /** * Scrollable transcript. Auto-scrolls to the newest message as it streams in, * but only while the user is pinned to the bottom — if they scrolled up to read @@ -208,7 +194,6 @@ export default function MessageList({ )} diff --git a/apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts b/apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts deleted file mode 100644 index 5f421aec..00000000 --- a/apps/client/src/features/ai-chat/components/tail-thinking-tokens.test.ts +++ /dev/null @@ -1,50 +0,0 @@ -import { describe, expect, it } from "vitest"; -import type { UIMessage } from "@ai-sdk/react"; -import { tailThinkingTokens } from "@/features/ai-chat/components/message-list.tsx"; - -/** - * Pure-helper tests for `tailThinkingTokens`: the live thinking-token count the - * standalone typing indicator shows. It is the reasoning split of the tail - * assistant message (estimate while streaming, authoritative once usage arrives). - */ -const msg = ( - role: "user" | "assistant", - parts: unknown[], - metadata?: unknown, -): UIMessage => - ({ id: Math.random().toString(), role, parts, metadata }) as UIMessage; - -describe("tailThinkingTokens", () => { - it("is 0 when there are no messages", () => { - expect(tailThinkingTokens([])).toBe(0); - }); - - it("is 0 when the tail message is the user's", () => { - expect(tailThinkingTokens([msg("user", [{ type: "text", text: "q" }])])).toBe(0); - }); - - it("is 0 when the assistant has produced no reasoning yet", () => { - expect( - tailThinkingTokens([msg("assistant", [{ type: "text", text: "answer" }])]), - ).toBe(0); - }); - - it("estimates reasoning tokens from streamed reasoning text", () => { - // 8 chars -> 2 tokens. - expect( - tailThinkingTokens([ - msg("assistant", [{ type: "reasoning", text: "12345678" }]), - ]), - ).toBe(2); - }); - - it("uses authoritative usage.reasoningTokens once the server attaches it", () => { - expect( - tailThinkingTokens([ - msg("assistant", [{ type: "reasoning", text: "x" }], { - usage: { outputTokens: 100, reasoningTokens: 42 }, - }), - ]), - ).toBe(42); - }); -}); diff --git a/apps/client/src/features/ai-chat/components/typing-indicator.tsx b/apps/client/src/features/ai-chat/components/typing-indicator.tsx index 72ac3179..a3e9f937 100644 --- a/apps/client/src/features/ai-chat/components/typing-indicator.tsx +++ b/apps/client/src/features/ai-chat/components/typing-indicator.tsx @@ -16,12 +16,6 @@ interface TypingIndicatorProps { * assistant row above already shows the same name, to avoid a duplicate label. */ showName?: boolean; - /** - * Live thinking/reasoning token count for the in-flight turn. When > 0 the - * typing line becomes `Thinking… · {count} tokens` (like Claude Code). Omitted - * / 0 keeps the plain `Thinking…` line. - */ - thinkingTokens?: number; } /** @@ -32,18 +26,12 @@ interface TypingIndicatorProps { * * Mirrors the assistant row layout in MessageItem (the dimmed label), so it reads * as the assistant's bubble taking shape. The dimmed label uses the configured - * identity name when provided (otherwise the generic "AI agent"), while the - * typing line is always the generic "Thinking…" (it never includes the - * role/identity name). + * identity name when provided (otherwise the generic "AI agent"); below it the + * animated dots stand in for the nascent bubble until content arrives. */ -export default function TypingIndicator({ assistantName, showName = true, thinkingTokens }: TypingIndicatorProps) { +export default function TypingIndicator({ assistantName, showName = true }: TypingIndicatorProps) { const { t } = useTranslation(); const name = resolveAssistantName(assistantName); - // Show the running thinking-token count only once there is something to count. - const thinkingLine = - thinkingTokens && thinkingTokens > 0 - ? t("Thinking… · {{count}} tokens", { count: thinkingTokens }) - : t("Thinking…"); return ( @@ -58,9 +46,6 @@ export default function TypingIndicator({ assistantName, showName = true, thinki - - {thinkingLine} - ); From c065e26d14c36cc3d11eb7cc5ae73935cd9d11b4 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Thu, 25 Jun 2026 00:10:40 +0300 Subject: [PATCH 16/43] refactor(ai): retry outside instrumentation + retry-exhaustion test (#179 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Invert the transport layers so the pre-response retry is OUTERMOST and the provider-HTTP instrumentation is INNER. Before, the retry lived inside createStreamingFetch (under the instrumentation), so a reset the retry recovered from logged only a clean "OK status=200" — the "PRE-RESPONSE FAILED ... ECONNRESET ... idleSincePrevCall" signal went blind exactly when the fix works, and AI_STREAM_KEEPALIVE_MS couldn't be tuned from prod data. Now createStreamingFetch is the dispatcher-bound BASE (no retry) and a new withPreResponseRetry() wraps it; ai.service composes withPreResponseRetry(createInstrumentedFetch('AiService:provider-http', createStreamingFetch())), so every attempt — including recovered resets — flows through the instrumentation. (Also expresses the keepAlive-config vs retry- behavior boundary structurally, per review #3.) - Add the retry-exhaustion test: a server that resets EVERY connection, asserting the call rejects with a retryable connection error AND exactly PRE_RESPONSE_CONNECT_RETRIES + 1 (= 3) requests reached the server — pinning the bound and that the final error propagates (guards an off-by-one / infinite loop / swallowed error). Existing happy-retry + abort tests moved onto withPreResponseRetry. Verified on the stand: a normal turn still streams (reasoning + finish) and the provider-HTTP telemetry still logs. server tsc + ai/mcp specs green (30). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai/ai-streaming-fetch.spec.ts | 47 +++++++++++++++---- .../src/integrations/ai/ai-streaming-fetch.ts | 40 +++++++++++----- apps/server/src/integrations/ai/ai.service.ts | 22 +++++---- 3 files changed, 80 insertions(+), 29 deletions(-) diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts index 1af56a26..07c8ec40 100644 --- a/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.spec.ts @@ -1,6 +1,7 @@ import * as http from 'node:http'; import { createStreamingFetch, + withPreResponseRetry, streamTimeoutMs, streamKeepAliveMs, streamingDispatcherOptions, @@ -152,17 +153,25 @@ describe('createStreamingFetch — against a delayed server', () => { }); }); -describe('createStreamingFetch — pre-response connection retry', () => { +describe('withPreResponseRetry', () => { + // The retry is the OUTERMOST layer (over the dispatcher-bound streaming fetch), + // matching ai.service's withPreResponseRetry(instrument(createStreamingFetch())). + // PRE_RESPONSE_CONNECT_RETRIES is 2 -> at most 3 total attempts. + const MAX_ATTEMPTS = 3; let server: http.Server; let url: string; let requests = 0; + // 'first' resets only the first connection; 'all' resets every connection. + let resetMode: 'first' | 'all' = 'first'; + + const retryingFetch = () => withPreResponseRetry(createStreamingFetch()); beforeAll(async () => { server = http.createServer((req, res) => { requests += 1; - if (requests === 1) { - // Reset the FIRST connection before any response byte (a poisoned/stale - // keep-alive socket). The retry must open a fresh connection. + const shouldReset = resetMode === 'all' || requests === 1; + if (shouldReset) { + // Reset before any response byte (a poisoned/stale keep-alive socket). const sock = req.socket as import('node:net').Socket & { resetAndDestroy?: () => void; }; @@ -184,22 +193,42 @@ describe('createStreamingFetch — pre-response connection retry', () => { beforeEach(() => { requests = 0; + resetMode = 'first'; }); it('retries a pre-response reset on a fresh connection and succeeds', async () => { - const streamingFetch = createStreamingFetch(); - const res = await streamingFetch(url); + resetMode = 'first'; + const res = await retryingFetch()(url); expect(res.status).toBe(200); expect(await res.text()).toBe('ok'); // first request reset -> retry -> second request served. - expect(requests).toBeGreaterThanOrEqual(2); + expect(requests).toBe(2); + }); + + it('gives up after the retry bound and rethrows the original reset', async () => { + resetMode = 'all'; // every attempt resets -> retries exhaust + let caught: unknown; + try { + await retryingFetch()(url); + } catch (e) { + caught = e; + } + expect(caught).toBeDefined(); + // A retryable connection error reached the caller (not swallowed). + expect(isRetryableConnectError(caught)).toBe(true); + // Bounded: exactly PRE_RESPONSE_CONNECT_RETRIES + 1 attempts hit the server + // (pins both the limit and that the final error propagates — guards an + // off-by-one or an infinite loop). + expect(requests).toBe(MAX_ATTEMPTS); }); it('does NOT retry an aborted request (no retry storm)', async () => { + resetMode = 'all'; const ctrl = new AbortController(); ctrl.abort(); - const streamingFetch = createStreamingFetch(); - await expect(streamingFetch(url, { signal: ctrl.signal })).rejects.toBeDefined(); + await expect( + retryingFetch()(url, { signal: ctrl.signal }), + ).rejects.toBeDefined(); // Pre-aborted: the request never reached the server, so nothing was retried. expect(requests).toBe(0); }); diff --git a/apps/server/src/integrations/ai/ai-streaming-fetch.ts b/apps/server/src/integrations/ai/ai-streaming-fetch.ts index 75a3770d..b781df9a 100644 --- a/apps/server/src/integrations/ai/ai-streaming-fetch.ts +++ b/apps/server/src/integrations/ai/ai-streaming-fetch.ts @@ -104,23 +104,41 @@ export function isRetryableConnectError(err: unknown): boolean { * recycling, #175). A single shared dispatcher is returned (callers hold it for * the service lifetime) so its connection pool is reused. * - * On a PRE-RESPONSE connection reset (`fetch()` rejects before the Response - * resolves — so nothing has streamed) it retries a few times on a fresh - * connection. A poisoned keep-alive socket is destroyed by undici on the reset, - * so the retry lands on a new connection. An abort (client disconnect) is never - * retried. + * This is the BASE transport — no retry. The chat path wraps it as + * `withPreResponseRetry(createInstrumentedFetch(ctx, createStreamingFetch()))` + * so the retry is the OUTERMOST layer and the instrumentation observes EVERY + * attempt (a recovered reset is still logged — see withPreResponseRetry). */ export function createStreamingFetch(): typeof fetch { const dispatcher = new Agent(streamingDispatcherOptions()); + return ((input: Parameters[0], init?: RequestInit) => + fetch(input, { + ...(init ?? {}), + // `dispatcher` is an undici-specific init field (not in the DOM + // RequestInit type); Node's global fetch reads it. Cast to satisfy it. + dispatcher, + } as RequestInit & { dispatcher: Agent })) as typeof fetch; +} + +/** + * Wrap a fetch so a PRE-RESPONSE connection reset (`baseFetch` rejects before the + * Response resolves — so nothing has streamed) is retried a few times on a fresh + * connection (#175). A poisoned keep-alive socket is destroyed by undici on the + * reset, so the retry lands on a new connection. An abort (client disconnect) is + * never retried. + * + * This is the OUTERMOST transport layer by design: composing it as + * `withPreResponseRetry(instrumentedFetch)` means every attempt — including the + * resets that the retry recovers from — flows through the instrumentation, so the + * "PRE-RESPONSE FAILED ... ECONNRESET ... idleSincePrevCall" telemetry stays + * visible precisely when the fix is working (and AI_STREAM_KEEPALIVE_MS can be + * tuned from real data). A retry INSIDE the transport would hide it. + */ +export function withPreResponseRetry(baseFetch: typeof fetch): typeof fetch { return (async (input: Parameters[0], init?: RequestInit) => { for (let attempt = 0; ; attempt++) { try { - return await fetch(input, { - ...(init ?? {}), - // `dispatcher` is an undici-specific init field (not in the DOM - // RequestInit type); Node's global fetch reads it. Cast to satisfy it. - dispatcher, - } as RequestInit & { dispatcher: Agent }); + return await baseFetch(input, init); } catch (err) { const aborted = init?.signal?.aborted === true; if ( diff --git a/apps/server/src/integrations/ai/ai.service.ts b/apps/server/src/integrations/ai/ai.service.ts index 18f15b5d..16aa6997 100644 --- a/apps/server/src/integrations/ai/ai.service.ts +++ b/apps/server/src/integrations/ai/ai.service.ts @@ -16,7 +16,10 @@ import { AiEmbeddingNotConfiguredException } from './ai-embedding-not-configured import { AiSttNotConfiguredException } from './ai-stt-not-configured.exception'; import { describeProviderError } from './ai-error.util'; import { createInstrumentedFetch } from './ai-provider-http'; -import { createStreamingFetch } from './ai-streaming-fetch'; +import { + createStreamingFetch, + withPreResponseRetry, +} from './ai-streaming-fetch'; import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo'; import { SecretBoxService } from '../crypto/secret-box'; import { AiDriver } from './ai.types'; @@ -46,14 +49,15 @@ export interface ChatModelOverride { export class AiService { private readonly logger = new Logger(AiService.name); - // Provider HTTP fetch for the chat path: the streaming fetch — which RAISES - // undici's 300s headers/body timeouts to a generous-but-finite silence timeout - // so a long agent turn is not severed mid-stream (#175) — wrapped with the - // provider-HTTP instrumentation so the logs observe that exact transport. Held - // for the service lifetime to reuse the streaming dispatcher's connection pool. - private readonly aiProviderFetch = createInstrumentedFetch( - 'AiService:provider-http', - createStreamingFetch(), + // Provider HTTP fetch for the chat path, layered so each transport concern is + // observed (#175). Inside-out: the streaming fetch (finite silence timeouts + + // keep-alive recycling) → provider-HTTP instrumentation (logs every attempt) → + // pre-response connection-reset retry as the OUTERMOST layer. Retry-outer means + // a reset the retry recovers from is still logged with its idle-gap, instead of + // collapsing into a clean "OK". Held for the service lifetime to reuse the + // streaming dispatcher's connection pool. + private readonly aiProviderFetch = withPreResponseRetry( + createInstrumentedFetch('AiService:provider-http', createStreamingFetch()), ); constructor( From bf2ebb9d4739d565581da5041aab80ddc2a6ed52 Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 00:21:53 +0300 Subject: [PATCH 17/43] fix(ai-chat): increase bottom margin for typing indicator name The name label was crowding the bouncing dots when displayed. Adding extra bottom margin (mb={8}) gives the dots room and improves readability. The change only applies when the name is shown. --- .../src/features/ai-chat/components/typing-indicator.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/client/src/features/ai-chat/components/typing-indicator.tsx b/apps/client/src/features/ai-chat/components/typing-indicator.tsx index a3e9f937..706f20e5 100644 --- a/apps/client/src/features/ai-chat/components/typing-indicator.tsx +++ b/apps/client/src/features/ai-chat/components/typing-indicator.tsx @@ -36,7 +36,10 @@ export default function TypingIndicator({ assistantName, showName = true }: Typi return ( {showName !== false && ( - + // Extra bottom gap (vs MessageItem's mb={4}) gives the small bouncing + // dots room below the name label; without it they crowd the label. Only + // applies when the name is shown — the nameless case spaces fine on its own. + {name ?? t("AI agent")} )} From 5aa199660d42a8ac27029f04e125d81a13805b28 Mon Sep 17 00:00:00 2001 From: claude_code Date: Thu, 25 Jun 2026 00:34:22 +0300 Subject: [PATCH 18/43] fix(ai-chat): keep thinking dots visible between streamed steps showTypingIndicator hid the standalone thinking dots for any non-empty trailing text part, so during the pause after the model finished an intermediate narration and before its next step (e.g. a tool call) the UI looked frozen. Suppress the dots only while the text part is still streaming: a finalized ("done") trailing text part on an in-flight turn now shows the dots again, matching the function's documented intent. - message-list: guard the text branch with state !== "done" (AI SDK v6 TextUIPart.state); stateless parts keep their previous behavior - show-typing-indicator.test: add done -> shown and streaming -> hidden cases Co-Authored-By: Claude Opus 4.8 --- .../ai-chat/components/message-list.tsx | 18 ++++++++++++++++-- .../components/show-typing-indicator.test.ts | 10 ++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/apps/client/src/features/ai-chat/components/message-list.tsx b/apps/client/src/features/ai-chat/components/message-list.tsx index f04ca2ab..fda2a87f 100644 --- a/apps/client/src/features/ai-chat/components/message-list.tsx +++ b/apps/client/src/features/ai-chat/components/message-list.tsx @@ -50,7 +50,9 @@ const BOTTOM_THRESHOLD = 40; * assistant message's LAST part is not live output: * - the last message is still the user's (assistant hasn't started a row), or * - the assistant row has no parts yet, or - * - its last part is an empty/whitespace text part, or + * - its last part is an empty/whitespace text part, or a finished ("done") + * text part while the turn continues (the model paused after some narration + * and is thinking about its next step), or * - its last part is a finished/errored tool (the model is thinking about the * next step between tool calls). * It hides only while output is actively rendering: a non-empty streaming text @@ -64,7 +66,19 @@ export function showTypingIndicator(messages: UIMessage[], isStreaming: boolean) const lastPart = last.parts[last.parts.length - 1]; if (!lastPart) return true; // assistant row exists but has no parts yet. // The answer text is actively streaming in -> MessageItem renders it; no dots. - if (lastPart.type === "text" && lastPart.text.trim().length > 0) return false; + // Only while it is STILL streaming, though: once a non-empty text part is + // finalized ("done") but the turn is still in flight, the model has paused + // after some narration and is working on its next step (e.g. about to call a + // tool) — nothing is visibly progressing, so the dots must show. A text part + // without a `state` is treated as still-rendering (kept suppressed); this + // branch only runs while streaming, where live parts always carry a state. + if ( + lastPart.type === "text" && + lastPart.text.trim().length > 0 && + (lastPart as { state?: "streaming" | "done" }).state !== "done" + ) { + return false; + } // A tool still in flight shows its own Loader in ToolCallCard -> no dots. if ( isToolPart(lastPart.type) && diff --git a/apps/client/src/features/ai-chat/components/show-typing-indicator.test.ts b/apps/client/src/features/ai-chat/components/show-typing-indicator.test.ts index 0c18431b..34364b55 100644 --- a/apps/client/src/features/ai-chat/components/show-typing-indicator.test.ts +++ b/apps/client/src/features/ai-chat/components/show-typing-indicator.test.ts @@ -82,4 +82,14 @@ describe("showTypingIndicator", () => { showTypingIndicator([msg("assistant", [doneTool, text])], true), ).toBe(false); }); + + it("shows while streaming after a text part is finalized (paused before the next step)", () => { + const doneText = { type: "text", text: "Now creating the page in", state: "done" } as unknown as UIMessage["parts"][number]; + expect(showTypingIndicator([msg("assistant", [doneText])], true)).toBe(true); + }); + + it("hides while a text part is actively streaming (state: streaming)", () => { + const streamingText = { type: "text", text: "Now writ", state: "streaming" } as unknown as UIMessage["parts"][number]; + expect(showTypingIndicator([msg("assistant", [streamingText])], true)).toBe(false); + }); }); From 4597183a1e58a8ebf75def9eea228ca6fb5f75de Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Wed, 24 Jun 2026 15:00:37 +0300 Subject: [PATCH 19/43] fix(ai-chat): WYSIWYG Copy chat export keeps the on-screen partial reply (#160) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "Copy chat" built the Markdown from persisted rows plus a live tail that was only included while isStreaming. When a turn was interrupted (dropped stream / "Lost connection" banner) isStreaming flipped false, the live tail was dropped, and the partial assistant reply visible on screen — whose row often never persisted — vanished from the export, leaving only the user messages. - buildChatMarkdown is now live-first: the on-screen `live` messages ARE the document. Each is matched to a persisted row by id to enrich it with token usage / error / timestamp; authoritative usage/error already on the live message win over the row. When `live` is empty it falls back to the persisted rows (old format preserved). Only the tail assistant is flagged "still generating", and only when it is genuinely the streaming tail — so the status==="submitted" window (tail is the user message) never mislabels the previous, completed answer. - The on-screen banner (classified error / dropped connection / manual stop) is flattened to a string in ChatThread, mirrored into liveStateRef alongside the messages/isStreaming snapshot, and appended at the end of the export. - handleCopy maps the live messages and passes live/rows/isStreaming/banner. Tests: chat-markdown rewritten for the live/enrichment/fallback/banner paths and the submitted-window regression (26); full ai-chat suite green (186). tsc clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai-chat/components/ai-chat-window.tsx | 53 ++- .../ai-chat/components/chat-thread.tsx | 44 ++- .../ai-chat/utils/chat-markdown.test.ts | 303 +++++++++++++----- .../features/ai-chat/utils/chat-markdown.ts | 185 ++++++++--- 4 files changed, 424 insertions(+), 161 deletions(-) diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index 5f6b1dde..3990a0ba 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -151,9 +151,14 @@ export default function AiChatWindow() { // Live snapshot of the active thread's useChat state, kept up to date by // ChatThread. Lets the export include the in-progress (not-yet-persisted) // streaming turn. A ref avoids re-rendering this window on every token. - const liveThreadRef = useRef<{ messages: UIMessage[]; isStreaming: boolean }>({ + const liveThreadRef = useRef<{ + messages: UIMessage[]; + isStreaming: boolean; + banner: string | null; + }>({ messages: [], isStreaming: false, + banner: null, }); // Live turn-token total (reasoning + output) for the in-flight turn, pushed up @@ -249,28 +254,42 @@ export default function AiChatWindow() { // call) and copy it to the clipboard. The "Copied" notification is the // feedback. const handleCopy = useCallback(() => { + // Export gate. Requiring at least one persisted row means a brand-new chat + // whose VERY FIRST turn dropped before the server persisted even the user + // message cannot be exported (the button is also hidden — see `canExport`). + // That narrow first-turn case is deliberately out of scope for #160; the user + // message is normally persisted before model contact, so an interrupted later + // turn still has rows and exports the on-screen partial reply WYSIWYG. if (!activeChatId || !messageRows || messageRows.length === 0) return; - // While the active thread is streaming, the current user message and the - // in-progress assistant reply are NOT yet in messageRows (the persisted - // query is only refetched after the turn finishes). Pull the live tail — - // messages whose id is not among the persisted rows — and append them, - // flagging the streaming assistant message as still generating. + // WYSIWYG export: the live on-screen messages ARE the document (so a partial + // reply from an interrupted turn — which never reached the persisted rows — + // is exported just as it appears). The persisted rows enrich each live + // message (token usage / error / timestamp) by id and serve as the fallback + // when the live mirror is empty. The on-screen banner is appended too. See + // issue #160. const live = liveThreadRef.current; - const rowIds = new Set(messageRows.map((r) => r.id)); - const pending = live.isStreaming - ? live.messages - .filter((m) => !rowIds.has(m.id)) - .map((m) => ({ - role: m.role, - parts: (m.parts ?? []) as { type: string; text?: string }[], - generating: m.role === "assistant", - })) - : []; const markdown = buildChatMarkdown({ title: activeChat?.title ?? null, chatId: activeChatId, + live: live.messages.map((m) => ({ + id: m.id, + role: m.role, + parts: (m.parts ?? []) as { type: string; text?: string }[], + metadata: m.metadata as + | { + usage?: { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + reasoningTokens?: number; + }; + error?: string; + } + | undefined, + })), rows: messageRows, - pending, + isStreaming: live.isStreaming, + banner: live.banner, t, }); clipboard.copy(markdown); diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index 3898136e..f58d47d4 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -73,7 +73,11 @@ interface ChatThreadProps { * "Copy chat" export can include the in-progress, not-yet-persisted * assistant message. A ref (not state) avoids re-rendering the parent on * every streamed delta. */ - liveStateRef?: MutableRefObject<{ messages: UIMessage[]; isStreaming: boolean }>; + liveStateRef?: MutableRefObject<{ + messages: UIMessage[]; + isStreaming: boolean; + banner: string | null; + }>; /** Reports the live turn-token total (reasoning + output) for the in-flight * turn so the parent can show a header badge that ticks mid-stream. THROTTLED * here (~8 Hz) so the parent re-renders a handful of times a second, not on @@ -309,18 +313,37 @@ export default function ChatThread({ if (isStreaming) setStopNotice(null); }, [isStreaming]); + // Classify the turn error into a heading + detail so the banner names the cause + // (connection reset, timeout, rate limit, context overflow, quota, ...) instead + // of a generic "Something went wrong". Computed here (not only in the JSX) so + // the SAME on-screen banner text can be mirrored into the export (issue #160). + const errorView = error ? describeChatError(error.message ?? "", t) : null; + + // The exact banner the user sees under the message list, flattened to a single + // string for the "Copy chat" export so the artifact records the interruption + // WYSIWYG. Mirrors the JSX precedence below: error first, else the stop notice. + const banner = errorView + ? errorView.detail + ? `${errorView.title} — ${errorView.detail}` + : errorView.title + : stopNotice === "manual" + ? t("Response stopped.") + : stopNotice === "disconnect" + ? t("Connection lost — the answer was interrupted.") + : null; + // Mirror the live useChat snapshot into the parent-owned ref so the export - // (handled in AiChatWindow) can include the in-progress streaming turn. The - // cleanup clears the ref on unmount so a thread torn down by `key` on chat - // switch can't leak its (possibly still-streaming) tail into the next chat's - // export before the new thread's effect repopulates the ref. + // (handled in AiChatWindow) can include the in-progress streaming turn AND the + // on-screen banner. The cleanup clears the ref on unmount so a thread torn down + // by `key` on chat switch can't leak its (possibly still-streaming) tail into + // the next chat's export before the new thread's effect repopulates the ref. useEffect(() => { if (!liveStateRef) return; - liveStateRef.current = { messages, isStreaming }; + liveStateRef.current = { messages, isStreaming, banner }; return () => { - liveStateRef.current = { messages: [], isStreaming: false }; + liveStateRef.current = { messages: [], isStreaming: false, banner: null }; }; - }, [liveStateRef, messages, isStreaming]); + }, [liveStateRef, messages, isStreaming, banner]); // Report the live turn-token total to the parent header badge, THROTTLED to // ~8 Hz so the parent re-renders a few times a second instead of on every @@ -370,11 +393,6 @@ export default function ChatThread({ }; }, []); - // Classify the turn error into a heading + detail so the banner names the cause - // (connection reset, timeout, rate limit, context overflow, quota, ...) instead - // of a generic "Something went wrong". - const errorView = error ? describeChatError(error.message ?? "", t) : null; - // A role was picked with autoStart=false: the role is bound but NOTHING was // sent, so chatId stays null and the empty state would keep showing the cards. // This flag hides the cards and reveals the composer (with the role indicated) diff --git a/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts b/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts index 651d1d26..97628d8b 100644 --- a/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts +++ b/apps/client/src/features/ai-chat/utils/chat-markdown.test.ts @@ -367,125 +367,258 @@ describe("buildChatMarkdown — token totals", () => { }); }); -describe("buildChatMarkdown — pending / in-progress messages", () => { - it("continues the heading numbering after the persisted rows", () => { +// A minimal on-screen (live) message, matching the subset buildChatMarkdown reads. +function live(partial: { + id?: string; + role?: string; + parts?: { type: string; text?: string }[]; + metadata?: { usage?: Record; error?: string }; +}) { + return { + id: partial.id ?? "live-id", + role: partial.role ?? "assistant", + parts: partial.parts ?? [], + metadata: partial.metadata, + }; +} + +describe("buildChatMarkdown — live (WYSIWYG) source", () => { + it("uses the live messages as the document (what's on screen), numbered from 1", () => { const md = buildChatMarkdown({ title: "t", chatId: "c", - rows: [row({ role: "user", content: "persisted" })], - pending: [ - { - role: "user", - parts: [{ type: "text", text: "live question" }], - generating: false, - }, - { - role: "assistant", - parts: [{ type: "text", text: "live answer" }], - generating: true, - }, + // Persisted rows hold only the user turn; the assistant reply is live-only. + rows: [row({ id: "u1", role: "user", content: "persisted user" })], + live: [ + live({ id: "u1", role: "user", parts: [{ type: "text", text: "on-screen user" }] }), + live({ id: "a1", role: "assistant", parts: [{ type: "text", text: "on-screen reply" }] }), ], + isStreaming: false, t, }); expect(md).toContain("## 1. You"); - expect(md).toContain("## 2. You"); - expect(md).toContain("## 3. AI agent"); - expect(md).toContain("live question"); - expect(md).toContain("live answer"); + expect(md).toContain("## 2. AI agent"); + expect(md).toContain("on-screen user"); + expect(md).toContain("on-screen reply"); + // Message count reflects the LIVE document, not rows + live. + expect(md).toContain("- Messages: 2"); }); - it("flags a generating assistant pending message as still being generated", () => { + it("captures a partial reply from an interrupted (non-streaming) turn — no 'generating' note", () => { const md = buildChatMarkdown({ title: "t", chatId: "c", - rows: [row({ role: "user", content: "persisted" })], - pending: [ - { + rows: [row({ id: "u1", role: "user", content: "q" })], + live: [ + live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }), + live({ + id: "a-live", role: "assistant", - parts: [{ type: "text", text: "partial reply" }], - generating: true, - }, + parts: [{ type: "text", text: "partial plan before the drop" }], + }), ], + isStreaming: false, // the stream dropped — not streaming anymore + banner: "Connection lost — the answer was interrupted.", t, }); - expect(md).toContain("partial reply"); - expect(md).toContain("still being generated"); + // The partial assistant answer that was on screen IS in the export. + expect(md).toContain("partial plan before the drop"); + // It is NOT flagged still-generating (the turn is over, just interrupted). + expect(md).not.toContain("still being generated"); + // The on-screen banner is recorded at the end. + expect(md).toContain("Connection lost — the answer was interrupted."); }); - it("renders a non-generating user pending message without the note", () => { + it("flags ONLY the tail assistant as still generating, and only while streaming", () => { + const streaming = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [], + live: [ + live({ id: "a", role: "assistant", parts: [{ type: "text", text: "done earlier" }] }), + live({ id: "u", role: "user", parts: [{ type: "text", text: "next q" }] }), + live({ id: "b", role: "assistant", parts: [{ type: "text", text: "streaming now" }] }), + ], + isStreaming: true, + t, + }); + // Exactly one "still being generated" note (the tail assistant). + expect(streaming.match(/still being generated/g)?.length).toBe(1); + + const idle = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [], + live: [live({ id: "b", role: "assistant", parts: [{ type: "text", text: "final" }] })], + isStreaming: false, + t, + }); + expect(idle).not.toContain("still being generated"); + }); + + it("does NOT flag a completed assistant as generating when the streaming tail is a user message", () => { + // The `status === "submitted"` window: the user just sent, isStreaming is + // already true, but the new assistant turn has no message yet so the tail is + // the USER message. The previous assistant answer is complete on screen and + // must not be marked still-generating (WYSIWYG; regression for #160 review). const md = buildChatMarkdown({ title: "t", chatId: "c", - rows: [row({ role: "user", content: "persisted" })], - pending: [ - { - role: "user", - parts: [{ type: "text", text: "my live message" }], - generating: false, - }, + rows: [], + live: [ + live({ id: "a", role: "assistant", parts: [{ type: "text", text: "completed answer" }] }), + live({ id: "u", role: "user", parts: [{ type: "text", text: "the new question" }] }), ], + isStreaming: true, t, }); - expect(md).toContain("my live message"); + expect(md).toContain("completed answer"); expect(md).not.toContain("still being generated"); }); - it("includes the pending messages in the metadata message count", () => { + it("emits the heading + note for a streaming tail assistant with empty parts", () => { const md = buildChatMarkdown({ title: "t", chatId: "c", - rows: [ - row({ role: "user", content: "a" }), - row({ role: "assistant", content: "b" }), - ], - pending: [ - { - role: "user", - parts: [{ type: "text", text: "c" }], - generating: false, - }, - { - role: "assistant", - parts: [{ type: "text", text: "d" }], - generating: true, - }, - ], - t, - }); - // 2 persisted rows + 2 pending = 4. - expect(md).toContain("- Messages: 4"); - }); - - it("emits the heading and note for a generating assistant with empty parts", () => { - expect(() => - buildChatMarkdown({ - title: "t", - chatId: "c", - rows: [row({ role: "user", content: "persisted" })], - pending: [ - { - role: "assistant", - parts: [], - generating: true, - }, - ], - t, - }), - ).not.toThrow(); - const md = buildChatMarkdown({ - title: "t", - chatId: "c", - rows: [row({ role: "user", content: "persisted" })], - pending: [ - { - role: "assistant", - parts: [], - generating: true, - }, + rows: [row({ id: "u1", role: "user", content: "q" })], + live: [ + live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }), + live({ id: "a-live", role: "assistant", parts: [] }), ], + isStreaming: true, t, }); expect(md).toContain("## 2. AI agent"); expect(md).toContain("still being generated"); }); }); + +describe("buildChatMarkdown — live enrichment from persisted rows", () => { + it("pulls usage / error / timestamp from the persisted row matched by id", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [ + row({ + id: "a1", + role: "assistant", + content: "x", + createdAt: "2026-06-22T10:00:00.000Z", + metadata: { usage: { inputTokens: 10, outputTokens: 5 }, error: "rate limited" }, + }), + ], + live: [ + // Same id as the persisted row, but no usage/error/timestamp on the live msg. + live({ id: "a1", role: "assistant", parts: [{ type: "text", text: "reply" }] }), + ], + isStreaming: false, + t, + }); + expect(md).toContain("reply"); + // Token footer + total come from the enriched row. + expect(md).toContain("_Tokens — in: 10, out: 5, total: 15_"); + expect(md).toContain("- Total tokens: 15"); + expect(md).toContain("**⚠️ Error:** rate limited"); + // The persisted timestamp is carried into the export. + expect(md).toContain(""); + }); + + it("prefers authoritative usage already on the live message over the row's", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [ + row({ + id: "a1", + role: "assistant", + content: "x", + metadata: { usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 } }, + }), + ], + live: [ + live({ + id: "a1", + role: "assistant", + parts: [{ type: "text", text: "reply" }], + metadata: { usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 } }, + }), + ], + isStreaming: false, + t, + }); + // The live (authoritative, freshest) usage wins, not the stale row usage. + expect(md).toContain("- Total tokens: 150"); + expect(md).not.toContain("- Total tokens: 2"); + }); + + it("a current-turn live message with no matching row renders without a footer", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [row({ id: "u1", role: "user", content: "q" })], + live: [ + live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }), + live({ id: "a-live", role: "assistant", parts: [{ type: "text", text: "fresh reply" }] }), + ], + isStreaming: false, + t, + }); + expect(md).toContain("fresh reply"); + // No persisted row for the live assistant -> no token footer, no timestamp. + expect(md).not.toContain("_Tokens —"); + expect(md).not.toContain(""); + }); +}); + +describe("buildChatMarkdown — fallback + banner", () => { + it("falls back to the persisted rows when there are no live messages", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [ + row({ role: "user", content: "from rows" }), + row({ + role: "assistant", + content: "answer", + metadata: { usage: { inputTokens: 4, outputTokens: 6 } }, + }), + ], + live: [], // empty live mirror -> fallback path + isStreaming: false, + t, + }); + expect(md).toContain("## 1. You"); + expect(md).toContain("## 2. AI agent"); + expect(md).toContain("from rows"); + expect(md).toContain("- Messages: 2"); + expect(md).toContain("- Total tokens: 10"); + }); + + it("appends the on-screen banner once, after the messages", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [row({ role: "user", content: "q" })], + live: [live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] })], + isStreaming: false, + banner: "Rate limit reached — try again shortly.", + t, + }); + expect(md).toContain("_⚠️ Rate limit reached — try again shortly._"); + // Banner comes after the (only) message block. + expect(md.indexOf("Rate limit reached")).toBeGreaterThan(md.indexOf("## 1.")); + }); + + it("omits the banner block when there is no banner", () => { + const md = buildChatMarkdown({ + title: "t", + chatId: "c", + rows: [row({ role: "user", content: "q" })], + live: [live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] })], + isStreaming: false, + banner: null, + t, + }); + expect(md).not.toContain("_⚠️"); + }); +}); diff --git a/apps/client/src/features/ai-chat/utils/chat-markdown.ts b/apps/client/src/features/ai-chat/utils/chat-markdown.ts index c3c3b3b2..f70836d5 100644 --- a/apps/client/src/features/ai-chat/utils/chat-markdown.ts +++ b/apps/client/src/features/ai-chat/utils/chat-markdown.ts @@ -25,11 +25,23 @@ type Translate = (key: string, values?: Record) => string; interface BuildChatMarkdownArgs { title: string | null; chatId: string; + /** The live, on-screen messages — the WYSIWYG source of the export. When + * present and non-empty these DRIVE the document (so it mirrors exactly what + * the user sees, including a partial reply from an interrupted turn). Each is + * matched to a persisted row by `id` to enrich it with token usage / error / + * timestamp. When absent or empty the builder falls back to `rows`. */ + live?: LiveMessage[]; + /** Persisted message rows. Enrichment source (matched to `live` by id) AND the + * fallback document source when `live` is empty. */ rows: IAiChatMessageRow[]; - /** In-progress, not-yet-persisted live messages (the current streaming - * turn) to append after the persisted rows. `generating: true` adds a - * note that the message is still being produced. */ - pending?: PendingMessage[]; + /** Whether the live thread is still streaming. Only then is the tail assistant + * message flagged "still generating"; an interrupted (non-streaming) partial + * reply is exported as-is and the `banner` explains the interruption. */ + isStreaming?: boolean; + /** The on-screen banner text (error / dropped connection / manual stop), + * appended at the end of the export so the artifact records the interruption + * the user saw. */ + banner?: string | null; t: Translate; } @@ -39,10 +51,31 @@ interface TextLikePart { text?: string; } -/** A live, not-yet-persisted message (current streaming turn) to append. */ -interface PendingMessage { +/** Authoritative per-turn usage the server attaches to a message / row. */ +interface UsageLike { + inputTokens?: number; + outputTokens?: number; + totalTokens?: number; + reasoningTokens?: number; +} + +/** A live, on-screen message (subset of the AI SDK UIMessage we consume). */ +interface LiveMessage { + id: string; role: "user" | "assistant" | string; parts: TextLikePart[]; + metadata?: { usage?: UsageLike; error?: string }; +} + +/** One message normalized for rendering, regardless of live/persisted origin. */ +interface ExportItem { + role: string; + parts: TextLikePart[]; + usage?: UsageLike; + error?: string; + /** ISO timestamp from the persisted row, when one is known. */ + createdAt?: string; + /** True only for the tail assistant message while the thread is streaming. */ generating: boolean; } @@ -127,53 +160,128 @@ function renderMessageParts(parts: TextLikePart[], t: Translate): string[] { return out; } +/** Resolve a persisted row's parts: prefer the rich persisted parts, else a + * single text part built from the plain-text content (mirrors `rowToUiMessage`). */ +function rowParts(row: IAiChatMessageRow): TextLikePart[] { + return Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0 + ? (row.metadata.parts as TextLikePart[]) + : [{ type: "text", text: row.content ?? "" }]; +} + +/** + * Normalize the export to one ordered list of {@link ExportItem}, WYSIWYG-first: + * + * - When `live` messages are present, THEY are the document (what the user sees, + * incl. an interrupted turn's partial reply). Each is matched to a persisted + * row by `id` to pull token usage / error / timestamp — a live message of the + * CURRENT turn has no matching row yet, so it simply renders without a footer. + * Authoritative `usage`/`error` already on the live message metadata win over + * the row (the server attaches usage to the streamed message at a step + * boundary before the row is refetched). Only the tail assistant message is + * flagged `generating`, and only while `isStreaming`. + * - When `live` is empty (e.g. the export runs before the live mirror is + * populated), fall back to the persisted `rows` so the format never regresses. + */ +function resolveItems( + live: LiveMessage[] | undefined, + rows: IAiChatMessageRow[], + isStreaming: boolean, +): ExportItem[] { + if (live && live.length > 0) { + const rowsById = new Map(rows.map((r) => [r.id, r])); + // The "still generating" note may apply ONLY to an assistant message that is + // the actual TAIL of the list — that is where the on-screen typing indicator + // sits. While `status === "submitted"` (isStreaming true) right after the + // user hit send, the tail is the USER message and the new assistant turn has + // no message yet; the previous assistant answer is shown complete on screen, + // so it must NOT be flagged (the indicator renders as a separate bottom + // block, not on that answer). + const lastIndex = live.length - 1; + const tailIsStreamingAssistant = + isStreaming && live[lastIndex]?.role === "assistant"; + return live.map((m, i) => { + const row = rowsById.get(m.id); + return { + role: m.role, + parts: m.parts ?? [], + // Authoritative usage/error already on the live message (the server + // attaches usage to the streamed message at a step boundary) wins over + // the persisted row; a current-turn live message has no matching row yet + // and simply renders without a token footer (the accepted WYSIWYG + // tradeoff — an interrupted turn loses only its token footer, not text). + usage: m.metadata?.usage ?? row?.metadata?.usage, + error: m.metadata?.error ?? row?.metadata?.error ?? undefined, + createdAt: row?.createdAt, + generating: tailIsStreamingAssistant && i === lastIndex, + }; + }); + } + + return rows.map((row) => ({ + role: row.role, + parts: rowParts(row), + usage: row.metadata?.usage, + error: row.metadata?.error ?? undefined, + createdAt: row.createdAt, + generating: false, + })); +} + /** * Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the * export timestamp), so it is straightforward to unit-test. */ export function buildChatMarkdown(args: BuildChatMarkdownArgs): string { - const { title, chatId, rows, pending, t } = args; + const { title, chatId, live, rows, isStreaming, banner, t } = args; const blocks: string[] = []; + const items = resolveItems(live, rows, isStreaming === true); + const heading = (title ?? "").trim() || t("Untitled chat"); blocks.push(`# ${heading}`); // Metadata bullet list. Total tokens is only shown when there is a sum. - const totalTokens = rows.reduce((sum, row) => { - const usage = row.metadata?.usage; - return usage ? sum + rowTokens(usage) : sum; - }, 0); + const totalTokens = items.reduce( + (sum, item) => (item.usage ? sum + rowTokens(item.usage) : sum), + 0, + ); const meta = [ `- Chat ID: \`${chatId}\``, `- Exported: ${new Date().toISOString()}`, - `- Messages: ${rows.length + (pending?.length ?? 0)}`, + `- Messages: ${items.length}`, ]; if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`); blocks.push(meta.join("\n")); - rows.forEach((row, index) => { + items.forEach((item, index) => { blocks.push("---"); - const roleLabel = row.role === "assistant" ? t("AI agent") : t("You"); + const roleLabel = item.role === "assistant" ? t("AI agent") : t("You"); blocks.push(`## ${index + 1}. ${roleLabel}`); // Created-at kept in source as an HTML comment (out of the rendered prose). - blocks.push(``); + // A live message of the current turn has no persisted row yet — omit it. + if (item.createdAt) blocks.push(``); - // Resolve parts: prefer the rich persisted parts, else a single text part - // built from the plain-text content (mirrors `rowToUiMessage`). - const parts: TextLikePart[] = - Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0 - ? (row.metadata.parts as TextLikePart[]) - : [{ type: "text", text: row.content ?? "" }]; + blocks.push(...renderMessageParts(item.parts, t)); - blocks.push(...renderMessageParts(parts, t)); - - if (row.metadata?.error) { - blocks.push(`**⚠️ Error:** ${row.metadata.error}`); + // A generating assistant may have empty/no parts yet — the heading (above) + // and this note still record the in-progress turn. + if (item.generating) { + blocks.push( + "_⏳ This message is still being generated — the export captured a partial, in-progress response._", + ); } - const usage = row.metadata?.usage; + // A persisted per-message error (the raw provider text) may coexist with the + // trailing `banner` (the classified on-screen alert) when the failed turn's + // row has already been refetched by export time. They describe the same + // failure at different fidelity; showing both is an accepted, minor redundancy. + if (item.error) { + blocks.push(`**⚠️ Error:** ${item.error}`); + } + + const usage = item.usage; if (usage) { const total = usage.totalTokens ?? rowTokens(usage); // Reasoning (thinking) tokens are shown only when the provider reported a @@ -188,27 +296,12 @@ export function buildChatMarkdown(args: BuildChatMarkdownArgs): string { } }); - // Append the in-progress, not-yet-persisted live messages (the current - // streaming turn) after the persisted rows. Heading numbering CONTINUES from - // the persisted rows. A `generating` assistant gets a note that the captured - // response is partial; pending messages carry no usage/token footer yet. - (pending ?? []).forEach((message, p) => { + // Record the on-screen banner (error / dropped connection / manual stop) so + // the export reflects exactly what the user saw, including an interruption. + if (banner && banner.trim().length > 0) { blocks.push("---"); - - const num = rows.length + p + 1; - const roleLabel = message.role === "assistant" ? t("AI agent") : t("You"); - blocks.push(`## ${num}. ${roleLabel}`); - - blocks.push(...renderMessageParts(message.parts, t)); - - // A generating assistant may have empty/no parts yet — still emit the - // heading (above) and this note so the export shows the in-progress turn. - if (message.generating === true) { - blocks.push( - "_⏳ This message is still being generated — the export captured a partial, in-progress response._", - ); - } - }); + blocks.push(`_⚠️ ${banner.trim()}_`); + } // Blank line between blocks so the Markdown renders cleanly. return blocks.join("\n\n"); From df81851eb352d05a0d86265a2cf89760162c6f1a Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Thu, 25 Jun 2026 03:52:03 +0300 Subject: [PATCH 20/43] fix(ai-chat): export the first unsaved turn (#174) The "Copy chat" button was hidden during a brand-new chat's very first turn: both the `canExport` gate and the `handleCopy` early-return required an `activeChatId` AND persisted `messageRows`, neither of which exists yet while the first turn is streaming or after it was interrupted before any row was persisted. Decouple the export gate from persisted state. ChatThread now reports a reactive `onLiveContentChange(messages.length > 0)` signal (the live snapshot lives in a non-reactive ref, so a separate reactive flag is needed to re-render the button); the parent keeps it in `hasLiveContent` and exports whenever there is anything on screen OR persisted. `handleCopy` passes a `"unsaved"` placeholder chat id when none exists yet, and the live-first builder serializes the on-screen thread WYSIWYG. Builds on #160 (WYSIWYG export); covers the first-turn edge case that was explicitly out of scope there. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../ai-chat/components/ai-chat-window.tsx | 108 ++++++++---- .../ai-chat/components/chat-thread.tsx | 24 ++- .../ai-chat/utils/chat-markdown.test.ts | 159 ++++++++++++++++-- 3 files changed, 240 insertions(+), 51 deletions(-) diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index 3990a0ba..740945c4 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -80,17 +80,31 @@ function computeInitialGeom() { Math.min(DEFAULT_HEIGHT, window.innerHeight - 2 * EDGE_MARGIN), ); const left = Math.max(EDGE_MARGIN, window.innerWidth - width - 24); - const maxTop = Math.max(EDGE_MARGIN, window.innerHeight - height - EDGE_MARGIN); + const maxTop = Math.max( + EDGE_MARGIN, + window.innerHeight - height - EDGE_MARGIN, + ); const top = Math.min(60, maxTop); return { left, top, width, height }; } // Clamp a geometry so the window stays within the current viewport. -function clampGeom(g: { left: number; top: number; width: number; height: number }) { +function clampGeom(g: { + left: number; + top: number; + width: number; + height: number; +}) { const effWidth = Math.max(g.width, MIN_WIDTH); const effHeight = Math.max(g.height, MIN_HEIGHT); - const maxLeft = Math.max(EDGE_MARGIN, window.innerWidth - effWidth - EDGE_MARGIN); - const maxTop = Math.max(EDGE_MARGIN, window.innerHeight - effHeight - EDGE_MARGIN); + const maxLeft = Math.max( + EDGE_MARGIN, + window.innerWidth - effWidth - EDGE_MARGIN, + ); + const maxTop = Math.max( + EDGE_MARGIN, + window.innerHeight - effHeight - EDGE_MARGIN, + ); return { ...g, left: Math.min(Math.max(EDGE_MARGIN, g.left), maxLeft), @@ -166,6 +180,12 @@ export default function AiChatWindow() { // `null` means no turn is in flight -> the badge falls back to the persisted // context size below. const [liveTurnTokens, setLiveTurnTokens] = useState(null); + // Whether the on-screen thread currently holds at least one message. Reported + // reactively by ChatThread (the live snapshot lives in a non-reactive ref). This + // lets the "Copy chat" button stay available for a brand-new, not-yet-persisted + // chat whose first turn is in flight or was interrupted — that case has no + // persisted rows yet, so a persisted-rows-only gate would hide the button (#174). + const [hasLiveContent, setHasLiveContent] = useState(false); // The page the user is currently viewing. AiChatWindow lives in a pathless // parent layout route, so useParams() can't see :pageSlug. Match the full @@ -190,17 +210,21 @@ export default function AiChatWindow() { // The invalidate closures are passed inline: `onTurnFinished` is read live by // useChat's onFinish (never in an effect dep array), so their identity does not // matter — no memoization ceremony needed. - const { threadKey, waitingForHistory, onTurnFinished, cancelPendingAdoption } = - useChatSession({ - activeChatId, - setActiveChatId, - chats, - messagesLoading, - onInvalidateChatList: () => - queryClient.invalidateQueries({ queryKey: AI_CHATS_RQ_KEY }), - onInvalidateChatMessages: (id) => - queryClient.invalidateQueries({ queryKey: AI_CHAT_MESSAGES_RQ_KEY(id) }), - }); + const { + threadKey, + waitingForHistory, + onTurnFinished, + cancelPendingAdoption, + } = useChatSession({ + activeChatId, + setActiveChatId, + chats, + messagesLoading, + onInvalidateChatList: () => + queryClient.invalidateQueries({ queryKey: AI_CHATS_RQ_KEY }), + onInvalidateChatMessages: (id) => + queryClient.invalidateQueries({ queryKey: AI_CHAT_MESSAGES_RQ_KEY(id) }), + }); // startNewChat/selectChat set the public atom; the hook's render-phase // reconciler handles the remount when activeChatId actually CHANGES. But @@ -236,13 +260,23 @@ export default function AiChatWindow() { () => chats?.items?.find((c) => c.id === activeChatId) ?? null, [chats, activeChatId], ); - const canExport = !!activeChatId && !!messageRows && messageRows.length > 0; + // Export is available when there is anything to export: either persisted rows + // for the active chat, OR a live on-screen thread with at least one message. + // The live arm covers a brand-new chat whose first turn is streaming or was + // interrupted before the server persisted any row (#174); the persisted arm is + // the steady-state path for an already-saved chat (#160). + const canExport = + hasLiveContent || + (!!activeChatId && !!messageRows && messageRows.length > 0); // The role to display in the header and as the assistant's name. Prefer the // persisted role of an existing chat (chat-list JOIN); fall back to the role // picked via a card click for a brand-new or just-adopted chat. selectChat // resets selectedRoleId, so this fallback never leaks into an unrelated chat. - const currentRole = useMemo<{ name: string; emoji: string | null } | null>(() => { + const currentRole = useMemo<{ + name: string; + emoji: string | null; + } | null>(() => { if (activeChat?.roleName) { return { name: activeChat.roleName, emoji: activeChat.roleEmoji ?? null }; } @@ -254,23 +288,25 @@ export default function AiChatWindow() { // call) and copy it to the clipboard. The "Copied" notification is the // feedback. const handleCopy = useCallback(() => { - // Export gate. Requiring at least one persisted row means a brand-new chat - // whose VERY FIRST turn dropped before the server persisted even the user - // message cannot be exported (the button is also hidden — see `canExport`). - // That narrow first-turn case is deliberately out of scope for #160; the user - // message is normally persisted before model contact, so an interrupted later - // turn still has rows and exports the on-screen partial reply WYSIWYG. - if (!activeChatId || !messageRows || messageRows.length === 0) return; + // Export gate. There must be SOMETHING to export — either a live on-screen + // message or a persisted row. A brand-new chat whose first turn is streaming + // or was interrupted has live messages but no persisted rows yet; it still + // exports the on-screen thread WYSIWYG (#174). Only a truly empty chat (no + // live messages and no rows) is non-exportable (the button is hidden too — + // see `canExport`). + const live = liveThreadRef.current; + const hasRows = !!messageRows && messageRows.length > 0; + if (live.messages.length === 0 && !hasRows) return; // WYSIWYG export: the live on-screen messages ARE the document (so a partial // reply from an interrupted turn — which never reached the persisted rows — // is exported just as it appears). The persisted rows enrich each live // message (token usage / error / timestamp) by id and serve as the fallback // when the live mirror is empty. The on-screen banner is appended too. See - // issue #160. - const live = liveThreadRef.current; + // issues #160 and #174. `chatId` may be null for a not-yet-saved chat — use a + // placeholder so the header line still renders. const markdown = buildChatMarkdown({ title: activeChat?.title ?? null, - chatId: activeChatId, + chatId: activeChatId ?? "unsaved", live: live.messages.map((m) => ({ id: m.id, role: m.role, @@ -370,7 +406,8 @@ export default function AiChatWindow() { const width = el.offsetWidth; const height = el.offsetHeight; setGeom((prev) => { - if (!prev || (prev.width === width && prev.height === height)) return prev; + if (!prev || (prev.width === width && prev.height === height)) + return prev; return { ...prev, width, height }; }); }); @@ -516,11 +553,15 @@ export default function AiChatWindow() { flash a "0" badge before any token streams in (#151 review). */} {liveTurnTokens !== null && liveTurnTokens > 0 ? ( - {formatTokens(liveTurnTokens)} + + {formatTokens(liveTurnTokens)} + ) : contextTokens > 0 ? ( - {formatTokens(contextTokens)} + + {formatTokens(contextTokens)} + ) : null}
@@ -534,7 +575,11 @@ export default function AiChatWindow() { aria-label={t("Copy chat")} onClick={handleCopy} > - {clipboard.copied ? : } + {clipboard.copied ? ( + + ) : ( + + )} )}