From 52beae85b3b627c4c225d4c3474b6aa17736dbe8 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 12:06:45 +0300 Subject: [PATCH 1/7] fix(client): close mobile sidebar drawer after creating a page (#325) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On mobile the "create page" action is triggered from inside the off-canvas sidebar drawer (the space sidebar "+" and temporary-note buttons, and the tree-row "add subpage"). handleCreate navigated to the new page's editor route but never closed that drawer, so it stayed open on top of the freshly created page — the editor was hidden behind the page tree ("as if the page didn't open", #325 item 5). Close the mobile sidebar (`setMobileSidebar(false)`) right after navigating, mirroring the existing drawer-close on a tree-row tap (space-tree-row). Placing it in handleCreate covers all three create entry points in one spot. It is a no-op on desktop, where the mobile-sidebar atom is already false and only governs the sub-992px collapsed state — desktop behavior is unchanged. Verified: `tsc --noEmit` clean; client vitest 887 passed | 1 expected-fail. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../page/tree/hooks/use-tree-mutation.ts | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/apps/client/src/features/page/tree/hooks/use-tree-mutation.ts b/apps/client/src/features/page/tree/hooks/use-tree-mutation.ts index 494f8b93..7aff8141 100644 --- a/apps/client/src/features/page/tree/hooks/use-tree-mutation.ts +++ b/apps/client/src/features/page/tree/hooks/use-tree-mutation.ts @@ -1,5 +1,5 @@ import { useCallback } from "react"; -import { useAtom, useStore } from "jotai"; +import { useAtom, useSetAtom, useStore } from "jotai"; import { notifications } from "@mantine/notifications"; import { useTranslation } from "react-i18next"; import { useNavigate, useParams } from "react-router-dom"; @@ -20,6 +20,7 @@ import { } from "@/features/page/queries/page-query.ts"; import { buildPageUrl } from "@/features/page/page.utils.ts"; import { getSpaceUrl } from "@/lib/config.ts"; +import { mobileSidebarAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom.ts"; export type UseTreeMutation = { handleMove: (sourceId: string, op: DropOp) => Promise; @@ -43,6 +44,7 @@ export function useTreeMutation(spaceId: string): UseTreeMutation { const removePageMutation = useRemovePageMutation(); const movePageMutation = useMovePageMutation(); const navigate = useNavigate(); + const setMobileSidebar = useSetAtom(mobileSidebarAtom); const { spaceSlug, pageSlug } = useParams(); const handleMove = useCallback( @@ -201,8 +203,23 @@ export function useTreeMutation(spaceId: string): UseTreeMutation { createdPage.title, ); navigate(pageUrl); + // On mobile the create action is triggered from inside the off-canvas + // sidebar drawer (space sidebar "+", tree-row "add subpage"). Navigating + // alone leaves that drawer open on top of the freshly created page, so the + // editor stays hidden behind the tree. Close it here so the new page opens + // in the editor — mirrors the row-click drawer-close in space-tree-row. + // No-op on desktop, where the mobile drawer atom is already false. + setMobileSidebar(false); }, - [spaceId, createPageMutation, setData, store, navigate, spaceSlug], + [ + spaceId, + createPageMutation, + setData, + store, + navigate, + spaceSlug, + setMobileSidebar, + ], ); const handleRename = useCallback( From baa41d66adf8fa5a14108ccc3b274b61f2c2e5ee Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 12:37:28 +0300 Subject: [PATCH 2/7] test(infra): coverage-gate + acceptInvitation atomicity int-spec + turn-end unit (#324) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tail of #244. Three items: 1. Coverage-gate (main). develop had no coverage tooling at all. Added @vitest/coverage-v8@4.1.6 (pinned to the vitest already in use) to the three vitest packages — git-sync, editor-ext (which also gains its missing direct `vitest` devDep), apps/client — and enabled v8 coverage with per-package thresholds (no root vitest config exists, so per-package is the only meaningful scope). v8 provider is chosen deliberately: istanbul broke on the ESM `@docmost/editor-ext` barrel; v8 collects native runtime coverage and never re-parses ESM. `enabled: true` wires the gate into the plain `test` script, so `pnpm -r test` (the CI entrypoint) enforces it without a manual `--coverage`. Thresholds set ~4-5 pts below measured current coverage so the gate PASSES today and FAILS on regression (verified: forcing lines=95 on editor-ext exits 1). `all: false` — coverage counts test-touched files; documented in the configs (with `all: true` the many untested type/barrel files would sink the % and make the gate meaningless). Measured→threshold (S/B/F/L): git-sync 91.78/79.16/76.76/92.46 → 88/75/72/88; editor-ext 58.58/48.1/64.96/58.91 → 54/44/60/54; client 59.93/58/48.47/59.39 → 55/53/44/55. All exit 0. 2. acceptInvitation atomicity int-spec. New apps/server/test/integration/workspace-accept-invitation-atomicity.int-spec.ts (+ createDefaultGroup/createInvitation seeders in test/integration/db.ts per its convention). Wires the real WorkspaceInvitationService with real User/Group/GroupUser repos against the test Kysely, stubbing only the post-commit collaborators. Asserts the invariant protected by users_email_workspace_id_unique: (a) two CONCURRENT accepts → exactly one fulfilled, one BadRequestException('Invitation already accepted'), membership count == 1, invitation consumed; (b) repeated sequential accept → still one membership; (c) the survivor is in the workspace default group (whole-tx, no torn state). Ran against real Postgres+Redis: 3/3 pass. 3. turn-end decision unit test. `decideTurnEnd` does not exist as a symbol; the turn-end logic lives in chat-thread.tsx's onFinish handler. Added a focused block to the existing chat-thread.test.tsx (matching its hoisted-mock style): clean finish → flush queued (continue); abort/disconnect/error → queue preserved (end) with the correct notice; parent notified on every terminal outcome. 8 passed (3 existing + 5 new). Verified: git-sync 712, editor-ext 247, client 888 (all with the gate, exit 0); int-spec 3/3 (real Postgres); tsc --noEmit clean for client + server; pnpm install --frozen-lockfile consistent (lockfile additive). Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/client/package.json | 1 + .../ai-chat/components/chat-thread.test.tsx | 90 +++++++- apps/client/vitest.config.ts | 17 ++ apps/server/test/integration/db.ts | 56 +++++ ...ce-accept-invitation-atomicity.int-spec.ts | 218 ++++++++++++++++++ packages/editor-ext/package.json | 4 + packages/editor-ext/vitest.config.ts | 16 ++ packages/git-sync/package.json | 1 + packages/git-sync/vitest.config.ts | 19 ++ pnpm-lock.yaml | 173 +++++++++++++- 10 files changed, 589 insertions(+), 6 deletions(-) create mode 100644 apps/server/test/integration/workspace-accept-invitation-atomicity.int-spec.ts diff --git a/apps/client/package.json b/apps/client/package.json index 010cb5e4..ca1eb5cc 100644 --- a/apps/client/package.json +++ b/apps/client/package.json @@ -81,6 +81,7 @@ "@types/react": "18.3.12", "@types/react-dom": "18.3.1", "@vitejs/plugin-react": "6.0.1", + "@vitest/coverage-v8": "4.1.6", "eslint": "9.28.0", "eslint-plugin-react": "7.37.5", "eslint-plugin-react-hooks": "7.0.1", diff --git a/apps/client/src/features/ai-chat/components/chat-thread.test.tsx b/apps/client/src/features/ai-chat/components/chat-thread.test.tsx index 94499d0f..359abbd7 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.test.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.test.tsx @@ -1,5 +1,5 @@ import { describe, it, expect, beforeEach, vi } from "vitest"; -import { render, screen, fireEvent, act } from "@testing-library/react"; +import { render, screen, fireEvent, act, cleanup } from "@testing-library/react"; import { MantineProvider } from "@mantine/core"; // Shared, hoisted mock state so the @ai-sdk/react and "ai" module mocks (hoisted @@ -140,3 +140,91 @@ describe("ChatThread — send now (#198)", () => { expect(prep({ messages: [], body: {} }).body.interrupted).toBe(false); }); }); + +// The turn-end decision lives in the `onFinish` handler: given the terminal +// outcome of a turn (`isAbort` / `isDisconnect` / `isError`, or none = clean), +// it decides whether to CONTINUE (flush the next queued message) or END (leave +// the queue intact for the user), and which stop notice — if any — to show. +// `sendNow` is exercised above; these tests pin down the plain outcomes. +describe("ChatThread — turn-end decision (onFinish)", () => { + beforeEach(() => { + h.state.status = "streaming"; + h.state.onFinish = null; + h.state.sendMessage.mockClear(); + h.state.stop.mockClear(); + h.state.transport = null; + }); + + // Drive a fresh onFinish with the given terminal flags after queueing a + // message, and report both what the parent was told and whether the queue was + // flushed (a resend to the sendMessage spy). + function finishWith(flags: { + isAbort?: boolean; + isDisconnect?: boolean; + isError?: boolean; + }) { + // Tear down any prior render so the loop-driven "every outcome" case does + // not leave duplicate queue buttons in the DOM. + cleanup(); + h.state.sendMessage.mockClear(); + const { onTurnFinished } = renderThread(); + // Populate the queue while the turn is streaming. + fireEvent.click(screen.getByTestId("queue-btn")); + act(() => { + h.state.onFinish?.({ + message: { id: "a", role: "assistant", parts: [] }, + isAbort: false, + isDisconnect: false, + isError: false, + ...flags, + }); + }); + return { onTurnFinished }; + } + + it("CONTINUES — flushes the next queued message on a clean finish", () => { + finishWith({}); + // Clean finish (no terminal flag): the queued message is auto-sent. + expect(h.state.sendMessage).toHaveBeenCalledWith({ text: "queued text" }); + // A clean finish shows no stop notice. + expect(screen.queryByText("Response stopped.")).toBeNull(); + }); + + it("ENDS — keeps the queue intact on a user abort and shows the stopped notice", () => { + finishWith({ isAbort: true }); + // A plain Stop (not the sendNow interrupt path) must NOT auto-resend: the + // queue is preserved for the user to decide. + expect(h.state.sendMessage).not.toHaveBeenCalled(); + expect(screen.getByText("Response stopped.")).toBeTruthy(); + }); + + it("ENDS — keeps the queue intact on a disconnect and shows the connection-lost notice", () => { + finishWith({ isDisconnect: true }); + expect(h.state.sendMessage).not.toHaveBeenCalled(); + expect( + screen.getByText("Connection lost — the answer was interrupted."), + ).toBeTruthy(); + }); + + it("ENDS — keeps the queue intact on a stream error (no auto-retry, no stopped notice)", () => { + finishWith({ isError: true }); + // Blindly retrying after a failure would be wrong; the queue is left alone. + expect(h.state.sendMessage).not.toHaveBeenCalled(); + // isError clears the neutral notice (the error banner covers this case). + expect(screen.queryByText("Response stopped.")).toBeNull(); + }); + + it("notifies the parent on EVERY terminal outcome", () => { + // The chat-list refresh / new-chat id adoption must run on success and on + // every failure path alike. + for (const flags of [ + {}, + { isAbort: true }, + { isDisconnect: true }, + { isError: true }, + ]) { + const { onTurnFinished } = finishWith(flags); + expect(onTurnFinished).toHaveBeenCalled(); + } + }); +}); diff --git a/apps/client/vitest.config.ts b/apps/client/vitest.config.ts index 334f6226..c40bb93e 100644 --- a/apps/client/vitest.config.ts +++ b/apps/client/vitest.config.ts @@ -13,5 +13,22 @@ export default defineConfig({ environment: 'jsdom', globals: true, setupFiles: ['./vitest.setup.ts'], + // Coverage gate (issue #324). v8 provider (not istanbul) so ESM barrels + // like `@docmost/editor-ext` are not re-parsed/instrumented. Thresholds are + // set a few points below the level measured on develop, scoped to the files + // the suite exercises (`all: false`) rather than the whole app, so the gate + // passes today but fails on a genuine coverage regression. + coverage: { + enabled: true, + provider: 'v8', + reporter: ['text-summary', 'text'], + all: false, + thresholds: { + statements: 55, + branches: 53, + functions: 44, + lines: 55, + }, + }, }, }); diff --git a/apps/server/test/integration/db.ts b/apps/server/test/integration/db.ts index ede53494..db795a2f 100644 --- a/apps/server/test/integration/db.ts +++ b/apps/server/test/integration/db.ts @@ -132,6 +132,62 @@ export async function createUser( return { id: row.id as string }; } +// The default group every workspace has; `groupUserRepo.addUserToDefaultGroup` +// (invoked by acceptInvitation) looks it up by `isDefault = true`, so a +// workspace under test must have exactly one for the accept path to complete. +export async function createDefaultGroup( + db: Kysely, + workspaceId: string, + overrides: { name?: string } = {}, +): Promise<{ id: string }> { + const id = randomUUID(); + const suffix = shortId(id); + const row = await db + .insertInto('groups') + .values({ + id, + // name is unique per workspace + NOT NULL. + name: overrides.name ?? `group-${suffix}`, + isDefault: true, + workspaceId, + }) + .returning(['id']) + .executeTakeFirstOrThrow(); + return { id: row.id as string }; +} + +// A pending workspace invitation. `role`/`token` are NOT NULL; `groupIds` is a +// nullable uuid[] and `invitedById` a nullable FK to users. Returns the fields a +// spec needs to drive acceptInvitation (id + token + the invited email). +export async function createInvitation( + db: Kysely, + args: { + workspaceId: string; + email: string; + invitedById?: string | null; + role?: string; + token?: string; + groupIds?: string[] | null; + }, +): Promise<{ id: string; token: string; email: string }> { + const id = randomUUID(); + const token = args.token ?? `tok-${shortId(id)}`; + const row = await db + .insertInto('workspaceInvitations') + .values({ + id, + email: args.email, + role: args.role ?? 'member', + token, + groupIds: (args.groupIds ?? null) as any, + invitedById: args.invitedById ?? null, + workspaceId: args.workspaceId, + }) + .returning(['id']) + .executeTakeFirstOrThrow(); + return { id: row.id as string, token, email: args.email }; +} + export async function createSpace( db: Kysely, workspaceId: string, diff --git a/apps/server/test/integration/workspace-accept-invitation-atomicity.int-spec.ts b/apps/server/test/integration/workspace-accept-invitation-atomicity.int-spec.ts new file mode 100644 index 00000000..7be776a9 --- /dev/null +++ b/apps/server/test/integration/workspace-accept-invitation-atomicity.int-spec.ts @@ -0,0 +1,218 @@ +import { BadRequestException } from '@nestjs/common'; +import { Kysely } from 'kysely'; +import { Workspace } from '@docmost/db/types/entity.types'; +import { UserRepo } from '@docmost/db/repos/user/user.repo'; +import { GroupRepo } from '@docmost/db/repos/group/group.repo'; +import { GroupUserRepo } from '@docmost/db/repos/group/group-user.repo'; +import { WorkspaceInvitationService } from 'src/core/workspace/services/workspace-invitation.service'; +import { + getTestDb, + destroyTestDb, + createWorkspace, + createUser, + createDefaultGroup, + createInvitation, +} from './db'; + +/** + * acceptInvitation atomicity (issue #324, tail of #244). + * + * acceptInvitation() reads the invitation OUTSIDE the transaction, then inside a + * single tx: inserts the invited user, adds them to the default group, and + * deletes the invitation. Two accepts of the SAME invitation therefore race to + * insert a user with the same (email, workspaceId) — which the + * `users_email_workspace_id_unique` constraint forbids. The service catches that + * violation and reports "Invitation already accepted". + * + * These specs pin the INVARIANT that path protects: no matter how many times the + * invitation is accepted (concurrently or repeatedly), the workspace ends up + * with exactly ONE membership for the invited email and the invitation is + * consumed exactly once — never a duplicate user and never a half-applied state. + * + * The service is wired with the REAL repos (UserRepo / GroupRepo / GroupUserRepo) + * against the test Kysely; only the peripheral collaborators that acceptInvitation + * touches AFTER the transaction (mail, session token, billing, audit, env) are + * stubbed, so the exercised DB write path is the production one. + */ +describe('WorkspaceInvitationService.acceptInvitation atomicity [integration]', () => { + let db: Kysely; + let service: WorkspaceInvitationService; + + // Count the memberships (user rows) for an email within a workspace — the + // quantity the atomicity guarantee is about. + async function membershipCount( + workspaceId: string, + email: string, + ): Promise { + const rows = await db + .selectFrom('users') + .select('id') + .where('workspaceId', '=', workspaceId) + .where('email', '=', email.toLowerCase()) + .execute(); + return rows.length; + } + + async function invitationExists(invitationId: string): Promise { + const row = await db + .selectFrom('workspaceInvitations') + .select('id') + .where('id', '=', invitationId) + .executeTakeFirst(); + return !!row; + } + + beforeAll(() => { + db = getTestDb(); + + const userRepo = new UserRepo(db as any); + const groupRepo = new GroupRepo(db as any); + const groupUserRepo = new GroupUserRepo(db as any, groupRepo, userRepo); + + // Collaborators used only on the post-commit success tail; safe to stub. + const mailService = { sendToQueue: jest.fn().mockResolvedValue(undefined) }; + const domainService = {} as any; + const tokenService = {} as any; + const sessionService = { + createSessionAndToken: jest.fn().mockResolvedValue('test-auth-token'), + }; + const billingQueue = { add: jest.fn().mockResolvedValue(undefined) }; + const environmentService = { isCloud: () => false }; + const auditService = { log: jest.fn() }; + + service = new WorkspaceInvitationService( + userRepo, + groupUserRepo, + mailService as any, + domainService, + tokenService, + sessionService as any, + db as any, + billingQueue as any, + environmentService as any, + auditService as any, + ); + }); + + afterAll(async () => { + await destroyTestDb(); + }); + + // A workspace with its default group, an inviter, and a pending invitation. + async function seedInvite(): Promise<{ + workspace: Workspace; + invitationId: string; + token: string; + email: string; + }> { + const { id: workspaceId } = await createWorkspace(db); + await createDefaultGroup(db, workspaceId); + const inviter = await createUser(db, workspaceId); + // Distinct address per invite so specs never collide across the suite. + const email = `invitee-${workspaceId.slice(0, 8)}@example.test`; + const invite = await createInvitation(db, { + workspaceId, + email, + invitedById: inviter.id, + }); + + // acceptInvitation only reads id/hostname/enforceSso/emailDomains/enforceMfa + // off the workspace; a minimal plain object is sufficient. + const workspace = { + id: workspaceId, + hostname: `host-${workspaceId.slice(0, 8)}`, + enforceSso: false, + enforceMfa: false, + emailDomains: [] as string[], + } as unknown as Workspace; + + return { workspace, invitationId: invite.id, token: invite.token, email }; + } + + it('concurrent accepts create a single membership and consume the invitation once', async () => { + const { workspace, invitationId, token, email } = await seedInvite(); + + const dto = { invitationId, token, name: 'Invited User', password: 'password123' }; + + // Fire two accepts of the SAME invitation at once. They race to insert the + // same (email, workspaceId); the unique constraint lets exactly one win. + const results = await Promise.allSettled([ + service.acceptInvitation({ ...dto }, workspace), + service.acceptInvitation({ ...dto }, workspace), + ]); + + const fulfilled = results.filter((r) => r.status === 'fulfilled'); + const rejected = results.filter( + (r): r is PromiseRejectedResult => r.status === 'rejected', + ); + + // Exactly one accept succeeds; the other is rejected. + expect(fulfilled).toHaveLength(1); + expect(rejected).toHaveLength(1); + + // The loser fails via the caught unique-constraint path with the specific + // "already accepted" message — not a half-state / generic failure. + expect(rejected[0].reason).toBeInstanceOf(BadRequestException); + expect(rejected[0].reason.message).toBe('Invitation already accepted'); + + // Invariant: exactly one membership, and the invitation is gone. + expect(await membershipCount(workspace.id, email)).toBe(1); + expect(await invitationExists(invitationId)).toBe(false); + }); + + it('a repeated (sequential) accept does not create a duplicate membership', async () => { + const { workspace, invitationId, token, email } = await seedInvite(); + const dto = { invitationId, token, name: 'Invited User', password: 'password123' }; + + // First accept succeeds and returns an auth token. + const first = await service.acceptInvitation({ ...dto }, workspace); + expect(first?.authToken).toBe('test-auth-token'); + expect(await membershipCount(workspace.id, email)).toBe(1); + expect(await invitationExists(invitationId)).toBe(false); + + // Re-accepting the (now consumed) invitation must be rejected and must NOT + // add a second membership. The invitation row is gone, so this hits the + // "Invitation not found" guard rather than the unique-constraint path. + await expect( + service.acceptInvitation({ ...dto }, workspace), + ).rejects.toBeInstanceOf(BadRequestException); + + expect(await membershipCount(workspace.id, email)).toBe(1); + }); + + it('the single created membership is added to the default group (no partial state)', async () => { + const { workspace, invitationId, token, email } = await seedInvite(); + const dto = { invitationId, token, name: 'Invited User', password: 'password123' }; + + await Promise.allSettled([ + service.acceptInvitation({ ...dto }, workspace), + service.acceptInvitation({ ...dto }, workspace), + ]); + + // Resolve the one surviving user and assert the whole tx applied: they exist + // AND are in the workspace default group (the mid-transaction step), proving + // the winning accept committed as a whole rather than leaving a torn state. + const user = await db + .selectFrom('users') + .select(['id']) + .where('workspaceId', '=', workspace.id) + .where('email', '=', email.toLowerCase()) + .executeTakeFirstOrThrow(); + + const defaultGroup = await db + .selectFrom('groups') + .select(['id']) + .where('workspaceId', '=', workspace.id) + .where('isDefault', '=', true) + .executeTakeFirstOrThrow(); + + const membership = await db + .selectFrom('groupUsers') + .select(['userId']) + .where('groupId', '=', defaultGroup.id) + .where('userId', '=', user.id) + .execute(); + + expect(membership).toHaveLength(1); + }); +}); diff --git a/packages/editor-ext/package.json b/packages/editor-ext/package.json index 0e9b8305..1f2b5ff8 100644 --- a/packages/editor-ext/package.json +++ b/packages/editor-ext/package.json @@ -13,5 +13,9 @@ "types": "dist/index.d.ts", "dependencies": { "marked": "17.0.5" + }, + "devDependencies": { + "@vitest/coverage-v8": "4.1.6", + "vitest": "4.1.6" } } diff --git a/packages/editor-ext/vitest.config.ts b/packages/editor-ext/vitest.config.ts index 617c62d3..cb5a542b 100644 --- a/packages/editor-ext/vitest.config.ts +++ b/packages/editor-ext/vitest.config.ts @@ -5,5 +5,21 @@ export default defineConfig({ environment: "jsdom", globals: true, include: ["src/**/*.{test,spec}.ts"], + // Coverage gate (issue #324). v8 provider avoids the istanbul AST-rewrite + // that broke on this package's ESM barrel. Thresholds sit a few points + // below the level measured on develop, over the files the suite exercises + // (`all: false`), so the gate passes today and catches a real regression. + coverage: { + enabled: true, + provider: "v8", + reporter: ["text-summary", "text"], + all: false, + thresholds: { + statements: 54, + branches: 44, + functions: 60, + lines: 54, + }, + }, }, }); diff --git a/packages/git-sync/package.json b/packages/git-sync/package.json index cce08975..96765ffa 100644 --- a/packages/git-sync/package.json +++ b/packages/git-sync/package.json @@ -38,6 +38,7 @@ "@docmost/editor-ext": "workspace:*", "@types/jsdom": "^21.1.7", "@types/node": "^20.0.0", + "@vitest/coverage-v8": "4.1.6", "fast-check": "^4.8.0", "typescript": "^5.0.0", "vitest": "4.1.6" diff --git a/packages/git-sync/vitest.config.ts b/packages/git-sync/vitest.config.ts index 1c63f4e3..676c4111 100644 --- a/packages/git-sync/vitest.config.ts +++ b/packages/git-sync/vitest.config.ts @@ -18,6 +18,25 @@ export default defineConfig({ }, test: { environment: 'node', + // Coverage gate (issue #324). The v8 provider is used deliberately: the + // istanbul provider instruments sources by rewriting their AST, which broke + // on the ESM `@docmost/editor-ext` barrel import; v8 collects native + // coverage from the runtime and never re-parses ESM, so it sidesteps that. + // Thresholds are calibrated a few points BELOW the level measured on + // develop so the gate passes today but fails on a real regression. Numbers + // reflect the files actually exercised by the suite (`all: false`). + coverage: { + enabled: true, + provider: 'v8', + reporter: ['text-summary', 'text'], + all: false, + thresholds: { + statements: 88, + branches: 75, + functions: 72, + lines: 88, + }, + }, // Runtime suites. The `.test.ts` glob deliberately EXCLUDES the type-only // contract file (`*.test-d.ts`), which is enforced by the typecheck pass // below instead — so the 35 runtime suites are never typechecked. diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7540bafe..66aeb468 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -453,6 +453,9 @@ importers: '@vitejs/plugin-react': specifier: 6.0.1 version: 6.0.1(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) + '@vitest/coverage-v8': + specifier: 4.1.6 + version: 4.1.6(vitest@4.1.6) eslint: specifier: 9.28.0 version: 9.28.0(jiti@2.4.2) @@ -497,7 +500,7 @@ importers: version: 8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3) vitest: specifier: 4.1.6 - version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@22.19.1)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) + version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@22.19.1)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) apps/server: dependencies: @@ -886,6 +889,13 @@ importers: marked: specifier: 17.0.5 version: 17.0.5 + devDependencies: + '@vitest/coverage-v8': + specifier: 4.1.6 + version: 4.1.6(vitest@4.1.6) + vitest: + specifier: 4.1.6 + version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.5.0)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@27.4.0(@noble/hashes@2.0.1))(vite@8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) packages/git-sync: dependencies: @@ -938,6 +948,9 @@ importers: '@types/node': specifier: ^20.0.0 version: 20.19.43 + '@vitest/coverage-v8': + specifier: 4.1.6 + version: 4.1.6(vitest@4.1.6) fast-check: specifier: ^4.8.0 version: 4.8.0 @@ -946,7 +959,7 @@ importers: version: 5.9.3 vitest: specifier: 4.1.6 - version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@20.19.43)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@20.19.43)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) + version: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@20.19.43)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@20.19.43)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) packages/mcp: dependencies: @@ -1500,10 +1513,18 @@ packages: resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==} engines: {node: '>=6.9.0'} + '@babel/helper-string-parser@7.29.7': + resolution: {integrity: sha512-Pb5ijPrZ89GDH8223L4UP8i6QApWxs04RbPQJTeWDV0/keR2E36MeKnyr6LYmUUvqRRI+Iv87SuF1W6ErINzYw==} + engines: {node: '>=6.9.0'} + '@babel/helper-validator-identifier@7.28.5': resolution: {integrity: sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==} engines: {node: '>=6.9.0'} + '@babel/helper-validator-identifier@7.29.7': + resolution: {integrity: sha512-qehxGkRj55h/ff8EMaJ+cYhyaKlHIxqYDn682wQD7RNp9UujOQsHog2uS0r2vzr4pW+sXf90NeeayjcNaX3fFg==} + engines: {node: '>=6.9.0'} + '@babel/helper-validator-option@7.27.1': resolution: {integrity: sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==} engines: {node: '>=6.9.0'} @@ -1526,6 +1547,11 @@ packages: engines: {node: '>=6.0.0'} hasBin: true + '@babel/parser@7.29.7': + resolution: {integrity: sha512-hnORnjP/1P/zFEndoeX+n+t1RwWRJiJpM/jO7FW32Kn9r5+sJB2JWOdYo4L6k78j15eCwY3Gm/7364B1EMwtNg==} + engines: {node: '>=6.0.0'} + hasBin: true + '@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@7.23.3': resolution: {integrity: sha512-iRkKcCqb7iGnq9+3G6rZ+Ciz5VywC4XNRHe57lKM+jOeYAoR0lVqdeeDRfh0tQcTfw/+vBhHn926FmQhLtlFLQ==} engines: {node: '>=6.9.0'} @@ -2015,9 +2041,17 @@ packages: resolution: {integrity: sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==} engines: {node: '>=6.9.0'} + '@babel/types@7.29.7': + resolution: {integrity: sha512-4zBIxpPzowiZpusoFkyGVwakdRJUyuH5PxQ/PrqghfdFWWasvnCdPfQXHrenDai+gyLARulZjZowCOj6fjT4pA==} + engines: {node: '>=6.9.0'} + '@bcoe/v8-coverage@0.2.3': resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==} + '@bcoe/v8-coverage@1.0.2': + resolution: {integrity: sha512-6zABk/ECA/QYSCQ1NGiVwwbQerUCZ+TQbp64Q3AgmfNvurHH0j8TtXa1qbShXA6qqkpAj4V5W8pP6mLe1mcMqA==} + engines: {node: '>=18'} + '@borewit/text-codec@0.2.1': resolution: {integrity: sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==} @@ -5453,6 +5487,15 @@ packages: babel-plugin-react-compiler: optional: true + '@vitest/coverage-v8@4.1.6': + resolution: {integrity: sha512-36l628fQ/9a/8ihy97eOtEnvWQEdqULQOJtcaxtoNq0G1w3Mxd4szSahOaMM9/NGyZ+hyKcMtIW/WIxq0XQViQ==} + peerDependencies: + '@vitest/browser': 4.1.6 + vitest: 4.1.6 + peerDependenciesMeta: + '@vitest/browser': + optional: true + '@vitest/expect@4.1.6': resolution: {integrity: sha512-7EHDquPthALSV0jhhjgEW8FXaviMx7rSqu8W6oqCoAuOhKov814P99QDV1pxMA3QPv21YudvJngIhjrNI4opLg==} @@ -5738,6 +5781,9 @@ packages: resolution: {integrity: sha512-Izi8RQcffqCeNVgFigKli1ssklIbpHnCYc6AknXGYoB6grJqyeby7jv12JUQgmTAnIDnbck1uxksT4dzN3PWBA==} engines: {node: '>=12'} + ast-v8-to-istanbul@1.0.4: + resolution: {integrity: sha512-0bC0/4bTSrnwdhU3IsZDwEdojvuPrSg59OYZfKsLRtJZ0u8VBx9DebfqqG8bRdCC0I7vjgxmPi41P0lpkhJHtA==} + async-lock@1.4.1: resolution: {integrity: sha512-Az2ZTpuytrtqENulXwO3GGv1Bztugx6TT37NIo7imr/Qo0gsYiGtSdBa2B6fsXhTpVZDNfu1Qn3pk531e3q+nQ==} @@ -7665,6 +7711,10 @@ packages: resolution: {integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==} engines: {node: '>=8'} + istanbul-reports@3.2.0: + resolution: {integrity: sha512-HGYWWS/ehqTV3xN10i23tkPkpH46MLCIMFNCaaKNavAXTF1RkqxawEPtnjnGZ6XKSInBKkiOA5BKS+aZiY3AvA==} + engines: {node: '>=8'} + iterare@1.2.1: resolution: {integrity: sha512-RKYVTCjAnRthyJes037NX/IiqeidgN1xc3j1RjFfECFp28A1GVwK9nA+i0rJPaHqSZwygLzRnFlzUuHFoWWy+Q==} engines: {node: '>=6'} @@ -7890,6 +7940,9 @@ packages: js-tiktoken@1.0.21: resolution: {integrity: sha512-biOj/6M5qdgx5TKjDnFT1ymSpM5tbd3ylwDtrQvFQSu0Z7bBYko2dF+W/aUkXUPuk6IVpRxk/3Q2sHOzGlS36g==} + js-tokens@10.0.0: + resolution: {integrity: sha512-lM/UBzQmfJRo9ABXbPWemivdCW8V2G8FHaHdypQaIy523snUjog0W71ayWXTjiR+ixeMyVHN2XcpnTd/liPg/Q==} + js-tokens@4.0.0: resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} @@ -8335,6 +8388,9 @@ packages: magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + magicast@0.5.3: + resolution: {integrity: sha512-pVKE4UdSQ7DvHzivsCIFx2BJn1mHG6KsyrFcaxFx6tONdneEuThrDx0Cj3AMg58KyN4pzYT+LHOotxDQDjNvkw==} + make-dir@2.1.0: resolution: {integrity: sha512-LS9X+dc8KLxXCb8dni79fLIIUA5VyZoyjSMCwTluaXA0o27cCK0bhXkpgw+sTXVpPy/lSO57ilRixqk0vDmtRA==} engines: {node: '>=6'} @@ -11643,8 +11699,12 @@ snapshots: '@babel/helper-string-parser@7.27.1': {} + '@babel/helper-string-parser@7.29.7': {} + '@babel/helper-validator-identifier@7.28.5': {} + '@babel/helper-validator-identifier@7.29.7': {} + '@babel/helper-validator-option@7.27.1': {} '@babel/helper-wrap-function@7.22.20': @@ -11666,6 +11726,10 @@ snapshots: dependencies: '@babel/types': 7.28.5 + '@babel/parser@7.29.7': + dependencies: + '@babel/types': 7.29.7 + '@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@7.23.3(@babel/core@7.28.5)': dependencies: '@babel/core': 7.28.5 @@ -12271,8 +12335,15 @@ snapshots: '@babel/helper-string-parser': 7.27.1 '@babel/helper-validator-identifier': 7.28.5 + '@babel/types@7.29.7': + dependencies: + '@babel/helper-string-parser': 7.29.7 + '@babel/helper-validator-identifier': 7.29.7 + '@bcoe/v8-coverage@0.2.3': {} + '@bcoe/v8-coverage@1.0.2': {} + '@borewit/text-codec@0.2.1': {} '@braintree/sanitize-url@6.0.2': {} @@ -13256,7 +13327,7 @@ snapshots: '@jridgewell/trace-mapping@0.3.31': dependencies: '@jridgewell/resolve-uri': 3.1.2 - '@jridgewell/sourcemap-codec': 1.5.0 + '@jridgewell/sourcemap-codec': 1.5.5 '@jridgewell/trace-mapping@0.3.9': dependencies: @@ -15931,6 +16002,20 @@ snapshots: '@rolldown/pluginutils': 1.0.0-rc.7 vite: 8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3) + '@vitest/coverage-v8@4.1.6(vitest@4.1.6)': + dependencies: + '@bcoe/v8-coverage': 1.0.2 + '@vitest/utils': 4.1.6 + ast-v8-to-istanbul: 1.0.4 + istanbul-lib-coverage: 3.2.2 + istanbul-lib-report: 3.0.1 + istanbul-reports: 3.2.0 + magicast: 0.5.3 + obug: 2.1.1 + std-env: 4.1.0 + tinyrainbow: 3.1.0 + vitest: 4.1.6(@opentelemetry/api@1.9.0)(@types/node@22.19.1)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) + '@vitest/expect@4.1.6': dependencies: '@standard-schema/spec': 1.1.0 @@ -15956,6 +16041,14 @@ snapshots: optionalDependencies: vite: 8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3) + '@vitest/mocker@4.1.6(vite@8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3))': + dependencies: + '@vitest/spy': 4.1.6 + estree-walker: 3.0.3 + magic-string: 0.30.21 + optionalDependencies: + vite: 8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3) + '@vitest/pretty-format@4.1.6': dependencies: tinyrainbow: 3.1.0 @@ -16268,6 +16361,12 @@ snapshots: assertion-error@2.0.1: {} + ast-v8-to-istanbul@1.0.4: + dependencies: + '@jridgewell/trace-mapping': 0.3.31 + estree-walker: 3.0.3 + js-tokens: 10.0.0 + async-lock@1.4.1: {} async-mutex@0.5.0: @@ -18487,6 +18586,11 @@ snapshots: html-escaper: 2.0.2 istanbul-lib-report: 3.0.1 + istanbul-reports@3.2.0: + dependencies: + html-escaper: 2.0.2 + istanbul-lib-report: 3.0.1 + iterare@1.2.1: {} iterator.prototype@1.1.5: @@ -18897,6 +19001,8 @@ snapshots: dependencies: base64-js: 1.5.1 + js-tokens@10.0.0: {} + js-tokens@4.0.0: {} js-yaml@3.14.2: @@ -19333,6 +19439,12 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 + magicast@0.5.3: + dependencies: + '@babel/parser': 7.29.7 + '@babel/types': 7.29.7 + source-map-js: 1.2.1 + make-dir@2.1.0: dependencies: pify: 4.0.1 @@ -21690,7 +21802,25 @@ snapshots: tsx: 4.21.0 yaml: 2.8.3 - vitest@4.1.6(@opentelemetry/api@1.9.0)(@types/node@20.19.43)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@20.19.43)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)): + vite@8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3): + dependencies: + lightningcss: 1.32.0 + picomatch: 4.0.4 + postcss: 8.5.14 + rolldown: 1.0.0-rc.12 + tinyglobby: 0.2.15 + optionalDependencies: + '@types/node': 25.5.0 + esbuild: 0.28.0 + fsevents: 2.3.3 + jiti: 2.4.2 + less: 4.2.0 + sugarss: 5.0.1(postcss@8.5.14) + terser: 5.39.0 + tsx: 4.21.0 + yaml: 2.8.3 + + vitest@4.1.6(@opentelemetry/api@1.9.0)(@types/node@20.19.43)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@20.19.43)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)): dependencies: '@vitest/expect': 4.1.6 '@vitest/mocker': 4.1.6(vite@8.0.5(@types/node@20.19.43)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) @@ -21715,12 +21845,13 @@ snapshots: optionalDependencies: '@opentelemetry/api': 1.9.0 '@types/node': 20.19.43 + '@vitest/coverage-v8': 4.1.6(vitest@4.1.6) happy-dom: 20.8.9 jsdom: 25.0.0 transitivePeerDependencies: - msw - vitest@4.1.6(@opentelemetry/api@1.9.0)(@types/node@22.19.1)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)): + vitest@4.1.6(@opentelemetry/api@1.9.0)(@types/node@22.19.1)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@25.0.0)(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)): dependencies: '@vitest/expect': 4.1.6 '@vitest/mocker': 4.1.6(vite@8.0.5(@types/node@22.19.1)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) @@ -21745,11 +21876,43 @@ snapshots: optionalDependencies: '@opentelemetry/api': 1.9.0 '@types/node': 22.19.1 + '@vitest/coverage-v8': 4.1.6(vitest@4.1.6) happy-dom: 20.8.9 jsdom: 25.0.0 transitivePeerDependencies: - msw + vitest@4.1.6(@opentelemetry/api@1.9.0)(@types/node@25.5.0)(@vitest/coverage-v8@4.1.6)(happy-dom@20.8.9)(jsdom@27.4.0(@noble/hashes@2.0.1))(vite@8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)): + dependencies: + '@vitest/expect': 4.1.6 + '@vitest/mocker': 4.1.6(vite@8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3)) + '@vitest/pretty-format': 4.1.6 + '@vitest/runner': 4.1.6 + '@vitest/snapshot': 4.1.6 + '@vitest/spy': 4.1.6 + '@vitest/utils': 4.1.6 + es-module-lexer: 2.1.0 + expect-type: 1.3.0 + magic-string: 0.30.21 + obug: 2.1.1 + pathe: 2.0.3 + picomatch: 4.0.4 + std-env: 4.1.0 + tinybench: 2.9.0 + tinyexec: 1.1.2 + tinyglobby: 0.2.15 + tinyrainbow: 3.1.0 + vite: 8.0.5(@types/node@25.5.0)(esbuild@0.28.0)(jiti@2.4.2)(less@4.2.0)(sugarss@5.0.1(postcss@8.5.14))(terser@5.39.0)(tsx@4.21.0)(yaml@2.8.3) + why-is-node-running: 2.3.0 + optionalDependencies: + '@opentelemetry/api': 1.9.0 + '@types/node': 25.5.0 + '@vitest/coverage-v8': 4.1.6(vitest@4.1.6) + happy-dom: 20.8.9 + jsdom: 27.4.0(@noble/hashes@2.0.1) + transitivePeerDependencies: + - msw + void-elements@3.1.0: {} vscode-jsonrpc@8.2.0: {} From f13105333abb287f7d32b3e4305f20854cc4c160 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 15:17:07 +0300 Subject: [PATCH 3/7] =?UTF-8?q?feat(client):=20intraline=20diff=20highligh?= =?UTF-8?q?ting=20in=20the=20suggestion=20before=E2=86=92after=20block=20(?= =?UTF-8?q?#331)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The suggestion block (#315) struck the whole `selection` red and showed the whole `suggestedText` green, so a one-letter edit (заведем→заведём) highlighted the entire line. Now only the CHANGED fragments are emphasized intraline, git-style. Pure, render-only — nothing changes in the DB/backend/MCP/IComment/mutations/ Apply/Badge. New pure `computeSuggestionDiff(old, new) => { old: Segment[], new: Segment[] }` (Segment = {text, changed}) in suggestion.ts: hybrid word+char — `diffWordsWithSpace` for the word skeleton, then `diffChars` inside an adjacent removed+added pair so only the differing letters (not the whole word) are flagged; a lone insertion/deletion is wholly changed; equal parts are common on both sides. Concatenating each side reproduces the input (lossless). Wrapped in `useMemo` on [selection, suggestedText]. comment-list-item.tsx renders per-segment spans instead of two whole ; changed segments get `.suggestionChanged` (a stronger currentColor tint + bold, NO text-decoration so the old block's inherited line-through survives on the changed letters — the whole old line still reads removed, new as added). `diff@8.0.3` (jsdiff, already in the root package.json) added to apps/client/package.json (+ lockfile, additive) so the workspace resolves it; it bundles its own types. Tests: new suggestion.test.ts (one-letter ё/е; word replacement keeping the shared word common with no per-letter noise; word insertion/deletion; identical) — asserts segment text + changed flags, non-vacuous. Two pre-existing comment-list-item.test assertions switched from getByText (a single text node) to container.textContent (the new line is now multiple spans) — adapts to the intended DOM change, not a weakening. Verified: tsc --noEmit clean; client vitest 892 passed | 1 expected-fail. Visual/pixel check of the tint at the 390px comment panel needs a human (no screenshot tooling in-repo). Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/client/package.json | 1 + .../components/comment-list-item.test.tsx | 14 ++- .../comment/components/comment-list-item.tsx | 39 ++++++- .../comment/components/comment.module.css | 15 +++ .../features/comment/utils/suggestion.test.ts | 102 +++++++++++++++++ .../src/features/comment/utils/suggestion.ts | 103 ++++++++++++++++++ pnpm-lock.yaml | 3 + 7 files changed, 267 insertions(+), 10 deletions(-) create mode 100644 apps/client/src/features/comment/utils/suggestion.test.ts diff --git a/apps/client/package.json b/apps/client/package.json index 010cb5e4..9262d7a4 100644 --- a/apps/client/package.json +++ b/apps/client/package.json @@ -40,6 +40,7 @@ "axios": "1.16.0", "blueimp-load-image": "5.16.0", "clsx": "2.1.1", + "diff": "8.0.3", "dompurify": "3.4.1", "file-saver": "2.0.5", "highlightjs-sap-abap": "0.3.0", diff --git a/apps/client/src/features/comment/components/comment-list-item.test.tsx b/apps/client/src/features/comment/components/comment-list-item.test.tsx index 8b75b337..ceb9cbc0 100644 --- a/apps/client/src/features/comment/components/comment-list-item.test.tsx +++ b/apps/client/src/features/comment/components/comment-list-item.test.tsx @@ -108,10 +108,12 @@ describe("CommentListItem — suggested edit (#315)", () => { }); it("renders the было→стало diff and an Apply button when canEdit and not applied/resolved", () => { - renderItem(suggestion(), true); - // Old text appears both as the selection quote and as the struck diff row. + const { container } = renderItem(suggestion(), true); + // Old text appears as the selection quote (a single unsplit Text node). expect(screen.getAllByText("old wording here").length).toBeGreaterThan(0); - expect(screen.getByText("new wording here")).toBeDefined(); + // The new line is now rendered as per-fragment spans (intraline diff, #331), + // so it is no longer a single text node — assert the concatenated content. + expect(container.textContent).toContain("new wording here"); // Apply button is present. expect(screen.getByRole("button", { name: "Apply" })).toBeDefined(); // No Applied badge yet. @@ -119,9 +121,9 @@ describe("CommentListItem — suggested edit (#315)", () => { }); it("hides the Apply button when canEdit is false", () => { - renderItem(suggestion(), false); - // Diff still renders... - expect(screen.getByText("new wording here")).toBeDefined(); + const { container } = renderItem(suggestion(), false); + // Diff still renders (as per-fragment spans, #331)... + expect(container.textContent).toContain("new wording here"); // ...but no Apply button. expect(screen.queryByRole("button", { name: "Apply" })).toBeNull(); }); diff --git a/apps/client/src/features/comment/components/comment-list-item.tsx b/apps/client/src/features/comment/components/comment-list-item.tsx index 0d4b5e02..0e397245 100644 --- a/apps/client/src/features/comment/components/comment-list-item.tsx +++ b/apps/client/src/features/comment/components/comment-list-item.tsx @@ -1,6 +1,6 @@ import { Group, Text, Box, Badge, Button } from "@mantine/core"; import { AgentAvatarStack } from "@/components/ui/agent-avatar-stack.tsx"; -import React, { useEffect, useRef, useState } from "react"; +import React, { useEffect, useMemo, useRef, useState } from "react"; import classes from "./comment.module.css"; import { useAtom, useAtomValue } from "jotai"; import { useTimeAgo } from "@/hooks/use-time-ago"; @@ -17,7 +17,10 @@ import { useUpdateCommentMutation, } from "@/features/comment/queries/comment-query"; import { IComment } from "@/features/comment/types/comment.types"; -import { canShowApply } from "@/features/comment/utils/suggestion"; +import { + canShowApply, + computeSuggestionDiff, +} from "@/features/comment/utils/suggestion"; import { CustomAvatar } from "@/components/ui/custom-avatar.tsx"; import { currentUserAtom } from "@/features/user/atoms/current-user-atom.ts"; import { useTranslation } from "react-i18next"; @@ -54,6 +57,18 @@ function CommentListItem({ const [currentUser] = useAtom(currentUserAtom); const createdAtAgo = useTimeAgo(comment.createdAt); + // Intraline "before -> after" diff (#331) for a suggested edit: only the + // fragments that actually changed get emphasised inside the red/green block, + // instead of striking through / greening the whole line. Memoised on the + // (selection, suggestedText) pair so it recomputes only when they change. + const suggestionDiff = useMemo( + () => + comment.suggestedText != null + ? computeSuggestionDiff(comment.selection ?? "", comment.suggestedText) + : null, + [comment.selection, comment.suggestedText], + ); + useEffect(() => { setContent(comment.content); }, [comment]); @@ -236,12 +251,28 @@ function CommentListItem({ {!comment.parentCommentId && comment.suggestedText && ( {comment.selection && ( + // Old line: read as removed as a whole (line-through/red); only the + // changed fragments carry the extra intraline emphasis. - {comment.selection} + {suggestionDiff?.old.map((segment, index) => ( + + {segment.text} + + ))} )} - {comment.suggestedText} + {suggestionDiff?.new.map((segment, index) => ( + + {segment.text} + + ))} {comment.suggestionAppliedAt ? ( diff --git a/apps/client/src/features/comment/components/comment.module.css b/apps/client/src/features/comment/components/comment.module.css index 2a4b9397..f3e7f04f 100644 --- a/apps/client/src/features/comment/components/comment.module.css +++ b/apps/client/src/features/comment/components/comment.module.css @@ -53,6 +53,21 @@ margin-top: 4px; } +/* Intraline diff (#331): the fragment that actually changed within the + red "before" / green "after" block. It inherits the surrounding red/green + framing and adds a stronger tint plus an underline so the eye lands on the + changed letters/words (git/GitHub-style) rather than the whole line. The + container's line-through (old) / green (new) still marks the full line. */ +.suggestionChanged { + /* Stronger tint of the surrounding red/green so the changed fragment pops + within the block. `currentColor` follows the parent's red (old) or green + (new) text colour. No `text-decoration` here on purpose: the old block's + inherited line-through must survive on the changed letters too. */ + background: color-mix(in srgb, currentColor 22%, transparent); + border-radius: 2px; + font-weight: 700; +} + .commentEditor { &[data-editable][data-surface="muted"] .ProseMirror:not(.focused) { diff --git a/apps/client/src/features/comment/utils/suggestion.test.ts b/apps/client/src/features/comment/utils/suggestion.test.ts new file mode 100644 index 00000000..890e9625 --- /dev/null +++ b/apps/client/src/features/comment/utils/suggestion.test.ts @@ -0,0 +1,102 @@ +import { describe, it, expect } from "vitest"; +import { computeSuggestionDiff, Segment } from "@/features/comment/utils/suggestion"; + +// Reconstruct the plain string from a segment stream — the diff must be +// lossless (concatenating every fragment yields the original input). +const join = (segments: Segment[]): string => + segments.map((s) => s.text).join(""); + +// The subset of segments (in order) that the UI would emphasise. +const changed = (segments: Segment[]): string[] => + segments.filter((s) => s.changed).map((s) => s.text); + +// Find the segment that contains a substring, to assert its `changed` flag. +const segmentWith = (segments: Segment[], needle: string): Segment | undefined => + segments.find((s) => s.text.includes(needle)); + +describe("computeSuggestionDiff", () => { + it("highlights only the single changed letter in a one-letter edit", () => { + const { old, new: neu } = computeSuggestionDiff("заведем", "заведём"); + + // Lossless. + expect(join(old)).toBe("заведем"); + expect(join(neu)).toBe("заведём"); + + // Old side: exactly the `е` is changed, the rest is common. + expect(changed(old)).toEqual(["е"]); + expect(old).toEqual([ + { text: "завед", changed: false }, + { text: "е", changed: true }, + { text: "м", changed: false }, + ]); + + // New side: exactly the `ё` is changed. + expect(changed(neu)).toEqual(["ё"]); + expect(neu).toEqual([ + { text: "завед", changed: false }, + { text: "ё", changed: true }, + { text: "м", changed: false }, + ]); + }); + + it("marks the differing words changed but keeps the shared word common", () => { + const { old, new: neu } = computeSuggestionDiff( + "привет мир", + "здравствуй мир", + ); + + // Lossless. + expect(join(old)).toBe("привет мир"); + expect(join(neu)).toBe("здравствуй мир"); + + // The shared trailing word stays common on both sides (no per-letter noise + // leaking across the differing words into `мир`). + expect(segmentWith(old, "мир")?.changed).toBe(false); + expect(segmentWith(neu, "мир")?.changed).toBe(false); + + // The differing words are emphasised somewhere on each side. + expect(changed(old).length).toBeGreaterThan(0); + expect(changed(neu).length).toBeGreaterThan(0); + expect(changed(old).join("")).toContain("п"); // from `привет` + expect(changed(neu).join("")).toContain("зд"); // from `здравствуй` + + // No changed fragment on either side touches the word `мир`. + expect(changed(old).some((t) => t.includes("мир"))).toBe(false); + expect(changed(neu).some((t) => t.includes("мир"))).toBe(false); + }); + + it("marks a whole inserted word changed and leaves the old line common", () => { + const { old, new: neu } = computeSuggestionDiff("a c", "a b c"); + + expect(join(old)).toBe("a c"); + expect(join(neu)).toBe("a b c"); + + // Old line has no changed fragment (nothing was removed). + expect(changed(old)).toEqual([]); + // The inserted word is the only changed fragment on the new side. + expect(neu).toContainEqual({ text: "b ", changed: true }); + expect(changed(neu)).toEqual(["b "]); + }); + + it("marks a whole deleted word changed and leaves the new line common", () => { + const { old, new: neu } = computeSuggestionDiff("a b c", "a c"); + + expect(join(old)).toBe("a b c"); + expect(join(neu)).toBe("a c"); + + // The deleted word is the only changed fragment on the old side. + expect(old).toContainEqual({ text: "b ", changed: true }); + expect(changed(old)).toEqual(["b "]); + // New line has no changed fragment (nothing was added). + expect(changed(neu)).toEqual([]); + }); + + it("marks everything common for identical strings", () => { + const { old, new: neu } = computeSuggestionDiff("hello", "hello"); + + expect(old).toEqual([{ text: "hello", changed: false }]); + expect(neu).toEqual([{ text: "hello", changed: false }]); + expect(changed(old)).toEqual([]); + expect(changed(neu)).toEqual([]); + }); +}); diff --git a/apps/client/src/features/comment/utils/suggestion.ts b/apps/client/src/features/comment/utils/suggestion.ts index d14dea6e..b353b876 100644 --- a/apps/client/src/features/comment/utils/suggestion.ts +++ b/apps/client/src/features/comment/utils/suggestion.ts @@ -1,3 +1,4 @@ +import { diffWordsWithSpace, diffChars } from "diff"; import { IComment } from "@/features/comment/types/comment.types"; // Whether the suggested-edit (#315) "Apply" button should be shown for a @@ -12,3 +13,105 @@ export function canShowApply(comment: IComment, canEdit?: boolean): boolean { !comment.parentCommentId, ); } + +// One contiguous run of text within a suggestion's "before" or "after" line. +// `changed` marks the fragment that actually differs from the other side, so +// the UI can emphasise only the intraline delta (git/GitHub-style) instead of +// the whole line. +export interface Segment { + text: string; + changed: boolean; +} + +// A pure "before -> after" intraline diff (#331): the old line split into +// common vs. removed-and-changed fragments, and the new line split into common +// vs. added-and-changed fragments. Concatenating each side's `text` reproduces +// the original strings. +export interface SuggestionDiff { + old: Segment[]; + new: Segment[]; +} + +// Push a segment, coalescing runs of the same `changed` flag on the same side +// so the render emits as few spans as possible and tests stay predictable. +function pushSegment(segments: Segment[], text: string, changed: boolean): void { + if (text === "") return; + const last = segments[segments.length - 1]; + if (last && last.changed === changed) { + last.text += text; + } else { + segments.push({ text, changed }); + } +} + +// Compute an intraline diff between the old `selection` and the new +// `suggestedText` of a suggestion. PURE — no React, no DOM, no I/O. +// +// Hybrid word + char algorithm (per #331): +// 1. `diffWordsWithSpace` yields word-granular parts [{value, added, removed}]. +// 2. An ADJACENT removed+added pair (a word replacement) is refined with +// `diffChars`: shared characters stay common, differing characters are +// marked `changed` on their respective side. This is what keeps a +// one-letter edit (заведем -> заведём) from highlighting the whole word. +// 3. A lone `added` (insertion) or lone `removed` (deletion) marks the whole +// fragment `changed`. +// 4. An unchanged part is `common` on both sides. +// +// Rejected alternatives: pure `diffChars` is noisy on word swaps; pure +// `diffWordsWithSpace` highlights the whole word rather than the changed letter. +export function computeSuggestionDiff( + oldStr: string, + newStr: string, +): SuggestionDiff { + const oldSegments: Segment[] = []; + const newSegments: Segment[] = []; + + const parts = diffWordsWithSpace(oldStr, newStr); + + for (let i = 0; i < parts.length; i++) { + const part = parts[i]; + const next = parts[i + 1]; + + // A word replacement: a removed part immediately followed by an added part + // (or the reverse). Refine it character-by-character so only the differing + // letters are highlighted while shared letters stay common. + const isReplacementPair = + next && + ((part.removed && next.added) || (part.added && next.removed)); + + if (isReplacementPair) { + const removedPart = part.removed ? part : next; + const addedPart = part.added ? part : next; + + const charParts = diffChars(removedPart.value, addedPart.value); + for (const cp of charParts) { + if (cp.added) { + pushSegment(newSegments, cp.value, true); + } else if (cp.removed) { + pushSegment(oldSegments, cp.value, true); + } else { + // Shared character: common on both sides. + pushSegment(oldSegments, cp.value, false); + pushSegment(newSegments, cp.value, false); + } + } + + i++; // consume the paired part as well + continue; + } + + if (part.added) { + // Lone insertion: only present in the new line, wholly changed. + pushSegment(newSegments, part.value, true); + } else if (part.removed) { + // Lone deletion: only present in the old line, wholly changed. + pushSegment(oldSegments, part.value, true); + } else { + // Unchanged: common on both sides. + pushSegment(oldSegments, part.value, false); + pushSegment(newSegments, part.value, false); + } + } + + return { old: oldSegments, new: newSegments }; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7540bafe..a9642cf2 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -335,6 +335,9 @@ importers: clsx: specifier: 2.1.1 version: 2.1.1 + diff: + specifier: 8.0.3 + version: 8.0.3 dompurify: specifier: 3.4.1 version: 3.4.1 From 40d42d61e6e306f08577d2889ac6b2bed464d58d Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 15:51:34 +0300 Subject: [PATCH 4/7] =?UTF-8?q?feat(mcp):=20search=5Fin=5Fpage=20tool=20?= =?UTF-8?q?=E2=80=94=20in-page=20substring/regex=20search=20for=20the=20ag?= =?UTF-8?q?ent=20(#330)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Editorial roles (Corrector/Factchecker) brute-forced `get_node` block-by-block to find occurrences (unquoted «ё», straight quotes, «т.е.»), burning tokens. New `search_in_page(pageId, query, {regex?, caseSensitive?, limit?})` reads the page's ProseMirror JSON via the existing getPageRaw and searches it IN MEMORY — no server endpoint, no DB/schema change, no touch to the packages/mcp/src/lib schema mirror. New pure `searchInDoc(doc, query, opts)` (packages/mcp/src/lib/page-search.ts): recursive descent to each TEXT CONTAINER (paragraph/heading/table-cell paragraph), glues its inline text via `blockPlainText` (a match survives inline-mark boundaries — e.g. «т.е.» split across bold/italic), searches literal (indexOf) or regex, and returns `{ total, truncated, matches:[{ nodeId, blockIndex, type, before, match, after }] }`. `nodeId` is the container's attrs.id or the `#` of the enclosing top-level block — the SAME ref format get_node/patch_node/comment-anchoring accept (verified identical to getNodeByRef), so the agent goes straight from a hit to a targeted comment; `before`/`after` are ~40-char windows for a unique selection. `total`/`truncated` always reported (never silent truncation). Lives in the SHARED_TOOL_SPECS registry → exposed in BOTH transports (external /mcp + in-app AI-chat), with a SERVER_INSTRUCTIONS line and a DocmostClientLike signature + contract-test entry. Corrector/Factchecker prompts get a one-line "use search_in_page first" hint (versions bumped, catalog hash lock refreshed). Guards: empty/whitespace query → clear error; invalid regex → clear error (not a generic 500); zero-length regex matches (`\b`, `a*`) skipped with lastIndex advanced (no loop/flood); MAX_PATTERN_LENGTH=1000, MAX_CONTAINER_TEXT=100k bound each exec; limit clamped [1,200] (default 50). Tests: new page-search.test.mjs (17) — literal+regex, case-sensitivity, mark-boundary glue, nodeId for paragraph/heading (attrs.id) and table-cell (# fallback), context bounds, limit/total/truncated + clamp, invalid regex/empty/over-long errors, zero-length skip, empty-doc null-safety. mcp: tsc clean; node --test 467 passed (+17). apps/server: tsc --noEmit clean (DocmostClientLike + wiring). catalog check.mjs OK. Known limitations (from internal review, non-blocking): - Residual ReDoS: a crafted catastrophic-backtracking pattern (e.g. `(a+)+$`) against a large single container can hang the event loop — JS regex is not interruptible, so the length caps bound the base but not the backtracking. Realistic exposure is low (containers are small; the pattern is supplied by the authenticated model). Candidate for a follow-up hardening (safe-regex validation or a worker+timeout) if it matters. - Case-insensitive LITERAL search folds via toLowerCase; a char whose lowercase differs in length (e.g. Turkish İ) BEFORE a match could shift the context window — negligible for the RU/EN editorial scenario. - On a `#` table-cell fallback, `type` is the inline container ("paragraph") while nodeId addresses the top-level block — addressing is correct; the field is documented as the container's type. Co-Authored-By: Claude Opus 4.8 (1M context) --- agent-roles-catalog/bundles/editorial/en.yaml | 4 +- agent-roles-catalog/bundles/editorial/ru.yaml | 4 +- agent-roles-catalog/index.yaml | 4 +- .../scripts/content-hashes.json | 8 +- .../ai-chat/tools/ai-chat-tools.service.ts | 10 + .../ai-chat/tools/docmost-client.loader.ts | 5 + .../mcp/build/_vendored_editor_ext/copy.js | 6 + .../build/_vendored_editor_ext/getFromPath.js | 18 ++ .../_vendored_editor_ext/getReplaceStep.js | 27 ++ .../mcp/build/_vendored_editor_ext/index.js | 8 + .../build/_vendored_editor_ext/package.json | 1 + .../_vendored_editor_ext/recreateTransform.js | 242 +++++++++++++++++ .../recreateTransform.test.js | 118 +++++++++ .../build/_vendored_editor_ext/removeMarks.js | 9 + .../_vendored_editor_ext/simplifyTransform.js | 27 ++ .../mcp/build/_vendored_editor_ext/types.js | 2 + packages/mcp/build/client.js | 17 ++ packages/mcp/build/index.js | 11 +- packages/mcp/build/lib/page-search.js | 169 ++++++++++++ packages/mcp/build/tool-specs.js | 42 +++ packages/mcp/src/client.ts | 22 ++ packages/mcp/src/index.ts | 15 +- packages/mcp/src/lib/page-search.ts | 245 ++++++++++++++++++ packages/mcp/src/tool-specs.ts | 45 ++++ .../test/unit/client-host-contract.test.mjs | 1 + packages/mcp/test/unit/page-search.test.mjs | 217 ++++++++++++++++ 26 files changed, 1265 insertions(+), 12 deletions(-) create mode 100644 packages/mcp/build/_vendored_editor_ext/copy.js create mode 100644 packages/mcp/build/_vendored_editor_ext/getFromPath.js create mode 100644 packages/mcp/build/_vendored_editor_ext/getReplaceStep.js create mode 100644 packages/mcp/build/_vendored_editor_ext/index.js create mode 100644 packages/mcp/build/_vendored_editor_ext/package.json create mode 100644 packages/mcp/build/_vendored_editor_ext/recreateTransform.js create mode 100644 packages/mcp/build/_vendored_editor_ext/recreateTransform.test.js create mode 100644 packages/mcp/build/_vendored_editor_ext/removeMarks.js create mode 100644 packages/mcp/build/_vendored_editor_ext/simplifyTransform.js create mode 100644 packages/mcp/build/_vendored_editor_ext/types.js create mode 100644 packages/mcp/build/lib/page-search.js create mode 100644 packages/mcp/src/lib/page-search.ts create mode 100644 packages/mcp/test/unit/page-search.test.mjs diff --git a/agent-roles-catalog/bundles/editorial/en.yaml b/agent-roles-catalog/bundles/editorial/en.yaml index d947b2f2..8379ebc8 100644 --- a/agent-roles-catalog/bundles/editorial/en.yaml +++ b/agent-roles-catalog/bundles/editorial/en.yaml @@ -128,7 +128,7 @@ roles: - Don't fabricate confirmations. If you can't verify, honestly mark [Unverified] or [Unverifiable]. HOW TO LEAVE COMMENTS - You don't edit the text directly. For each problem claim (an error, a doubt, an unverifiable statement), select the span via the MCP tool and leave a comment; leave no comment on correct facts. Give the verdict, the correction (if any), and the source. For an [Incorrect] verdict, ALWAYS attach the ready correction as a suggested replacement (the `suggestedText` parameter): since you found the correct value in the sources, propose the ready fix right away instead of merely describing the error. The replacement is the exact new text for the selected fragment, plain text with no markup; the author applies it with one click instead of retyping the fragment. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context. Do not attach a replacement to [Unverified], [Unverifiable], or [Opinion] verdicts. Tag severity: + You don't edit the text directly. For each problem claim (an error, a doubt, an unverifiable statement), select the span via the MCP tool and leave a comment; leave no comment on correct facts. Give the verdict, the correction (if any), and the source. For an [Incorrect] verdict, ALWAYS attach the ready correction as a suggested replacement (the `suggestedText` parameter): since you found the correct value in the sources, propose the ready fix right away instead of merely describing the error. The replacement is the exact new text for the selected fragment, plain text with no markup; the author applies it with one click instead of retyping the fragment. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context. When a figure, name, term, or version to check recurs across the page, use search_in_page to find every occurrence in one call first, then place a targeted comment per hit instead of reading block by block. Do not attach a replacement to [Unverified], [Unverifiable], or [Opinion] verdicts. Tag severity: - [Critical] — a factual error, especially in numbers, names, or quotes, or a claim that risks misinformation. - [Major] — a doubtful or unconfirmed claim that needs a source. - [Minor] — a small correction, or false precision worth rounding or confirming. @@ -169,7 +169,7 @@ roles: - Don't make substantive changes. Edits are minimal and mechanical. HOW TO WORK - Go through the whole text from start to finish in a single pass. Flag EVERY violation, including all repeat occurrences of the same error and minor items tagged [Minor] — don't stop at the first few or the most conspicuous. Don't summarize instead of marking up: until you've reached the end of the document, the job isn't done. One run covers the whole text, not just "the most important". + Go through the whole text from start to finish in a single pass. Flag EVERY violation, including all repeat occurrences of the same error and minor items tagged [Minor] — don't stop at the first few or the most conspicuous. Don't summarize instead of marking up: until you've reached the end of the document, the job isn't done. One run covers the whole text, not just "the most important". For a systematic issue that recurs — straight quotes, a hyphen used as a dash, an inconsistent unit or spelling — use search_in_page to list every occurrence in one call first, then leave a targeted comment (with its replacement) on each hit, instead of scanning block by block. HOW TO LEAVE COMMENTS You don't edit the text directly. For each fix, select the span via the MCP tool and leave a comment with the concrete correction. Attach a suggested replacement to every fix (the `suggestedText` parameter): the exact corrected text for the selected fragment, plain text with no markup — the author applies it with one click. The selected fragment must occur exactly once in the text; if it isn't unique, extend the selection with surrounding context. Do NOT leave summary notes like "throughout, replace X with Y" or "make the units/quotes/spelling consistent": such a comment can't be applied with a button. If the same error occurs in several places, walk EVERY occurrence and leave a separate targeted comment with its own replacement on each — ten targeted fixes instead of one blanket note. The only exception is a note that genuinely cannot be expressed as a replacement of a concrete fragment; leave those rare cases as an ordinary comment without a replacement. Tag severity: diff --git a/agent-roles-catalog/bundles/editorial/ru.yaml b/agent-roles-catalog/bundles/editorial/ru.yaml index 0497d59e..367b5bc1 100644 --- a/agent-roles-catalog/bundles/editorial/ru.yaml +++ b/agent-roles-catalog/bundles/editorial/ru.yaml @@ -128,7 +128,7 @@ roles: - Не выдумываешь подтверждения. Если не можешь проверить — честно ставь [Не проверено] или [Непроверяемо]. КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ - Ты не редактируешь текст напрямую. Для каждого проблемного утверждения (ошибка, сомнение, непроверяемость) через MCP-инструмент выдели фрагмент и оставь комментарий; на верные факты комментарии не оставляй. В комментарии дай вердикт, исправление (если нужно) и источник. К вердикту [Неверно] всегда прикладывай готовое исправление как предложение-замену (параметр `suggestedText`): раз ты нашёл по источникам верное значение — сразу предлагай готовую правку, а не только описывай ошибку. Замена — это точный новый текст взамен выделенного фрагмента, обычным текстом без разметки; автор применит её одной кнопкой, не переписывая фрагмент вручную. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом. К вердиктам [Не проверено], [Непроверяемо] и [Это мнение] замену не прикладывай. Помечай важность: + Ты не редактируешь текст напрямую. Для каждого проблемного утверждения (ошибка, сомнение, непроверяемость) через MCP-инструмент выдели фрагмент и оставь комментарий; на верные факты комментарии не оставляй. В комментарии дай вердикт, исправление (если нужно) и источник. К вердикту [Неверно] всегда прикладывай готовое исправление как предложение-замену (параметр `suggestedText`): раз ты нашёл по источникам верное значение — сразу предлагай готовую правку, а не только описывай ошибку. Замена — это точный новый текст взамен выделенного фрагмента, обычным текстом без разметки; автор применит её одной кнопкой, не переписывая фрагмент вручную. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом. Когда проверяемая цифра, имя, термин или версия встречается по тексту несколько раз, сначала одним вызовом search_in_page найди все вхождения, а затем ставь целевой комментарий на каждое — не читая страницу поблочно. К вердиктам [Не проверено], [Непроверяемо] и [Это мнение] замену не прикладывай. Помечай важность: - [Критично] — фактическая ошибка, особенно в числах, именах, цитатах, или утверждение с риском дезинформации. - [Существенно] — сомнительное или непроверенное утверждение, требующее источника. - [Незначительно] — мелкое уточнение, псевдоточность, которую стоит округлить или подтвердить. @@ -170,7 +170,7 @@ roles: - Не вносишь содержательных изменений. Правки — минимальные и механические. КАК РАБОТАТЬ - Пройди весь текст от начала до конца за один проход. Помечай КАЖДОЕ нарушение, включая все повторные вхождения одной и той же ошибки и мелочи с меткой [Незначительно], — не ограничивайся первыми несколькими или самыми заметными. Не подводи итог вместо разбора: пока не дошёл до конца документа, работа не закончена. Один прогон покрывает весь текст, а не «самое важное». + Пройди весь текст от начала до конца за один проход. Помечай КАЖДОЕ нарушение, включая все повторные вхождения одной и той же ошибки и мелочи с меткой [Незначительно], — не ограничивайся первыми несколькими или самыми заметными. Не подводи итог вместо разбора: пока не дошёл до конца документа, работа не закончена. Один прогон покрывает весь текст, а не «самое важное». Для систематической ошибки, которая повторяется — прямые кавычки, «е» вместо «ё», дефис вместо тире, неединообразная единица или написание, — сначала одним вызовом search_in_page получи все вхождения, а затем оставь на каждом целевой комментарий с заменой, вместо поблочного просмотра. КАК ОСТАВЛЯТЬ ЗАМЕЧАНИЯ Ты не редактируешь текст напрямую. Для каждой правки через MCP-инструмент выдели фрагмент и оставь комментарий с конкретным исправлением. К каждой правке прикладывай предложение-замену (параметр `suggestedText`): точный исправленный текст взамен выделенного фрагмента, обычным текстом без разметки — автор применит его одной кнопкой. Выделенный фрагмент должен встречаться в тексте ровно один раз; если он не уникален, расширь выделение контекстом. НЕ оставляй сводных замечаний вида «во всём тексте заменить X на Y» или «привести единицы/кавычки/написание к единообразию»: такой комментарий нельзя применить кнопкой. Если одна и та же ошибка встречается в нескольких местах, обойди КАЖДОЕ вхождение и оставь на нём отдельный целевой комментарий со своей заменой — десять точечных правок вместо одной общей. Единственное исключение — замечание, которое в принципе невозможно выразить заменой конкретного фрагмента; такие редкие случаи оставляй обычным комментарием без замены. Помечай важность: diff --git a/agent-roles-catalog/index.yaml b/agent-roles-catalog/index.yaml index 21217c0e..4ee918c1 100644 --- a/agent-roles-catalog/index.yaml +++ b/agent-roles-catalog/index.yaml @@ -16,9 +16,9 @@ bundles: - slug: line-editor version: 4 - slug: fact-checker - version: 5 + version: 6 - slug: proofreader - version: 7 + version: 8 - slug: narrator version: 2 - id: research diff --git a/agent-roles-catalog/scripts/content-hashes.json b/agent-roles-catalog/scripts/content-hashes.json index e8409270..65d29b49 100644 --- a/agent-roles-catalog/scripts/content-hashes.json +++ b/agent-roles-catalog/scripts/content-hashes.json @@ -1,7 +1,7 @@ { "fact-checker": { - "version": 5, - "hash": "d7769872968109a1ccfb58d71bc3f3564a750b91766156f59031762848de4f24" + "version": 6, + "hash": "6bb22a9e5a5079b5cb287b5b26addbd36b9afeb7c9508287dcad9343fc53d685" }, "line-editor": { "version": 4, @@ -12,8 +12,8 @@ "hash": "66fe653003b4f63ef3c3a5c5c48552fe47daeefffc16907c37c35f0e8da98851" }, "proofreader": { - "version": 7, - "hash": "fdf8e0a443fa3c4102095e024146401363629a3f9015fb938c7bac2642825e56" + "version": 8, + "hash": "cef39fed321779631ddd1077fcba53399adf0e48b301df281c71eb042610900d" }, "researcher": { "version": 1, diff --git a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts index 9abd1c50..82f2ecb6 100644 --- a/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts +++ b/apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts @@ -630,6 +630,16 @@ export class AiChatToolsService { async ({ pageId, nodeId }) => await client.getNode(pageId, nodeId), ), + searchInPage: sharedTool( + sharedToolSpecs.searchInPage, + async ({ pageId, query, regex, caseSensitive, limit }) => + await client.searchInPage(pageId, query, { + regex, + caseSensitive, + limit, + }), + ), + getTable: tool({ description: 'Read a table as a matrix of cell texts (plus a parallel cellIds ' + diff --git a/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts b/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts index 7b5a9a4e..42bbd097 100644 --- a/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts +++ b/apps/server/src/core/ai-chat/tools/docmost-client.loader.ts @@ -55,6 +55,11 @@ export interface DocmostClientLike { getOutline(pageId: string): Promise>; getPageJson(pageId: string): Promise>; getNode(pageId: string, nodeId: string): Promise>; + searchInPage( + pageId: string, + query: string, + opts?: { regex?: boolean; caseSensitive?: boolean; limit?: number }, + ): Promise>; getTable(pageId: string, tableRef: string): Promise>; listComments(pageId: string): Promise; getComment( diff --git a/packages/mcp/build/_vendored_editor_ext/copy.js b/packages/mcp/build/_vendored_editor_ext/copy.js new file mode 100644 index 00000000..428db9cf --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/copy.js @@ -0,0 +1,6 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.copy = copy; +function copy(value) { + return JSON.parse(JSON.stringify(value)); +} diff --git a/packages/mcp/build/_vendored_editor_ext/getFromPath.js b/packages/mcp/build/_vendored_editor_ext/getFromPath.js new file mode 100644 index 00000000..b5b56b0c --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/getFromPath.js @@ -0,0 +1,18 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.getFromPath = getFromPath; +/** + * get target value from json-pointer (e.g. /content/0/content) + * @param {AnyObject} obj object to resolve path into + * @param {string} path json-pointer + * @return {any} target value + */ +function getFromPath(obj, path) { + const pathParts = path.split("/"); + pathParts.shift(); // remove root-entry + while (pathParts.length) { + const property = pathParts.shift(); + obj = obj[property]; + } + return obj; +} diff --git a/packages/mcp/build/_vendored_editor_ext/getReplaceStep.js b/packages/mcp/build/_vendored_editor_ext/getReplaceStep.js new file mode 100644 index 00000000..357ce0d7 --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/getReplaceStep.js @@ -0,0 +1,27 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.getReplaceStep = getReplaceStep; +const transform_1 = require("@tiptap/pm/transform"); +function getReplaceStep(fromDoc, toDoc) { + let start = toDoc.content.findDiffStart(fromDoc.content); + if (start === null) { + return false; + } + // @ts-ignore property access to content + let { a: endA, b: endB } = toDoc.content.findDiffEnd(fromDoc.content); + const overlap = start - Math.min(endA, endB); + if (overlap > 0) { + // If there is an overlap, there is some freedom of choice in how to calculate the + // start/end boundary. for an inserted/removed slice. We choose the extreme with + // the lowest depth value. + if (fromDoc.resolve(start - overlap).depth < + toDoc.resolve(endA + overlap).depth) { + start -= overlap; + } + else { + endA += overlap; + endB += overlap; + } + } + return new transform_1.ReplaceStep(start, endB, toDoc.slice(start, endA)); +} diff --git a/packages/mcp/build/_vendored_editor_ext/index.js b/packages/mcp/build/_vendored_editor_ext/index.js new file mode 100644 index 00000000..37b373e2 --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/index.js @@ -0,0 +1,8 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.RecreateTransform = exports.recreateTransform = void 0; +// https://gitlab.com/mpapp-public/prosemirror-recreate-steps - MIT +// https://github.com/sueddeutsche/prosemirror-recreate-transform - MIT +var recreateTransform_1 = require("./recreateTransform"); +Object.defineProperty(exports, "recreateTransform", { enumerable: true, get: function () { return recreateTransform_1.recreateTransform; } }); +Object.defineProperty(exports, "RecreateTransform", { enumerable: true, get: function () { return recreateTransform_1.RecreateTransform; } }); diff --git a/packages/mcp/build/_vendored_editor_ext/package.json b/packages/mcp/build/_vendored_editor_ext/package.json new file mode 100644 index 00000000..0292b995 --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/package.json @@ -0,0 +1 @@ +{"type":"commonjs"} \ No newline at end of file diff --git a/packages/mcp/build/_vendored_editor_ext/recreateTransform.js b/packages/mcp/build/_vendored_editor_ext/recreateTransform.js new file mode 100644 index 00000000..0b2226bf --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/recreateTransform.js @@ -0,0 +1,242 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.RecreateTransform = void 0; +exports.recreateTransform = recreateTransform; +const transform_1 = require("@tiptap/pm/transform"); +const rfc6902_1 = require("rfc6902"); +const diff_1 = require("diff"); +const getReplaceStep_1 = require("./getReplaceStep"); +const simplifyTransform_1 = require("./simplifyTransform"); +const removeMarks_1 = require("./removeMarks"); +const getFromPath_1 = require("./getFromPath"); +const copy_1 = require("./copy"); +class RecreateTransform { + constructor(fromDoc, toDoc, options = {}) { + const o = { + complexSteps: true, + wordDiffs: false, + simplifyDiff: true, + ...options, + }; + this.fromDoc = fromDoc; + this.toDoc = toDoc; + this.complexSteps = o.complexSteps; // Whether to return steps other than ReplaceSteps + this.wordDiffs = o.wordDiffs; // Whether to make text diffs cover entire words + this.simplifyDiff = o.simplifyDiff; + this.schema = fromDoc.type.schema; + this.tr = new transform_1.Transform(fromDoc); + } + init() { + if (this.complexSteps) { + // For First steps: we create versions of the documents without marks as + // these will only confuse the diffing mechanism and marks won't cause + // any mapping changes anyway. + this.currentJSON = (0, removeMarks_1.removeMarks)(this.fromDoc).toJSON(); + this.finalJSON = (0, removeMarks_1.removeMarks)(this.toDoc).toJSON(); + this.ops = (0, rfc6902_1.createPatch)(this.currentJSON, this.finalJSON); + this.recreateChangeContentSteps(); + this.recreateChangeMarkSteps(); + } + else { + // We don't differentiate between mark changes and other changes. + this.currentJSON = this.fromDoc.toJSON(); + this.finalJSON = this.toDoc.toJSON(); + this.ops = (0, rfc6902_1.createPatch)(this.currentJSON, this.finalJSON); + this.recreateChangeContentSteps(); + } + if (this.simplifyDiff) { + this.tr = (0, simplifyTransform_1.simplifyTransform)(this.tr) || this.tr; + } + return this.tr; + } + /** convert json-diff to prosemirror steps */ + recreateChangeContentSteps() { + // First step: find content changing steps. + let ops = []; + while (this.ops.length) { + // get next + let op = this.ops.shift(); + ops.push(op); + let toDoc; + const afterStepJSON = (0, copy_1.copy)(this.currentJSON); // working document receiving patches + const pathParts = op.path.split("/"); + // collect operations until we receive a valid document: + // apply ops-patches until a valid prosemirror document is retrieved, + // then try to create a transformation step or retry with next operation + while (toDoc == null) { + (0, rfc6902_1.applyPatch)(afterStepJSON, [op]); + try { + toDoc = this.schema.nodeFromJSON(afterStepJSON); + toDoc.check(); + } + catch (error) { + toDoc = null; + if (this.ops.length > 0) { + op = this.ops.shift(); + ops.push(op); + } + else { + throw new Error(`No valid diff possible applying ${op.path}`); + } + } + } + // apply operation (ignoring afterStepJSON) + if (this.complexSteps && + ops.length === 1 && + (pathParts.includes("attrs") || pathParts.includes("type"))) { + // Node markup is changing + this.addSetNodeMarkup(); // a lost update is ignored + ops = []; + // console.log("%cop", logStyle, "- update node", ops); + } + else if (ops.length === 1 && + op.op === "replace" && + pathParts[pathParts.length - 1] === "text") { + // Text is being replaced, we apply text diffing to find the smallest possible diffs. + this.addReplaceTextSteps(op, afterStepJSON); + ops = []; + // console.log("%cop", logStyle, "- replace", ops); + } + else if (this.addReplaceStep(toDoc, afterStepJSON)) { + // operations have been applied + ops = []; + // console.log("%cop", logStyle, "- other", ops); + } + } + } + /** update node with attrs and marks, may also change type */ + addSetNodeMarkup() { + // first diff in document is supposed to be a node-change (in type and/or attributes) + // thus simply find the first change and apply a node change step, then recalculate the diff + // after updating the document + const fromDoc = this.schema.nodeFromJSON(this.currentJSON); + const toDoc = this.schema.nodeFromJSON(this.finalJSON); + const start = toDoc.content.findDiffStart(fromDoc.content); + // @note start is the same (first) position for current and target document + const fromNode = fromDoc.nodeAt(start); + const toNode = toDoc.nodeAt(start); + if (start != null) { + // @note this completly updates all attributes in one step, by completely replacing node + const nodeType = fromNode.type === toNode.type ? null : toNode.type; + try { + this.tr.setNodeMarkup(start, nodeType, toNode.attrs, toNode.marks); + } + catch (e) { + // if nodetypes differ, the updated node-type and contents might not be compatible + // with schema and requires a replace + if (nodeType && e.message.includes("Invalid content")) { + // @todo add test-case for this scenario + this.tr.replaceWith(start, start + fromNode.nodeSize, toNode); + } + else { + throw e; + } + } + this.currentJSON = (0, removeMarks_1.removeMarks)(this.tr.doc).toJSON(); + // setting the node markup may have invalidated the following ops, so we calculate them again. + this.ops = (0, rfc6902_1.createPatch)(this.currentJSON, this.finalJSON); + return true; + } + return false; + } + recreateChangeMarkSteps() { + // Now the documents should be the same, except their marks, so everything should map 1:1. + // Second step: Iterate through the toDoc and make sure all marks are the same in tr.doc + this.toDoc.descendants((tNode, tPos) => { + if (!tNode.isInline) { + return true; + } + this.tr.doc.nodesBetween(tPos, tPos + tNode.nodeSize, (fNode, fPos) => { + if (!fNode.isInline) { + return true; + } + const from = Math.max(tPos, fPos); + const to = Math.min(tPos + tNode.nodeSize, fPos + fNode.nodeSize); + fNode.marks.forEach((nodeMark) => { + if (!nodeMark.isInSet(tNode.marks)) { + this.tr.removeMark(from, to, nodeMark); + } + }); + tNode.marks.forEach((nodeMark) => { + if (!nodeMark.isInSet(fNode.marks)) { + this.tr.addMark(from, to, nodeMark); + } + }); + }); + }); + } + /** + * retrieve and possibly apply replace-step based from doc changes + * From http://prosemirror.net/examples/footnote/ + */ + addReplaceStep(toDoc, afterStepJSON) { + const fromDoc = this.schema.nodeFromJSON(this.currentJSON); + const step = (0, getReplaceStep_1.getReplaceStep)(fromDoc, toDoc); + if (!step) { + return false; + } + else if (!this.tr.maybeStep(step).failed) { + this.currentJSON = afterStepJSON; + return true; // @change previously null + } + throw new Error("No valid step found."); + } + /** retrieve and possibly apply text replace-steps based from doc changes */ + addReplaceTextSteps(op, afterStepJSON) { + // We find the position number of the first character in the string + const op1 = { ...op, value: "xx" }; + const op2 = { ...op, value: "yy" }; + const afterOP1JSON = (0, copy_1.copy)(this.currentJSON); + const afterOP2JSON = (0, copy_1.copy)(this.currentJSON); + (0, rfc6902_1.applyPatch)(afterOP1JSON, [op1]); + (0, rfc6902_1.applyPatch)(afterOP2JSON, [op2]); + const op1Doc = this.schema.nodeFromJSON(afterOP1JSON); + const op2Doc = this.schema.nodeFromJSON(afterOP2JSON); + // get text diffs + const finalText = op.value; + const currentText = (0, getFromPath_1.getFromPath)(this.currentJSON, op.path); + const textDiffs = this.wordDiffs + ? (0, diff_1.diffWordsWithSpace)(currentText, finalText) + : (0, diff_1.diffChars)(currentText, finalText); + let offset = op1Doc.content.findDiffStart(op2Doc.content); + const marks = op1Doc.resolve(offset + 1).marks(); + while (textDiffs.length) { + const diff = textDiffs.shift(); + if (diff.added) { + const textNode = this.schema + .nodeFromJSON({ type: "text", text: diff.value }) + .mark(marks); + if (textDiffs.length && textDiffs[0].removed) { + const nextDiff = textDiffs.shift(); + this.tr.replaceWith(offset, offset + nextDiff.value.length, textNode); + } + else { + this.tr.insert(offset, textNode); + } + offset += diff.value.length; + } + else if (diff.removed) { + if (textDiffs.length && textDiffs[0].added) { + const nextDiff = textDiffs.shift(); + const textNode = this.schema + .nodeFromJSON({ type: "text", text: nextDiff.value }) + .mark(marks); + this.tr.replaceWith(offset, offset + diff.value.length, textNode); + offset += nextDiff.value.length; + } + else { + this.tr.delete(offset, offset + diff.value.length); + } + } + else { + offset += diff.value.length; + } + } + this.currentJSON = afterStepJSON; + } +} +exports.RecreateTransform = RecreateTransform; +function recreateTransform(fromDoc, toDoc, options = {}) { + const recreator = new RecreateTransform(fromDoc, toDoc, options); + return recreator.init(); +} diff --git a/packages/mcp/build/_vendored_editor_ext/recreateTransform.test.js b/packages/mcp/build/_vendored_editor_ext/recreateTransform.test.js new file mode 100644 index 00000000..79323148 --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/recreateTransform.test.js @@ -0,0 +1,118 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +const vitest_1 = require("vitest"); +const schema_basic_1 = require("@tiptap/pm/schema-basic"); +const transform_1 = require("@tiptap/pm/transform"); +const recreateTransform_1 = require("./recreateTransform"); +/** + * recreateTransform diffs two documents and produces ProseMirror steps that turn + * `fromDoc` into `toDoc`. It is the backbone of collaborative/version diffing, so + * THE invariant that matters is: replaying the produced steps on `fromDoc` must + * reproduce `toDoc` exactly. Every test below re-applies the steps onto a fresh + * Transform seeded from `fromDoc` (not just trusting `tr.doc`) and asserts node + * equality with `.eq()`. If a regression makes any step wrong, the round-trip + * breaks and the test fails. + */ +// Real ProseMirror schema (the standard basic schema) with paragraph/heading + +// strong/em marks — the same primitives the editor diffs in production. +const doc = (...c) => schema_basic_1.schema.node("doc", null, c); +const p = (...c) => schema_basic_1.schema.node("paragraph", null, c.length ? c : undefined); +const h = (level, ...c) => schema_basic_1.schema.node("heading", { level }, c); +const t = (text, ...marks) => schema_basic_1.schema.text(text, marks.length ? marks : undefined); +const strong = schema_basic_1.schema.marks.strong.create(); +const em = schema_basic_1.schema.marks.em.create(); +// Replay the diff's steps onto a fresh Transform built from `fromDoc`. This is +// the faithful "apply(diff) == target" check — it exercises the actual Step +// objects rather than the transform's internal accumulated doc. +function applyDiff(fromDoc, toDoc, options) { + const tr = (0, recreateTransform_1.recreateTransform)(fromDoc, toDoc, options); + const replay = new transform_1.Transform(fromDoc); + tr.steps.forEach((s) => { + const result = replay.maybeStep(s); + if (result.failed) + throw new Error(`step failed: ${result.failed}`); + }); + return replay.doc; +} +(0, vitest_1.describe)("recreateTransform round-trip (apply(diff) == target)", () => { + (0, vitest_1.it)("reconstructs the target on plain text insertion", () => { + // Inserting " world" must yield exactly the target paragraph. + const from = doc(p(t("hello"))); + const to = doc(p(t("hello world"))); + (0, vitest_1.expect)(applyDiff(from, to).eq(to)).toBe(true); + }); + (0, vitest_1.it)("reconstructs the target on text deletion", () => { + // Deleting a trailing word is the inverse of insertion and must round-trip. + const from = doc(p(t("hello world"))); + const to = doc(p(t("hello"))); + (0, vitest_1.expect)(applyDiff(from, to).eq(to)).toBe(true); + }); + (0, vitest_1.it)("reconstructs the target when a word is replaced mid-string", () => { + // A char-level replace in the middle must not corrupt the surrounding text. + const from = doc(p(t("the quick brown fox"))); + const to = doc(p(t("the slow brown fox"))); + (0, vitest_1.expect)(applyDiff(from, to).eq(to)).toBe(true); + }); + (0, vitest_1.it)("reconstructs the target when a mark is added (complexSteps path)", () => { + // Mark-only changes are diffed in a separate pass; the bolded run must match. + const from = doc(p(t("hello"))); + const to = doc(p(t("hello", strong))); + const out = applyDiff(from, to); + (0, vitest_1.expect)(out.eq(to)).toBe(true); + // Sanity: the produced doc actually carries the strong mark. + (0, vitest_1.expect)(out.firstChild.firstChild.marks.length).toBe(1); + }); + (0, vitest_1.it)("reconstructs the target when a mark is removed", () => { + // Removing the only mark must leave the same text with no marks. + const from = doc(p(t("hello", strong))); + const to = doc(p(t("hello"))); + const out = applyDiff(from, to); + (0, vitest_1.expect)(out.eq(to)).toBe(true); + (0, vitest_1.expect)(out.firstChild.firstChild.marks.length).toBe(0); + }); + (0, vitest_1.it)("reconstructs the target on a paragraph split into two blocks", () => { + // Structural change (one block -> two) must replay as valid replace steps. + const from = doc(p(t("hello world"))); + const to = doc(p(t("hello")), p(t("world"))); + const out = applyDiff(from, to); + (0, vitest_1.expect)(out.eq(to)).toBe(true); + (0, vitest_1.expect)(out.childCount).toBe(2); + }); + (0, vitest_1.it)("reconstructs the target on a node-type change (paragraph -> heading)", () => { + // Type/attrs changes drive the setNodeMarkup branch; the node must become a + // heading while keeping its text. + const from = doc(p(t("hello"))); + const to = doc(h(1, t("hello"))); + const out = applyDiff(from, to); + (0, vitest_1.expect)(out.eq(to)).toBe(true); + (0, vitest_1.expect)(out.firstChild.type.name).toBe("heading"); + }); + (0, vitest_1.it)("reconstructs a combined structural + mark change", () => { + // Several diff kinds at once (new block + italic run) still round-trips. + const from = doc(p(t("alpha"))); + const to = doc(p(t("alpha")), p(t("beta", em))); + const out = applyDiff(from, to); + (0, vitest_1.expect)(out.eq(to)).toBe(true); + }); + (0, vitest_1.it)("produces an empty step list for identical documents", () => { + // No diff => no work; spurious steps would mean wasted/incorrect history. + const from = doc(p(t("same"))); + const to = doc(p(t("same"))); + const tr = (0, recreateTransform_1.recreateTransform)(from, to); + (0, vitest_1.expect)(tr.steps.length).toBe(0); + (0, vitest_1.expect)(tr.doc.eq(to)).toBe(true); + }); + (0, vitest_1.it)("round-trips with complexSteps:false (marks diffed as replaces)", () => { + // With complexSteps off, mark changes are folded into replace steps rather + // than dedicated mark steps — the result must still equal the target. + const from = doc(p(t("hello"))); + const to = doc(p(t("hello", strong))); + (0, vitest_1.expect)(applyDiff(from, to, { complexSteps: false }).eq(to)).toBe(true); + }); + (0, vitest_1.it)("round-trips with wordDiffs:true (whole-word text diffing)", () => { + // wordDiffs changes the granularity of the text diff, not the outcome. + const from = doc(p(t("the quick brown fox"))); + const to = doc(p(t("the quick red fox"))); + (0, vitest_1.expect)(applyDiff(from, to, { wordDiffs: true }).eq(to)).toBe(true); + }); +}); diff --git a/packages/mcp/build/_vendored_editor_ext/removeMarks.js b/packages/mcp/build/_vendored_editor_ext/removeMarks.js new file mode 100644 index 00000000..a0bedebc --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/removeMarks.js @@ -0,0 +1,9 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.removeMarks = removeMarks; +const transform_1 = require("@tiptap/pm/transform"); +function removeMarks(doc) { + const tr = new transform_1.Transform(doc); + tr.removeMark(0, doc.nodeSize - 2); + return tr.doc; +} diff --git a/packages/mcp/build/_vendored_editor_ext/simplifyTransform.js b/packages/mcp/build/_vendored_editor_ext/simplifyTransform.js new file mode 100644 index 00000000..57fd9995 --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/simplifyTransform.js @@ -0,0 +1,27 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.simplifyTransform = simplifyTransform; +const transform_1 = require("@tiptap/pm/transform"); +const getReplaceStep_1 = require("./getReplaceStep"); +// join adjacent ReplaceSteps +function simplifyTransform(tr) { + if (!tr.steps.length) { + return undefined; + } + const newTr = new transform_1.Transform(tr.docs[0]); + const oldSteps = tr.steps.slice(); + while (oldSteps.length) { + let step = oldSteps.shift(); + while (oldSteps.length && step.merge(oldSteps[0])) { + const addedStep = oldSteps.shift(); + if (step instanceof transform_1.ReplaceStep && addedStep instanceof transform_1.ReplaceStep) { + step = (0, getReplaceStep_1.getReplaceStep)(newTr.doc, addedStep.apply(step.apply(newTr.doc).doc).doc); + } + else { + step = step.merge(addedStep); + } + } + newTr.step(step); + } + return newTr; +} diff --git a/packages/mcp/build/_vendored_editor_ext/types.js b/packages/mcp/build/_vendored_editor_ext/types.js new file mode 100644 index 00000000..c8ad2e54 --- /dev/null +++ b/packages/mcp/build/_vendored_editor_ext/types.js @@ -0,0 +1,2 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index 1b4b31d5..b656546f 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -13,6 +13,7 @@ import { footnoteWarningsField } from "./lib/footnote-analyze.js"; import { buildPageTree } from "./lib/tree.js"; import { serializeDocmostMarkdown, parseDocmostMarkdown, } from "./lib/markdown-document.js"; import { replaceNodeById, deleteNodeById, assertUnambiguousMatch, insertNodeRelative, buildOutline, getNodeByRef, readTable, insertTableRow, deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; +import { searchInDoc } from "./lib/page-search.js"; import { withPageLock } from "./lib/page-lock.js"; import { applyTextEdits, } from "./lib/json-edit.js"; import { getCollabToken, performLogin } from "./lib/auth-utils.js"; @@ -872,6 +873,22 @@ export class DocmostClient { node: hit.node, }; } + /** + * Find every occurrence of `query` on a page IN MEMORY, over the plain text of + * each text container (reusing the same `getPageRaw` fetch as the other read + * tools) — no server search endpoint, no whole-document round-trip through the + * model. Returns `{ total, truncated, matches }`; each match carries a ref the + * agent can hand straight to get_node/patch_node or a comment anchor, plus the + * top-level block index and a short context window to build a unique selection. + * The pure engine (`searchInDoc`) owns the traversal, glue, ReDoS guards and + * the empty-query / invalid-regex errors. + */ + async searchInPage(pageId, query, opts = {}) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + const result = searchInDoc(data.content ?? { type: "doc", content: [] }, query, opts); + return { pageId, query, ...result }; + } /** * Read a table as a matrix. `tableRef` is `#` (from get_outline) or a * block id of any node inside the table. Returns the cell texts plus a diff --git a/packages/mcp/build/index.js b/packages/mcp/build/index.js index a573231a..a9fc3696 100644 --- a/packages/mcp/build/index.js +++ b/packages/mcp/build/index.js @@ -35,7 +35,7 @@ const VERSION = packageJson.version; // name is not mentioned below (see its EXCEPTIONS list for the rare opt-outs). // Exported for that test. export const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent.\n" + - "READ: find a page -> search (workspace-wide full-text); list -> list_pages / list_spaces. Locate blocks and their ids CHEAPLY -> get_outline (compact top-level map; start here, not get_page_json). One block's subtree -> get_node (by attrs.id, or \"#\" for tables, which carry no id). Whole page -> get_page (Markdown, lossy; inline tags are comment anchors — markup, not text) or get_page_json (lossless ProseMirror with block ids). Hand a huge page (with images) to an external consumer without pulling it through the model context -> stash_page (returns a short-lived anonymous URL).\n" + + "READ: find a page -> search (workspace-wide full-text); list -> list_pages / list_spaces. Locate blocks and their ids CHEAPLY -> get_outline (compact top-level map; start here, not get_page_json). One block's subtree -> get_node (by attrs.id, or \"#\" for tables, which carry no id). Find every occurrence of a string/regex ON a page (and where each is) -> search_in_page, NOT block-by-block get_node — it returns each hit's node ref + block index + context for a targeted comment. Whole page -> get_page (Markdown, lossy; inline tags are comment anchors — markup, not text) or get_page_json (lossless ProseMirror with block ids). Hand a huge page (with images) to an external consumer without pulling it through the model context -> stash_page (returns a short-lived anonymous URL).\n" + "EDIT: fix wording/typos/numbers -> edit_page_text (find/replace inside blocks, no node id needed). Change ONE block (paragraph/heading/callout/etc.) structurally -> patch_node (by attrs.id from get_outline). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Tables -> table_get / table_update_cell / table_insert_row / table_delete_row (address by \"#\" from get_outline; table nodes have no attrs.id). Images -> insert_image (add from a web URL) / replace_image (swap an existing image). Footnotes -> insert_footnote. Bulk/structural rewrite -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Complex/scripted rewrite (multiple coordinated edits, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes.\n" + "PAGES: new -> create_page (Markdown). Rename (title only) -> rename_page. Move -> move_page. Delete -> delete_page (SOFT delete — the page goes to trash and is restorable; nothing is permanent). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Sharing -> share_page / unshare_page / list_shares; share_page makes the page PUBLICLY accessible — do it only when explicitly asked.\n" + "COMMENTS: create_comment is always inline and requires an EXACT selection — contiguous text from a single block, <=250 chars (fails rather than leaving an unanchored comment); reply to a thread via parentCommentId. Propose a concrete text fix for one-click human approval -> create_comment with suggestedText (the exact plain-text replacement for the selection; the selection must then be UNIQUE in the page — extend it with context if needed); prefer this over editing directly when the change is subjective or needs the author's sign-off. Manage -> list_comments, update_comment, resolve_comment (resolve/reopen, reversible — prefer over delete to close), delete_comment, check_new_comments.\n" + @@ -141,6 +141,15 @@ export function createDocmostMcpServer(config) { const result = await docmostClient.getNode(pageId, nodeId); return jsonContent(result); }); + // Tool: search_in_page + registerShared(SHARED_TOOL_SPECS.searchInPage, async ({ pageId, query, regex, caseSensitive, limit }) => { + const result = await docmostClient.searchInPage(pageId, query, { + regex, + caseSensitive, + limit, + }); + return jsonContent(result); + }); // Tool: table_get server.registerTool("table_get", { description: "Read a table as a matrix. Returns {rows, cols, cells (text[][]), " + diff --git a/packages/mcp/build/lib/page-search.js b/packages/mcp/build/lib/page-search.js new file mode 100644 index 00000000..cd762909 --- /dev/null +++ b/packages/mcp/build/lib/page-search.js @@ -0,0 +1,169 @@ +/** + * Pure, network-free in-page search over a ProseMirror/TipTap document tree. + * + * `searchInDoc(doc, query, opts)` finds every occurrence of a literal substring + * (default) or a regular expression across the page's TEXT CONTAINERS and + * reports WHERE each match is — the container's ref (usable verbatim with + * get_node/patch_node and comment anchoring), the top-level block index, and a + * short context window around the hit. It never touches the network, the DB, or + * the schema mirror; like `comment-anchor.ts` it is isolated-testable. + * + * WHY plain text (not markdown): each container's inline text is glued into ONE + * string via `blockPlainText`, so a match survives inline-mark boundaries + * (bold/italic/link splits that fracture a run like "т.е." into several text + * nodes) and comment-anchor spans never clutter the haystack. + * + * The SEARCH UNIT is a text container: a node whose direct children include + * text nodes (a paragraph/heading, or the paragraph inside a table cell / list + * item). ProseMirror keeps block vs. inline content exclusive, so a container + * never nests another container — the walk reaches each cell/item's own text and + * the context window is naturally scoped to that specific cell/item, not the + * whole top-level block's glued text. + */ +import { blockPlainText } from "./node-ops.js"; +/** True if `value` is a non-null plain object (and not an array). */ +function isObject(value) { + return value != null && typeof value === "object" && !Array.isArray(value); +} +/** + * A text container is a node with a `content` array holding at least one text + * node (a child with a string `text`). These are the paragraphs/headings whose + * glued inline text we search. + */ +function isTextContainer(node) { + return (isObject(node) && + Array.isArray(node.content) && + node.content.some((c) => isObject(c) && typeof c.text === "string")); +} +// Result-size defaults/ceiling. +const DEFAULT_LIMIT = 50; +const MAX_LIMIT = 200; +// Context window on each side of a match. +const CONTEXT = 40; +// Anti-ReDoS guards. JS regex is not interruptible, so a pathological pattern +// on a large input can wedge the event loop; we bound BOTH inputs by size (not +// a timeout). These also bound the literal engine's work. +const MAX_PATTERN_LENGTH = 1000; // cap the query/pattern length +const MAX_CONTAINER_TEXT = 100_000; // cap the text scanned per container +/** Clamp the requested limit into [1, MAX_LIMIT], defaulting when absent. */ +function resolveLimit(limit) { + const n = typeof limit === "number" && Number.isFinite(limit) ? limit : DEFAULT_LIMIT; + return Math.min(MAX_LIMIT, Math.max(1, Math.floor(n))); +} +/** + * Yield the [start, length] of every occurrence of the engine in `text`, in + * order. A literal engine uses indexOf (case-folded when requested); a regex + * engine uses a global RegExp. Zero-length regex matches (e.g. `\b`, `a*`) are + * SKIPPED and lastIndex is advanced, so a pattern that can match the empty + * string cannot flood the results or spin forever. + */ +function* eachMatch(text, query, re, caseSensitive) { + if (re) { + re.lastIndex = 0; + let m; + while ((m = re.exec(text)) != null) { + const len = m[0].length; + if (len === 0) { + // Empty match: advance past this position and do not record it. + re.lastIndex = m.index + 1; + continue; + } + yield [m.index, len]; + } + return; + } + // Literal engine. For case-insensitive search, fold BOTH sides only to locate + // the indices; the reported match/context are always sliced from the original + // text so the caller gets the real casing (needed to build a unique selection). + const haystack = caseSensitive ? text : text.toLowerCase(); + const needle = caseSensitive ? query : query.toLowerCase(); + const len = needle.length; + let from = 0; + for (;;) { + const idx = haystack.indexOf(needle, from); + if (idx === -1) + return; + yield [idx, len]; + from = idx + len; + } +} +/** + * Search a ProseMirror document for `query` and return `{ total, truncated, + * matches }`. `total` counts EVERY occurrence (even beyond the limit) and + * `truncated` flags when the returned list was capped — nothing is silently + * dropped. + * + * Throws a clear, model-actionable error (never a generic failure) on: an + * empty/whitespace-only query, an over-long pattern, or — with `regex:true` — + * an invalid RegExp, so the agent can fix its input. + */ +export function searchInDoc(doc, query, opts = {}) { + // --- edge-case guards (fail loudly so the agent can correct the call) --- + if (typeof query !== "string" || query.trim().length === 0) { + throw new Error("search_in_page: query is empty — pass the text (or regex) to look for."); + } + if (query.length > MAX_PATTERN_LENGTH) { + throw new Error(`search_in_page: query is too long (${query.length} chars; max ${MAX_PATTERN_LENGTH}). Shorten the search text/pattern.`); + } + const caseSensitive = opts.caseSensitive === true; + const limit = resolveLimit(opts.limit); + // Compile the regex up front so an invalid pattern is a clean tool error + // rather than a failure deep in the traversal. + let re = null; + if (opts.regex === true) { + try { + re = new RegExp(query, caseSensitive ? "g" : "gi"); + } + catch (e) { + throw new Error(`search_in_page: invalid regular expression: ${e instanceof Error ? e.message : String(e)}`); + } + } + const matches = []; + let total = 0; + const topLevel = isObject(doc) && Array.isArray(doc.content) ? doc.content : []; + // Descend a top-level block, collecting matches from every text container + // within it. blockIndex/topRef stay pinned to the enclosing top-level block. + const descend = (node, blockIndex, topRef) => { + if (!isObject(node)) + return; + if (isTextContainer(node)) { + // Glue this container's inline text into one string (mark-safe) and cap it + // so a single non-interruptible regex exec can never run on an unbounded + // input. + let text = blockPlainText(node); + if (text.length > MAX_CONTAINER_TEXT) { + text = text.slice(0, MAX_CONTAINER_TEXT); + } + // The container's own id addresses it verbatim in get_node/patch_node and + // comment anchoring; a container with no id (e.g. a table-cell paragraph) + // falls back to the top-level block's #. + const id = isObject(node.attrs) && typeof node.attrs.id === "string" && node.attrs.id.length > 0 + ? node.attrs.id + : topRef; + for (const [idx, len] of eachMatch(text, query, re, caseSensitive)) { + total++; + if (matches.length < limit) { + matches.push({ + nodeId: id, + blockIndex, + type: node.type, + before: text.slice(Math.max(0, idx - CONTEXT), idx), + match: text.slice(idx, idx + len), + after: text.slice(idx + len, idx + len + CONTEXT), + }); + } + } + // A text container holds inline content only — no nested containers to + // recurse into. + return; + } + if (Array.isArray(node.content)) { + for (const child of node.content) + descend(child, blockIndex, topRef); + } + }; + for (let i = 0; i < topLevel.length; i++) { + descend(topLevel[i], i, `#${i}`); + } + return { total, truncated: total > matches.length, matches }; +} diff --git a/packages/mcp/build/tool-specs.js b/packages/mcp/build/tool-specs.js index f73d1d15..df05da43 100644 --- a/packages/mcp/build/tool-specs.js +++ b/packages/mcp/build/tool-specs.js @@ -74,6 +74,48 @@ export const SHARED_TOOL_SPECS = { nodeId: z.string().min(1), }), }, + // --- in-page occurrence search (client-side, over ProseMirror plain text) --- + searchInPage: { + mcpName: 'search_in_page', + inAppKey: 'searchInPage', + description: 'Find every occurrence of a string (or regex) INSIDE one page and get ' + + 'WHERE each is — instead of pulling blocks one-by-one with get_node. ' + + 'Searches the plain text of each text block/cell (marks glued, so a match ' + + 'survives bold/italic/link splits; comment anchors do not interfere). ' + + 'Returns { total, truncated, matches:[{ nodeId, blockIndex, type, before, ' + + 'match, after }] }: `nodeId` is the block id (or "#" for ' + + 'table/cell content) — pass it straight to get_node/patch_node or as a ' + + 'comment anchor; `blockIndex` is the get_outline index; `before`/`after` ' + + 'give ~40 chars of context to build a unique selection. `total` counts all ' + + 'hits and `truncated` is true when more than `limit` were found (nothing ' + + 'is silently dropped). Default is a literal, case-INSENSITIVE substring; ' + + 'set regex:true for a JS regular expression (char classes, word ' + + 'boundaries) and caseSensitive:true to match case. Ideal for systematic ' + + 'editorial sweeps (unquoted "ё", straight quotes, "т.е.", stray units). An ' + + 'invalid regex or an empty query returns a clear error to fix.', + buildShape: (z) => ({ + pageId: z.string().min(1).describe('ID of the page to search'), + query: z + .string() + .min(1) + .describe('The text to find (a literal substring, or a regex when regex:true)'), + regex: z + .boolean() + .optional() + .describe('Treat query as a JS regular expression (default false).'), + caseSensitive: z + .boolean() + .optional() + .describe('Case-sensitive matching (default false).'), + limit: z + .number() + .int() + .min(1) + .max(200) + .optional() + .describe('Max matches to RETURN (default 50, max 200); total is always reported.'), + }), + }, // --- node delete --- deleteNode: { mcpName: 'delete_node', diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index cd29d494..39654fdb 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -47,6 +47,7 @@ import { deleteTableRow, updateTableCell, } from "./lib/node-ops.js"; +import { searchInDoc, SearchOptions } from "./lib/page-search.js"; import { withPageLock } from "./lib/page-lock.js"; import { applyTextEdits, @@ -1093,6 +1094,27 @@ export class DocmostClient { }; } + /** + * Find every occurrence of `query` on a page IN MEMORY, over the plain text of + * each text container (reusing the same `getPageRaw` fetch as the other read + * tools) — no server search endpoint, no whole-document round-trip through the + * model. Returns `{ total, truncated, matches }`; each match carries a ref the + * agent can hand straight to get_node/patch_node or a comment anchor, plus the + * top-level block index and a short context window to build a unique selection. + * The pure engine (`searchInDoc`) owns the traversal, glue, ReDoS guards and + * the empty-query / invalid-regex errors. + */ + async searchInPage(pageId: string, query: string, opts: SearchOptions = {}) { + await this.ensureAuthenticated(); + const data = await this.getPageRaw(pageId); + const result = searchInDoc( + data.content ?? { type: "doc", content: [] }, + query, + opts, + ); + return { pageId, query, ...result }; + } + /** * Read a table as a matrix. `tableRef` is `#` (from get_outline) or a * block id of any node inside the table. Returns the cell texts plus a diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index 6bbcea7c..db6c9274 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -46,7 +46,7 @@ const VERSION = packageJson.version; // Exported for that test. export const SERVER_INSTRUCTIONS = "Docmost editing guide — choose the tool by intent.\n" + - "READ: find a page -> search (workspace-wide full-text); list -> list_pages / list_spaces. Locate blocks and their ids CHEAPLY -> get_outline (compact top-level map; start here, not get_page_json). One block's subtree -> get_node (by attrs.id, or \"#\" for tables, which carry no id). Whole page -> get_page (Markdown, lossy; inline tags are comment anchors — markup, not text) or get_page_json (lossless ProseMirror with block ids). Hand a huge page (with images) to an external consumer without pulling it through the model context -> stash_page (returns a short-lived anonymous URL).\n" + + "READ: find a page -> search (workspace-wide full-text); list -> list_pages / list_spaces. Locate blocks and their ids CHEAPLY -> get_outline (compact top-level map; start here, not get_page_json). One block's subtree -> get_node (by attrs.id, or \"#\" for tables, which carry no id). Find every occurrence of a string/regex ON a page (and where each is) -> search_in_page, NOT block-by-block get_node — it returns each hit's node ref + block index + context for a targeted comment. Whole page -> get_page (Markdown, lossy; inline tags are comment anchors — markup, not text) or get_page_json (lossless ProseMirror with block ids). Hand a huge page (with images) to an external consumer without pulling it through the model context -> stash_page (returns a short-lived anonymous URL).\n" + "EDIT: fix wording/typos/numbers -> edit_page_text (find/replace inside blocks, no node id needed). Change ONE block (paragraph/heading/callout/etc.) structurally -> patch_node (by attrs.id from get_outline). Add a block -> insert_node (before/after a block by attrs.id or by anchor text, or append). Remove a block -> delete_node (by attrs.id). Tables -> table_get / table_update_cell / table_insert_row / table_delete_row (address by \"#\" from get_outline; table nodes have no attrs.id). Images -> insert_image (add from a web URL) / replace_image (swap an existing image). Footnotes -> insert_footnote. Bulk/structural rewrite -> update_page_json (full ProseMirror replace; prefer the granular tools above to avoid resending the whole ~100KB+ document). Complex/scripted rewrite (multiple coordinated edits, renumbering) -> docmost_transform: write a JS `(doc, ctx) => doc` transform, preview the diff with dryRun (default), then apply with dryRun:false; ctx.helpers includes commentsToFootnotes for turning inline comments into numbered footnotes.\n" + "PAGES: new -> create_page (Markdown). Rename (title only) -> rename_page. Move -> move_page. Delete -> delete_page (SOFT delete — the page goes to trash and is restorable; nothing is permanent). Copy/replace a page's whole content from another page (server-side, no document through the model) -> copy_page_content. Sharing -> share_page / unshare_page / list_shares; share_page makes the page PUBLICLY accessible — do it only when explicitly asked.\n" + "COMMENTS: create_comment is always inline and requires an EXACT selection — contiguous text from a single block, <=250 chars (fails rather than leaving an unanchored comment); reply to a thread via parentCommentId. Propose a concrete text fix for one-click human approval -> create_comment with suggestedText (the exact plain-text replacement for the selection; the selection must then be UNIQUE in the page — extend it with context if needed); prefer this over editing directly when the change is subjective or needs the author's sign-off. Manage -> list_comments, update_comment, resolve_comment (resolve/reopen, reversible — prefer over delete to close), delete_comment, check_new_comments.\n" + @@ -187,6 +187,19 @@ registerShared(SHARED_TOOL_SPECS.getNode, async ({ pageId, nodeId }) => { return jsonContent(result); }); +// Tool: search_in_page +registerShared( + SHARED_TOOL_SPECS.searchInPage, + async ({ pageId, query, regex, caseSensitive, limit }) => { + const result = await docmostClient.searchInPage(pageId, query, { + regex, + caseSensitive, + limit, + }); + return jsonContent(result); + }, +); + // Tool: table_get server.registerTool( "table_get", diff --git a/packages/mcp/src/lib/page-search.ts b/packages/mcp/src/lib/page-search.ts new file mode 100644 index 00000000..75be7839 --- /dev/null +++ b/packages/mcp/src/lib/page-search.ts @@ -0,0 +1,245 @@ +/** + * Pure, network-free in-page search over a ProseMirror/TipTap document tree. + * + * `searchInDoc(doc, query, opts)` finds every occurrence of a literal substring + * (default) or a regular expression across the page's TEXT CONTAINERS and + * reports WHERE each match is — the container's ref (usable verbatim with + * get_node/patch_node and comment anchoring), the top-level block index, and a + * short context window around the hit. It never touches the network, the DB, or + * the schema mirror; like `comment-anchor.ts` it is isolated-testable. + * + * WHY plain text (not markdown): each container's inline text is glued into ONE + * string via `blockPlainText`, so a match survives inline-mark boundaries + * (bold/italic/link splits that fracture a run like "т.е." into several text + * nodes) and comment-anchor spans never clutter the haystack. + * + * The SEARCH UNIT is a text container: a node whose direct children include + * text nodes (a paragraph/heading, or the paragraph inside a table cell / list + * item). ProseMirror keeps block vs. inline content exclusive, so a container + * never nests another container — the walk reaches each cell/item's own text and + * the context window is naturally scoped to that specific cell/item, not the + * whole top-level block's glued text. + */ + +import { blockPlainText } from "./node-ops.js"; + +/** True if `value` is a non-null plain object (and not an array). */ +function isObject(value: any): value is Record { + return value != null && typeof value === "object" && !Array.isArray(value); +} + +/** + * A text container is a node with a `content` array holding at least one text + * node (a child with a string `text`). These are the paragraphs/headings whose + * glued inline text we search. + */ +function isTextContainer(node: any): boolean { + return ( + isObject(node) && + Array.isArray(node.content) && + node.content.some((c: any) => isObject(c) && typeof c.text === "string") + ); +} + +/** Options controlling the search engine and result size. */ +export interface SearchOptions { + /** Treat `query` as a RegExp instead of a literal substring (default false). */ + regex?: boolean; + /** Case-sensitive matching (default false). */ + caseSensitive?: boolean; + /** Max matches to RETURN (default 50, clamped to [1, 200]); total is unbounded. */ + limit?: number; +} + +/** One located occurrence. */ +export interface SearchMatch { + /** + * The container's ref: its `attrs.id` when it has one, otherwise + * `#` of the nearest top-level block (the same ref format + * get_node/patch_node and comment anchoring accept). Table-cell/list-item + * paragraphs that carry no id fall back to the `#` form. + */ + nodeId: string; + /** The top-level block index (as in get_outline). */ + blockIndex: number; + /** The container node's type (paragraph/heading/...). */ + type: string | undefined; + /** ~40 chars of context immediately before the match (from THIS container). */ + before: string; + /** The matched text. */ + match: string; + /** ~40 chars of context immediately after the match (from THIS container). */ + after: string; +} + +/** The search result. `truncated` is true when `total > matches.length`. */ +export interface SearchResult { + total: number; + truncated: boolean; + matches: SearchMatch[]; +} + +// Result-size defaults/ceiling. +const DEFAULT_LIMIT = 50; +const MAX_LIMIT = 200; + +// Context window on each side of a match. +const CONTEXT = 40; + +// Anti-ReDoS guards. JS regex is not interruptible, so a pathological pattern +// on a large input can wedge the event loop; we bound BOTH inputs by size (not +// a timeout). These also bound the literal engine's work. +const MAX_PATTERN_LENGTH = 1000; // cap the query/pattern length +const MAX_CONTAINER_TEXT = 100_000; // cap the text scanned per container + +/** Clamp the requested limit into [1, MAX_LIMIT], defaulting when absent. */ +function resolveLimit(limit: number | undefined): number { + const n = typeof limit === "number" && Number.isFinite(limit) ? limit : DEFAULT_LIMIT; + return Math.min(MAX_LIMIT, Math.max(1, Math.floor(n))); +} + +/** + * Yield the [start, length] of every occurrence of the engine in `text`, in + * order. A literal engine uses indexOf (case-folded when requested); a regex + * engine uses a global RegExp. Zero-length regex matches (e.g. `\b`, `a*`) are + * SKIPPED and lastIndex is advanced, so a pattern that can match the empty + * string cannot flood the results or spin forever. + */ +function* eachMatch( + text: string, + query: string, + re: RegExp | null, + caseSensitive: boolean, +): Generator<[number, number]> { + if (re) { + re.lastIndex = 0; + let m: RegExpExecArray | null; + while ((m = re.exec(text)) != null) { + const len = m[0].length; + if (len === 0) { + // Empty match: advance past this position and do not record it. + re.lastIndex = m.index + 1; + continue; + } + yield [m.index, len]; + } + return; + } + + // Literal engine. For case-insensitive search, fold BOTH sides only to locate + // the indices; the reported match/context are always sliced from the original + // text so the caller gets the real casing (needed to build a unique selection). + const haystack = caseSensitive ? text : text.toLowerCase(); + const needle = caseSensitive ? query : query.toLowerCase(); + const len = needle.length; + let from = 0; + for (;;) { + const idx = haystack.indexOf(needle, from); + if (idx === -1) return; + yield [idx, len]; + from = idx + len; + } +} + +/** + * Search a ProseMirror document for `query` and return `{ total, truncated, + * matches }`. `total` counts EVERY occurrence (even beyond the limit) and + * `truncated` flags when the returned list was capped — nothing is silently + * dropped. + * + * Throws a clear, model-actionable error (never a generic failure) on: an + * empty/whitespace-only query, an over-long pattern, or — with `regex:true` — + * an invalid RegExp, so the agent can fix its input. + */ +export function searchInDoc( + doc: any, + query: string, + opts: SearchOptions = {}, +): SearchResult { + // --- edge-case guards (fail loudly so the agent can correct the call) --- + if (typeof query !== "string" || query.trim().length === 0) { + throw new Error( + "search_in_page: query is empty — pass the text (or regex) to look for.", + ); + } + if (query.length > MAX_PATTERN_LENGTH) { + throw new Error( + `search_in_page: query is too long (${query.length} chars; max ${MAX_PATTERN_LENGTH}). Shorten the search text/pattern.`, + ); + } + + const caseSensitive = opts.caseSensitive === true; + const limit = resolveLimit(opts.limit); + + // Compile the regex up front so an invalid pattern is a clean tool error + // rather than a failure deep in the traversal. + let re: RegExp | null = null; + if (opts.regex === true) { + try { + re = new RegExp(query, caseSensitive ? "g" : "gi"); + } catch (e) { + throw new Error( + `search_in_page: invalid regular expression: ${ + e instanceof Error ? e.message : String(e) + }`, + ); + } + } + + const matches: SearchMatch[] = []; + let total = 0; + + const topLevel = + isObject(doc) && Array.isArray(doc.content) ? doc.content : []; + + // Descend a top-level block, collecting matches from every text container + // within it. blockIndex/topRef stay pinned to the enclosing top-level block. + const descend = (node: any, blockIndex: number, topRef: string): void => { + if (!isObject(node)) return; + + if (isTextContainer(node)) { + // Glue this container's inline text into one string (mark-safe) and cap it + // so a single non-interruptible regex exec can never run on an unbounded + // input. + let text = blockPlainText(node); + if (text.length > MAX_CONTAINER_TEXT) { + text = text.slice(0, MAX_CONTAINER_TEXT); + } + + // The container's own id addresses it verbatim in get_node/patch_node and + // comment anchoring; a container with no id (e.g. a table-cell paragraph) + // falls back to the top-level block's #. + const id = + isObject(node.attrs) && typeof node.attrs.id === "string" && node.attrs.id.length > 0 + ? node.attrs.id + : topRef; + + for (const [idx, len] of eachMatch(text, query, re, caseSensitive)) { + total++; + if (matches.length < limit) { + matches.push({ + nodeId: id, + blockIndex, + type: node.type, + before: text.slice(Math.max(0, idx - CONTEXT), idx), + match: text.slice(idx, idx + len), + after: text.slice(idx + len, idx + len + CONTEXT), + }); + } + } + // A text container holds inline content only — no nested containers to + // recurse into. + return; + } + + if (Array.isArray(node.content)) { + for (const child of node.content) descend(child, blockIndex, topRef); + } + }; + + for (let i = 0; i < topLevel.length; i++) { + descend(topLevel[i], i, `#${i}`); + } + + return { total, truncated: total > matches.length, matches }; +} diff --git a/packages/mcp/src/tool-specs.ts b/packages/mcp/src/tool-specs.ts index c6dbd7da..cec8bc89 100644 --- a/packages/mcp/src/tool-specs.ts +++ b/packages/mcp/src/tool-specs.ts @@ -110,6 +110,51 @@ export const SHARED_TOOL_SPECS = { }), }, + // --- in-page occurrence search (client-side, over ProseMirror plain text) --- + + searchInPage: { + mcpName: 'search_in_page', + inAppKey: 'searchInPage', + description: + 'Find every occurrence of a string (or regex) INSIDE one page and get ' + + 'WHERE each is — instead of pulling blocks one-by-one with get_node. ' + + 'Searches the plain text of each text block/cell (marks glued, so a match ' + + 'survives bold/italic/link splits; comment anchors do not interfere). ' + + 'Returns { total, truncated, matches:[{ nodeId, blockIndex, type, before, ' + + 'match, after }] }: `nodeId` is the block id (or "#" for ' + + 'table/cell content) — pass it straight to get_node/patch_node or as a ' + + 'comment anchor; `blockIndex` is the get_outline index; `before`/`after` ' + + 'give ~40 chars of context to build a unique selection. `total` counts all ' + + 'hits and `truncated` is true when more than `limit` were found (nothing ' + + 'is silently dropped). Default is a literal, case-INSENSITIVE substring; ' + + 'set regex:true for a JS regular expression (char classes, word ' + + 'boundaries) and caseSensitive:true to match case. Ideal for systematic ' + + 'editorial sweeps (unquoted "ё", straight quotes, "т.е.", stray units). An ' + + 'invalid regex or an empty query returns a clear error to fix.', + buildShape: (z) => ({ + pageId: z.string().min(1).describe('ID of the page to search'), + query: z + .string() + .min(1) + .describe('The text to find (a literal substring, or a regex when regex:true)'), + regex: z + .boolean() + .optional() + .describe('Treat query as a JS regular expression (default false).'), + caseSensitive: z + .boolean() + .optional() + .describe('Case-sensitive matching (default false).'), + limit: z + .number() + .int() + .min(1) + .max(200) + .optional() + .describe('Max matches to RETURN (default 50, max 200); total is always reported.'), + }), + }, + // --- node delete --- deleteNode: { diff --git a/packages/mcp/test/unit/client-host-contract.test.mjs b/packages/mcp/test/unit/client-host-contract.test.mjs index d7b80b1b..bf8c26bb 100644 --- a/packages/mcp/test/unit/client-host-contract.test.mjs +++ b/packages/mcp/test/unit/client-host-contract.test.mjs @@ -45,6 +45,7 @@ const HOST_CONTRACT_METHODS = [ "getOutline", "getPageJson", "getNode", + "searchInPage", "getTable", "listComments", "getComment", diff --git a/packages/mcp/test/unit/page-search.test.mjs b/packages/mcp/test/unit/page-search.test.mjs new file mode 100644 index 00000000..c5bd0fe5 --- /dev/null +++ b/packages/mcp/test/unit/page-search.test.mjs @@ -0,0 +1,217 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; + +import { searchInDoc } from "../../build/lib/page-search.js"; + +// --------------------------------------------------------------------------- +// Document builders. Mirror the Docmost ProseMirror shape: paragraphs/headings +// carry an attrs.id and hold text nodes; a text node may carry marks, and +// adjacent runs with different marks are GLUED by blockPlainText so a match can +// straddle a mark boundary. Table cells hold id-less paragraphs. +// --------------------------------------------------------------------------- + +const text = (t, marks) => (marks ? { type: "text", text: t, marks } : { type: "text", text: t }); +const para = (id, ...children) => ({ type: "paragraph", attrs: { id }, content: children }); +const heading = (id, level, t) => ({ + type: "heading", + attrs: { id, level }, + content: [text(t)], +}); + +function doc(...content) { + return { type: "doc", content }; +} + +test("literal substring: finds every occurrence with total/truncated and refs", () => { + const d = doc( + para("p1", text("The cat sat on the cat mat.")), + heading("h1", 2, "Another cat here"), + ); + const res = searchInDoc(d, "cat"); + assert.equal(res.total, 3); + assert.equal(res.truncated, false); + assert.equal(res.matches.length, 3); + // First hit: paragraph p1, block index 0. + assert.equal(res.matches[0].nodeId, "p1"); + assert.equal(res.matches[0].blockIndex, 0); + assert.equal(res.matches[0].type, "paragraph"); + assert.equal(res.matches[0].match, "cat"); + // Third hit is in the heading (block index 1). + assert.equal(res.matches[2].nodeId, "h1"); + assert.equal(res.matches[2].blockIndex, 1); + assert.equal(res.matches[2].type, "heading"); +}); + +test("context windows: before/after are drawn from the SAME container", () => { + const d = doc(para("p1", text("alpha beta gamma delta"))); + const res = searchInDoc(d, "gamma"); + assert.equal(res.matches.length, 1); + assert.equal(res.matches[0].before, "alpha beta "); + assert.equal(res.matches[0].match, "gamma"); + assert.equal(res.matches[0].after, " delta"); +}); + +test("context windows are bounded to ~40 chars each side", () => { + const long = "x".repeat(100); + const d = doc(para("p1", text(long + "NEEDLE" + long))); + const res = searchInDoc(d, "NEEDLE"); + assert.equal(res.matches.length, 1); + assert.equal(res.matches[0].before.length, 40); + assert.equal(res.matches[0].after.length, 40); +}); + +test("case-insensitive by default; caseSensitive:true narrows", () => { + const d = doc(para("p1", text("Cat CAT cat"))); + assert.equal(searchInDoc(d, "cat").total, 3); + assert.equal(searchInDoc(d, "cat", { caseSensitive: true }).total, 1); + // Reported match preserves the ORIGINAL casing even under a folded search. + const res = searchInDoc(d, "cat"); + assert.deepEqual( + res.matches.map((m) => m.match), + ["Cat", "CAT", "cat"], + ); +}); + +test("match survives an inline mark boundary (glued runs)", () => { + // "т.е." is fractured across three text nodes by bold/italic marks. + const d = doc( + para( + "p1", + text("вводное слово, "), + text("т", [{ type: "bold" }]), + text(".", [{ type: "italic" }]), + text("е", [{ type: "bold" }]), + text(". дальше"), + ), + ); + const res = searchInDoc(d, "т.е."); + assert.equal(res.total, 1); + assert.equal(res.matches[0].match, "т.е."); + assert.equal(res.matches[0].nodeId, "p1"); +}); + +test("regex engine: character classes and word boundaries", () => { + const d = doc(para("p1", text("v1 v22 version v3"))); + const res = searchInDoc(d, "\\bv\\d+\\b", { regex: true }); + assert.deepEqual( + res.matches.map((m) => m.match), + ["v1", "v22", "v3"], + ); + // "version" is not matched by \bv\d+\b. + assert.equal(res.total, 3); +}); + +test("regex is case-insensitive by default and respects caseSensitive", () => { + const d = doc(para("p1", text("Foo foo FOO"))); + assert.equal(searchInDoc(d, "foo", { regex: true }).total, 3); + assert.equal( + searchInDoc(d, "foo", { regex: true, caseSensitive: true }).total, + 1, + ); +}); + +test("regex empty/zero-length matches are skipped, not flooded", () => { + const d = doc(para("p1", text("abc"))); + // `a*` can match the empty string at every position; we must not emit those. + const res = searchInDoc(d, "a*", { regex: true }); + assert.equal(res.total, 1); + assert.equal(res.matches[0].match, "a"); +}); + +test("nodeId for a table cell paragraph WITHOUT an id falls back to #", () => { + // A table at top-level block index 1; its cell paragraphs carry no attrs.id. + const cellPara = (t) => ({ type: "paragraph", content: [text(t)] }); + const d = doc( + para("intro", text("before the table")), + { + type: "table", + content: [ + { + type: "tableRow", + content: [ + { type: "tableCell", content: [cellPara("needle in a cell")] }, + { type: "tableHeader", content: [cellPara("another needle")] }, + ], + }, + ], + }, + ); + const res = searchInDoc(d, "needle"); + assert.equal(res.total, 2); + // Both cell hits report the table's top-level # (block 1) since the + // cell paragraphs have no id. + for (const m of res.matches) { + assert.equal(m.nodeId, "#1"); + assert.equal(m.blockIndex, 1); + } + // Context is scoped to the specific cell, not the whole table's glued text. + assert.equal(res.matches[0].after, " in a cell"); + assert.equal(res.matches[1].before, "another "); +}); + +test("nodeId uses attrs.id when the container has one (paragraph & heading)", () => { + const d = doc(heading("h9", 1, "heading needle"), para("p9", text("para needle"))); + const res = searchInDoc(d, "needle"); + assert.equal(res.matches[0].nodeId, "h9"); + assert.equal(res.matches[1].nodeId, "p9"); +}); + +test("limit caps the returned matches but total and truncated stay honest", () => { + const d = doc(para("p1", text("x ".repeat(10).trim()))); // 10 'x' + const res = searchInDoc(d, "x", { limit: 3 }); + assert.equal(res.total, 10); + assert.equal(res.matches.length, 3); + assert.equal(res.truncated, true); +}); + +test("limit is clamped to the [1, 200] range", () => { + const d = doc(para("p1", text("a".repeat(5)))); + // A limit above the ceiling still returns all 5 (< 200) without truncation. + const hi = searchInDoc(d, "a", { limit: 9999 }); + assert.equal(hi.matches.length, 5); + assert.equal(hi.truncated, false); + // A non-positive limit clamps up to 1. + const lo = searchInDoc(d, "a", { limit: 0 }); + assert.equal(lo.matches.length, 1); + assert.equal(lo.total, 5); + assert.equal(lo.truncated, true); +}); + +test("invalid regex throws a clear tool error", () => { + const d = doc(para("p1", text("hi"))); + assert.throws( + () => searchInDoc(d, "(", { regex: true }), + /invalid regular expression/i, + ); +}); + +test("empty or whitespace-only query is rejected", () => { + const d = doc(para("p1", text("hi"))); + assert.throws(() => searchInDoc(d, ""), /query is empty/i); + assert.throws(() => searchInDoc(d, " "), /query is empty/i); + assert.throws(() => searchInDoc(d, undefined), /query is empty/i); +}); + +test("an over-long pattern is rejected (anti-ReDoS pattern cap)", () => { + const d = doc(para("p1", text("hi"))); + assert.throws(() => searchInDoc(d, "a".repeat(1001)), /too long/i); +}); + +test("no matches yields an empty, non-truncated result", () => { + const d = doc(para("p1", text("nothing to see"))); + const res = searchInDoc(d, "zebra"); + assert.deepEqual(res, { total: 0, truncated: false, matches: [] }); +}); + +test("null-safe on a missing/empty doc", () => { + assert.deepEqual(searchInDoc(null, "x"), { + total: 0, + truncated: false, + matches: [], + }); + assert.deepEqual(searchInDoc({ type: "doc" }, "x"), { + total: 0, + truncated: false, + matches: [], + }); +}); From 94f60cf0ec4fd21cf0758e12a4332f115f127c13 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 16:21:25 +0300 Subject: [PATCH 5/7] =?UTF-8?q?docs(client):=20fix=20.suggestionChanged=20?= =?UTF-8?q?comment=20=E2=80=94=20bold=20weight,=20not=20underline=20(#331?= =?UTF-8?q?=20review=20F1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The header comment claimed the rule adds 'an underline'; it does not — it adds a color-mix tint + font-weight:700, and the inner comment already notes text- decoration is omitted on purpose. Aligned the header comment with the rule. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/client/src/features/comment/components/comment.module.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/client/src/features/comment/components/comment.module.css b/apps/client/src/features/comment/components/comment.module.css index f3e7f04f..0f6e4c5e 100644 --- a/apps/client/src/features/comment/components/comment.module.css +++ b/apps/client/src/features/comment/components/comment.module.css @@ -55,7 +55,7 @@ /* Intraline diff (#331): the fragment that actually changed within the red "before" / green "after" block. It inherits the surrounding red/green - framing and adds a stronger tint plus an underline so the eye lands on the + framing and adds a stronger tint plus bold weight so the eye lands on the changed letters/words (git/GitHub-style) rather than the whole line. The container's line-through (old) / green (new) still marks the full line. */ .suggestionChanged { From 77b245461ffea0362833d965a52c001da93e4bd6 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 17:45:49 +0300 Subject: [PATCH 6/7] fix(mcp): search_in_page regex via re2 (ReDoS-safe) + review DO F1-F4 (#330 review) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maintainer decision on the escalated ReDoS fork: use re2. The regex path compiled agent-supplied patterns with `new RegExp` and ran them synchronously in the shared event-loop; a catastrophic-backtracking pattern (e.g. `(a+)+$`) hung the whole Node backend for all users (the tool is in both transports incl. the in-app apps/server agent), and size caps do NOT bound backtracking. Switch the regex engine to re2 (Google RE2, linear-time, no backtracking): - `new RE2(query, caseSensitive?'g':'gi')`. RE2 extends RegExp, so eachMatch and the zero-length-match lastIndex guard are unchanged. - Unsupported patterns are now a CLEAN error, not a hang: RE2 throws on invalid syntax AND on the backtracking-only features it can't do (lookaround (?=…)/(?<=…), backreferences \1) — caught at compile and returned as a clear tool error telling the agent to rewrite without them. - Removed MAX_CONTAINER_TEXT + the per-container slice (re2 is linear, so it's no longer a ReDoS defense, and truncating risked silently dropping real matches in a long container); kept MAX_PATTERN_LENGTH as a cheap query sanity cap. - Verified: `(a+)+$` over 50k `a` completes in ~4ms; lookaround/backref throw. - Added re2 (^1.21.0) to packages/mcp; lockfile updated. Reviewer DO items: - F1 [doc]: removed the false "pass nodeId as a comment anchor" claim (create_comment has no nodeId param — it needs a text `selection`). Fixed in tool-specs.ts + page-search.ts (module + SearchMatch JSDoc) + client.ts; the ref is for get_node/patch_node, and for a comment you build a unique text selection from before+match+after. - F2 [doc]: clarified `#` refs (id-less table/cell) are accepted by get_node but NOT patch_node (id-only). - F3 [test]: round-trip — each match's nodeId fed to the real getNodeByRef (attrs.id node + `#` table-cell) to prove the ref format is consumable. - F4 [test]: before/after edge-pinning (match in first 40 chars of a long container; index 0 → before==""; container end → after==""). - New re2 tests: catastrophic patterns complete fast; lookaround/backref → error. mcp: tsc clean; node --test 472 passed (+5). apps/server: tsc --noEmit clean. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/mcp/build/client.js | 12 +- packages/mcp/build/lib/page-search.js | 73 +++++++----- packages/mcp/build/tool-specs.js | 10 +- packages/mcp/package.json | 1 + packages/mcp/src/client.ts | 12 +- packages/mcp/src/lib/page-search.ts | 102 ++++++++++------ packages/mcp/src/tool-specs.ts | 10 +- packages/mcp/test/unit/page-search.test.mjs | 105 ++++++++++++++++- pnpm-lock.yaml | 124 ++++++++++++++++++++ 9 files changed, 369 insertions(+), 80 deletions(-) diff --git a/packages/mcp/build/client.js b/packages/mcp/build/client.js index b656546f..81a2ec29 100644 --- a/packages/mcp/build/client.js +++ b/packages/mcp/build/client.js @@ -877,11 +877,13 @@ export class DocmostClient { * Find every occurrence of `query` on a page IN MEMORY, over the plain text of * each text container (reusing the same `getPageRaw` fetch as the other read * tools) — no server search endpoint, no whole-document round-trip through the - * model. Returns `{ total, truncated, matches }`; each match carries a ref the - * agent can hand straight to get_node/patch_node or a comment anchor, plus the - * top-level block index and a short context window to build a unique selection. - * The pure engine (`searchInDoc`) owns the traversal, glue, ReDoS guards and - * the empty-query / invalid-regex errors. + * model. Returns `{ total, truncated, matches }`; each match carries a ref for + * get_node/patch_node (the `#` form resolves with get_node but NOT + * patch_node — see SearchMatch.nodeId), plus the top-level block index and a + * short context window used to build a unique text `selection` for + * create_comment (create_comment has no nodeId param). The pure engine + * (`searchInDoc`) owns the traversal, glue, the RE2 ReDoS-safe regex engine + * and the empty-query / invalid-or-unsupported-regex errors. */ async searchInPage(pageId, query, opts = {}) { await this.ensureAuthenticated(); diff --git a/packages/mcp/build/lib/page-search.js b/packages/mcp/build/lib/page-search.js index cd762909..34ca407b 100644 --- a/packages/mcp/build/lib/page-search.js +++ b/packages/mcp/build/lib/page-search.js @@ -3,10 +3,20 @@ * * `searchInDoc(doc, query, opts)` finds every occurrence of a literal substring * (default) or a regular expression across the page's TEXT CONTAINERS and - * reports WHERE each match is — the container's ref (usable verbatim with - * get_node/patch_node and comment anchoring), the top-level block index, and a - * short context window around the hit. It never touches the network, the DB, or - * the schema mirror; like `comment-anchor.ts` it is isolated-testable. + * reports WHERE each match is — the container's ref (for get_node/patch_node; + * see the SearchMatch.nodeId note for the `#` caveat), the top-level + * block index, and a short context window around the hit. It never touches the + * network, the DB, or the schema mirror; like `comment-anchor.ts` it is + * isolated-testable. + * + * REGEX ENGINE: with `regex:true` the pattern is compiled with RE2 (Google's + * linear-time engine), NOT the JS `RegExp`. RE2 has no backtracking, so a + * catastrophic pattern (e.g. `(a+)+$`) can never wedge the shared event loop — + * it runs in linear time. The trade-off is that RE2 does not support the + * backtracking-only features lookaround (`(?=…)`, `(?<=…)`) and backreferences + * (`\1`); such a pattern is rejected up front with a clear tool error (see + * searchInDoc) rather than being run, which is the desired behaviour — a clear + * error the agent can fix beats a server hang. * * WHY plain text (not markdown): each container's inline text is glued into ONE * string via `blockPlainText`, so a match survives inline-mark boundaries @@ -20,6 +30,7 @@ * the context window is naturally scoped to that specific cell/item, not the * whole top-level block's glued text. */ +import RE2 from "re2"; import { blockPlainText } from "./node-ops.js"; /** True if `value` is a non-null plain object (and not an array). */ function isObject(value) { @@ -40,11 +51,12 @@ const DEFAULT_LIMIT = 50; const MAX_LIMIT = 200; // Context window on each side of a match. const CONTEXT = 40; -// Anti-ReDoS guards. JS regex is not interruptible, so a pathological pattern -// on a large input can wedge the event loop; we bound BOTH inputs by size (not -// a timeout). These also bound the literal engine's work. -const MAX_PATTERN_LENGTH = 1000; // cap the query/pattern length -const MAX_CONTAINER_TEXT = 100_000; // cap the text scanned per container +// Cheap sanity cap on the query/pattern length. ReDoS is handled structurally +// by the RE2 engine (linear-time, no backtracking — see the module doc), so we +// no longer truncate the per-container text: RE2 scans it in linear time and a +// cap could silently drop real matches past it. This just rejects an absurdly +// long pattern early with a clear error. +const MAX_PATTERN_LENGTH = 1000; /** Clamp the requested limit into [1, MAX_LIMIT], defaulting when absent. */ function resolveLimit(limit) { const n = typeof limit === "number" && Number.isFinite(limit) ? limit : DEFAULT_LIMIT; @@ -53,9 +65,10 @@ function resolveLimit(limit) { /** * Yield the [start, length] of every occurrence of the engine in `text`, in * order. A literal engine uses indexOf (case-folded when requested); a regex - * engine uses a global RegExp. Zero-length regex matches (e.g. `\b`, `a*`) are - * SKIPPED and lastIndex is advanced, so a pattern that can match the empty - * string cannot flood the results or spin forever. + * engine uses a global RE2 regex (RE2 extends `RegExp`, so `.exec` advances + * `lastIndex` exactly like the native engine). Zero-length regex matches (e.g. + * `\b`, `a*`) are SKIPPED and lastIndex is advanced, so a pattern that can match + * the empty string cannot flood the results or spin forever. */ function* eachMatch(text, query, re, caseSensitive) { if (re) { @@ -94,8 +107,9 @@ function* eachMatch(text, query, re, caseSensitive) { * dropped. * * Throws a clear, model-actionable error (never a generic failure) on: an - * empty/whitespace-only query, an over-long pattern, or — with `regex:true` — - * an invalid RegExp, so the agent can fix its input. + * empty/whitespace-only query, an over-long pattern, or — with `regex:true` — a + * pattern RE2 rejects (invalid syntax, or the unsupported lookaround/ + * backreference features), so the agent can fix its input. */ export function searchInDoc(doc, query, opts = {}) { // --- edge-case guards (fail loudly so the agent can correct the call) --- @@ -107,15 +121,19 @@ export function searchInDoc(doc, query, opts = {}) { } const caseSensitive = opts.caseSensitive === true; const limit = resolveLimit(opts.limit); - // Compile the regex up front so an invalid pattern is a clean tool error - // rather than a failure deep in the traversal. + // Compile the pattern up front with RE2 (linear-time, ReDoS-safe) so a bad + // pattern is a clean tool error rather than a failure deep in the traversal — + // and so a catastrophic-backtracking pattern can never wedge the event loop. + // RE2 throws both on syntactically invalid input AND on backtracking-only + // features it does not implement (lookaround, backreferences); both map to the + // same actionable error so the agent rewrites the pattern. let re = null; if (opts.regex === true) { try { - re = new RegExp(query, caseSensitive ? "g" : "gi"); + re = new RE2(query, caseSensitive ? "g" : "gi"); } catch (e) { - throw new Error(`search_in_page: invalid regular expression: ${e instanceof Error ? e.message : String(e)}`); + throw new Error(`search_in_page: invalid or unsupported regular expression: ${e instanceof Error ? e.message : String(e)} — RE2 does not support lookaround ((?=…)/(?<=…)) or backreferences (\\1); rewrite the pattern without them.`); } } const matches = []; @@ -127,16 +145,15 @@ export function searchInDoc(doc, query, opts = {}) { if (!isObject(node)) return; if (isTextContainer(node)) { - // Glue this container's inline text into one string (mark-safe) and cap it - // so a single non-interruptible regex exec can never run on an unbounded - // input. - let text = blockPlainText(node); - if (text.length > MAX_CONTAINER_TEXT) { - text = text.slice(0, MAX_CONTAINER_TEXT); - } - // The container's own id addresses it verbatim in get_node/patch_node and - // comment anchoring; a container with no id (e.g. a table-cell paragraph) - // falls back to the top-level block's #. + // Glue this container's inline text into one string (mark-safe). No length + // cap: RE2 scans it in linear time (no ReDoS) and the whole document is + // already in memory, so truncating would only risk dropping real matches + // in a very long container. + const text = blockPlainText(node); + // The container's own id addresses it verbatim in get_node/patch_node; a + // container with no id (e.g. a table-cell paragraph) falls back to the + // top-level block's # (readable via get_node, but not patchable — + // see the SearchMatch.nodeId note). const id = isObject(node.attrs) && typeof node.attrs.id === "string" && node.attrs.id.length > 0 ? node.attrs.id : topRef; diff --git a/packages/mcp/build/tool-specs.js b/packages/mcp/build/tool-specs.js index df05da43..51ceb675 100644 --- a/packages/mcp/build/tool-specs.js +++ b/packages/mcp/build/tool-specs.js @@ -84,9 +84,13 @@ export const SHARED_TOOL_SPECS = { 'survives bold/italic/link splits; comment anchors do not interfere). ' + 'Returns { total, truncated, matches:[{ nodeId, blockIndex, type, before, ' + 'match, after }] }: `nodeId` is the block id (or "#" for ' + - 'table/cell content) — pass it straight to get_node/patch_node or as a ' + - 'comment anchor; `blockIndex` is the get_outline index; `before`/`after` ' + - 'give ~40 chars of context to build a unique selection. `total` counts all ' + + 'table/cell content) — pass it to get_node/patch_node (the "#" ' + + 'form resolves with get_node but NOT patch_node, which only accepts a real ' + + 'block id). To anchor a comment, do NOT pass nodeId to create_comment (it ' + + 'has no nodeId param); build a UNIQUE text selection from before+match+' + + 'after and pass it as create_comment\'s `selection`. `blockIndex` is the ' + + 'get_outline index; `before`/`after` give ~40 chars of context to build ' + + 'that unique selection. `total` counts all ' + 'hits and `truncated` is true when more than `limit` were found (nothing ' + 'is silently dropped). Default is a literal, case-INSENSITIVE substring; ' + 'set regex:true for a JS regular expression (char classes, word ' + diff --git a/packages/mcp/package.json b/packages/mcp/package.json index 3edc1902..9ca5b471 100644 --- a/packages/mcp/package.json +++ b/packages/mcp/package.json @@ -51,6 +51,7 @@ "form-data": "^4.0.0", "jsdom": "^27.4.0", "marked": "^17.0.1", + "re2": "^1.21.0", "ws": "^8.19.0", "y-prosemirror": "1.3.7", "yjs": "^13.6.29", diff --git a/packages/mcp/src/client.ts b/packages/mcp/src/client.ts index 39654fdb..7c6aef75 100644 --- a/packages/mcp/src/client.ts +++ b/packages/mcp/src/client.ts @@ -1098,11 +1098,13 @@ export class DocmostClient { * Find every occurrence of `query` on a page IN MEMORY, over the plain text of * each text container (reusing the same `getPageRaw` fetch as the other read * tools) — no server search endpoint, no whole-document round-trip through the - * model. Returns `{ total, truncated, matches }`; each match carries a ref the - * agent can hand straight to get_node/patch_node or a comment anchor, plus the - * top-level block index and a short context window to build a unique selection. - * The pure engine (`searchInDoc`) owns the traversal, glue, ReDoS guards and - * the empty-query / invalid-regex errors. + * model. Returns `{ total, truncated, matches }`; each match carries a ref for + * get_node/patch_node (the `#` form resolves with get_node but NOT + * patch_node — see SearchMatch.nodeId), plus the top-level block index and a + * short context window used to build a unique text `selection` for + * create_comment (create_comment has no nodeId param). The pure engine + * (`searchInDoc`) owns the traversal, glue, the RE2 ReDoS-safe regex engine + * and the empty-query / invalid-or-unsupported-regex errors. */ async searchInPage(pageId: string, query: string, opts: SearchOptions = {}) { await this.ensureAuthenticated(); diff --git a/packages/mcp/src/lib/page-search.ts b/packages/mcp/src/lib/page-search.ts index 75be7839..7f95b2a5 100644 --- a/packages/mcp/src/lib/page-search.ts +++ b/packages/mcp/src/lib/page-search.ts @@ -3,10 +3,20 @@ * * `searchInDoc(doc, query, opts)` finds every occurrence of a literal substring * (default) or a regular expression across the page's TEXT CONTAINERS and - * reports WHERE each match is — the container's ref (usable verbatim with - * get_node/patch_node and comment anchoring), the top-level block index, and a - * short context window around the hit. It never touches the network, the DB, or - * the schema mirror; like `comment-anchor.ts` it is isolated-testable. + * reports WHERE each match is — the container's ref (for get_node/patch_node; + * see the SearchMatch.nodeId note for the `#` caveat), the top-level + * block index, and a short context window around the hit. It never touches the + * network, the DB, or the schema mirror; like `comment-anchor.ts` it is + * isolated-testable. + * + * REGEX ENGINE: with `regex:true` the pattern is compiled with RE2 (Google's + * linear-time engine), NOT the JS `RegExp`. RE2 has no backtracking, so a + * catastrophic pattern (e.g. `(a+)+$`) can never wedge the shared event loop — + * it runs in linear time. The trade-off is that RE2 does not support the + * backtracking-only features lookaround (`(?=…)`, `(?<=…)`) and backreferences + * (`\1`); such a pattern is rejected up front with a clear tool error (see + * searchInDoc) rather than being run, which is the desired behaviour — a clear + * error the agent can fix beats a server hang. * * WHY plain text (not markdown): each container's inline text is glued into ONE * string via `blockPlainText`, so a match survives inline-mark boundaries @@ -21,8 +31,13 @@ * whole top-level block's glued text. */ +import RE2 from "re2"; + import { blockPlainText } from "./node-ops.js"; +/** An RE2 regex instance (RE2 extends `RegExp`, so it is usable as one). */ +type Re2Regex = InstanceType; + /** True if `value` is a non-null plain object (and not an array). */ function isObject(value: any): value is Record { return value != null && typeof value === "object" && !Array.isArray(value); @@ -54,10 +69,21 @@ export interface SearchOptions { /** One located occurrence. */ export interface SearchMatch { /** - * The container's ref: its `attrs.id` when it has one, otherwise - * `#` of the nearest top-level block (the same ref format - * get_node/patch_node and comment anchoring accept). Table-cell/list-item - * paragraphs that carry no id fall back to the `#` form. + * The container's ref, for addressing the block with get_node/patch_node: its + * `attrs.id` when it has one, otherwise `#` of the nearest + * top-level block. Table-cell/list-item paragraphs that carry no id fall back + * to the `#` form. + * + * CAVEAT: the `#` form is accepted by get_node (getNodeByRef resolves + * it by top-level index) but NOT by patch_node (replaceNodeById resolves only + * by `attrs.id`), so id-less table/cell content can be READ by this ref but + * not PATCHED by it. + * + * To anchor a comment, do NOT pass this ref to create_comment — it has no + * nodeId parameter. A top-level comment needs an exact-text `selection` that + * occurs once on the page (it fails if the text isn't found), so build a + * UNIQUE `selection` from before+match+after and pass THAT as create_comment's + * `selection`. */ nodeId: string; /** The top-level block index (as in get_outline). */ @@ -86,11 +112,12 @@ const MAX_LIMIT = 200; // Context window on each side of a match. const CONTEXT = 40; -// Anti-ReDoS guards. JS regex is not interruptible, so a pathological pattern -// on a large input can wedge the event loop; we bound BOTH inputs by size (not -// a timeout). These also bound the literal engine's work. -const MAX_PATTERN_LENGTH = 1000; // cap the query/pattern length -const MAX_CONTAINER_TEXT = 100_000; // cap the text scanned per container +// Cheap sanity cap on the query/pattern length. ReDoS is handled structurally +// by the RE2 engine (linear-time, no backtracking — see the module doc), so we +// no longer truncate the per-container text: RE2 scans it in linear time and a +// cap could silently drop real matches past it. This just rejects an absurdly +// long pattern early with a clear error. +const MAX_PATTERN_LENGTH = 1000; /** Clamp the requested limit into [1, MAX_LIMIT], defaulting when absent. */ function resolveLimit(limit: number | undefined): number { @@ -101,14 +128,15 @@ function resolveLimit(limit: number | undefined): number { /** * Yield the [start, length] of every occurrence of the engine in `text`, in * order. A literal engine uses indexOf (case-folded when requested); a regex - * engine uses a global RegExp. Zero-length regex matches (e.g. `\b`, `a*`) are - * SKIPPED and lastIndex is advanced, so a pattern that can match the empty - * string cannot flood the results or spin forever. + * engine uses a global RE2 regex (RE2 extends `RegExp`, so `.exec` advances + * `lastIndex` exactly like the native engine). Zero-length regex matches (e.g. + * `\b`, `a*`) are SKIPPED and lastIndex is advanced, so a pattern that can match + * the empty string cannot flood the results or spin forever. */ function* eachMatch( text: string, query: string, - re: RegExp | null, + re: Re2Regex | null, caseSensitive: boolean, ): Generator<[number, number]> { if (re) { @@ -148,8 +176,9 @@ function* eachMatch( * dropped. * * Throws a clear, model-actionable error (never a generic failure) on: an - * empty/whitespace-only query, an over-long pattern, or — with `regex:true` — - * an invalid RegExp, so the agent can fix its input. + * empty/whitespace-only query, an over-long pattern, or — with `regex:true` — a + * pattern RE2 rejects (invalid syntax, or the unsupported lookaround/ + * backreference features), so the agent can fix its input. */ export function searchInDoc( doc: any, @@ -171,17 +200,21 @@ export function searchInDoc( const caseSensitive = opts.caseSensitive === true; const limit = resolveLimit(opts.limit); - // Compile the regex up front so an invalid pattern is a clean tool error - // rather than a failure deep in the traversal. - let re: RegExp | null = null; + // Compile the pattern up front with RE2 (linear-time, ReDoS-safe) so a bad + // pattern is a clean tool error rather than a failure deep in the traversal — + // and so a catastrophic-backtracking pattern can never wedge the event loop. + // RE2 throws both on syntactically invalid input AND on backtracking-only + // features it does not implement (lookaround, backreferences); both map to the + // same actionable error so the agent rewrites the pattern. + let re: Re2Regex | null = null; if (opts.regex === true) { try { - re = new RegExp(query, caseSensitive ? "g" : "gi"); + re = new RE2(query, caseSensitive ? "g" : "gi"); } catch (e) { throw new Error( - `search_in_page: invalid regular expression: ${ + `search_in_page: invalid or unsupported regular expression: ${ e instanceof Error ? e.message : String(e) - }`, + } — RE2 does not support lookaround ((?=…)/(?<=…)) or backreferences (\\1); rewrite the pattern without them.`, ); } } @@ -198,17 +231,16 @@ export function searchInDoc( if (!isObject(node)) return; if (isTextContainer(node)) { - // Glue this container's inline text into one string (mark-safe) and cap it - // so a single non-interruptible regex exec can never run on an unbounded - // input. - let text = blockPlainText(node); - if (text.length > MAX_CONTAINER_TEXT) { - text = text.slice(0, MAX_CONTAINER_TEXT); - } + // Glue this container's inline text into one string (mark-safe). No length + // cap: RE2 scans it in linear time (no ReDoS) and the whole document is + // already in memory, so truncating would only risk dropping real matches + // in a very long container. + const text = blockPlainText(node); - // The container's own id addresses it verbatim in get_node/patch_node and - // comment anchoring; a container with no id (e.g. a table-cell paragraph) - // falls back to the top-level block's #. + // The container's own id addresses it verbatim in get_node/patch_node; a + // container with no id (e.g. a table-cell paragraph) falls back to the + // top-level block's # (readable via get_node, but not patchable — + // see the SearchMatch.nodeId note). const id = isObject(node.attrs) && typeof node.attrs.id === "string" && node.attrs.id.length > 0 ? node.attrs.id diff --git a/packages/mcp/src/tool-specs.ts b/packages/mcp/src/tool-specs.ts index cec8bc89..4f1187a3 100644 --- a/packages/mcp/src/tool-specs.ts +++ b/packages/mcp/src/tool-specs.ts @@ -122,9 +122,13 @@ export const SHARED_TOOL_SPECS = { 'survives bold/italic/link splits; comment anchors do not interfere). ' + 'Returns { total, truncated, matches:[{ nodeId, blockIndex, type, before, ' + 'match, after }] }: `nodeId` is the block id (or "#" for ' + - 'table/cell content) — pass it straight to get_node/patch_node or as a ' + - 'comment anchor; `blockIndex` is the get_outline index; `before`/`after` ' + - 'give ~40 chars of context to build a unique selection. `total` counts all ' + + 'table/cell content) — pass it to get_node/patch_node (the "#" ' + + 'form resolves with get_node but NOT patch_node, which only accepts a real ' + + 'block id). To anchor a comment, do NOT pass nodeId to create_comment (it ' + + 'has no nodeId param); build a UNIQUE text selection from before+match+' + + 'after and pass it as create_comment\'s `selection`. `blockIndex` is the ' + + 'get_outline index; `before`/`after` give ~40 chars of context to build ' + + 'that unique selection. `total` counts all ' + 'hits and `truncated` is true when more than `limit` were found (nothing ' + 'is silently dropped). Default is a literal, case-INSENSITIVE substring; ' + 'set regex:true for a JS regular expression (char classes, word ' + diff --git a/packages/mcp/test/unit/page-search.test.mjs b/packages/mcp/test/unit/page-search.test.mjs index c5bd0fe5..5add544d 100644 --- a/packages/mcp/test/unit/page-search.test.mjs +++ b/packages/mcp/test/unit/page-search.test.mjs @@ -2,6 +2,7 @@ import { test } from "node:test"; import assert from "node:assert/strict"; import { searchInDoc } from "../../build/lib/page-search.js"; +import { getNodeByRef } from "../../build/lib/node-ops.js"; // --------------------------------------------------------------------------- // Document builders. Mirror the Docmost ProseMirror shape: paragraphs/headings @@ -181,10 +182,112 @@ test("invalid regex throws a clear tool error", () => { const d = doc(para("p1", text("hi"))); assert.throws( () => searchInDoc(d, "(", { regex: true }), - /invalid regular expression/i, + /invalid or unsupported regular expression/i, ); }); +test("RE2: a catastrophic-backtracking pattern completes FAST and correctly (no ReDoS)", () => { + // (a+)+$ against a long run of 'a' followed by a non-'a' is the classic + // catastrophic-backtracking case that wedges the JS RegExp engine for + // seconds/forever. Under RE2 (linear time) it returns effectively instantly. + const d = doc(para("p1", text("a".repeat(50_000) + "b"))); + const t0 = Date.now(); + const res = searchInDoc(d, "(a+)+$", { regex: true }); + const elapsed = Date.now() - t0; + // No '$'-anchored all-'a' run exists (there's a trailing 'b'), so no match. + assert.equal(res.total, 0); + assert.equal(res.matches.length, 0); + // Generous ceiling: the JS engine would take orders of magnitude longer. + assert.ok(elapsed < 1000, `expected fast completion, took ${elapsed}ms`); +}); + +test("RE2: catastrophic pattern that DOES match still completes fast and finds it", () => { + // (a+)+b matches the whole "aaa…b"; RE2 finds it in linear time. + const d = doc(para("p1", text("a".repeat(20_000) + "b"))); + const t0 = Date.now(); + const res = searchInDoc(d, "(a+)+b", { regex: true }); + const elapsed = Date.now() - t0; + assert.equal(res.total, 1); + assert.equal(res.matches[0].match, "a".repeat(20_000) + "b"); + assert.ok(elapsed < 1000, `expected fast completion, took ${elapsed}ms`); +}); + +test("RE2: unsupported lookaround/backreference patterns yield the clear unsupported-regex error", () => { + const d = doc(para("p1", text("hello"))); + // Lookahead / lookbehind / backreference are backtracking-only features RE2 + // rejects at compile time — a clean tool error, never a hang. + assert.throws( + () => searchInDoc(d, "foo(?=bar)", { regex: true }), + /invalid or unsupported regular expression/i, + ); + assert.throws( + () => searchInDoc(d, "(?<=foo)bar", { regex: true }), + /invalid or unsupported regular expression/i, + ); + assert.throws( + () => searchInDoc(d, "(a)\\1", { regex: true }), + /invalid or unsupported regular expression/i, + ); +}); + +test("F3 round-trip: every match's nodeId resolves through the REAL getNodeByRef consumer", () => { + // A doc mixing an attrs.id paragraph and an id-less table-cell paragraph, so + // both ref formats (block id and "#") are exercised end-to-end. + const cellPara = (t) => ({ type: "paragraph", content: [text(t)] }); + const d = doc( + para("intro", text("find needle here")), // attrs.id ref -> "intro" + { + type: "table", + content: [ + { + type: "tableRow", + content: [ + { type: "tableCell", content: [cellPara("cell needle")] }, // id-less -> "#1" + ], + }, + ], + }, + ); + const res = searchInDoc(d, "needle"); + assert.equal(res.total, 2); + + // Match 0: an attrs.id ref must resolve to that exact paragraph. + assert.equal(res.matches[0].nodeId, "intro"); + const byId = getNodeByRef(d, res.matches[0].nodeId); + assert.ok(byId, "attrs.id ref must resolve via getNodeByRef"); + assert.equal(byId.type, "paragraph"); + assert.equal(byId.node.attrs.id, "intro"); + + // Match 1: an id-less table cell falls back to the table's "#", which + // getNodeByRef resolves to the TOP-LEVEL block (the table) by index. + assert.equal(res.matches[1].nodeId, "#1"); + const byIndex = getNodeByRef(d, res.matches[1].nodeId); + assert.ok(byIndex, "# ref must resolve via getNodeByRef"); + assert.equal(byIndex.type, "table"); +}); + +test("F4: before/after are pinned correctly at string edges (clamp not dropped)", () => { + // Match within the first CONTEXT (40) chars of a container LONGER than + // CONTEXT: before is only the chars that exist, never a negative-index slice. + const head = doc(para("p1", text("ab NEEDLE" + "x".repeat(100)))); + const r1 = searchInDoc(head, "NEEDLE"); + assert.equal(r1.matches.length, 1); + assert.equal(r1.matches[0].before, "ab "); + assert.equal(r1.matches[0].after.length, 40); // plenty of trailing 'x' + + // Match at index 0: before is empty. + const atStart = doc(para("p1", text("NEEDLE tail"))); + const r2 = searchInDoc(atStart, "NEEDLE"); + assert.equal(r2.matches[0].before, ""); + assert.equal(r2.matches[0].after, " tail"); + + // Match at the container END: after is empty. + const atEnd = doc(para("p1", text("lead NEEDLE"))); + const r3 = searchInDoc(atEnd, "NEEDLE"); + assert.equal(r3.matches[0].before, "lead "); + assert.equal(r3.matches[0].after, ""); +}); + test("empty or whitespace-only query is rejected", () => { const d = doc(para("p1", text("hi"))); assert.throws(() => searchInDoc(d, ""), /query is empty/i); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7540bafe..88cb2a3d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1007,6 +1007,9 @@ importers: marked: specifier: ^17.0.1 version: 17.0.5 + re2: + specifier: ^1.21.0 + version: 1.25.0 ws: specifier: 8.20.1 version: 8.20.1 @@ -2852,6 +2855,10 @@ packages: '@ioredis/commands@1.5.1': resolution: {integrity: sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw==} + '@isaacs/fs-minipass@4.0.1': + resolution: {integrity: sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==} + engines: {node: '>=18.0.0'} + '@istanbuljs/load-nyc-config@1.1.0': resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==} engines: {node: '>=8'} @@ -5552,6 +5559,10 @@ packages: resolution: {integrity: sha512-nrUSn7hzt7J6JWgWGz78ZYI8wj+gdIJdk0Ynjpp8l+trkn58Uqsf6RYrYkEK+3X18EX+TNdtJI0WxAtc+L84SQ==} hasBin: true + abbrev@5.0.0: + resolution: {integrity: sha512-/XrFJgzQQQHpti1raDJC6m4ws6aNktmjBlhk8Fdlk7LwCEuDoieEJJY9OFHjfiFJFFRM2tK+Ky/IsfbbmlMu1w==} + engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0} + abstract-logging@2.0.1: resolution: {integrity: sha512-2BjRTZxTPvheOvGbBslFSYOUkr+SjPtOnrLP33f+VIWLzezQpZcqVg7ja3L4dBXmzzgwT+a029jRx5PCi3JuiA==} @@ -6019,6 +6030,10 @@ packages: resolution: {integrity: sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==} engines: {node: '>= 14.16.0'} + chownr@3.0.0: + resolution: {integrity: sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==} + engines: {node: '>=18'} + chrome-trace-event@1.0.3: resolution: {integrity: sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==} engines: {node: '>=6.0'} @@ -6975,6 +6990,9 @@ packages: resolution: {integrity: sha512-1zQrciTiQfRdo7qJM1uG4navm8DayFa2TgCSRlzUyNkhcJ6XUZF3hjnpkyr3VhAqPH7i/9GkG7Tv5abz6fqz0Q==} engines: {node: ^18.14.0 || ^20.0.0 || ^22.0.0 || >=24.0.0} + exponential-backoff@3.1.3: + resolution: {integrity: sha512-ZgEeZXj30q+I0EN+CbSSpIyPaJ5HVQD18Z1m+u1FXbAeT94mr1zw50q4q6jiiC447Nl/YTcIYSAftiGqetwXCA==} + express-rate-limit@8.2.2: resolution: {integrity: sha512-Ybv7bqtOgA914MLwaHWVFXMpMYeR1MQu/D+z2MaLYteqBsTIp9sY3AU7mGNLMJv8eLg8uQMpE20I+L2Lv49nSg==} engines: {node: '>= 16'} @@ -7455,6 +7473,11 @@ packages: inherits@2.0.4: resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + install-artifact-from-github@1.6.0: + resolution: {integrity: sha512-wKsuzN8fy8QK7iEUqyWTQmvZ1QFGPn1xyl3/1iIIDthDjS7Hn9HoPwHlNakZirWbCsbad0lZMkr6Xfbpe1pUzw==} + engines: {node: '>=18'} + hasBin: true + internal-slot@1.1.0: resolution: {integrity: sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==} engines: {node: '>= 0.4'} @@ -7642,6 +7665,10 @@ packages: isexe@2.0.0: resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + isexe@4.0.0: + resolution: {integrity: sha512-FFUtZMpoZ8RqHS3XeXEmHWLA4thH+ZxCv2lOiPIn1Xc7CxrqhWzNSDzD+/chS/zbYezmiwWLdQC09JdQKmthOw==} + engines: {node: '>=20'} + isomorphic.js@0.2.5: resolution: {integrity: sha512-PIeMbHqMt4DnUP3MA/Flc0HElYjMXArsw1qwJZcm9sqR8mq3l8NYizFMty0pWwE/tzIGH3EKK5+jes5mAr85yw==} @@ -8477,6 +8504,10 @@ packages: resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==} engines: {node: '>=16 || 14 >=14.17'} + minizlib@3.1.0: + resolution: {integrity: sha512-KZxYo1BUkWD2TVFLr0MQoM8vUUigWD3LlD83a/75BqC+4qE0Hb1Vo5v1FgcfaNXvfXzr+5EhQ6ing/CaBijTlw==} + engines: {node: '>= 18'} + mitt@3.0.1: resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==} @@ -8514,6 +8545,9 @@ packages: resolution: {integrity: sha512-WWdIxpyjEn+FhQJQQv9aQAYlHoNVdzIzUySNV1gHUPDSdZJ3yZn7pAAbQcV7B56Mvu881q9FZV+0Vx2xC44VWA==} engines: {node: ^18.17.0 || >=20.5.0} + nan@2.28.0: + resolution: {integrity: sha512-fTsDz99OTq2sVePhGdp4qQhggZFtKr64ZNVyVajRKtMOkJxYekplBh577PiJB12v/D3s2E5cGtOI45LWp6rnLQ==} + nanoid@3.3.8: resolution: {integrity: sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==} engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} @@ -8606,6 +8640,11 @@ packages: resolution: {integrity: sha512-LA4ZjwlnUblHVgq0oBF3Jl/6h/Nvs5fzBLwdEF4nuxnFdsfajde4WfxtJr3CaiH+F6ewcIB/q4jQ4UzPyid+CQ==} hasBin: true + node-gyp@13.0.1: + resolution: {integrity: sha512-piOr0S10qy5THB+q5BdqkoOx65XL/tjTMUAit3vciPNp+snTOBnGunWH1Rz7XZUxf2T9uFrfT/Ty4+aC3yPeyg==} + engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0} + hasBin: true + node-int64@0.4.0: resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} @@ -8616,6 +8655,11 @@ packages: resolution: {integrity: sha512-0PF8Yb1yZuQfQbq+5/pZJrtF6WQcjTd5/S4JOHs9PGFxuTqoB/icwuB44pOdURHJbRKX1PPoJZtY7R4VUoCC8w==} engines: {node: '>=6.0.0'} + nopt@10.0.1: + resolution: {integrity: sha512-df3sBr/6ax9hSGuC3CspvLlbnX8cP5L5nZwXF8cGN8l0zSWR6BvzmQ6jPUKjvo6+/xdpkNvEcucBNUdBeeV13g==} + engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0} + hasBin: true + normalize-path@3.0.0: resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} engines: {node: '>=0.10.0'} @@ -9135,6 +9179,10 @@ packages: resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==} engines: {node: '>=6'} + proc-log@7.0.0: + resolution: {integrity: sha512-FYgfaA69XZ93zaXLoMNQ+ViDXGGBgR8aLh03txzcFhV+9xOXx7+8DLCULrKKpR9+GsH9ZfHm82aSUPpozX0Ztg==} + engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0} + process-nextick-args@2.0.1: resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} @@ -9289,6 +9337,10 @@ packages: resolution: {integrity: sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==} engines: {node: '>= 0.10'} + re2@1.25.0: + resolution: {integrity: sha512-mtxKjWS+VYIt2ijgt6ohEdwzNlGPom1whyaEKJD40cBc/wqkO1vJoOyK539Qb8Xa9m4GA6hiPGDIbW/d3egSRQ==} + engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0} + react-clear-modal@2.0.18: resolution: {integrity: sha512-Aiv8Bw5NVm19tlUt3RLV2a1I/ya+UlyEZjREosn5G887nnusnefT+ls4AXkuP8XLn1KOah6DrM5MemV7cXgwWg==} peerDependencies: @@ -9963,6 +10015,10 @@ packages: resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} engines: {node: '>=6'} + tar@7.5.19: + resolution: {integrity: sha512-4LeEWl96twnS2Q7Bz4MGqgazLqO+hJN63GZxXoIqh1T3VweYD997gbU1ItNsQafqqXTXd5WFyFdReLtwvRBNiw==} + engines: {node: '>=18'} + terser-webpack-plugin@5.4.0: resolution: {integrity: sha512-Bn5vxm48flOIfkdl5CaD2+1CiUVbonWQ3KQPyP7/EuIl9Gbzq/gQFOzaMFUEgVjB1396tcK0SG8XcNJ/2kDH8g==} engines: {node: '>= 10.13.0'} @@ -10618,6 +10674,11 @@ packages: engines: {node: '>= 8'} hasBin: true + which@7.0.0: + resolution: {integrity: sha512-RancgH2dmbLdHl6LRhEqvklWMgl/Hdnun0Y90KhBOLkMefg8Qa7/Zel8Sm+8HEcP6DEjzsWzpkuBQEZok58isA==} + engines: {node: ^22.22.2 || ^24.15.0 || >=26.0.0} + hasBin: true + why-is-node-running@2.3.0: resolution: {integrity: sha512-hUrmaWBdVDcxvYqnyh09zunKzROWjbZTiNy8dBEjkS7ehEDQibXJ7XvlmtbwuTclUiIyN+CyXQD4Vmko8fNm8w==} engines: {node: '>=8'} @@ -10743,6 +10804,10 @@ packages: yallist@3.1.1: resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} + yallist@5.0.0: + resolution: {integrity: sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==} + engines: {node: '>=18'} + yaml@1.10.3: resolution: {integrity: sha512-vIYeF1u3CjlhAFekPPAk2h/Kv4T3mAkMox5OymRiJQB0spDP10LHvt+K7G9Ny6NuuMAb25/6n1qyUjAcGNf/AA==} engines: {node: '>= 6'} @@ -13016,6 +13081,10 @@ snapshots: '@ioredis/commands@1.5.1': {} + '@isaacs/fs-minipass@4.0.1': + dependencies: + minipass: 7.1.3 + '@istanbuljs/load-nyc-config@1.1.0': dependencies: camelcase: 5.3.1 @@ -16075,6 +16144,8 @@ snapshots: dependencies: argparse: 2.0.1 + abbrev@5.0.0: {} + abstract-logging@2.0.1: {} accepts@1.3.8: @@ -16633,6 +16704,8 @@ snapshots: dependencies: readdirp: 4.0.2 + chownr@3.0.0: {} + chrome-trace-event@1.0.3: {} ci-info@4.4.0: {} @@ -17748,6 +17821,8 @@ snapshots: jest-mock: 30.3.0 jest-util: 30.3.0 + exponential-backoff@3.1.3: {} + express-rate-limit@8.2.2(express@5.2.1): dependencies: express: 5.2.1 @@ -18283,6 +18358,8 @@ snapshots: inherits@2.0.4: {} + install-artifact-from-github@1.6.0: {} + internal-slot@1.1.0: dependencies: es-errors: 1.3.0 @@ -18454,6 +18531,8 @@ snapshots: isexe@2.0.0: {} + isexe@4.0.0: {} + isomorphic.js@0.2.5: {} istanbul-lib-coverage@3.2.2: {} @@ -19470,6 +19549,10 @@ snapshots: minipass@7.1.3: {} + minizlib@3.1.0: + dependencies: + minipass: 7.1.3 + mitt@3.0.1: {} mlly@1.8.0: @@ -19514,6 +19597,8 @@ snapshots: mute-stream@2.0.0: {} + nan@2.28.0: {} + nanoid@3.3.8: {} nanoid@4.0.2: {} @@ -19574,12 +19659,29 @@ snapshots: node-gyp-build@4.8.4: {} + node-gyp@13.0.1: + dependencies: + env-paths: 2.2.1 + exponential-backoff: 3.1.3 + graceful-fs: 4.2.11 + nopt: 10.0.1 + proc-log: 7.0.0 + semver: 7.7.4 + tar: 7.5.19 + tinyglobby: 0.2.15 + undici: 7.24.0 + which: 7.0.0 + node-int64@0.4.0: {} node-releases@2.0.27: {} nodemailer@8.0.5: {} + nopt@10.0.1: + dependencies: + abbrev: 5.0.0 + normalize-path@3.0.0: {} notepack.io@3.0.1: {} @@ -20184,6 +20286,8 @@ snapshots: prismjs@1.30.0: {} + proc-log@7.0.0: {} + process-nextick-args@2.0.1: {} process-warning@4.0.0: {} @@ -20438,6 +20542,12 @@ snapshots: iconv-lite: 0.7.2 unpipe: 1.0.0 + re2@1.25.0: + dependencies: + install-artifact-from-github: 1.6.0 + nan: 2.28.0 + node-gyp: 13.0.1 + react-clear-modal@2.0.18(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1): dependencies: react: 18.3.1 @@ -21221,6 +21331,14 @@ snapshots: inherits: 2.0.4 readable-stream: 3.6.2 + tar@7.5.19: + dependencies: + '@isaacs/fs-minipass': 4.0.1 + chownr: 3.0.0 + minipass: 7.1.3 + minizlib: 3.1.0 + yallist: 5.0.0 + terser-webpack-plugin@5.4.0(@swc/core@1.5.25(@swc/helpers@0.5.5))(webpack@5.106.0(@swc/core@1.5.25(@swc/helpers@0.5.5))): dependencies: '@jridgewell/trace-mapping': 0.3.31 @@ -21910,6 +22028,10 @@ snapshots: dependencies: isexe: 2.0.0 + which@7.0.0: + dependencies: + isexe: 4.0.0 + why-is-node-running@2.3.0: dependencies: siginfo: 2.0.0 @@ -22007,6 +22129,8 @@ snapshots: yallist@3.1.1: {} + yallist@5.0.0: {} + yaml@1.10.3: {} yaml@2.8.3: {} From 086bc1bf8b1041d3a7ca2e4f1d3aa44d41e149dc Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 4 Jul 2026 18:08:27 +0300 Subject: [PATCH 7/7] docs(mcp): search_in_page regex desc names RE2, not JS regex (#330 review F5) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The RE2 swap narrowed the contract: regex:true rejects lookaround ((?=…)/(?<=…)) and backreferences (\1). The internal JSDoc was updated, but the AGENT-VISIBLE tool-spec (the only text the agent reads at call time, single-sourced to both transports) still said 'a JS regular expression' — so an agent would write a lookahead/backref and hit an error. Updated the .description and the regex flag .describe() to name RE2 (linear-time, ReDoS-safe), list that char classes / word boundaries / anchors / quantifiers work while lookaround and backreferences do NOT, and keep the 'invalid/unsupported regex -> clear error' note. mcp: tsc clean; tool-specs / server-instructions / contract tests green. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/mcp/build/tool-specs.js | 9 ++++++--- packages/mcp/src/tool-specs.ts | 11 ++++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/packages/mcp/build/tool-specs.js b/packages/mcp/build/tool-specs.js index 51ceb675..33319e18 100644 --- a/packages/mcp/build/tool-specs.js +++ b/packages/mcp/build/tool-specs.js @@ -93,8 +93,10 @@ export const SHARED_TOOL_SPECS = { 'that unique selection. `total` counts all ' + 'hits and `truncated` is true when more than `limit` were found (nothing ' + 'is silently dropped). Default is a literal, case-INSENSITIVE substring; ' + - 'set regex:true for a JS regular expression (char classes, word ' + - 'boundaries) and caseSensitive:true to match case. Ideal for systematic ' + + 'set regex:true for an RE2 regular expression (linear-time, ReDoS-safe: ' + + 'char classes, word boundaries, anchors and quantifiers work; lookaround ' + + '(?=…)/(?<=…) and backreferences \\1 are NOT supported) and ' + + 'caseSensitive:true to match case. Ideal for systematic ' + 'editorial sweeps (unquoted "ё", straight quotes, "т.е.", stray units). An ' + 'invalid regex or an empty query returns a clear error to fix.', buildShape: (z) => ({ @@ -106,7 +108,8 @@ export const SHARED_TOOL_SPECS = { regex: z .boolean() .optional() - .describe('Treat query as a JS regular expression (default false).'), + .describe('Treat query as an RE2 regular expression — linear-time, ReDoS-safe; ' + + 'no lookaround or backreferences (default false).'), caseSensitive: z .boolean() .optional() diff --git a/packages/mcp/src/tool-specs.ts b/packages/mcp/src/tool-specs.ts index 4f1187a3..79a2c066 100644 --- a/packages/mcp/src/tool-specs.ts +++ b/packages/mcp/src/tool-specs.ts @@ -131,8 +131,10 @@ export const SHARED_TOOL_SPECS = { 'that unique selection. `total` counts all ' + 'hits and `truncated` is true when more than `limit` were found (nothing ' + 'is silently dropped). Default is a literal, case-INSENSITIVE substring; ' + - 'set regex:true for a JS regular expression (char classes, word ' + - 'boundaries) and caseSensitive:true to match case. Ideal for systematic ' + + 'set regex:true for an RE2 regular expression (linear-time, ReDoS-safe: ' + + 'char classes, word boundaries, anchors and quantifiers work; lookaround ' + + '(?=…)/(?<=…) and backreferences \\1 are NOT supported) and ' + + 'caseSensitive:true to match case. Ideal for systematic ' + 'editorial sweeps (unquoted "ё", straight quotes, "т.е.", stray units). An ' + 'invalid regex or an empty query returns a clear error to fix.', buildShape: (z) => ({ @@ -144,7 +146,10 @@ export const SHARED_TOOL_SPECS = { regex: z .boolean() .optional() - .describe('Treat query as a JS regular expression (default false).'), + .describe( + 'Treat query as an RE2 regular expression — linear-time, ReDoS-safe; ' + + 'no lookaround or backreferences (default false).', + ), caseSensitive: z .boolean() .optional()