diff --git a/.env.example b/.env.example index 7407e629..08eea50a 100644 --- a/.env.example +++ b/.env.example @@ -170,6 +170,20 @@ MCP_DOCMOST_PASSWORD= # Default 900000 (15 min). # AI_MCP_CALL_TIMEOUT_MS=900000 +# --- Autonomous / detached agent runs (settings.ai.autonomousRuns) --- +# Opt-in per workspace (AI settings; off by default). When on, a chat turn becomes +# a server-side RUN that survives a browser disconnect — only an explicit Stop ends +# it, and a client reconnects/live-follows the run. +# +# DEPLOY CONSTRAINT — SINGLE-INSTANCE ONLY in phase 1: Stop and the in-process +# AbortController that backs it are process-local, so a Stop only aborts a run +# executing on the SAME replica that owns it (cross-instance pub/sub stop is phase +# 2 and not yet reliable). Do NOT enable autonomousRuns on a horizontally-scaled +# deployment (multiple replicas behind a load balancer, or Docmost cloud +# CLOUD=true) — run a single instance instead. The server logs a startup WARNING +# when it detects a multi-instance deployment (CLOUD=true) so the constraint is +# visible, and a startup sweep settles any run left dangling by a restart. + # --- Anonymous public-share AI assistant --- # Opt-in per workspace (AI settings -> "public share assistant"; off by default). # When enabled, anonymous visitors of a published share can ask an AI about that diff --git a/AGENTS.md b/AGENTS.md index e8eed03d..e517ee6a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -259,6 +259,7 @@ The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes - `core/ai-chat/tools/` — the agent's ~40 read+write tools. Every tool runs under the **calling user's** CASL permissions via a per-user loopback access token (`docmost-client.loader.ts`), so the agent can never exceed what the user could do. Only **reversible** operations are exposed (page history + trash; no permanent delete). Agent edits get an "AI agent" provenance badge in page history (`20260616T130000-agent-provenance` migration). - `core/ai-chat/embedding/` — RAG indexer + a BullMQ consumer on `AI_QUEUE` that embeds pages into `page_embeddings` (vector search), complementing Postgres full-text search. Pages are (re)indexed on edit; `AI_EMBEDDING_TIMEOUT_MS` bounds a hung embeddings endpoint. - `core/ai-chat/external-mcp/` — admins can attach external MCP servers (e.g. Tavily) to give the agent web access. **`ssrf-guard.ts` validates outbound MCP URLs against SSRF** — keep that guard in the path when touching external-MCP connection logic. + - `core/ai-chat/ai-chat-run.service.ts` + `ai_chat_runs` — **detached/autonomous agent runs** (`#184`), behind the per-workspace `settings.ai.autonomousRuns` flag (off by default). When on, a turn becomes a server-side RUN that survives a browser disconnect; only an explicit `POST /ai-chat/stop` ends it, and a client reconnects/live-follows via `POST /ai-chat/run`. **DEPLOY CONSTRAINT — single-instance only in phase 1:** Stop and the AbortController that backs it are process-local, so a Stop only aborts a run executing on the **same** replica that owns it (cross-instance pub/sub stop is phase 2). Do **not** enable `autonomousRuns` on a horizontally-scaled deployment (multiple replicas behind a load balancer, or Docmost cloud `CLOUD=true`) — run a single instance instead. The server logs a startup WARNING when it detects a multi-instance deployment (`CLOUD=true`) so the constraint is visible. The startup sweep settles any run left dangling by a restart. ### Client structure Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirrors the server domains: `page`, `space`, `comment`, `ai-chat`, `editor`, …). Conventions: diff --git a/CHANGELOG.md b/CHANGELOG.md index 832615d6..56b9b6f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 append/prepend fragments, nor to COMMENT bodies — a comment may legitimately contain a standalone footnote definition, which canonicalization would drop. (#228) +- **Detached, autonomous agent runs that survive a browser disconnect.** When the + new `settings.ai.autonomousRuns` workspace flag is on (off by default), an + AI-chat turn becomes a first-class, server-side RUN tracked in a new + `ai_chat_runs` table instead of a socket-bound stream: closing the tab or + losing the connection no longer aborts the turn — it keeps executing and + persisting server-side, and only an explicit Stop ends it. A client can + reconnect and live-follow (or stop) an in-flight run via `POST /ai-chat/run` + (resolve the latest run + its assistant message for a chat) and + `POST /ai-chat/stop` (stop by `runId` or `chatId`). A partial unique index + enforces one active run per chat, and a startup sweep settles any run left + dangling by a restart. Phase 1 is single-instance-only (cross-instance Stop is + not yet reliable); the server warns at startup on a horizontally-scaled + deployment. (#184) ### Changed diff --git a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx index 3df60ddb..e349a8fe 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-window.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-window.tsx @@ -17,7 +17,7 @@ import { IconPlus, IconX, } from "@tabler/icons-react"; -import { useAtom, useSetAtom } from "jotai"; +import { useAtom, useAtomValue, useSetAtom } from "jotai"; import { useMatch } from "react-router-dom"; import { useTranslation } from "react-i18next"; import { useQueryClient } from "@tanstack/react-query"; @@ -34,9 +34,12 @@ import { AI_CHATS_RQ_KEY, AI_CHAT_MESSAGES_RQ_KEY, useAiChatMessagesQuery, + useAiChatRunQuery, useAiChatsQuery, useAiRolesQuery, } from "@/features/ai-chat/queries/ai-chat-query.ts"; +import { shouldObserveRun } from "@/features/ai-chat/utils/run-polling.ts"; +import { workspaceAtom } from "@/features/user/atoms/current-user-atom"; import ConversationList from "@/features/ai-chat/components/conversation-list.tsx"; import ChatThread from "@/features/ai-chat/components/chat-thread.tsx"; import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts"; @@ -162,6 +165,61 @@ export default function AiChatWindow() { const { data: messageRows, isLoading: messagesLoading } = useAiChatMessagesQuery(activeChatId ?? undefined); + // #184 reconnect-and-live-follow. Whether detached agent runs are enabled for + // this workspace. The reconnect endpoint itself is NOT flag-gated server-side + // (it is only owner-gated and returns `{ run: null }` when the chat has no + // run); but when the feature is off no runs are ever created, so polling it + // would always come back empty — we gate it off here to avoid pointless polls. + const workspace = useAtomValue(workspaceAtom); + const autonomousRunsEnabled = + workspace?.settings?.ai?.autonomousRuns === true; + + // Whether THIS tab is the one actively streaming the open chat's run locally + // (it started the run here and holds the SSE). Reported up from ChatThread. We + // are the STREAMER while true and a passive OBSERVER while false — the basis of + // the observer-vs-streamer detection. Reset to false by the fresh ChatThread's + // mount effect on every chat switch. + const [localStreaming, setLocalStreaming] = useState(false); + const onStreamingChange = useCallback((streaming: boolean) => { + setLocalStreaming(streaming); + }, []); + + // Poll the latest run of the open chat ONLY when we are a passive observer: + // feature on, a chat is open, and we are NOT the local streamer (the streamer + // already has the live SSE — polling/merging too would double-render). The + // query's own status-keyed refetchInterval stops once the run is terminal. + const { data: runData } = useAiChatRunQuery( + activeChatId ?? undefined, + autonomousRunsEnabled && !localStreaming, + ); + const run = runData?.run ?? null; + // The run's incrementally-persisted assistant message to merge into the thread, + // but only while we are an observer (never when we are the streamer — guards + // against a stale poll fighting the live stream). Includes a terminal run so the + // final persisted output is shown on reopen. + const observedRow = shouldObserveRun(run, localStreaming) + ? (runData?.message ?? null) + : null; + + // When the observed run reaches a terminal status, do a final messages refetch + // so the persisted final state (token/context badge, export source) is shown, + // then the query's refetchInterval has already stopped polling. Deduped per run + // id so it fires exactly once per run, not on every subsequent poll-less render. + const finalizedRunIdRef = useRef(null); + useEffect(() => { + if (!run || !activeChatId) return; + if (run.status === "pending" || run.status === "running") { + // Active again (a new run) — re-arm so its terminal transition fires once. + finalizedRunIdRef.current = null; + return; + } + if (finalizedRunIdRef.current === run.id) return; + finalizedRunIdRef.current = run.id; + queryClient.invalidateQueries({ + queryKey: AI_CHAT_MESSAGES_RQ_KEY(activeChatId), + }); + }, [run, activeChatId, queryClient]); + // The page the user is currently viewing. AiChatWindow lives in a pathless // parent layout route, so useParams() can't see :pageSlug. Match the full // pathname against the authenticated page route instead so "the current page" @@ -636,6 +694,12 @@ export default function AiChatWindow() { assistantName={currentRole?.name} onTurnFinished={onTurnFinished} onServerChatId={onServerChatId} + // #184: live-follow a still-running run when we reopened the chat as + // a passive observer; null when there is nothing to observe or this + // tab is the streamer. onStreamingChange lets the window stop polling + // while we are the streamer. + observedRow={observedRow} + onStreamingChange={onStreamingChange} /> )} diff --git a/apps/client/src/features/ai-chat/components/chat-thread.test.tsx b/apps/client/src/features/ai-chat/components/chat-thread.test.tsx index 94499d0f..985e5c41 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.test.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.test.tsx @@ -11,6 +11,7 @@ const h = vi.hoisted(() => ({ onFinish: null as null | ((arg: Record) => void), sendMessage: vi.fn(), stop: vi.fn(), + setMessages: vi.fn(), transport: null as null | { prepareSendMessagesRequest: (arg: { messages: unknown[]; @@ -30,6 +31,8 @@ vi.mock("@ai-sdk/react", () => ({ status: h.state.status, stop: h.state.stop, error: null, + // #184: ChatThread reads setMessages to merge a polled observer run. + setMessages: h.state.setMessages, }; }, })); @@ -140,3 +143,56 @@ describe("ChatThread — send now (#198)", () => { expect(prep({ messages: [], body: {} }).body.interrupted).toBe(false); }); }); + +// #184 passive-observer merge: when reconnecting to a still-running run, the +// parent feeds the polled run message via `observedRow`; ChatThread merges it via +// setMessages — but ONLY when this tab is NOT itself streaming (the streamer's +// SSE owns the view, so a stale observedRow must never overwrite it). +describe("ChatThread — observer run merge (#184)", () => { + beforeEach(() => { + h.state.onFinish = null; + h.state.setMessages.mockReset(); + }); + + const observedRow = { + id: "a-run", + role: "assistant", + content: "step 1\nstep 2", + metadata: { + parts: [{ type: "text", text: "step 1\nstep 2" }], + }, + createdAt: "2026-01-01T00:00:00Z", + } as const; + + function renderObserver(status: string) { + h.state.status = status; + render( + + + , + ); + } + + it("merges the polled run message when this tab is a passive observer", () => { + renderObserver("ready"); + expect(h.state.setMessages).toHaveBeenCalledTimes(1); + // The updater replaces/append the observed assistant row by id. + const updater = h.state.setMessages.mock.calls[0][0] as ( + prev: { id: string; parts: { text: string }[] }[], + ) => { id: string; parts: { text: string }[] }[]; + const merged = updater([{ id: "u1", parts: [{ text: "hi" }] }]); + expect(merged).toHaveLength(2); + expect(merged[1].id).toBe("a-run"); + expect(merged[1].parts[0].text).toBe("step 1\nstep 2"); + }); + + it("does NOT merge while THIS tab is the streamer (no double-render)", () => { + renderObserver("streaming"); + expect(h.state.setMessages).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index 875e36f6..c9a0de20 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -24,6 +24,7 @@ import { } from "@/features/ai-chat/utils/role-launch.ts"; import { describeChatError } from "@/features/ai-chat/utils/error-message.ts"; import { extractServerChatId } from "@/features/ai-chat/utils/adopt-chat-id.ts"; +import { mergeObservedMessage } from "@/features/ai-chat/utils/run-polling.ts"; import { dequeue, enqueueMessage, @@ -86,6 +87,19 @@ interface ChatThreadProps { * Copy/export button available mid-stream). Distinct from onTurnFinished, * which fires only at the terminal outcome. */ onServerChatId?: (serverChatId?: string) => void; + /** #184 reconnect-and-live-follow. When THIS tab reopened a chat whose agent + * run is still going (it is a PASSIVE OBSERVER — it did not start the run here), + * the parent polls the reconnect endpoint and feeds the run's incrementally- + * persisted assistant message here; we merge it into the live list so new + * steps/tool-calls appear as they are persisted. Null when there is nothing to + * observe (no run, feature off, or this tab IS the streamer). The merge is + * ADDITIONALLY guarded by our own `isStreaming`, so a stale value can never + * fight the local stream when we are the streamer. */ + observedRow?: IAiChatMessageRow | null; + /** Report this tab's live streaming status up to the parent, so it can stop + * polling the run while WE are the active streamer (the SSE owns the view) and + * resume once we go idle. Called from an effect on every transition. */ + onStreamingChange?: (streaming: boolean) => void; } /** @@ -131,6 +145,8 @@ export default function ChatThread({ assistantName, onTurnFinished, onServerChatId, + observedRow, + onStreamingChange, }: ChatThreadProps) { const { t } = useTranslation(); @@ -274,7 +290,7 @@ export default function ChatThread({ [], ); - const { messages, sendMessage, status, stop, error } = useChat({ + const { messages, sendMessage, status, stop, error, setMessages } = useChat({ // Stable per-mount key. Existing chats use their real id; new chats use a // generated client id (never `undefined`) so the store is NOT re-created on // every render mid-stream (see `chatStoreId` above). @@ -378,6 +394,27 @@ export default function ChatThread({ const isStreaming = status === "submitted" || status === "streaming"; + // #184: report our live streaming status up so the parent stops polling the run + // while WE are the streamer (the SSE owns the view) and resumes once we go idle. + // Effect (not render) so it never updates parent state during our own render; + // fires on mount with `false`, which also re-syncs the parent after a chat + // switch remounts this thread (a fresh mount is idle until the user sends). + useEffect(() => { + onStreamingChange?.(isStreaming); + }, [isStreaming, onStreamingChange]); + + // #184 passive-observer merge: when the parent feeds a polled run message (we + // reopened a chat whose run is still going and did NOT start it here), merge it + // into the live list so new steps/tool-calls appear as they are persisted. Hard- + // gated by `!isStreaming`: if THIS tab is actually the streamer, the local SSE + // owns the view and a stale observedRow must never overwrite it. `observedRow` + // is a stable per-poll object, so this runs once per poll, not per render. + useEffect(() => { + if (isStreaming || !observedRow) return; + const observed = rowToUiMessage(observedRow); + setMessages((prev) => mergeObservedMessage(prev, observed)); + }, [observedRow, isStreaming, setMessages]); + // "Send now" on a queued message: interrupt the current turn and immediately // send THIS message, keeping the agent's partial output. Other queued messages // stay queued and flush normally after the new turn. Reuses the existing diff --git a/apps/client/src/features/ai-chat/queries/ai-chat-query.ts b/apps/client/src/features/ai-chat/queries/ai-chat-query.ts index 37cf70f8..555d11b9 100644 --- a/apps/client/src/features/ai-chat/queries/ai-chat-query.ts +++ b/apps/client/src/features/ai-chat/queries/ai-chat-query.ts @@ -12,6 +12,7 @@ import { deleteAiChat, deleteAiRole, getAiChatMessages, + getAiChatRun, getAiChats, getAiRoleCatalog, getAiRoleCatalogBundle, @@ -24,6 +25,7 @@ import { import { IAiChat, IAiChatMessageRow, + IAiChatRunResponse, IAiRole, IAiRoleCatalog, IAiRoleCatalogBundle, @@ -34,6 +36,7 @@ import { IAiRoleUpdateFromCatalogResult, } from "@/features/ai-chat/types/ai-chat.types.ts"; import { IPagination } from "@/lib/types.ts"; +import { runPollInterval } from "@/features/ai-chat/utils/run-polling.ts"; export const AI_CHATS_RQ_KEY = ["ai-chats"]; export const AI_ROLES_RQ_KEY = ["ai-roles"]; @@ -51,16 +54,18 @@ export const AI_CHAT_MESSAGES_RQ_KEY = (chatId: string) => [ "ai-chat-messages", chatId, ]; +export const AI_CHAT_RUN_RQ_KEY = (chatId: string) => ["ai-chat-run", chatId]; /** Paginated list of the current user's chats (auto-loads further pages). */ export function useAiChatsQuery() { const query = useInfiniteQuery({ queryKey: AI_CHATS_RQ_KEY, - queryFn: ({ pageParam }) => - getAiChats({ cursor: pageParam, limit: 50 }), + queryFn: ({ pageParam }) => getAiChats({ cursor: pageParam, limit: 50 }), initialPageParam: undefined as string | undefined, getNextPageParam: (lastPage) => - lastPage.meta.hasNextPage ? (lastPage.meta.nextCursor ?? undefined) : undefined, + lastPage.meta.hasNextPage + ? (lastPage.meta.nextCursor ?? undefined) + : undefined, }); const data = useMemo | undefined>(() => { @@ -90,7 +95,9 @@ export function useAiChatMessagesQuery(chatId: string | undefined) { getAiChatMessages({ chatId: chatId as string, cursor: pageParam }), initialPageParam: undefined as string | undefined, getNextPageParam: (lastPage) => - lastPage.meta.hasNextPage ? (lastPage.meta.nextCursor ?? undefined) : undefined, + lastPage.meta.hasNextPage + ? (lastPage.meta.nextCursor ?? undefined) + : undefined, enabled: !!chatId, }); @@ -131,6 +138,34 @@ export function useAiChatMessagesQuery(chatId: string | undefined) { }; } +/** + * Reconnect to a chat's latest agent run and LIVE-FOLLOW it (#184). While the run + * is active the query re-polls every {@link runPollInterval} ms (driven off the + * fetched `run.status`, the same status-keyed refetchInterval pattern as the + * embeddings reindex polling); once the run reaches a terminal status — or there + * is no run — the interval returns `false` and polling stops on its own. Polling + * is thus naturally bounded by the run terminating; no separate timeout cap. + * + * `enabled` gates the whole thing: callers pass `false` when the autonomous-runs + * feature is off (the endpoint is NOT flag-gated server-side, but with the feature + * off the chat has no runs, so polling would only ever return `{ run: null }`) OR + * when THIS tab is the one actively streaming the run (the live SSE owns the view, + * so we must not also poll/merge). The global `retry: false` means a failed fetch + * leaves `data` undefined, so refetchInterval(undefined run) returns false — a + * failed fetch can never spin a tight loop. + */ +export function useAiChatRunQuery( + chatId: string | undefined, + enabled: boolean, +) { + return useQuery({ + queryKey: AI_CHAT_RUN_RQ_KEY(chatId ?? ""), + queryFn: () => getAiChatRun(chatId as string), + enabled: !!chatId && enabled, + refetchInterval: (query) => runPollInterval(query.state.data?.run), + }); +} + export function useRenameAiChatMutation() { const queryClient = useQueryClient(); const { t } = useTranslation(); @@ -280,11 +315,14 @@ export function useImportAiRolesFromCatalogMutation() { mutationFn: (payload) => importAiRolesFromCatalog(payload), onSuccess: (result) => { notifications.show({ - message: t("Imported {{created}}, renamed {{renamed}}, skipped {{skipped}}", { - created: result.created, - renamed: result.renamed, - skipped: result.skipped, - }), + message: t( + "Imported {{created}}, renamed {{renamed}}, skipped {{skipped}}", + { + created: result.created, + renamed: result.renamed, + skipped: result.skipped, + }, + ), }); // Surface partial failures (e.g. unique-name races) as a red warning. if (result.errors.length > 0) { diff --git a/apps/client/src/features/ai-chat/queries/ai-chat-run-query.test.tsx b/apps/client/src/features/ai-chat/queries/ai-chat-run-query.test.tsx new file mode 100644 index 00000000..db09683a --- /dev/null +++ b/apps/client/src/features/ai-chat/queries/ai-chat-run-query.test.tsx @@ -0,0 +1,92 @@ +import { describe, it, expect, vi, beforeEach } from "vitest"; +import React from "react"; +import { renderHook, waitFor } from "@testing-library/react"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import type { IAiChatRunResponse } from "@/features/ai-chat/types/ai-chat.types.ts"; + +// react-i18next is pulled in transitively by ai-chat-query.ts (the mutation hooks +// use it); stub it so the module imports cleanly in this hook test. +vi.mock("react-i18next", () => ({ + useTranslation: () => ({ t: (key: string) => key }), +})); + +vi.mock("@mantine/notifications", () => ({ + notifications: { show: vi.fn() }, +})); + +// Mock the whole service module; only getAiChatRun is exercised here, but the +// other named exports must exist so ai-chat-query.ts imports resolve. +vi.mock("@/features/ai-chat/services/ai-chat-service.ts", () => ({ + getAiChatRun: vi.fn(), + getAiChatMessages: vi.fn(), + getAiChats: vi.fn(), + getAiRoleCatalog: vi.fn(), + getAiRoleCatalogBundle: vi.fn(), + getAiRoles: vi.fn(), + importAiRolesFromCatalog: vi.fn(), + createAiRole: vi.fn(), + deleteAiChat: vi.fn(), + deleteAiRole: vi.fn(), + renameAiChat: vi.fn(), + updateAiRole: vi.fn(), + updateAiRoleFromCatalog: vi.fn(), +})); + +import { getAiChatRun } from "@/features/ai-chat/services/ai-chat-service.ts"; +import { useAiChatRunQuery } from "@/features/ai-chat/queries/ai-chat-query.ts"; + +function createWrapper() { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false } }, + }); + return function Wrapper({ children }: { children: React.ReactNode }) { + return ( + {children} + ); + }; +} + +const runningResponse: IAiChatRunResponse = { + run: { id: "run-1", chatId: "c1", status: "running" }, + message: { + id: "a1", + role: "assistant", + content: "working...", + createdAt: "2026-01-01T00:00:00Z", + }, +}; + +describe("useAiChatRunQuery — enable gating", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("fetches the run when enabled (passive observer, feature on)", async () => { + vi.mocked(getAiChatRun).mockResolvedValue(runningResponse); + const { result } = renderHook(() => useAiChatRunQuery("c1", true), { + wrapper: createWrapper(), + }); + await waitFor(() => expect(result.current.isSuccess).toBe(true)); + expect(getAiChatRun).toHaveBeenCalledWith("c1"); + expect(result.current.data?.run?.status).toBe("running"); + }); + + it("does NOT fetch when disabled (this tab is the streamer / feature off)", async () => { + vi.mocked(getAiChatRun).mockResolvedValue(runningResponse); + renderHook(() => useAiChatRunQuery("c1", false), { + wrapper: createWrapper(), + }); + // Give any errant fetch a chance to fire, then assert none did. + await new Promise((r) => setTimeout(r, 20)); + expect(getAiChatRun).not.toHaveBeenCalled(); + }); + + it("does NOT fetch when there is no chat id", async () => { + vi.mocked(getAiChatRun).mockResolvedValue(runningResponse); + renderHook(() => useAiChatRunQuery(undefined, true), { + wrapper: createWrapper(), + }); + await new Promise((r) => setTimeout(r, 20)); + expect(getAiChatRun).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/client/src/features/ai-chat/services/ai-chat-service.ts b/apps/client/src/features/ai-chat/services/ai-chat-service.ts index f8a57d0a..fba0dfad 100644 --- a/apps/client/src/features/ai-chat/services/ai-chat-service.ts +++ b/apps/client/src/features/ai-chat/services/ai-chat-service.ts @@ -5,6 +5,7 @@ import { IAiChatListParams, IAiChatMessageRow, IAiChatMessagesParams, + IAiChatRunResponse, IAiRole, IAiRoleCatalog, IAiRoleCatalogBundle, @@ -42,6 +43,23 @@ export async function getAiChatMessages( return req.data; } +/** + * Reconnect to the latest agent run of a chat (#184). Returns the run's + * persisted lifecycle state and the assistant message it materializes (the + * partial output while the run is in-flight, the final output once it finished). + * The DB is the source of truth, so this works for an in-flight run (the browser + * dropped, the run kept going) and a finished one alike; `{ run: null }` when the + * chat has never had a run. Owner-gated server-side (the requesting user must own + * the chat); it is NOT flag-gated — when the feature is off the chat simply has no + * runs, so the endpoint returns `{ run: null }`. + */ +export async function getAiChatRun( + chatId: string, +): Promise { + const req = await api.post("/ai-chat/run", { chatId }); + return req.data; +} + /** * Resolve the chat bound to a document (the current user's most-recent chat * created on that page), or null when there is none. Drives auto-open-on-page. diff --git a/apps/client/src/features/ai-chat/types/ai-chat.types.ts b/apps/client/src/features/ai-chat/types/ai-chat.types.ts index 78b049c0..8ed54ab2 100644 --- a/apps/client/src/features/ai-chat/types/ai-chat.types.ts +++ b/apps/client/src/features/ai-chat/types/ai-chat.types.ts @@ -200,6 +200,38 @@ export interface IAiChatMessageRow { createdAt: string; } +/** + * A persisted agent-run row (#184), mirroring the `ai_chat_runs` fields the + * client reads from `POST /ai-chat/run`. Only `status` is load-bearing for the + * reconnect-and-live-update UX (it drives the poll cadence); the rest are carried + * for display/diagnostics. The DB is the source of truth, so this resolves for an + * in-flight run (the browser dropped, the run kept going) and a finished one. + */ +export interface IAiChatRun { + id: string; + chatId: string; + // 'pending' | 'running' | 'succeeded' | 'failed' | 'aborted'. The first two are + // ACTIVE (keep polling); the rest are TERMINAL (stop polling). + status: "pending" | "running" | "succeeded" | "failed" | "aborted" | string; + error?: string | null; + stepCount?: number; + assistantMessageId?: string | null; + startedAt?: string | null; + finishedAt?: string | null; + createdAt?: string; + updatedAt?: string; +} + +/** + * Response of `POST /ai-chat/run` (#184): the latest run of a chat and the + * assistant message it materializes (the partial/final output, projected from the + * persisted rows). Both are `null` when the chat has never had a run. + */ +export interface IAiChatRunResponse { + run: IAiChatRun | null; + message: IAiChatMessageRow | null; +} + export interface IAiChatListParams extends QueryParams {} export interface IAiChatMessagesParams { diff --git a/apps/client/src/features/ai-chat/utils/run-polling.test.ts b/apps/client/src/features/ai-chat/utils/run-polling.test.ts new file mode 100644 index 00000000..a1a9cb26 --- /dev/null +++ b/apps/client/src/features/ai-chat/utils/run-polling.test.ts @@ -0,0 +1,104 @@ +import { describe, it, expect } from "vitest"; +import type { UIMessage } from "@ai-sdk/react"; +import type { IAiChatRun } from "@/features/ai-chat/types/ai-chat.types.ts"; +import { + RUN_POLL_INTERVAL_MS, + isRunActive, + runPollInterval, + shouldObserveRun, + mergeObservedMessage, +} from "./run-polling.ts"; + +function makeRun(status: string): IAiChatRun { + return { id: "run-1", chatId: "c1", status }; +} + +function makeMsg(id: string, text: string): UIMessage { + return { + id, + role: "assistant", + parts: [{ type: "text", text }], + } as UIMessage; +} + +describe("isRunActive", () => { + it("treats pending and running as active", () => { + expect(isRunActive(makeRun("pending"))).toBe(true); + expect(isRunActive(makeRun("running"))).toBe(true); + }); + + it("treats terminal / unknown / nullish as not active", () => { + expect(isRunActive(makeRun("succeeded"))).toBe(false); + expect(isRunActive(makeRun("failed"))).toBe(false); + expect(isRunActive(makeRun("aborted"))).toBe(false); + expect(isRunActive(makeRun("weird-future-status"))).toBe(false); + expect(isRunActive(null)).toBe(false); + expect(isRunActive(undefined)).toBe(false); + }); +}); + +describe("runPollInterval (the refetchInterval helper)", () => { + it("returns 2000ms while the run is pending/running", () => { + expect(runPollInterval(makeRun("pending"))).toBe(RUN_POLL_INTERVAL_MS); + expect(runPollInterval(makeRun("running"))).toBe(RUN_POLL_INTERVAL_MS); + expect(RUN_POLL_INTERVAL_MS).toBe(2000); + }); + + it("returns false (stop polling) once the run is terminal", () => { + expect(runPollInterval(makeRun("succeeded"))).toBe(false); + expect(runPollInterval(makeRun("failed"))).toBe(false); + expect(runPollInterval(makeRun("aborted"))).toBe(false); + }); + + it("returns false (no polling) when there is no run", () => { + expect(runPollInterval(null)).toBe(false); + expect(runPollInterval(undefined)).toBe(false); + }); +}); + +describe("shouldObserveRun (observer-vs-streamer decision)", () => { + it("observes an active run when this tab is NOT the local streamer", () => { + expect(shouldObserveRun(makeRun("running"), false)).toBe(true); + expect(shouldObserveRun(makeRun("pending"), false)).toBe(true); + }); + + it("observes a terminal run too (so the final output shows on reopen)", () => { + expect(shouldObserveRun(makeRun("succeeded"), false)).toBe(true); + }); + + it("does NOT observe when this tab IS the streamer (no double-render)", () => { + expect(shouldObserveRun(makeRun("running"), true)).toBe(false); + expect(shouldObserveRun(makeRun("succeeded"), true)).toBe(false); + }); + + it("does NOT observe when there is no run", () => { + expect(shouldObserveRun(null, false)).toBe(false); + expect(shouldObserveRun(undefined, false)).toBe(false); + }); +}); + +describe("mergeObservedMessage", () => { + it("replaces the message with the same id in place (per-step growth)", () => { + const prev = [makeMsg("u1", "hi"), makeMsg("a1", "step 1")]; + const observed = makeMsg("a1", "step 1\nstep 2"); + const next = mergeObservedMessage(prev, observed); + expect(next).toHaveLength(2); + expect(next[1]).toBe(observed); + expect(next[0]).toBe(prev[0]); // untouched + expect(next).not.toBe(prev); // new array (never mutates input) + }); + + it("appends when the observed message is not yet present", () => { + const prev = [makeMsg("u1", "hi")]; + const observed = makeMsg("a1", "first token"); + const next = mergeObservedMessage(prev, observed); + expect(next).toHaveLength(2); + expect(next[1]).toBe(observed); + }); + + it("returns the original list unchanged when there is nothing to merge", () => { + const prev = [makeMsg("u1", "hi")]; + expect(mergeObservedMessage(prev, null)).toBe(prev); + expect(mergeObservedMessage(prev, undefined)).toBe(prev); + }); +}); diff --git a/apps/client/src/features/ai-chat/utils/run-polling.ts b/apps/client/src/features/ai-chat/utils/run-polling.ts new file mode 100644 index 00000000..c6e4c006 --- /dev/null +++ b/apps/client/src/features/ai-chat/utils/run-polling.ts @@ -0,0 +1,71 @@ +import type { UIMessage } from "@ai-sdk/react"; +import type { IAiChatRun } from "@/features/ai-chat/types/ai-chat.types.ts"; + +/** + * Reconnect-and-live-follow helpers (#184). When a chat is reopened while its + * agent run is STILL going, this tab is a PASSIVE OBSERVER: it did not start the + * run here (no local SSE stream), so it catches up by POLLING the reconnect + * endpoint (`POST /ai-chat/run`) and merging the run's incrementally-persisted + * assistant message into the rendered thread. These are the small pure decisions + * that machinery hangs off, extracted so they can be unit-tested in isolation + * (mirrors how reindex polling / editor-sync-state are tested). + */ + +/** How often to re-poll the reconnect endpoint while a run is ACTIVE. */ +export const RUN_POLL_INTERVAL_MS = 2000; + +// 'pending' and 'running' are the two ACTIVE statuses; 'succeeded' | 'failed' | +// 'aborted' are TERMINAL (and any unknown future status is treated as terminal, +// so a stale/odd value never polls forever). +const ACTIVE_STATUSES = new Set(["pending", "running"]); + +/** Whether a run is still going (worth polling / merging live updates from). */ +export function isRunActive(run: IAiChatRun | null | undefined): boolean { + return !!run && ACTIVE_STATUSES.has(run.status); +} + +/** + * The TanStack Query `refetchInterval` value for the run query: poll every + * {@link RUN_POLL_INTERVAL_MS} while the run is active, and `false` (stop) once + * it is terminal or there is no run. Polling is thus naturally bounded by the run + * reaching a terminal status — no separate timeout cap is needed. + */ +export function runPollInterval( + run: IAiChatRun | null | undefined, +): number | false { + return isRunActive(run) ? RUN_POLL_INTERVAL_MS : false; +} + +/** + * Observer-vs-streamer decision. We render the polled run message (catch up + + * keep advancing) ONLY when this tab is a passive observer: there IS a run AND + * this tab is NOT the one locally streaming it (we reconnected, we didn't start + * it here). When this tab is the streamer, the live SSE stream owns the view, so + * we neither poll nor merge — avoiding a double-render fight. Terminal runs still + * merge (so the final persisted output is shown on reopen); the poll itself is + * stopped separately by {@link runPollInterval}. + */ +export function shouldObserveRun( + run: IAiChatRun | null | undefined, + localStreaming: boolean, +): boolean { + return !!run && !localStreaming; +} + +/** + * Merge an observed assistant message into the rendered list: replace the message + * with the same id in place (the in-progress assistant row is already seeded from + * history, so per-step growth replaces it), or append it when absent. Returns a + * new array; the input is never mutated. + */ +export function mergeObservedMessage( + messages: UIMessage[], + observed: UIMessage | null | undefined, +): UIMessage[] { + if (!observed) return messages; + const idx = messages.findIndex((m) => m.id === observed.id); + if (idx === -1) return [...messages, observed]; + const next = messages.slice(); + next[idx] = observed; + return next; +} diff --git a/apps/client/src/features/workspace/types/workspace.types.ts b/apps/client/src/features/workspace/types/workspace.types.ts index 99824db6..f84eb550 100644 --- a/apps/client/src/features/workspace/types/workspace.types.ts +++ b/apps/client/src/features/workspace/types/workspace.types.ts @@ -65,6 +65,9 @@ export interface IWorkspaceAiSettings { dictation?: boolean; dictationStreaming?: boolean; publicShareAssistant?: boolean; + // #184: detached agent runs (a run survives a browser disconnect and can be + // reconnected to / live-followed on reopen). Gates the run-reconnect polling. + autonomousRuns?: boolean; } export interface IWorkspaceSharingSettings { diff --git a/apps/server/src/core/ai-chat/ai-chat-run.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat-run.service.spec.ts new file mode 100644 index 00000000..c95a4d7e --- /dev/null +++ b/apps/server/src/core/ai-chat/ai-chat-run.service.spec.ts @@ -0,0 +1,492 @@ +import { Logger } from '@nestjs/common'; +import { + AiChatRunService, + RunAlreadyActiveError, + ONE_ACTIVE_RUN_PER_CHAT_INDEX, + mapTurnStatusToRun, +} from './ai-chat-run.service'; + +/** Shape a Postgres unique-violation the way the postgres.js driver surfaces it: + * SQLSTATE 23505 + the offending index in `constraint_name`. */ +function uniqueViolation(constraintName: string): Error & { + code: string; + constraint_name: string; +} { + return Object.assign( + new Error('duplicate key value violates unique constraint'), + { + code: '23505', + constraint_name: constraintName, + }, + ); +} + +/** + * Unit coverage for the #184 phase-1 run lifecycle (AiChatRunService) with a + * hand-rolled mock repo — no Nest graph, no DB. The invariant under test is the + * one that makes a run "autonomous": a run keeps going when its SUBSCRIBER (the + * browser) detaches, and ONLY an explicit stop aborts it. We assert that at the + * abort-signal level (the signal the agent loop actually consumes). + */ + +/** Minimal EnvironmentService stub. Single-instance (CLOUD unset) by default. */ +function makeEnv(isCloud = false) { + return { isCloud: () => isCloud }; +} + +function makeRepo(overrides: Record = {}) { + return { + insert: jest.fn(async (v: any) => ({ + id: 'run-1', + status: v.status ?? 'running', + chatId: v.chatId, + workspaceId: v.workspaceId, + })), + update: jest.fn(async () => ({ id: 'run-1' })), + markStopRequested: jest.fn(async () => ({ id: 'run-1' })), + findActiveByChat: jest.fn(async () => undefined), + findLatestByChat: jest.fn(async () => undefined), + findById: jest.fn(async () => undefined), + sweepRunning: jest.fn(async () => 0), + ...overrides, + }; +} + +describe('mapTurnStatusToRun', () => { + it('maps the turn terminal status to the run terminal status', () => { + expect(mapTurnStatusToRun('completed')).toBe('succeeded'); + expect(mapTurnStatusToRun('error')).toBe('failed'); + expect(mapTurnStatusToRun('aborted')).toBe('aborted'); + }); +}); + +describe('AiChatRunService.onModuleInit (startup sweep)', () => { + afterEach(() => jest.restoreAllMocks()); + + it('calls sweepRunning and resolves; logs when > 0', async () => { + const repo = makeRepo({ sweepRunning: jest.fn(async () => 2) }); + const logSpy = jest + .spyOn(Logger.prototype, 'log') + .mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await expect(svc.onModuleInit()).resolves.toBeUndefined(); + expect(repo.sweepRunning).toHaveBeenCalledTimes(1); + expect(logSpy).toHaveBeenCalledTimes(1); + expect(String(logSpy.mock.calls[0][0])).toContain('2'); + }); + + it('a sweep failure is swallowed (never blocks startup)', async () => { + const repo = makeRepo({ + sweepRunning: jest.fn(async () => { + throw new Error('db down'); + }), + }); + const warnSpy = jest + .spyOn(Logger.prototype, 'warn') + .mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await expect(svc.onModuleInit()).resolves.toBeUndefined(); + // The first warn is the sweep failure (the multi-instance warn never fires + // single-instance), so the message is the db error. + expect(String(warnSpy.mock.calls[0][0])).toContain('db down'); + }); + + it('F1 (DECISION C): the boot sweep is UNCONDITIONAL — sweepRunning is called with NO staleness window, so a fresh running run (updatedAt = now) is settled, not skipped', async () => { + // The bug: a fast restart (deploy/OOM within minutes of the last step) left a + // run stuck 'running' under the old 10-min window, 409ing every later turn in + // the chat. The fix settles ALL pending|running on boot. We assert the service + // invokes sweepRunning with no `staleMs` (the unconditional path); the repo's + // own spec proves no-window => no updatedAt filter. + const repo = makeRepo({ sweepRunning: jest.fn(async () => 1) }); + jest.spyOn(Logger.prototype, 'log').mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await svc.onModuleInit(); + expect(repo.sweepRunning).toHaveBeenCalledTimes(1); + const callArgs = repo.sweepRunning.mock.calls[0] as unknown[]; + const firstArg = callArgs[0] as { staleMs?: number } | undefined; + // Either no opts at all, or opts without a staleMs window => unconditional. + expect(firstArg?.staleMs).toBeUndefined(); + }); + + it('F2 (DECISION A): warns at startup that autonomousRuns is single-instance-only when a horizontally-scaled deployment (CLOUD) is detected', async () => { + const repo = makeRepo(); + const warnSpy = jest + .spyOn(Logger.prototype, 'warn') + .mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv(true) as never); + await svc.onModuleInit(); + const warned = warnSpy.mock.calls.some((c) => + /single-instance-only/i.test(String(c[0])), + ); + expect(warned).toBe(true); + }); + + it('F2: does NOT warn about multi-instance on a single-instance (CLOUD unset) deployment', async () => { + const repo = makeRepo(); + const warnSpy = jest + .spyOn(Logger.prototype, 'warn') + .mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv(false) as never); + await svc.onModuleInit(); + const warned = warnSpy.mock.calls.some((c) => + /single-instance-only/i.test(String(c[0])), + ); + expect(warned).toBe(false); + }); +}); + +describe('AiChatRunService run lifecycle', () => { + it('beginRun inserts a running row and registers a live abort controller', async () => { + const repo = makeRepo(); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + const handle = await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + expect(repo.insert).toHaveBeenCalledWith( + expect.objectContaining({ + chatId: 'chat-1', + workspaceId: 'ws-1', + createdBy: 'user-1', + status: 'running', + trigger: 'user', + }), + ); + expect(handle.runId).toBe('run-1'); + expect(handle.signal.aborted).toBe(false); + expect(svc.isLocallyActive('run-1')).toBe(true); + }); + + it('beginRun REJECTS the racer: a 23505 on the one-active-per-chat index throws RunAlreadyActiveError (not swallowed) and registers no controller', async () => { + // The race: the controller's cheap pre-check passed for BOTH concurrent + // turns, so the loser's INSERT hits the partial unique index. That rejection + // is the authoritative gate — it must surface, not be swallowed into an + // untracked turn. + const repo = makeRepo({ + insert: jest.fn(async () => { + throw uniqueViolation(ONE_ACTIVE_RUN_PER_CHAT_INDEX); + }), + }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await expect( + svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }), + ).rejects.toBeInstanceOf(RunAlreadyActiveError); + // No controller leaked for a rejected start. + expect(svc.isLocallyActive('run-1')).toBe(false); + }); + + it('beginRun does NOT mask an unrelated unique violation as already-active', async () => { + // A 23505 on some OTHER constraint is a real bug, not the race — it must + // propagate unchanged so it is never silently treated as "already active". + const other = uniqueViolation('ai_chat_runs_pkey'); + const repo = makeRepo({ + insert: jest.fn(async () => { + throw other; + }), + }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await expect( + svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }), + ).rejects.toBe(other); + }); + + it('beginRun propagates a non-unique insert failure unchanged', async () => { + const boom = new Error('connection reset'); + const repo = makeRepo({ + insert: jest.fn(async () => { + throw boom; + }), + }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await expect( + svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }), + ).rejects.toBe(boom); + }); + + it('two concurrent begins on one chat: exactly one wins, the other is rejected as already-active', async () => { + // Integration-style: model the DB partial unique index with a one-shot slot. + // The first insert claims it; the second hits a 23505 on the active index. + let slotTaken = false; + const repo = makeRepo({ + insert: jest.fn(async (v: any) => { + if (slotTaken) throw uniqueViolation(ONE_ACTIVE_RUN_PER_CHAT_INDEX); + slotTaken = true; + return { id: 'run-win', status: v.status, chatId: v.chatId }; + }), + }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + const results = await Promise.allSettled([ + svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }), + svc.beginRun({ chatId: 'chat-1', workspaceId: 'ws-1', userId: 'user-1' }), + ]); + const fulfilled = results.filter((r) => r.status === 'fulfilled'); + const rejected = results.filter((r) => r.status === 'rejected'); + expect(fulfilled).toHaveLength(1); + expect(rejected).toHaveLength(1); + expect((rejected[0] as PromiseRejectedResult).reason).toBeInstanceOf( + RunAlreadyActiveError, + ); + // Exactly the winner is locally active. + expect(svc.isLocallyActive('run-win')).toBe(true); + }); + + it('a SUBSCRIBER detaching does NOT abort the run (only an explicit stop does)', async () => { + const repo = makeRepo(); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + const handle = await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + // Model a browser disconnect: nothing in the run service is told to stop. + // The signal the agent loop consumes must stay un-aborted and the run stays + // locally active — i.e. it keeps running server-side. + expect(handle.signal.aborted).toBe(false); + expect(svc.isLocallyActive('run-1')).toBe(true); + // markStopRequested was never called by a mere detach. + expect(repo.markStopRequested).not.toHaveBeenCalled(); + }); + + it('requestStop aborts the live controller, marks the row, and reports true', async () => { + const repo = makeRepo(); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + const handle = await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + const aborted = jest.fn(); + handle.signal.addEventListener('abort', aborted); + + const result = await svc.requestStop('run-1', 'ws-1'); + + expect(result).toBe(true); + expect(handle.signal.aborted).toBe(true); + expect(aborted).toHaveBeenCalledTimes(1); + expect(repo.markStopRequested).toHaveBeenCalledWith('run-1', 'ws-1'); + }); + + it('requestStop on a run this replica does NOT hold still marks the row (true)', async () => { + // e.g. after a restart, or a sibling replica owns the controller. The row is + // marked so the owning replica/sweep settles it; we report a stop took effect. + const repo = makeRepo({ + markStopRequested: jest.fn(async () => ({ id: 'run-9' })), + }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + const result = await svc.requestStop('run-9', 'ws-1'); + expect(result).toBe(true); + expect(svc.isLocallyActive('run-9')).toBe(false); + }); + + it('requestStop on an already-settled run (nothing active) reports false', async () => { + const repo = makeRepo({ + markStopRequested: jest.fn(async () => undefined), + }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + const result = await svc.requestStop('run-done', 'ws-1'); + expect(result).toBe(false); + }); + + it('finalizeRun settles the row to the mapped status with finishedAt and drops the in-memory entry', async () => { + const repo = makeRepo(); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + expect(svc.isLocallyActive('run-1')).toBe(true); + + await svc.finalizeRun('run-1', 'ws-1', 'error', 'provider blew up'); + + expect(svc.isLocallyActive('run-1')).toBe(false); + expect(repo.update).toHaveBeenCalledWith( + 'run-1', + 'ws-1', + expect.objectContaining({ + status: 'failed', + error: 'provider blew up', + finishedAt: expect.any(Date), + }), + ); + }); + + it('finalizeRun is IDEMPOTENT: a second settle no-ops (single terminal write)', async () => { + // The #184 review fix: AiChatService.stream wraps the turn in a safety-net + // catch that settles a failed turn AND streamText's terminal callback may + // also settle — both routes call finalizeRun. Only the FIRST may write the + // terminal row; the second must no-op so a late settle can never clobber the + // real terminal status or double-write the row. + const repo = makeRepo(); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + + await svc.finalizeRun('run-1', 'ws-1', 'error', 'first'); + expect(svc.isLocallyActive('run-1')).toBe(false); + // A second settle (e.g. a streamText callback firing after the catch) no-ops. + await svc.finalizeRun('run-1', 'ws-1', 'completed', undefined); + + expect(repo.update).toHaveBeenCalledTimes(1); + expect(repo.update).toHaveBeenCalledWith( + 'run-1', + 'ws-1', + expect.objectContaining({ status: 'failed', error: 'first' }), + ); + }); + + it('CONCURRENCY: two simultaneous finalizeRun on the same run write the terminal row EXACTLY ONCE (the 2nd caller exits synchronously at the atomic claim)', async () => { + // The CRITICAL race: AiChatService.stream's safety-net catch settles the turn + // to 'error' while a streamText terminal callback also settles it — both call + // finalizeRun for the SAME runId. The once-gate must close ATOMICALLY: a + // `settled.has` check alone is read BEFORE the awaited UPDATE, so both callers + // would pass it and BOTH write the row (last-write-wins clobber + double + // write). The fix claims the run with a SYNCHRONOUS `active.delete` before any + // await, so the second caller returns in the same tick, before the UPDATE. + // + // We force the two calls to overlap by making `update` return a promise we + // resolve only AFTER both finalizeRun calls have run their synchronous bodies. + let resolveUpdate!: (v: unknown) => void; + const updateGate = new Promise((res) => { + resolveUpdate = res; + }); + const update = jest.fn(() => updateGate); + const repo = makeRepo({ update }); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + + // Fire both before the (pending) update resolves. The first synchronously + // claims the entry (active.delete) and awaits update; the second, started in + // the same macrotask, finds the entry already gone and returns at the claim + // WITHOUT ever calling update. + const p1 = svc.finalizeRun('run-1', 'ws-1', 'completed'); + const p2 = svc.finalizeRun('run-1', 'ws-1', 'error', 'safety-net'); + + // The decisive assertion: exactly one caller reached the terminal UPDATE. + expect(update).toHaveBeenCalledTimes(1); + + // Let the single in-flight update land; both calls resolve cleanly. + resolveUpdate({ id: 'run-1' }); + await Promise.all([p1, p2]); + + expect(update).toHaveBeenCalledTimes(1); + // The winner is the FIRST caller ('completed' -> 'succeeded'); the late + // 'error' settle never wrote, so it could not clobber the real status. + expect(update).toHaveBeenCalledWith( + 'run-1', + 'ws-1', + expect.objectContaining({ status: 'succeeded' }), + ); + expect(svc.isLocallyActive('run-1')).toBe(false); + }); + + it('F6: a TRANSIENT terminal-write failure is ridden out by the bounded retry — the run is settled, not stranded', async () => { + // The bug: finalizeRun used to DROP the in-memory entry BEFORE the terminal + // UPDATE, then only warn-log a failure. A single transient blip (pool + // exhaustion / deadlock / connection hiccup) on that PK UPDATE left the row + // 'running' with nothing left to recover it -> every later turn in that chat + // 409s until a restart. The fix updates FIRST and retries. + let calls = 0; + const repo = makeRepo({ + update: jest.fn(async () => { + calls += 1; + if (calls === 1) throw new Error('deadlock detected'); + return { id: 'run-1' }; + }), + }); + jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + + await svc.finalizeRun('run-1', 'ws-1', 'completed'); + + // The retry landed the terminal write: the entry is dropped (slot freed) and + // the row carries the real terminal status — NOT stranded at 'running'. + expect(svc.isLocallyActive('run-1')).toBe(false); + expect(repo.update).toHaveBeenCalledTimes(2); + expect(repo.update).toHaveBeenLastCalledWith( + 'run-1', + 'ws-1', + expect.objectContaining({ status: 'succeeded' }), + ); + }); + + it('F6: if the terminal write keeps failing, the entry is RETAINED and a LATER settle completes it (chat not permanently 409d)', async () => { + // Worst case: the DB is down for the whole first finalize (all attempts fail). + // The run must NOT be silently lost — the entry stays so a subsequent settle + // (a streamText callback, requestStop -> onAbort, or a future sweep) can retry. + let healthy = false; + const repo = makeRepo({ + update: jest.fn(async () => { + if (!healthy) throw new Error('pool exhausted'); + return { id: 'run-1' }; + }), + }); + jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined); + const errorSpy = jest + .spyOn(Logger.prototype, 'error') + .mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await svc.beginRun({ + chatId: 'chat-1', + workspaceId: 'ws-1', + userId: 'user-1', + }); + + // First settle: every bounded attempt fails -> entry retained, NOT settled. + await svc.finalizeRun('run-1', 'ws-1', 'completed'); + expect(svc.isLocallyActive('run-1')).toBe(true); + // F12: the give-up emits ONE explicit, greppable ERROR (run + chat context) + // so an operator can tell "gave up, run held in memory" from a per-attempt + // blip — distinct from the per-attempt warns. + const gaveUp = errorSpy.mock.calls.some( + (c) => + /NON-TERMINAL/.test(String(c[0])) && + /run-1/.test(String(c[0])) && + /chat-1/.test(String(c[0])), + ); + expect(gaveUp).toBe(true); + + // The DB recovers; a later settle now succeeds and frees the slot. + healthy = true; + await svc.finalizeRun('run-1', 'ws-1', 'completed'); + expect(svc.isLocallyActive('run-1')).toBe(false); + expect(repo.update).toHaveBeenLastCalledWith( + 'run-1', + 'ws-1', + expect.objectContaining({ status: 'succeeded' }), + ); + + // And it is now idempotent: a further settle no-ops (terminal row already + // written), so a double-settle can never clobber the real status. + const callsBefore = repo.update.mock.calls.length; + await svc.finalizeRun('run-1', 'ws-1', 'error', 'late'); + expect(repo.update).toHaveBeenCalledTimes(callsBefore); + }); + + it('recordStep / linkAssistantMessage are best-effort: a repo failure is swallowed', async () => { + const repo = makeRepo({ + update: jest.fn(async () => { + throw new Error('transient'); + }), + }); + jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined); + const svc = new AiChatRunService(repo as never, makeEnv() as never); + await expect(svc.recordStep('run-1', 'ws-1', 3)).resolves.toBeUndefined(); + await expect( + svc.linkAssistantMessage('run-1', 'ws-1', 'msg-1'), + ).resolves.toBeUndefined(); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat-run.service.ts b/apps/server/src/core/ai-chat/ai-chat-run.service.ts new file mode 100644 index 00000000..9ec61df6 --- /dev/null +++ b/apps/server/src/core/ai-chat/ai-chat-run.service.ts @@ -0,0 +1,426 @@ +import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; +import { AiChatRunRepo } from '@docmost/db/repos/ai-chat/ai-chat-run.repo'; +import { AiChatRun } from '@docmost/db/types/entity.types'; +import { isUniqueViolation, violatedConstraint } from '@docmost/db/utils'; +import { EnvironmentService } from '../../integrations/environment/environment.service'; + +/** Name of the partial unique index enforcing "one active run per chat" (see the + * ai_chat_runs migration). A 23505 on THIS constraint is the race-safe signal + * that a concurrent turn already owns the chat — distinct from any other unique + * collision, which must NOT be silently treated as "already active". */ +export const ONE_ACTIVE_RUN_PER_CHAT_INDEX = 'ai_chat_runs_one_active_per_chat'; + +/** + * Thrown by {@link AiChatRunService.beginRun} when the run-row INSERT loses the + * race for a chat's single active slot (the partial unique index rejects it with + * a 23505). This is the AUTHORITATIVE concurrency gate: the controller's cheap + * pre-check is only a fast-path, and a request that slips past it must NOT run + * untracked. The caller (AiChatService.stream) translates this into a 409 and + * aborts the turn BEFORE any AI/provider call. + */ +export class RunAlreadyActiveError extends Error { + constructor(public readonly chatId: string) { + super(`An agent run is already in progress for chat ${chatId}`); + this.name = 'RunAlreadyActiveError'; + } +} + +/** + * The terminal status of a TURN (the #183 assistant-row lifecycle) maps onto the + * terminal status of a RUN (#184). A turn that completed -> the run succeeded; a + * turn that errored -> the run failed; a turn aborted (explicit user stop) -> the + * run aborted. Pure + unit-testable. + */ +export type TurnTerminalStatus = 'completed' | 'error' | 'aborted'; +export type RunTerminalStatus = 'succeeded' | 'failed' | 'aborted'; + +export function mapTurnStatusToRun( + status: TurnTerminalStatus, +): RunTerminalStatus { + switch (status) { + case 'completed': + return 'succeeded'; + case 'error': + return 'failed'; + case 'aborted': + return 'aborted'; + } +} + +/** An in-flight run held in process memory: its AbortController is the ONLY thing + * that can stop the turn (an explicit user stop), independent of the browser + * socket. A mere disconnect never touches it, so the run keeps going. */ +interface ActiveRun { + controller: AbortController; + chatId: string; + workspaceId: string; +} + +/** The live handle the streaming path drives a run through (returned by + * {@link AiChatRunService.beginRun}). The `signal` governs the agent loop's + * abort — wired to the run, NOT to the HTTP socket. */ +export interface RunHandle { + runId: string; + signal: AbortSignal; +} + +/** + * AiChatRunService (#184 phase 1) — owns the agent RUN as a first-class, + * server-side lifecycle object detached from the HTTP request / browser window. + * + * Responsibilities: + * - create a run row when a turn starts (pending -> running) and register an + * in-memory AbortController for it (the explicit-stop lever); + * - finalize the run row (succeeded / failed / aborted) and unregister it; + * - service an EXPLICIT user stop (`requestStop`) — the ONLY thing that aborts a + * run; a browser disconnect deliberately does NOT; + * - crash-recovery sweep of dangling runs on startup. + * + * The agent loop itself still runs in AiChatService.stream (reusing #183's + * step-granular durable write path, `consumeStream` already drains it independent + * of the socket); this service only wraps it in a durable lifecycle and an + * abort handle that outlives the subscriber. + */ +@Injectable() +export class AiChatRunService implements OnModuleInit { + private readonly logger = new Logger(AiChatRunService.name); + + // runId -> ActiveRun. Process-local on purpose (phase 1 is single-process / + // in-memory transport; a cross-process BullMQ runner + Redis stop-signal is + // deferred to phase 2). A stop for a runId not in this map (e.g. after a + // restart) still records `stop_requested_at` on the row. + private readonly active = new Map(); + + // runIds whose TERMINAL row write has SUCCEEDED — the idempotency once-gate + // (F6). A finalize must short-circuit only AFTER the terminal write has landed, + // NOT merely after the in-memory entry was dropped: a transient UPDATE failure + // has to stay retryable, so "already settled" means "row already terminal", not + // "entry already gone". Grows by one short UUID per finished run over process + // uptime — negligible in phase 1's single process. + private readonly settled = new Set(); + + // Bounded retry for the terminal write (F6): a single PK UPDATE can fail + // transiently under many fire-and-forget writes (pool exhaustion, deadlock, a + // brief connection blip). Riding out that blip in-place matters because the + // dominant success path (streamText onFinish) settles exactly ONCE — if that + // write is dropped and never retried, the row is stranded 'running' and the + // one-active-run gate 409s every future turn in the chat until a restart (no + // periodic sweep in phase 1). + private static readonly FINALIZE_MAX_ATTEMPTS = 3; + private static readonly FINALIZE_RETRY_BASE_MS = 50; + + constructor( + private readonly runRepo: AiChatRunRepo, + private readonly environment: EnvironmentService, + ) {} + + /** + * Crash-recovery sweep on server start: settle EVERY run still left + * pending/running to 'aborted' (F1 / DECISION C). The boot sweep is + * UNCONDITIONAL — no staleness window — because phase 1 is single-process: on a + * fresh boot any pending|running run is definitionally hung (no live runner owns + * it), so even a fast restart (deploy/OOM within minutes of the last step) can + * no longer leave a run stuck 'running' forever (which would make the + * one-active-run gate 409 every future turn in that chat). The staleness window + * is reintroduced only for the phase-2 multi-instance timer sweep, where a + * booting replica must not abort a run another replica is actively executing. + * Best-effort — a sweep failure is logged but MUST NOT block startup (mirrors + * AiChatService.onModuleInit for #183). + */ + async onModuleInit(): Promise { + this.warnIfMultiInstance(); + try { + // No `staleMs`: unconditional boot sweep (F1). See AiChatRunRepo.sweepRunning. + const swept = await this.runRepo.sweepRunning(); + if (swept > 0) { + this.logger.log( + `Startup sweep: marked ${swept} dangling agent run(s) as 'aborted'.`, + ); + } + } catch (err) { + this.logger.warn( + `Startup sweep of dangling runs failed: ${ + err instanceof Error ? err.message : 'unknown error' + }`, + ); + } + } + + /** + * F2 (DECISION A): autonomous runs are SINGLE-INSTANCE-ONLY in phase 1. An + * explicit Stop, and the in-memory AbortController that backs it, are + * process-local: a Stop only aborts the live turn if it lands on the SAME + * replica that owns the run (it still stamps `stop_requested_at` cross-instance, + * but nothing reads that flag during an active run yet). Cross-instance pub/sub + * stop is phase 2. So if the deployment is horizontally scaled, warn loudly at + * startup that a Stop may not reach a run executing on another replica. + * + * DETECTION: this codebase always wires the socket.io Redis adapter (REDIS_URL + * is mandatory), so the adapter alone is NOT a horizontal-scaling signal. The + * authoritative signal the codebase has is `CLOUD=true` (EnvironmentService + * .isCloud()), the Docmost-cloud multi-replica deployment. We warn whenever that + * is set, because any workspace could enable settings.ai.autonomousRuns. A + * self-hosted operator running multiple replicas behind a load balancer is also + * multi-instance; the deploy docs (.env.example / AGENTS.md) spell out the + * single-instance constraint for that case. + */ + private warnIfMultiInstance(): void { + if (this.environment.isCloud()) { + this.logger.warn( + 'Autonomous agent runs (settings.ai.autonomousRuns) are SINGLE-INSTANCE-ONLY ' + + 'in phase 1: a horizontally-scaled deployment was detected (CLOUD=true). ' + + 'An explicit Stop only aborts a run executing on the same replica that owns ' + + 'it (cross-instance Stop is not yet reliable — phase 2). Run a single ' + + 'instance if you enable autonomousRuns, or keep the flag off.', + ); + } + } + + /** + * Start a run for a turn: insert the run row (status 'running', startedAt now), + * register a fresh AbortController for it, and return a {@link RunHandle} whose + * `signal` the agent loop uses. The DB partial unique index guarantees at most + * one active run per chat — a second concurrent start on the same chat REJECTS + * at the insert (a 23505 on {@link ONE_ACTIVE_RUN_PER_CHAT_INDEX}). That + * rejection is the AUTHORITATIVE race gate: it is surfaced as a distinct + * {@link RunAlreadyActiveError} (NOT swallowed), so the caller turns it into a + * 409 and never streams an untracked turn. The controller is registered AFTER a + * successful insert so a rejected start leaks nothing. + */ + async beginRun(args: { + chatId: string; + workspaceId: string; + userId: string; + trigger?: string; + }): Promise { + let run: AiChatRun; + try { + run = await this.runRepo.insert({ + chatId: args.chatId, + workspaceId: args.workspaceId, + createdBy: args.userId, + trigger: args.trigger ?? 'user', + status: 'running', + startedAt: new Date(), + }); + } catch (err) { + // The race backstop: a concurrent turn already holds this chat's single + // active slot, so the partial unique index rejected our insert. Surface a + // distinct signal — the caller MUST reject this turn (409), not run it + // untracked. Any OTHER error propagates unchanged. + if ( + isUniqueViolation(err) && + violatedConstraint(err) === ONE_ACTIVE_RUN_PER_CHAT_INDEX + ) { + throw new RunAlreadyActiveError(args.chatId); + } + throw err; + } + const controller = new AbortController(); + this.active.set(run.id, { + controller, + chatId: args.chatId, + workspaceId: args.workspaceId, + }); + return { runId: run.id, signal: controller.signal }; + } + + /** Link the assistant message (the #183 projection) to its run. Best-effort. */ + async linkAssistantMessage( + runId: string, + workspaceId: string, + assistantMessageId: string, + ): Promise { + try { + await this.runRepo.update(runId, workspaceId, { assistantMessageId }); + } catch (err) { + this.logger.warn( + `Failed to link assistant message to run ${runId}: ${ + err instanceof Error ? err.message : 'unknown error' + }`, + ); + } + } + + /** Persist progress: bump the run's finished-step count. Best-effort (never + * blocks or breaks the stream). */ + async recordStep( + runId: string, + workspaceId: string, + stepCount: number, + ): Promise { + try { + await this.runRepo.update(runId, workspaceId, { stepCount }); + } catch (err) { + this.logger.warn( + `Failed to record step for run ${runId}: ${ + err instanceof Error ? err.message : 'unknown error' + }`, + ); + } + } + + /** + * Finalize a run to its terminal status (succeeded / failed / aborted), + * stamping finishedAt + any error. Best-effort, but ROBUST against a transient + * terminal-write failure (F6) AND atomically safe against a concurrent settle. + * + * ATOMIC ONCE-CLAIM (the gate must close in ONE synchronous tick): two + * finalizeRun calls for the SAME run can race — the documented real path is + * AiChatService.stream's safety-net catch settling the turn to 'error' while a + * streamText terminal callback (onFinish/onAbort/onError) ALSO settles it. The + * `settled.has` check alone is NOT a gate: it is read BEFORE the awaited UPDATE, + * so two callers can both see `false` and both write the row (last-write-wins + * clobbers the real terminal status, and the bounded retry only widens that + * window). The claim therefore happens via `active.delete`, a SYNCHRONOUS + * check-and-clear with NO await between the gate and the entry removal: the + * second concurrent caller finds the entry already gone and returns in the same + * tick, before any UPDATE. The transition "nobody is finalizing" -> "I am + * finalizing" is thus a single atomic step. + * + * ORDER MATTERS (F6): once we own the claim, the terminal UPDATE happens FIRST; + * only once it SUCCEEDS do we record the run as settled. If the UPDATE fails on + * every bounded attempt we RESTORE the in-memory entry, leave the run UNsettled, + * and emit an ERROR signal that the row is left non-terminal 'running' (which + * would 409 every future turn in the chat until recovery). An in-process retry + * by a LATER settle is only POSSIBLE, never guaranteed: it needs (a) the entry + * to have been restored at the give-up path AND (b) a fresh settler to arrive + * AFTER that restore. A concurrent settler that arrives DURING the retry window + * — while the entry is deleted for backoff and not yet restored — is consumed at + * the synchronous `active.delete` claim (it finds nothing to delete and returns + * a no-op), so it does NOT become an in-process retrier. The NO-streamText path + * (the turn threw before streamText was wired, so ONLY the safety-net ever + * settles) likewise has no second in-process settler at all. The UNCONDITIONAL + * backstop in every case is the boot sweep on the next restart (phase 1 has no + * periodic in-process sweep); the retained entry is bounded (cleared on restart) + * and harmless meanwhile. + * + * IDEMPOTENT on SUCCESS (#184 review): the terminal write happens AT MOST ONCE + * per run. After a successful write the once-gate keys off {@link settled} (the + * terminal row already written) so a settle arriving AFTER the entry was already + * dropped-and-settled returns early; a settle racing the in-flight write is + * stopped earlier still, by the `active.delete` claim. Either way a genuine + * double-settle collapses to a single write and a late settle can never clobber + * the real terminal status or double-write the row. + */ + async finalizeRun( + runId: string, + workspaceId: string, + turnStatus: TurnTerminalStatus, + error?: string, + ): Promise { + // ---- Atomic once-claim (synchronous; NO await before the gate closes) ---- + // Already terminally written -> idempotent no-op. + if (this.settled.has(runId)) return; + // Capture the entry BEFORE the delete so a total-failure path can restore it. + const entry = this.active.get(runId); + // SYNCHRONOUS check-and-clear: the FIRST caller deletes (claims) the entry; + // any concurrent SECOND caller finds nothing to delete and returns HERE, in + // the same tick, before any await — so it can never reach the UPDATE. + if (!this.active.delete(runId)) return; + + let lastError: unknown; + for ( + let attempt = 1; + attempt <= AiChatRunService.FINALIZE_MAX_ATTEMPTS; + attempt++ + ) { + try { + await this.runRepo.update(runId, workspaceId, { + status: mapTurnStatusToRun(turnStatus), + finishedAt: new Date(), + error: error ?? null, + }); + // Terminal write landed: arm the once-gate. The entry is already gone + // (claimed above); we do NOT restore it. The slot is now free. + this.settled.add(runId); + return; + } catch (err) { + lastError = err; + this.logger.warn( + `Failed to finalize run ${runId} (attempt ${attempt}/${ + AiChatRunService.FINALIZE_MAX_ATTEMPTS + }): ${err instanceof Error ? err.message : 'unknown error'}`, + ); + if (attempt < AiChatRunService.FINALIZE_MAX_ATTEMPTS) { + await this.delay(AiChatRunService.FINALIZE_RETRY_BASE_MS * attempt); + } + } + } + // Every attempt failed: this is a give-up, materially worse than a per-attempt + // blip — the row is left NON-TERMINAL ('running'), so emit ONE explicit, + // greppable ERROR so an operator can tell "survived a blip" from "gave up, run + // held in memory until recovery" (the last warn alone says only "attempt 3/3"). + this.logger.error( + `Run ${runId} (chat ${entry?.chatId ?? 'unknown'}) left NON-TERMINAL ` + + `('running'): terminal write failed after ${ + AiChatRunService.FINALIZE_MAX_ATTEMPTS + } attempts; entry retained in memory, recovery deferred to next settle / ` + + `boot sweep`, + lastError, + ); + // RESTORE the claimed entry (and leave the run UNsettled) so a LATER settle + // that arrives AFTER this restore MAY retry the terminal write — but that + // in-process retry is NOT guaranteed (a concurrent settler caught in the retry + // window above is consumed at the `active.delete` claim, and the no-streamText + // path has no second settler at all). The UNCONDITIONAL backstop in every case + // is the boot sweep on the next restart; the restored entry is bounded and + // cleared on restart. + if (entry) this.active.set(runId, entry); + } + + /** Small async backoff between terminal-write retries (F6). Isolated so it is + * trivial to stub/fake-time in tests. */ + private delay(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); + } + + /** + * Request an EXPLICIT stop of a run (the user pressed Stop). This is the ONLY + * thing that aborts a run — distinct from a browser disconnect, which leaves + * the run going. Records `stop_requested_at` on the row (only while active) and + * aborts the in-process controller if this replica owns the run. Returns true + * when a stop took effect (row marked and/or controller aborted), false when + * there was nothing active to stop. + */ + async requestStop(runId: string, workspaceId: string): Promise { + const marked = await this.runRepo.markStopRequested(runId, workspaceId); + const entry = this.active.get(runId); + if (entry) { + // Abort the live turn -> streamText onAbort fires -> the partial is + // persisted (#183) and finalizeRun settles the row as 'aborted'. + entry.controller.abort(); + } + return Boolean(marked) || Boolean(entry); + } + + /** Latest persisted run for a chat — the reconnect target (an in-flight or + * finished run). Pure read-through to the repo. */ + getLatestForChat( + chatId: string, + workspaceId: string, + ): Promise { + return this.runRepo.findLatestByChat(chatId, workspaceId); + } + + /** Fetch a run by id (workspace-scoped). Used to resolve + ownership-check an + * explicit stop targeting a runId. */ + getRun(runId: string, workspaceId: string): Promise { + return this.runRepo.findById(runId, workspaceId); + } + + /** The active run on a chat, if any (used to reject a concurrent start with a + * clean 409 before committing to the stream). */ + getActiveForChat( + chatId: string, + workspaceId: string, + ): Promise { + return this.runRepo.findActiveByChat(chatId, workspaceId); + } + + /** Test/diagnostic seam: whether this replica is holding a live controller for + * the run. */ + isLocallyActive(runId: string): boolean { + return this.active.has(runId); + } +} diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.bound-chat.spec.ts b/apps/server/src/core/ai-chat/ai-chat.controller.bound-chat.spec.ts index 769123a8..aa5adddf 100644 --- a/apps/server/src/core/ai-chat/ai-chat.controller.bound-chat.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.controller.bound-chat.spec.ts @@ -19,6 +19,7 @@ describe('AiChatController.boundChat', () => { }; const controller = new AiChatController( {} as never, + {} as never, // aiChatRunService aiChatRepo as never, {} as never, {} as never, diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts b/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts index f46aeaa0..4ceba306 100644 --- a/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts @@ -53,6 +53,7 @@ describe('AiChatController.export', () => { }; const controller = new AiChatController( {} as never, + {} as never, // aiChatRunService aiChatRepo as never, aiChatMessageRepo as never, {} as never, diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.run.spec.ts b/apps/server/src/core/ai-chat/ai-chat.controller.run.spec.ts new file mode 100644 index 00000000..321ecda1 --- /dev/null +++ b/apps/server/src/core/ai-chat/ai-chat.controller.run.spec.ts @@ -0,0 +1,163 @@ +import { BadRequestException, ForbiddenException } from '@nestjs/common'; +import { AiChatController } from './ai-chat.controller'; +import type { User, Workspace } from '@docmost/db/types/entity.types'; + +/** + * Wiring spec for the #184 run-reconnect / run-stop endpoints + * (`POST /ai-chat/run` and `POST /ai-chat/stop`). Both are OWNER-gated via + * assertOwnedChat (the requesting user must own the chat) and NOT flag-gated. + * Exercised with hand-rolled mocks — no Nest graph, no DB. The controller's + * constructor order is (aiChatService, aiChatRunService, aiChatRepo, + * aiChatMessageRepo, aiTranscription). + */ +describe('AiChatController run endpoints (#184)', () => { + const user = { id: 'u1' } as User; + const workspace = { id: 'ws1' } as Workspace; + + function makeController(opts: { + chat?: unknown; // what aiChatRepo.findById returns (owner-gate) + run?: unknown; // getLatestForChat / getRun result + activeRun?: unknown; // getActiveForChat result + message?: unknown; // aiChatMessageRepo.findById result + stopped?: boolean; // requestStop result + }) { + const aiChatRunService = { + getLatestForChat: jest.fn().mockResolvedValue(opts.run), + getRun: jest.fn().mockResolvedValue(opts.run), + getActiveForChat: jest.fn().mockResolvedValue(opts.activeRun), + requestStop: jest.fn().mockResolvedValue(opts.stopped ?? false), + }; + const aiChatRepo = { + findById: jest.fn().mockResolvedValue(opts.chat), + }; + const aiChatMessageRepo = { + findById: jest.fn().mockResolvedValue(opts.message), + }; + const controller = new AiChatController( + {} as never, // aiChatService + aiChatRunService as never, + aiChatRepo as never, + aiChatMessageRepo as never, + {} as never, // aiTranscription + ); + return { controller, aiChatRunService, aiChatRepo, aiChatMessageRepo }; + } + + describe('POST /ai-chat/run (getRun)', () => { + it('owner-gates: a chat the user does not own throws ForbiddenException', async () => { + const { controller, aiChatRunService } = makeController({ + chat: { id: 'c1', creatorId: 'someone-else' }, + }); + await expect( + controller.getRun({ chatId: 'c1' }, user, workspace), + ).rejects.toBeInstanceOf(ForbiddenException); + // It must NOT reach the run lookup once the owner-gate fails. + expect(aiChatRunService.getLatestForChat).not.toHaveBeenCalled(); + }); + + it('returns { run: null, message: null } when the chat has never had a run', async () => { + const { controller, aiChatRunService } = makeController({ + chat: { id: 'c1', creatorId: 'u1' }, + run: undefined, + }); + const res = await controller.getRun({ chatId: 'c1' }, user, workspace); + expect(res).toEqual({ run: null, message: null }); + expect(aiChatRunService.getLatestForChat).toHaveBeenCalledWith( + 'c1', + 'ws1', + ); + }); + + it('returns the run and its projected assistant message', async () => { + const run = { id: 'run-1', chatId: 'c1', assistantMessageId: 'm1' }; + const message = { id: 'm1', role: 'assistant' }; + const { controller, aiChatMessageRepo } = makeController({ + chat: { id: 'c1', creatorId: 'u1' }, + run, + message, + }); + const res = await controller.getRun({ chatId: 'c1' }, user, workspace); + expect(res).toEqual({ run, message }); + expect(aiChatMessageRepo.findById).toHaveBeenCalledWith('m1', 'ws1'); + }); + + it('returns message: null when the run has no linked assistant message', async () => { + const run = { id: 'run-1', chatId: 'c1', assistantMessageId: null }; + const { controller, aiChatMessageRepo } = makeController({ + chat: { id: 'c1', creatorId: 'u1' }, + run, + }); + const res = await controller.getRun({ chatId: 'c1' }, user, workspace); + expect(res).toEqual({ run, message: null }); + expect(aiChatMessageRepo.findById).not.toHaveBeenCalled(); + }); + }); + + describe('POST /ai-chat/stop (stopRun)', () => { + it('throws BadRequestException when neither runId nor chatId is given', async () => { + const { controller } = makeController({}); + await expect( + controller.stopRun({}, user, workspace), + ).rejects.toBeInstanceOf(BadRequestException); + }); + + it('stops by runId: owner-gates via the run’s chat, then requests the stop', async () => { + const { controller, aiChatRunService, aiChatRepo } = makeController({ + run: { id: 'run-1', chatId: 'c1' }, + chat: { id: 'c1', creatorId: 'u1' }, + stopped: true, + }); + const res = await controller.stopRun({ runId: 'run-1' }, user, workspace); + expect(res).toEqual({ stopped: true }); + expect(aiChatRunService.getRun).toHaveBeenCalledWith('run-1', 'ws1'); + expect(aiChatRepo.findById).toHaveBeenCalledWith('c1', 'ws1'); + expect(aiChatRunService.requestStop).toHaveBeenCalledWith('run-1', 'ws1'); + }); + + it('stops by runId: a foreign run’s chat throws ForbiddenException (no stop)', async () => { + const { controller, aiChatRunService } = makeController({ + run: { id: 'run-1', chatId: 'c1' }, + chat: { id: 'c1', creatorId: 'someone-else' }, + }); + await expect( + controller.stopRun({ runId: 'run-1' }, user, workspace), + ).rejects.toBeInstanceOf(ForbiddenException); + expect(aiChatRunService.requestStop).not.toHaveBeenCalled(); + }); + + it('stops by runId: an unknown run reports { stopped: false }', async () => { + const { controller, aiChatRunService } = makeController({ + run: undefined, + }); + const res = await controller.stopRun({ runId: 'gone' }, user, workspace); + expect(res).toEqual({ stopped: false }); + expect(aiChatRunService.requestStop).not.toHaveBeenCalled(); + }); + + it('stops by chatId: owner-gates, resolves the active run, requests the stop', async () => { + const { controller, aiChatRunService, aiChatRepo } = makeController({ + chat: { id: 'c1', creatorId: 'u1' }, + activeRun: { id: 'run-9' }, + stopped: true, + }); + const res = await controller.stopRun({ chatId: 'c1' }, user, workspace); + expect(res).toEqual({ stopped: true }); + expect(aiChatRepo.findById).toHaveBeenCalledWith('c1', 'ws1'); + expect(aiChatRunService.getActiveForChat).toHaveBeenCalledWith( + 'c1', + 'ws1', + ); + expect(aiChatRunService.requestStop).toHaveBeenCalledWith('run-9', 'ws1'); + }); + + it('stops by chatId: reports { stopped: false } when no run is active', async () => { + const { controller, aiChatRunService } = makeController({ + chat: { id: 'c1', creatorId: 'u1' }, + activeRun: undefined, + }); + const res = await controller.stopRun({ chatId: 'c1' }, user, workspace); + expect(res).toEqual({ stopped: false }); + expect(aiChatRunService.requestStop).not.toHaveBeenCalled(); + }); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.controller.ts b/apps/server/src/core/ai-chat/ai-chat.controller.ts index 7834d6b2..04a255c0 100644 --- a/apps/server/src/core/ai-chat/ai-chat.controller.ts +++ b/apps/server/src/core/ai-chat/ai-chat.controller.ts @@ -1,6 +1,7 @@ import { BadRequestException, Body, + ConflictException, Controller, ForbiddenException, HttpCode, @@ -20,14 +21,25 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard'; import { AuthUser } from '../../common/decorators/auth-user.decorator'; import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator'; import { SkipTransform } from '../../common/decorators/skip-transform.decorator'; -import { AiChat, User, Workspace } from '@docmost/db/types/entity.types'; +import { + AiChat, + AiChatMessage, + AiChatRun, + User, + Workspace, +} from '@docmost/db/types/entity.types'; import { PaginationOptions } from '@docmost/db/pagination/pagination-options'; import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo'; import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo'; import { UserThrottlerGuard } from '../../integrations/throttle/user-throttler.guard'; import { AI_CHAT_THROTTLER } from '../../integrations/throttle/throttler-names'; import { FileInterceptor } from '../../common/interceptors/file.interceptor'; -import { AiChatService, AiChatStreamBody } from './ai-chat.service'; +import { + AiChatRunHooks, + AiChatService, + AiChatStreamBody, +} from './ai-chat.service'; +import { AiChatRunService } from './ai-chat-run.service'; import { AiTranscriptionService } from './ai-transcription.service'; import { BoundChatDto, @@ -35,7 +47,9 @@ import { ExportChatDto, GeneratePageTitleDto, GetChatMessagesDto, + GetRunDto, RenameChatDto, + StopRunDto, } from './dto/ai-chat.dto'; import { describeProviderError } from '../../integrations/ai/ai-error.util'; import { buildChatMarkdown } from './chat-markdown.util'; @@ -52,6 +66,7 @@ export class AiChatController { constructor( private readonly aiChatService: AiChatService, + private readonly aiChatRunService: AiChatRunService, private readonly aiChatRepo: AiChatRepo, private readonly aiChatMessageRepo: AiChatMessageRepo, private readonly aiTranscription: AiTranscriptionService, @@ -137,6 +152,75 @@ export class AiChatController { return { markdown }; } + /** + * Reconnect to the latest run of a chat (#184 phase 1). Returns the run's + * persisted lifecycle state ({ status, error, stepCount, timings, ... }) plus + * the assistant message it projects (the partial/final output) — the DB is the + * source of truth, so this works for an in-flight run (the browser dropped, the + * run kept going) and a finished one alike. Owner-gated via assertOwnedChat. + * `{ run: null }` when the chat has never had a run. + */ + @HttpCode(HttpStatus.OK) + @Post('run') + async getRun( + @Body() dto: GetRunDto, + @AuthUser() user: User, + @AuthWorkspace() workspace: Workspace, + ): Promise<{ run: AiChatRun | null; message: AiChatMessage | null }> { + await this.assertOwnedChat(dto.chatId, user, workspace); + const run = await this.aiChatRunService.getLatestForChat( + dto.chatId, + workspace.id, + ); + if (!run) return { run: null, message: null }; + const message = run.assistantMessageId + ? await this.aiChatMessageRepo.findById( + run.assistantMessageId, + workspace.id, + ) + : undefined; + return { run, message: message ?? null }; + } + + /** + * Explicitly STOP an agent run (#184 phase 1) — the user pressed Stop. This is + * the ONLY thing that ends a detached run; a browser disconnect deliberately + * does not. Target by `runId` (from the streamed start metadata) or by `chatId` + * (stop whatever run is active on it). Owner-gated. Returns + * `{ stopped }` — false when there was nothing active to stop. + */ + @HttpCode(HttpStatus.OK) + @Post('stop') + async stopRun( + @Body() dto: StopRunDto, + @AuthUser() user: User, + @AuthWorkspace() workspace: Workspace, + ): Promise<{ stopped: boolean }> { + let runId = dto.runId; + if (!runId && !dto.chatId) { + throw new BadRequestException('runId or chatId is required'); + } + if (runId) { + // Resolve the run to its chat and owner-gate via that chat. + const run = await this.aiChatRunService.getRun(runId, workspace.id); + if (!run) return { stopped: false }; + await this.assertOwnedChat(run.chatId, user, workspace); + } else { + await this.assertOwnedChat(dto.chatId!, user, workspace); + const active = await this.aiChatRunService.getActiveForChat( + dto.chatId!, + workspace.id, + ); + if (!active) return { stopped: false }; + runId = active.id; + } + const stopped = await this.aiChatRunService.requestStop( + runId, + workspace.id, + ); + return { stopped }; + } + /** Rename a chat. */ @HttpCode(HttpStatus.OK) @Post('rename') @@ -188,11 +272,20 @@ export class AiChatController { @AuthWorkspace() workspace: Workspace, ): Promise { // A7 gate: the workspace must have AI chat explicitly enabled. - const settings = (workspace.settings ?? {}) as { ai?: { chat?: boolean } }; + const settings = (workspace.settings ?? {}) as { + ai?: { chat?: boolean; autonomousRuns?: boolean }; + }; if (settings.ai?.chat !== true) { throw new ForbiddenException('AI chat is disabled'); } + // #184 phase 1 flag: when ON, the turn becomes a detached, durable RUN — its + // lifecycle is tracked in ai_chat_runs, a browser disconnect no longer aborts + // it, and only an explicit /ai-chat/stop ends it. When OFF (the default) the + // turn is socket-bound exactly as before, so existing deployments are + // unaffected. + const autonomousRuns = settings.ai?.autonomousRuns === true; + const sessionId = (req.raw as { sessionId?: string }).sessionId; if (!sessionId) { // The chat requires an interactive session to mint loopback tokens @@ -216,6 +309,58 @@ export class AiChatController { // HttpException) instead of breaking mid-stream. const model = await this.aiChatService.getChatModel(workspace.id, role); + // #184: one active run per chat. For an EXISTING chat reject a concurrent + // start with a clean 409 BEFORE hijack (the common double-submit / second-tab + // case), so the user gets JSON, not a mid-stream error. A brand-new chat + // (no chatId) cannot have a prior run, and the DB partial unique index is the + // backstop against any race that slips past this check. + if (autonomousRuns && body.chatId) { + const active = await this.aiChatRunService.getActiveForChat( + body.chatId, + workspace.id, + ); + if (active) { + throw new ConflictException({ + message: 'An agent run is already in progress for this chat', + code: 'A_RUN_ALREADY_ACTIVE', + }); + } + } + + // Run-lifecycle hooks (#184), only when the flag is on. They wrap the turn in + // a durable run whose abort is governed by the run (explicit stop), persist + // its progress, and settle its terminal status — see AiChatRunService. + const runHooks: AiChatRunHooks | undefined = autonomousRuns + ? { + begin: (chatId) => + this.aiChatRunService.beginRun({ + chatId, + workspaceId: workspace.id, + userId: user.id, + trigger: 'user', + }), + onAssistantSeeded: (runId, messageId) => + this.aiChatRunService.linkAssistantMessage( + runId, + workspace.id, + messageId, + ), + onStep: (runId, stepCount) => + void this.aiChatRunService.recordStep( + runId, + workspace.id, + stepCount, + ), + onSettled: (runId, status, error) => + this.aiChatRunService.finalizeRun( + runId, + workspace.id, + status, + error, + ), + } + : undefined; + // Abort the agent loop when the client disconnects. `close` also fires on // normal completion, so only abort when the response has not finished // writing (a genuine disconnect). `once` fires at most once and self-removes; @@ -230,18 +375,44 @@ export class AiChatController { // A genuine disconnect leaves the response unfinished (unlike a normal // completion, which also fires `close`). Such a drop — e.g. a reverse // proxy cutting the SSE mid-answer — is otherwise invisible server-side, - // so log it here before aborting the agent loop. + // so log it here. if (!res.raw.writableEnded) { - this.logger.warn( - `AI chat stream: client disconnected before completion; aborting turn ` + - `(elapsed=${Date.now() - reqStartedAt}ms since request received)`, - ); - controller.abort(); + if (autonomousRuns) { + // #184: the turn is a DETACHED run. A disconnect must NOT abort it — + // the run keeps executing and persisting server-side; the client + // reconnects via /ai-chat/run (or re-stops via /ai-chat/stop). Log only. + this.logger.log( + `AI chat stream: client disconnected; run continues server-side ` + + `(elapsed=${Date.now() - reqStartedAt}ms since request received)`, + ); + } else { + this.logger.warn( + `AI chat stream: client disconnected before completion; aborting turn ` + + `(elapsed=${Date.now() - reqStartedAt}ms since request received)`, + ); + controller.abort(); + } } }; req.raw.once('close', onClose); res.raw.once('finish', () => req.raw.off('close', onClose)); + // #184: in detached mode the turn is NOT aborted on disconnect, so the SDK's + // pipe keeps writing to a socket the client may have dropped — for the rest of + // the (continuing) run. A write to the dead socket can emit an 'error' on the + // raw response; without a listener that surfaces as an unhandled error event. + // Swallow it (the run continues server-side regardless). Legacy mode aborts on + // disconnect, so it does not need this and keeps its exact prior behavior. + if (autonomousRuns) { + res.raw.on('error', (err) => { + this.logger.debug( + `AI chat detached stream: post-disconnect socket error swallowed: ${ + err instanceof Error ? err.message : String(err) + }`, + ); + }); + } + // Commit to streaming: hijack so Fastify stops managing the response and // the AI SDK can write the UI-message stream directly to the Node socket. res.hijack(); @@ -256,15 +427,32 @@ export class AiChatController { signal: controller.signal, model, role, + // #184: present only when the flag is on; wraps the turn in a durable run. + runHooks, }); } catch (err) { - // Any failure AFTER hijack can no longer send a clean JSON error, so emit - // a minimal error on the raw socket if nothing has been written yet. - this.logger.error('AI chat stream failed', err as Error); + // Any failure AFTER hijack can no longer go through Nest's exception + // filter, so emit the error on the raw socket if nothing has been written + // yet. The lost-the-race 409 (RunAlreadyActiveError -> ConflictException) + // is raised by stream() BEFORE it writes a byte, so headers are still + // unsent here: honor the HttpException's real status + body (a clean 409), + // not a blanket 500. Everything else stays a 500. + const isHttp = err instanceof HttpException; + if (!isHttp) { + this.logger.error('AI chat stream failed', err as Error); + } if (!res.raw.headersSent) { - res.raw.statusCode = 500; + const status = isHttp ? err.getStatus() : 500; + const payload = isHttp + ? err.getResponse() + : { error: 'Internal server error' }; + res.raw.statusCode = status; res.raw.setHeader('Content-Type', 'application/json'); - res.raw.end(JSON.stringify({ error: 'Internal server error' })); + res.raw.end( + JSON.stringify( + typeof payload === 'string' ? { message: payload } : payload, + ), + ); } else if (!res.raw.writableEnded) { res.raw.end(); } diff --git a/apps/server/src/core/ai-chat/ai-chat.generate-page-title.spec.ts b/apps/server/src/core/ai-chat/ai-chat.generate-page-title.spec.ts index ef242fdf..ef94e5b7 100644 --- a/apps/server/src/core/ai-chat/ai-chat.generate-page-title.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.generate-page-title.spec.ts @@ -57,6 +57,7 @@ describe('AiChatController.generatePageTitle', () => { const aiChatService = { generatePageTitle: generate }; const controller = new AiChatController( aiChatService as never, + {} as never, // aiChatRunService {} as never, {} as never, {} as never, diff --git a/apps/server/src/core/ai-chat/ai-chat.module.ts b/apps/server/src/core/ai-chat/ai-chat.module.ts index b8afd4c1..c0e8aa02 100644 --- a/apps/server/src/core/ai-chat/ai-chat.module.ts +++ b/apps/server/src/core/ai-chat/ai-chat.module.ts @@ -3,6 +3,7 @@ import { AiModule } from '../../integrations/ai/ai.module'; import { TokenModule } from '../auth/token.module'; import { AiChatController } from './ai-chat.controller'; import { AiChatService } from './ai-chat.service'; +import { AiChatRunService } from './ai-chat-run.service'; import { AiTranscriptionService } from './ai-transcription.service'; import { AiChatToolsService } from './tools/ai-chat-tools.service'; import { EmbeddingModule } from './embedding/embedding.module'; @@ -42,6 +43,7 @@ import { PublicShareChatToolsService } from './tools/public-share-chat-tools.ser controllers: [AiChatController, PublicShareChatController], providers: [ AiChatService, + AiChatRunService, AiTranscriptionService, AiChatToolsService, PublicShareChatService, diff --git a/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts index 77e9d3c4..8e46fd73 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.lifecycle.spec.ts @@ -1,5 +1,7 @@ import { Logger } from '@nestjs/common'; -import { AiChatService } from './ai-chat.service'; +import { AiChatService, AiChatRunHooks } from './ai-chat.service'; +import { AiChatRunService } from './ai-chat-run.service'; +import type { User, Workspace } from '@docmost/db/types/entity.types'; /** * Lifecycle unit tests for AiChatService.onModuleInit (#183 crash-recovery @@ -59,3 +61,97 @@ describe('AiChatService.onModuleInit (startup sweep)', () => { expect(String(warnSpy.mock.calls[0][0])).toContain('db unavailable'); }); }); + +/** + * #184 CRITICAL run-lifecycle safety net (review fix). A transient failure + * AFTER a successful beginRun but BEFORE streamText's terminal callbacks own the + * lifecycle must STILL settle the run — otherwise the run row is stuck 'running' + * forever (sweepRunning only runs at startup) and the partial unique index + the + * controller pre-check 409 every future turn in that chat until a restart. Here + * we model the very first bare await after beginRun (the user-message insert) + * throwing, wiring the run hooks to a REAL AiChatRunService (mock repo) exactly + * as the controller does, and assert the run is settled to 'error' and its + * in-memory entry dropped (so a follow-up turn would NOT be 409'd). + */ +describe('AiChatService.stream run-lifecycle safety net (#184)', () => { + const user = { id: 'u1' } as User; + const workspace = { id: 'ws1' } as Workspace; + + afterEach(() => jest.restoreAllMocks()); + + it('an exception after beginRun settles the run to error and drops the in-memory entry', async () => { + jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined); + + // Real run service over a mock repo, so finalizeRun's in-memory bookkeeping + // (active.delete) is exercised for real. + const runRepo = { + insert: jest.fn().mockResolvedValue({ id: 'run-1', status: 'running' }), + update: jest.fn().mockResolvedValue({ id: 'run-1' }), + }; + const runService = new AiChatRunService(runRepo as never, { isCloud: () => false } as never); + + // The user-message insert (the first bare await after beginRun) throws. + const aiChatMessageRepo = { + insert: jest.fn().mockRejectedValue(new Error('insert boom')), + }; + const aiChatRepo = { + // Existing chat -> chatId stays, no new-chat insert path. + findById: jest.fn().mockResolvedValue({ id: 'chat-1', creatorId: 'u1' }), + }; + + const service = new AiChatService( + {} as never, // ai + aiChatRepo as never, + aiChatMessageRepo as never, + {} as never, // aiSettings + {} as never, // tools + {} as never, // mcpClients + {} as never, // aiAgentRoleRepo + {} as never, // pageRepo + {} as never, // pageAccess + ); + + const runHooks: AiChatRunHooks = { + begin: (chatId) => + runService.beginRun({ + chatId, + workspaceId: workspace.id, + userId: user.id, + trigger: 'user', + }), + onSettled: (runId, status, error) => + runService.finalizeRun(runId, workspace.id, status, error), + }; + + await expect( + service.stream({ + user, + workspace, + sessionId: 'sess', + body: { + chatId: 'chat-1', + messages: [ + { id: 'm', role: 'user', parts: [{ type: 'text', text: 'hi' }] }, + ], + }, + res: {} as never, + signal: new AbortController().signal, + model: {} as never, + role: null, + runHooks, + }), + ).rejects.toThrow('insert boom'); + + // The run was begun... + expect(runRepo.insert).toHaveBeenCalledTimes(1); + // ...then settled to a terminal FAILED status by the safety net... + expect(runRepo.update).toHaveBeenCalledTimes(1); + expect(runRepo.update).toHaveBeenCalledWith( + 'run-1', + 'ws1', + expect.objectContaining({ status: 'failed' }), + ); + // ...and the in-memory entry is gone, so a follow-up turn is NOT 409'd. + expect(runService.isLocallyActive('run-1')).toBe(false); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.run-race.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.run-race.spec.ts new file mode 100644 index 00000000..747130ba --- /dev/null +++ b/apps/server/src/core/ai-chat/ai-chat.service.run-race.spec.ts @@ -0,0 +1,337 @@ +import { ConflictException, Logger } from '@nestjs/common'; + +// Mock the AI SDK so we can PROVE no provider call is made for the turn we are +// about to reject. The race rejection happens at runHooks.begin(), long before +// any streamText/generateText, so these never resolve a real model. +jest.mock('ai', () => ({ + streamText: jest.fn(), + generateText: jest.fn(), + convertToModelMessages: jest.fn(() => []), + stepCountIs: jest.fn(() => () => false), +})); + +import { streamText, generateText } from 'ai'; +import { AiChatService } from './ai-chat.service'; +import { RunAlreadyActiveError } from './ai-chat-run.service'; + +/** + * Race-closure coverage for the "one active run per chat" guard (#184). + * + * THE BUG: two simultaneous POST /ai-chat/stream on the same chat both pass the + * controller's cheap pre-check (TOCTOU), so the loser's run-row INSERT hits the + * partial unique index. Previously that 23505 was SWALLOWED and the second turn + * streamed UNTRACKED (no runId, not stoppable). THE FIX: beginRun surfaces a + * RunAlreadyActiveError and stream() turns it into a 409 BEFORE any AI call — + * the second turn never runs. + */ +describe('AiChatService.stream — concurrent-run race rejection (#184)', () => { + const streamTextMock = streamText as unknown as jest.Mock; + const generateTextMock = generateText as unknown as jest.Mock; + + beforeEach(() => { + streamTextMock.mockReset(); + generateTextMock.mockReset(); + }); + + // Minimal service whose only reachable deps before begin() are aiChatRepo + // (resolve the existing chat) — everything past begin must remain untouched. + function makeService(beginImpl: () => Promise) { + const aiChatMessageRepo = { insert: jest.fn() }; + const aiChatRepo = { + // An existing chat: stream keeps the supplied chatId and skips creation. + findById: jest.fn(async () => ({ id: 'chat-1', workspaceId: 'ws-1' })), + insert: jest.fn(), + }; + const svc = new AiChatService( + {} as never, // ai + aiChatRepo as never, + aiChatMessageRepo as never, + {} as never, // aiSettings + {} as never, // tools + {} as never, // mcpClients + {} as never, // aiAgentRoleRepo + {} as never, // pageRepo + {} as never, // pageAccess + ); + const begin = jest.fn(beginImpl); + return { svc, begin, aiChatRepo, aiChatMessageRepo }; + } + + const baseArgs = (begin: jest.Mock) => ({ + user: { id: 'user-1' } as never, + workspace: { id: 'ws-1' } as never, + sessionId: 'sess-1', + body: { chatId: 'chat-1', messages: [] } as never, + res: { raw: {} } as never, + signal: new AbortController().signal, + model: {} as never, + role: null, + runHooks: { + begin, + onAssistantSeeded: jest.fn(), + onStep: jest.fn(), + onSettled: jest.fn(), + } as never, + }); + + it('rejects the racer with a 409 ConflictException BEFORE any AI call, and never persists an untracked turn', async () => { + // begin loses the unique-index race -> RunAlreadyActiveError. + const { svc, begin, aiChatMessageRepo } = makeService(() => { + throw new RunAlreadyActiveError('chat-1'); + }); + + const promise = svc.stream(baseArgs(begin)); + + await expect(promise).rejects.toBeInstanceOf(ConflictException); + await promise.catch((err: ConflictException) => { + expect(err.getStatus()).toBe(409); + expect((err.getResponse() as { code?: string }).code).toBe( + 'A_RUN_ALREADY_ACTIVE', + ); + }); + + // The decisive assertions: the rejected racer spent NO tokens and left NO + // untracked turn behind. + expect(begin).toHaveBeenCalledTimes(1); + expect(streamTextMock).not.toHaveBeenCalled(); + expect(generateTextMock).not.toHaveBeenCalled(); + expect(aiChatMessageRepo.insert).not.toHaveBeenCalled(); + }); +}); + +/** + * F3 — the LOAD-BEARING run-detach wiring: `effectiveSignal = handle.signal` + * after runHooks.begin, then `abortSignal: effectiveSignal` passed to streamText. + * That single line is what makes a run survive a browser disconnect (the agent + * loop's abort is governed by the RUN's signal, not the socket): a regression to + * the socket-bound signal would still pass every other test green while silently + * breaking Stop + durability. These two tests pin the exact signal streamText + * consumes on both paths. + */ +describe('AiChatService.stream — abortSignal wiring (#184 F3)', () => { + const streamTextMock = streamText as unknown as jest.Mock; + + // A streamText result stub: the post-call drain + pipe are no-ops here; we only + // care WHICH abortSignal streamText was handed. + function makeStreamResult() { + return { + consumeStream: jest.fn(), + pipeUIMessageStreamToResponse: jest.fn(), + }; + } + + // A raw-response stub sufficient for the post-streamText wiring + // (stripStreamingHopByHopHeaders binds writeHead; startSseHeartbeat registers + // close/finish listeners; flushHeaders is belt-and-braces). + function makeRes() { + return { + raw: { + writeHead: jest.fn(), + write: jest.fn(), + once: jest.fn(), + on: jest.fn(), + flushHeaders: jest.fn(), + writableEnded: false, + destroyed: false, + }, + }; + } + + // Wire only the deps reached on the way to streamText: resolve the existing + // chat, persist the user + seed the assistant row, load (empty) history, the + // admin settings, an empty external toolset + Docmost toolset. + function makeService() { + const aiChatRepo = { + findById: jest.fn(async () => ({ id: 'chat-1', workspaceId: 'ws-1' })), + insert: jest.fn(), + }; + const aiChatMessageRepo = { + insert: jest.fn(async () => ({ id: 'msg-1' })), + findAllByChat: jest.fn(async () => []), + update: jest.fn(async () => ({ id: 'msg-1' })), + }; + const aiSettings = { resolve: jest.fn(async () => ({})) }; + const tools = { forUser: jest.fn(async () => ({})) }; + const mcpClients = { + toolsFor: jest.fn(async () => ({ + tools: {}, + clients: [], + outcomes: [], + instructions: [], + })), + }; + const svc = new AiChatService( + {} as never, // ai + aiChatRepo as never, + aiChatMessageRepo as never, + aiSettings as never, + tools as never, + mcpClients as never, + {} as never, // aiAgentRoleRepo + {} as never, // pageRepo (openPage undefined -> never touched) + {} as never, // pageAccess + ); + return { svc }; + } + + const body = { + chatId: 'chat-1', + messages: [ + { id: 'm1', role: 'user', parts: [{ type: 'text', text: 'hi' }] }, + ], + }; + + beforeEach(() => { + streamTextMock.mockReset(); + streamTextMock.mockImplementation(() => makeStreamResult()); + jest + .spyOn(Logger.prototype, 'log') + .mockImplementation(() => undefined as never); + }); + + afterEach(() => jest.restoreAllMocks()); + + it('happy path (run-wrapped): streamText is driven with abortSignal === handle.signal (the RUN signal, NOT the socket)', async () => { + const { svc } = makeService(); + const runController = new AbortController(); + const runSignal = runController.signal; + const socketSignal = new AbortController().signal; + + const begin = jest.fn(async () => ({ runId: 'run-1', signal: runSignal })); + await svc.stream({ + user: { id: 'user-1' } as never, + workspace: { id: 'ws-1' } as never, + sessionId: 'sess-1', + body: body as never, + res: makeRes() as never, + signal: socketSignal, + model: {} as never, + role: null, + runHooks: { + begin, + onAssistantSeeded: jest.fn(), + onStep: jest.fn(), + onSettled: jest.fn(), + } as never, + }); + + expect(begin).toHaveBeenCalledTimes(1); + expect(streamTextMock).toHaveBeenCalledTimes(1); + // THE assertion: the agent loop's abort is wired to the RUN, so a browser + // disconnect (which aborts only `socketSignal`) cannot end the turn. + expect(streamTextMock.mock.calls[0][0].abortSignal).toBe(runSignal); + expect(streamTextMock.mock.calls[0][0].abortSignal).not.toBe(socketSignal); + }); + + it('legacy path (no runHooks): streamText is driven with the SOCKET signal', async () => { + const { svc } = makeService(); + const socketSignal = new AbortController().signal; + + await svc.stream({ + user: { id: 'user-1' } as never, + workspace: { id: 'ws-1' } as never, + sessionId: 'sess-1', + body: body as never, + res: makeRes() as never, + signal: socketSignal, + model: {} as never, + role: null, + // No runHooks -> the turn stays socket-bound (flag off / default). + }); + + expect(streamTextMock).toHaveBeenCalledTimes(1); + expect(streamTextMock.mock.calls[0][0].abortSignal).toBe(socketSignal); + }); + + /** + * F9 — streamText's TERMINAL callbacks carry the #184 run lifecycle: + * onStepFinish -> runHooks.onStep(runId, stepCount) + * onFinish -> runHooks.onSettled(runId, 'completed') (dominant path) + * onAbort -> runHooks.onSettled(runId, 'aborted') + * onError -> runHooks.onSettled(runId, 'error', cause) + * makeStreamResult() ignores the streamText options, so these callbacks never + * fire on their own — a regression in this wiring (esp. the success path) would + * strand the run with NO test catching it. Here we CAPTURE the options streamText + * was handed and invoke each callback with the real wiring, asserting the run + * hooks fire with the right args. + */ + // Drive stream() to the point streamText is called, capturing the options object + // (which carries onStepFinish/onFinish/onError/onAbort) and the run hooks. + async function captureStreamCallbacks() { + const { svc } = makeService(); + let capturedOpts: any; + streamTextMock.mockImplementation((opts: any) => { + capturedOpts = opts; + return makeStreamResult(); + }); + const runHooks = { + begin: jest.fn(async () => ({ + runId: 'run-1', + signal: new AbortController().signal, + })), + onAssistantSeeded: jest.fn(), + onStep: jest.fn(), + onSettled: jest.fn(), + }; + await svc.stream({ + user: { id: 'user-1' } as never, + workspace: { id: 'ws-1' } as never, + sessionId: 'sess-1', + body: body as never, + res: makeRes() as never, + signal: new AbortController().signal, + model: {} as never, + role: null, + runHooks: runHooks as never, + }); + expect(capturedOpts).toBeDefined(); + return { capturedOpts, runHooks }; + } + + it('F9: onStepFinish bumps the run step count, onFinish settles the run "completed" (the dominant autonomous-run path)', async () => { + const { capturedOpts, runHooks } = await captureStreamCallbacks(); + + // A finished step -> onStep(runId, finishedStepCount). + capturedOpts.onStepFinish({ text: 'step one', toolCalls: [], content: [] }); + expect(runHooks.onStep).toHaveBeenCalledWith('run-1', 1); + capturedOpts.onStepFinish({ text: 'step two', toolCalls: [], content: [] }); + expect(runHooks.onStep).toHaveBeenLastCalledWith('run-1', 2); + + // The success terminal callback settles the run. + await capturedOpts.onFinish({ + text: 'done', + finishReason: 'stop', + totalUsage: {}, + usage: {}, + steps: [], + }); + expect(runHooks.onSettled).toHaveBeenCalledWith('run-1', 'completed'); + }); + + it('F9: onAbort settles the run "aborted"', async () => { + jest + .spyOn(Logger.prototype, 'warn') + .mockImplementation(() => undefined as never); + const { capturedOpts, runHooks } = await captureStreamCallbacks(); + + await capturedOpts.onAbort({ steps: [] }); + expect(runHooks.onSettled).toHaveBeenCalledWith('run-1', 'aborted'); + }); + + it('F9: onError settles the run "error" carrying the provider cause', async () => { + jest + .spyOn(Logger.prototype, 'error') + .mockImplementation(() => undefined as never); + jest + .spyOn(Logger.prototype, 'warn') + .mockImplementation(() => undefined as never); + const { capturedOpts, runHooks } = await captureStreamCallbacks(); + + await capturedOpts.onError({ error: new Error('provider exploded') }); + expect(runHooks.onSettled).toHaveBeenCalledWith( + 'run-1', + 'error', + expect.stringContaining('provider exploded'), + ); + }); +}); diff --git a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts index 4e5ac72a..418ed178 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.spec.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.spec.ts @@ -371,6 +371,12 @@ describe('chatStreamMetadata', () => { }); }); + it('attaches the runId on the start part when a run wraps the turn (#184)', () => { + expect( + chatStreamMetadata({ type: 'start' }, 'chat-1', undefined, 'run-1'), + ).toEqual({ chatId: 'chat-1', runId: 'run-1' }); + }); + it('returns the CUMULATIVE step usage passed in for the finish-step part', () => { // finish-step usage is per-step in v6; the caller accumulates and passes the // running sum, which this just wraps. diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index e4c81584..66a7d924 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -1,4 +1,5 @@ import { + ConflictException, ForbiddenException, Injectable, Logger, @@ -30,6 +31,7 @@ import { import { AiChatToolsService } from './tools/ai-chat-tools.service'; import { McpClientsService } from './external-mcp/mcp-clients.service'; import { buildSystemPrompt } from './ai-chat.prompt'; +import { RunAlreadyActiveError } from './ai-chat-run.service'; import { roleModelOverride } from './roles/role-model-config'; import { startSseHeartbeat, @@ -140,6 +142,31 @@ export interface AiChatStreamBody { messages?: UIMessage[]; } +/** + * Optional run-lifecycle hooks (#184 phase 1). When supplied, the turn is wrapped + * in a first-class server-side RUN: `begin` is called once the chat id is known + * and returns the run's AbortSignal (decoupled from the HTTP socket — a browser + * disconnect no longer governs the abort), and the lifecycle callbacks persist + * the run's progress and terminal status. Absent (the default) => the legacy + * socket-bound behavior is unchanged. + */ +export interface AiChatRunHooks { + // Called once the chat id is resolved; returns the run handle whose `signal` + // drives the agent loop's abort. Returning null disables run tracking (the + // turn falls back to the passed-in socket signal). + begin(chatId: string): Promise<{ runId: string; signal: AbortSignal } | null>; + onAssistantSeeded?( + runId: string, + assistantMessageId: string, + ): Promise | void; + onStep?(runId: string, stepCount: number): void; + onSettled?( + runId: string, + status: 'completed' | 'error' | 'aborted', + error?: string, + ): Promise | void; +} + export interface AiChatStreamArgs { user: User; workspace: Workspace; @@ -147,6 +174,10 @@ export interface AiChatStreamArgs { body: AiChatStreamBody; res: FastifyReply; signal: AbortSignal; + // Run-lifecycle hooks (#184). When present the turn becomes a detached, + // durable RUN whose abort is governed by the run (explicit stop), not the + // socket; when absent the turn stays socket-bound (legacy behavior). + runHooks?: AiChatRunHooks; // Resolved by the controller BEFORE res.hijack(), so an unconfigured provider // (AiNotConfiguredException -> 503) surfaces as clean JSON before streaming. // For a role with a model override this already carries the override-resolved @@ -303,6 +334,7 @@ export class AiChatService implements OnModuleInit { signal, model, role, + runHooks, }: AiChatStreamArgs): Promise { // Resolve / create the chat. A new chat is created when no valid chatId is // supplied or the supplied one does not belong to this workspace. @@ -347,503 +379,608 @@ export class AiChatService implements OnModuleInit { isNewChat = true; } - // Extract the incoming user turn (the last user message from useChat). - const incoming = lastUserMessage(body.messages); - const incomingText = uiMessageText(incoming); - - // Persist the user message before contacting the model. - await this.aiChatMessageRepo.insert({ - chatId, - workspaceId: workspace.id, - userId: user.id, - role: 'user', - content: incomingText, - // jsonb column: UIMessage parts are JSON-serializable at runtime but not - // structurally `JsonValue`, so cast through unknown. - metadata: (incoming?.parts ? { parts: incoming.parts } : null) as never, - }); - - // Rebuild the conversation from persisted history (not the client payload), - // so the model always sees the authoritative server-side transcript. Load - // the FULL history in chronological order (oldest -> newest, incl. the user - // message just inserted above) so NO turns are dropped — there is no - // recent-tail window anymore. `findAllByChat` keeps a 5000-row memory-safety - // backstop (on overflow it keeps the NEWEST rows and logs a warning); that - // is a safety net far above any realistic chat, not a conversational limit. - const history = await this.aiChatMessageRepo.findAllByChat( - chatId, - workspace.id, - ); - const uiMessages = history.map(rowToUiMessage); - // convertToModelMessages is async in ai@6.0.134 (returns Promise). - const messages = await convertToModelMessages(uiMessages); - - // Interrupt-resume detection (#198): the client "send now" flag is only a - // hint — confirm it against the persisted history (the preceding assistant - // turn must really be aborted/streaming) so a spoofed flag cannot inject the - // interrupt note onto an ordinary turn. The partial output the model needs is - // already in `messages` (the aborted assistant row replays via findRecent). - const interrupted = isInterruptResume(history, body.interrupted); - - // The model is resolved by the controller before hijack (clean 503 path). - // Here we only need the admin-configured system prompt. - const resolved = await this.aiSettings.resolve(workspace.id); - - // Build the external MCP toolset FIRST so the system prompt can carry each - // connected server's admin-authored guidance (#180). Merge in admin- - // configured external MCP tools (web search, etc.; §6.8). A down/slow - // external server never crashes the turn — toolsFor skips it and records the - // outcome. The returned client handles MUST be closed in the streamText - // lifecycle (onFinish/onError/onAbort) — leaking them is a bug. Docmost - // tools take precedence on a name clash (external are namespaced, so a clash - // is not expected; the spread order makes intent explicit). - let external: Awaited> = { - tools: {}, - clients: [], - outcomes: [], - instructions: [], - }; - try { - external = await this.mcpClients.toolsFor(workspace.id); - } catch (err) { - // Building the external toolset must never break the turn; proceed with - // Docmost-only tools. Never log URLs/headers — short message only. - this.logger.warn( - `External MCP toolset unavailable: ${ - err instanceof Error ? err.message : 'unknown error' - }`, - ); + // Start the durable RUN now that the chat id is known (#184 phase 1). The + // returned `runId` + `signal` make the turn a first-class server-side object + // whose abort is governed by the run (an explicit user stop), NOT by the HTTP + // socket — so a browser disconnect no longer ends the turn. With no runHooks + // (the default / flag off) the turn stays socket-bound via `signal` and + // `runId` is undefined, leaving the legacy path byte-for-byte unchanged. + let runId: string | undefined; + let effectiveSignal = signal; + if (runHooks) { + try { + const handle = await runHooks.begin(chatId); + if (handle) { + runId = handle.runId; + effectiveSignal = handle.signal; + } + } catch (err) { + // RACE BACKSTOP: the run-row INSERT lost the chat's single active slot + // (the partial unique index rejected it). This is the AUTHORITATIVE + // concurrency gate — the controller's pre-check is only a fast-path, and a + // request that slipped past it must NOT proceed. Reject the turn with a + // 409 NOW, BEFORE any AI/provider call: no tokens are spent and no + // untracked turn streams. (Matches the controller's pre-check 409.) + if (err instanceof RunAlreadyActiveError) { + throw new ConflictException({ + message: 'An agent run is already in progress for this chat', + code: 'A_RUN_ALREADY_ACTIVE', + }); + } + // Any OTHER run-start failure must not break the turn — fall back to the + // socket signal (legacy behavior) and stream anyway. + this.logger.error( + `Failed to begin agent run (chat ${chatId}); streaming without run tracking`, + err as Error, + ); + } } - // Close every external client EXACTLY ONCE across the turn's terminal - // callbacks (onFinish/onError/onAbort all fire at most once collectively, - // but guard anyway). DEFINED HERE — before the prompt/toolset are built — so - // that if buildSystemPrompt or forUser throws AFTER the external lease was - // taken (toolsFor above), the lease is still released. Otherwise its refCount - // stays >= 1 forever and the external undici sockets leak until restart - // (#180 reorder moved toolsFor ahead of these; #185 review). Close errors are - // swallowed so they never break the response. - let clientsClosed = false; - const closeExternalClients = async (): Promise => { - if (clientsClosed) return; - clientsClosed = true; - await Promise.all( - external.clients.map((c) => - c.close().catch((closeErr) => { - this.logger.warn( - `Failed to close external MCP client: ${ - closeErr instanceof Error ? closeErr.message : 'unknown error' - }`, - ); - }), - ), - ); - }; - - // Build the system prompt + Docmost toolset. If either throws after the - // external MCP lease was taken above, release the lease before rethrowing so - // the leased transports are not leaked (#185 review). - let system: string; - let docmostTools: Awaited>; + // #184 RUN-LIFECYCLE SAFETY NET (review fix). Everything from here until + // streamText's terminal callbacks (onFinish/onError/onAbort) take ownership of + // the run lifecycle runs under this try. Between a successful beginRun (run row + // 'running' + an in-memory AbortController) and streamText attaching there are + // bare awaits — the user-message insert, findAllByChat, convertToModelMessages, + // aiSettings.resolve — plus the buildSystemPrompt/forUser block and the + // synchronous streamText wiring, NONE of which finalize the run on failure (the + // inner catches only release MCP clients and rethrow). Without this net any + // transient failure there would leave ai_chat_runs stuck 'running' FOREVER + // (sweepRunning only runs at server startup): the partial unique index + the + // controller pre-check would then 409 every future turn in this chat until a + // restart, and an observer tab would poll forever. The catch settles the run to + // 'error' (which deletes the in-memory entry and writes a terminal row) before + // rethrowing to the controller. finalizeRun (via onSettled) is idempotent, so + // if streamText DID attach and a terminal callback also settles, exactly one + // terminal write wins — never a double-settle. try { - system = buildSystemPrompt({ - workspace, - adminPrompt: resolved?.systemPrompt, - // The role (pre-resolved by the controller) REPLACES the persona layer; - // the safety framework is still appended by buildSystemPrompt. - roleInstructions: role?.instructions, - // Server-validated open page (authoritative title), not the client value. - openedPage: openPageContext, - // Guidance only for servers that connected and yielded ≥1 callable tool. - mcpInstructions: external.instructions, - // History-confirmed interrupt-resume flag (#198): adds the interrupt note - // so the model treats the partial answer above as cut off, not finished. - interrupted, - }); + // Extract the incoming user turn (the last user message from useChat). + const incoming = lastUserMessage(body.messages); + const incomingText = uiMessageText(incoming); - // Pass the resolved chatId so the write tools can mint provenance tokens - // (access + collab) carrying { actor:'agent', aiChatId: chatId }, making - // agent REST/collab writes attributable and non-spoofable (§6.5/§6.6). - docmostTools = await this.tools.forUser( - user, - sessionId, - workspace.id, - chatId, - // Same server-validated open page used by the system prompt above; - // exposed to the model via getCurrentPage so page identity (and the - // AUTHORITATIVE title) survives prompt mangling / client title spoofing. - openPageContext, - ); - } catch (err) { - await closeExternalClients(); - throw err; - } - - const tools = { ...external.tools, ...docmostTools }; - - // Accumulate the turn's streamed output so a provider error / disconnect can - // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort - // callbacks don't hand us the in-progress text. `capturedSteps` holds finished - // steps (tool calls + their text); `inProgressText` holds the text streamed in - // the CURRENT, not-yet-finished step, reset whenever a step finishes. - const capturedSteps: StepLike[] = []; - let inProgressText = ''; - - // Step-granular durability (#183): create the assistant row UPFRONT in the - // 'streaming' state (before any token), then UPDATE it as each step finishes - // and finalize it once on the terminal callback. If the process dies - // mid-turn the row survives with every finished step already persisted; the - // startup sweep (sweepStreaming) later flips a dangling 'streaming' row to - // 'aborted'. The DB is now the single source of truth for the turn — the - // socket is never required for the write path. A failed upfront insert is - // logged and leaves assistantId undefined; the per-step/terminal updates then - // no-op (guarded below) so the turn still streams to the user. - let assistantId: string | undefined; - try { - const seed = flushAssistant([], '', 'streaming'); - const seeded = await this.aiChatMessageRepo.insert({ + // Persist the user message before contacting the model. + await this.aiChatMessageRepo.insert({ chatId, workspaceId: workspace.id, userId: user.id, - role: 'assistant', - content: seed.content, - // jsonb columns: cast through never (same as the user insert above). - toolCalls: (seed.toolCalls ?? null) as never, - metadata: seed.metadata as never, - status: seed.status, + role: 'user', + content: incomingText, + // jsonb column: UIMessage parts are JSON-serializable at runtime but not + // structurally `JsonValue`, so cast through unknown. + metadata: (incoming?.parts ? { parts: incoming.parts } : null) as never, }); - assistantId = seeded?.id; - } catch (err) { - this.logger.error( - `Failed to insert upfront assistant row (chat ${chatId}, workspace ${workspace.id})`, - err as Error, - ); - } - // Per-step (non-terminal) update: persist the finished steps the moment a - // step ends. Tolerant — a failed update is logged and swallowed so it never - // throws into the stream. Keeps status 'streaming'. - const updateStreaming = async (): Promise => { - if (!assistantId) return; - // Cheap short-circuit once the turn is finalized (see `finalized` below). - // The AUTHORITATIVE guard is `onlyIfStreaming` on the UPDATE: a late - // fire-and-forget step update could still be in flight on another pool - // connection when finalize runs, so the SQL `WHERE status='streaming'` - // (not this flag) is what prevents it clobbering the terminal row. - if (finalized) return; + // Rebuild the conversation from persisted history (not the client payload), + // so the model always sees the authoritative server-side transcript. Load + // the FULL history in chronological order (oldest -> newest, incl. the user + // message just inserted above) so NO turns are dropped — there is no + // recent-tail window anymore. `findAllByChat` keeps a 5000-row memory-safety + // backstop (on overflow it keeps the NEWEST rows and logs a warning); that + // is a safety net far above any realistic chat, not a conversational limit. + const history = await this.aiChatMessageRepo.findAllByChat( + chatId, + workspace.id, + ); + const uiMessages = history.map(rowToUiMessage); + // convertToModelMessages is async in ai@6.0.134 (returns Promise). + const messages = await convertToModelMessages(uiMessages); + + // Interrupt-resume detection (#198): the client "send now" flag is only a + // hint — confirm it against the persisted history (the preceding assistant + // turn must really be aborted/streaming) so a spoofed flag cannot inject the + // interrupt note onto an ordinary turn. The partial output the model needs is + // already in `messages` (the aborted assistant row replays via findRecent). + const interrupted = isInterruptResume(history, body.interrupted); + + // The model is resolved by the controller before hijack (clean 503 path). + // Here we only need the admin-configured system prompt. + const resolved = await this.aiSettings.resolve(workspace.id); + + // Build the external MCP toolset FIRST so the system prompt can carry each + // connected server's admin-authored guidance (#180). Merge in admin- + // configured external MCP tools (web search, etc.; §6.8). A down/slow + // external server never crashes the turn — toolsFor skips it and records the + // outcome. The returned client handles MUST be closed in the streamText + // lifecycle (onFinish/onError/onAbort) — leaking them is a bug. Docmost + // tools take precedence on a name clash (external are namespaced, so a clash + // is not expected; the spread order makes intent explicit). + let external: Awaited> = { + tools: {}, + clients: [], + outcomes: [], + instructions: [], + }; try { - await this.aiChatMessageRepo.update( - assistantId, - workspace.id, - flushAssistant(capturedSteps, '', 'streaming'), - { onlyIfStreaming: true }, - ); + external = await this.mcpClients.toolsFor(workspace.id); } catch (err) { + // Building the external toolset must never break the turn; proceed with + // Docmost-only tools. Never log URLs/headers — short message only. this.logger.warn( - `Failed to update streaming assistant row: ${ + `External MCP toolset unavailable: ${ err instanceof Error ? err.message : 'unknown error' }`, ); } - }; - // Serialize the per-step updates (#183 review): onStepFinish fires them - // without await, so two could otherwise commit out of order on different pool - // connections (step N landing after N+1). Chaining each onto the previous - // keeps the persisted row monotonic with step order; each link short-circuits - // on `finalized`, so a tail of late updates is cheap. - let stepUpdateChain: Promise = Promise.resolve(); + // Close every external client EXACTLY ONCE across the turn's terminal + // callbacks (onFinish/onError/onAbort all fire at most once collectively, + // but guard anyway). DEFINED HERE — before the prompt/toolset are built — so + // that if buildSystemPrompt or forUser throws AFTER the external lease was + // taken (toolsFor above), the lease is still released. Otherwise its refCount + // stays >= 1 forever and the external undici sockets leak until restart + // (#180 reorder moved toolsFor ahead of these; #185 review). Close errors are + // swallowed so they never break the response. + let clientsClosed = false; + const closeExternalClients = async (): Promise => { + if (clientsClosed) return; + clientsClosed = true; + await Promise.all( + external.clients.map((c) => + c.close().catch((closeErr) => { + this.logger.warn( + `Failed to close external MCP client: ${ + closeErr instanceof Error ? closeErr.message : 'unknown error' + }`, + ); + }), + ), + ); + }; - // Terminal finalize: write the completed/error/aborted row exactly once - // across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort - // callbacks — mirroring the pre-#183 persist-at-most-once guard for the - // TERMINAL status (the row may be updated many times with 'streaming' before - // this fires once). - let finalized = false; - const finalizeAssistant = async ( - flushed: AssistantFlush, - ): Promise => { - if (finalized) return; - finalized = true; - const plan = planFinalizeAssistant(assistantId); + // Build the system prompt + Docmost toolset. If either throws after the + // external MCP lease was taken above, release the lease before rethrowing so + // the leased transports are not leaked (#185 review). + let system: string; + let docmostTools: Awaited>; try { - // Shared dispatch (see applyFinalize): UPDATE the upfront row, or — when - // the upfront insert failed (kind 'insert') — INSERT the terminal row as - // the only safety against losing the turn entirely. - await applyFinalize( - this.aiChatMessageRepo, - plan, - { chatId, workspaceId: workspace.id, userId: user.id }, - flushed, + system = buildSystemPrompt({ + workspace, + adminPrompt: resolved?.systemPrompt, + // The role (pre-resolved by the controller) REPLACES the persona layer; + // the safety framework is still appended by buildSystemPrompt. + roleInstructions: role?.instructions, + // Server-validated open page (authoritative title), not the client value. + openedPage: openPageContext, + // Guidance only for servers that connected and yielded ≥1 callable tool. + mcpInstructions: external.instructions, + // History-confirmed interrupt-resume flag (#198): adds the interrupt note + // so the model treats the partial answer above as cut off, not finished. + interrupted, + }); + + // Pass the resolved chatId so the write tools can mint provenance tokens + // (access + collab) carrying { actor:'agent', aiChatId: chatId }, making + // agent REST/collab writes attributable and non-spoofable (§6.5/§6.6). + docmostTools = await this.tools.forUser( + user, + sessionId, + workspace.id, + chatId, + // Same server-validated open page used by the system prompt above; + // exposed to the model via getCurrentPage so page identity (and the + // AUTHORITATIVE title) survives prompt mangling / client title spoofing. + openPageContext, ); + } catch (err) { + await closeExternalClients(); + throw err; + } + + const tools = { ...external.tools, ...docmostTools }; + + // Accumulate the turn's streamed output so a provider error / disconnect can + // persist the PARTIAL answer the user already saw — the SDK's onError/onAbort + // callbacks don't hand us the in-progress text. `capturedSteps` holds finished + // steps (tool calls + their text); `inProgressText` holds the text streamed in + // the CURRENT, not-yet-finished step, reset whenever a step finishes. + const capturedSteps: StepLike[] = []; + let inProgressText = ''; + + // Step-granular durability (#183): create the assistant row UPFRONT in the + // 'streaming' state (before any token), then UPDATE it as each step finishes + // and finalize it once on the terminal callback. If the process dies + // mid-turn the row survives with every finished step already persisted; the + // startup sweep (sweepStreaming) later flips a dangling 'streaming' row to + // 'aborted'. The DB is now the single source of truth for the turn — the + // socket is never required for the write path. A failed upfront insert is + // logged and leaves assistantId undefined; the per-step/terminal updates then + // no-op (guarded below) so the turn still streams to the user. + let assistantId: string | undefined; + try { + const seed = flushAssistant([], '', 'streaming'); + const seeded = await this.aiChatMessageRepo.insert({ + chatId, + workspaceId: workspace.id, + userId: user.id, + role: 'assistant', + content: seed.content, + // jsonb columns: cast through never (same as the user insert above). + toolCalls: (seed.toolCalls ?? null) as never, + metadata: seed.metadata as never, + status: seed.status, + }); + assistantId = seeded?.id; } catch (err) { this.logger.error( - `Failed to finalize assistant message (kind=${plan.kind})`, + `Failed to insert upfront assistant row (chat ${chatId}, workspace ${workspace.id})`, err as Error, ); } - }; - // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure - // first-chunk latency, the model-silent gap right before a disconnect, and - // how many SSE heartbeats were written, so a Safari drop can be classified - // (idle-gap vs hard wall-clock cap vs slow first chunk). - const streamStartedAt = Date.now(); - let firstModelChunkAt: number | undefined; - let lastModelChunkAt = streamStartedAt; - let heartbeatsSent = 0; - - // NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous - // failure here (or in pipe below) would skip the terminal callbacks, so the - // catch releases the leased external clients to avoid a connection leak. - let result: ReturnType; - try { - result = streamText({ - model, - system, - messages, - tools, - // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full - // page body for the write tools) are emitted as OUTPUT tokens, so a fixed - // cap would truncate complex tool calls mid-argument. Let the model use its - // natural per-step budget. (Cost/credit limits are an account concern, not - // something to enforce by silently breaking the agent.) - stopWhen: stepCountIs(MAX_AGENT_STEPS), - // Forced finalization: reserve the LAST allowed step for a text-only - // answer. Without this, a turn that spends all its steps on tool calls - // ends with no assistant text (an empty turn). prepareAgentStep forbids - // further tool calls and appends a synthesis instruction on that step, - // concatenated onto the original `system` so the persona is preserved. - prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), - abortSignal: signal, - onChunk: ({ chunk }) => { - // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model - // output chunk means the stream is actively emitting bytes; track first - // + most-recent activity timestamps. - const now = Date.now(); - firstModelChunkAt ??= now; - lastModelChunkAt = now; - // 'text-delta' is the assistant's prose; tool-call args are separate chunk - // types — so this mirrors exactly what streams to the client. - if (chunk.type === 'text-delta') inProgressText += chunk.text; - }, - onStepFinish: (step) => { - // The finished step's full text is now in `step.text`; fold it in and reset - // the in-progress accumulator for the next step. - capturedSteps.push(step as StepLike); - inProgressText = ''; - // Step-granular durability (#183): persist this finished step (its text + - // tool calls + tool RESULTS) the moment it ends, so a process death after - // this point still recovers the step. Not awaited here (never block the - // stream), but SERIALIZED via stepUpdateChain so the writes commit in - // step order; updateStreaming is error-tolerant (logs + swallows). - stepUpdateChain = stepUpdateChain.then(() => updateStreaming()); - }, - onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => { - // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success - // baseline for Safari comparison. - const diagNow = Date.now(); - this.logger.log( - `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` + - `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + - `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`, + // Link the assistant message (the #183 projection) to its run (#184), so a + // reconnecting client can resolve the run's output. Best-effort. + if (runId && assistantId) { + try { + await runHooks?.onAssistantSeeded?.(runId, assistantId); + } catch (err) { + this.logger.warn( + `Failed to link assistant row to run ${runId}: ${ + err instanceof Error ? err.message : 'unknown error' + }`, ); - // Finalize the assistant row (#183): the upfront 'streaming' row is - // UPDATEd to 'completed' with the turn's final text, cumulative usage and - // full UIMessage parts. We pass the SDK `steps` (which carry the final - // step's text) as the captured steps so metadata.parts matches the - // pre-#183 onFinish record exactly; `inProgressText` is '' here (the last - // step already finished). Final-step usage (usage.input+output) ≈ the - // conversation's CURRENT context size, distinct from totalUsage. - // - // COLUMN-SEMANTICS NOTE (#183): `content` is built by flushAssistant as - // the CONCATENATION of every step's text (stepsText), whereas pre-#183 - // it stored only the FINAL step's text. This is a deliberate, harmless - // change: the UI and the Markdown export render from `metadata.parts` - // (per-step text + tool parts), not from `content`; `content` is the - // plain-text projection (full-text search / fallback). A multi-step - // turn's `content` therefore now holds all steps' prose, not just the - // last block. - await finalizeAssistant( - flushAssistant(steps as StepLike[], '', 'completed', { - finishReason: finishReason as string, - usage: totalUsage as StreamUsage, - contextTokens: - (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || - undefined, - // Max context window for the chat header badge denominator; - // resolved from the admin-configured provider settings (in - // closure scope here). Omitted/0 = no limit. - maxContextTokens: resolved?.chatContextWindow, - }), - ); - // Lifecycle: release the external MCP clients leased for this turn. - await closeExternalClients(); + } + } - // Generate the chat title for a freshly created chat AFTER the stream's - // provider call has completed — NOT concurrently with it. The z.ai coding - // endpoint stalls one of two concurrent requests to the same plan, which - // black-holed the chat stream (~300s headers timeout) when title - // generation raced it. Running it here (solo, fire-and-forget) avoids the - // race; never block the turn on it, swallow any error. - if (isNewChat && incomingText) { - void this.generateTitle(chatId, workspace.id, incomingText).catch( - (err) => { - this.logger.warn( - `Title generation failed: ${(err as Error)?.message ?? err}`, - ); - }, + // Per-step (non-terminal) update: persist the finished steps the moment a + // step ends. Tolerant — a failed update is logged and swallowed so it never + // throws into the stream. Keeps status 'streaming'. + const updateStreaming = async (): Promise => { + if (!assistantId) return; + // Cheap short-circuit once the turn is finalized (see `finalized` below). + // The AUTHORITATIVE guard is `onlyIfStreaming` on the UPDATE: a late + // fire-and-forget step update could still be in flight on another pool + // connection when finalize runs, so the SQL `WHERE status='streaming'` + // (not this flag) is what prevents it clobbering the terminal row. + if (finalized) return; + try { + await this.aiChatMessageRepo.update( + assistantId, + workspace.id, + flushAssistant(capturedSteps, '', 'streaming'), + { onlyIfStreaming: true }, + ); + } catch (err) { + this.logger.warn( + `Failed to update streaming assistant row: ${ + err instanceof Error ? err.message : 'unknown error' + }`, + ); + } + }; + + // Serialize the per-step updates (#183 review): onStepFinish fires them + // without await, so two could otherwise commit out of order on different pool + // connections (step N landing after N+1). Chaining each onto the previous + // keeps the persisted row monotonic with step order; each link short-circuits + // on `finalized`, so a tail of late updates is cheap. + let stepUpdateChain: Promise = Promise.resolve(); + + // Terminal finalize: write the completed/error/aborted row exactly once + // across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort + // callbacks — mirroring the pre-#183 persist-at-most-once guard for the + // TERMINAL status (the row may be updated many times with 'streaming' before + // this fires once). + let finalized = false; + const finalizeAssistant = async ( + flushed: AssistantFlush, + ): Promise => { + if (finalized) return; + finalized = true; + const plan = planFinalizeAssistant(assistantId); + try { + // Shared dispatch (see applyFinalize): UPDATE the upfront row, or — when + // the upfront insert failed (kind 'insert') — INSERT the terminal row as + // the only safety against losing the turn entirely. + await applyFinalize( + this.aiChatMessageRepo, + plan, + { chatId, workspaceId: workspace.id, userId: user.id }, + flushed, + ); + } catch (err) { + this.logger.error( + `Failed to finalize assistant message (kind=${plan.kind})`, + err as Error, + ); + } + }; + + // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure + // first-chunk latency, the model-silent gap right before a disconnect, and + // how many SSE heartbeats were written, so a Safari drop can be classified + // (idle-gap vs hard wall-clock cap vs slow first chunk). + const streamStartedAt = Date.now(); + let firstModelChunkAt: number | undefined; + let lastModelChunkAt = streamStartedAt; + let heartbeatsSent = 0; + + // NOTE: streamText is synchronous in v6 — do NOT await it. A synchronous + // failure here (or in pipe below) would skip the terminal callbacks, so the + // catch releases the leased external clients to avoid a connection leak. + let result: ReturnType; + try { + result = streamText({ + model, + system, + messages, + tools, + // No maxOutputTokens cap on the agent: tool-call arguments (e.g. a full + // page body for the write tools) are emitted as OUTPUT tokens, so a fixed + // cap would truncate complex tool calls mid-argument. Let the model use its + // natural per-step budget. (Cost/credit limits are an account concern, not + // something to enforce by silently breaking the agent.) + stopWhen: stepCountIs(MAX_AGENT_STEPS), + // Forced finalization: reserve the LAST allowed step for a text-only + // answer. Without this, a turn that spends all its steps on tool calls + // ends with no assistant text (an empty turn). prepareAgentStep forbids + // further tool calls and appends a synthesis instruction on that step, + // concatenated onto the original `system` so the persona is preserved. + prepareStep: ({ stepNumber }) => prepareAgentStep(stepNumber, system), + // #184: the RUN's signal (explicit-stop) when a run wraps this turn, else + // the socket-bound signal (legacy). A browser disconnect aborts only in + // the legacy path. + abortSignal: effectiveSignal, + onChunk: ({ chunk }) => { + // DIAGNOSTIC (Safari stream-drop investigation) — temporary. Any model + // output chunk means the stream is actively emitting bytes; track first + // + most-recent activity timestamps. + const now = Date.now(); + firstModelChunkAt ??= now; + lastModelChunkAt = now; + // 'text-delta' is the assistant's prose; tool-call args are separate chunk + // types — so this mirrors exactly what streams to the client. + if (chunk.type === 'text-delta') inProgressText += chunk.text; + }, + onStepFinish: (step) => { + // The finished step's full text is now in `step.text`; fold it in and reset + // the in-progress accumulator for the next step. + capturedSteps.push(step as StepLike); + inProgressText = ''; + // Step-granular durability (#183): persist this finished step (its text + + // tool calls + tool RESULTS) the moment it ends, so a process death after + // this point still recovers the step. Not awaited here (never block the + // stream), but SERIALIZED via stepUpdateChain so the writes commit in + // step order; updateStreaming is error-tolerant (logs + swallows). + stepUpdateChain = stepUpdateChain.then(() => updateStreaming()); + // #184: persist the run's progress (finished-step count). Fire-and- + // forget; the hook swallows its own errors. + if (runId) runHooks?.onStep?.(runId, capturedSteps.length); + }, + onFinish: async ({ + text, + finishReason, + totalUsage, + usage, + steps, + }) => { + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: success + // baseline for Safari comparison. + const diagNow = Date.now(); + this.logger.log( + `AI chat stream DIAGNOSTIC (finish): elapsed=${diagNow - streamStartedAt}ms ` + + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + + `heartbeatsSent=${heartbeatsSent} steps=${steps.length}`, ); - } - }, - onError: async ({ error }) => { - // NestJS Logger.error(message, stack?, context?): pass the real message - // (with statusCode when present) + the stack string, not the Error - // object, so the actual provider cause is clearly logged. Reuse the - // shared formatter so provider error formatting stays unified. - const e = error as { stack?: string }; - const errorText = describeProviderError(error, String(error)); - this.logger.error(`AI chat stream error: ${errorText}`, e?.stack); - // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of - // an error-terminated stream. - const diagNow = Date.now(); - this.logger.warn( - `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` + - `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + - `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`, - ); - // Finalize the PARTIAL answer streamed before the failure (text + any - // finished tool steps) WITH the error in metadata, so the turn shows what - // the user already saw plus the cause — not just a bare error. Status - // 'error' (#183). - await finalizeAssistant( - flushAssistant(capturedSteps, inProgressText, 'error', { - error: errorText, - }), - ); - await closeExternalClients(); - }, - onAbort: async ({ steps }) => { - const partialChars = - capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) + - inProgressText.length; - // Unlike onError/onFinish, this terminal path otherwise writes nothing, so - // an aborted turn (client disconnect / proxy drop / stop()) would be - // invisible in the logs. Log it (warn) so the abort is traceable. - this.logger.warn( - `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` + - `step(s), ${partialChars} chars partial text; persisting partial turn.`, - ); - // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key - // line — classifies the Safari drop. - const diagNow = Date.now(); - this.logger.warn( - `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` + - `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + - `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` + - `steps=${steps.length}`, - ); - await finalizeAssistant( - flushAssistant(capturedSteps, inProgressText, 'aborted'), - ); - await closeExternalClients(); - }, - }); + // Finalize the assistant row (#183): the upfront 'streaming' row is + // UPDATEd to 'completed' with the turn's final text, cumulative usage and + // full UIMessage parts. We pass the SDK `steps` (which carry the final + // step's text) as the captured steps so metadata.parts matches the + // pre-#183 onFinish record exactly; `inProgressText` is '' here (the last + // step already finished). Final-step usage (usage.input+output) ≈ the + // conversation's CURRENT context size, distinct from totalUsage. + // + // COLUMN-SEMANTICS NOTE (#183): `content` is built by flushAssistant as + // the CONCATENATION of every step's text (stepsText), whereas pre-#183 + // it stored only the FINAL step's text. This is a deliberate, harmless + // change: the UI and the Markdown export render from `metadata.parts` + // (per-step text + tool parts), not from `content`; `content` is the + // plain-text projection (full-text search / fallback). A multi-step + // turn's `content` therefore now holds all steps' prose, not just the + // last block. + await finalizeAssistant( + flushAssistant(steps as StepLike[], '', 'completed', { + finishReason: finishReason as string, + usage: totalUsage as StreamUsage, + contextTokens: + (usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) || + undefined, + // Max context window for the chat header badge denominator; + // resolved from the admin-configured provider settings (in + // closure scope here). Omitted/0 = no limit. + maxContextTokens: resolved?.chatContextWindow, + }), + ); + // #184: settle the RUN as succeeded (best-effort, after the projection + // is finalized above). + if (runId) await runHooks?.onSettled?.(runId, 'completed'); + // Lifecycle: release the external MCP clients leased for this turn. + await closeExternalClients(); - // Drain the stream independently of the client socket so the turn always - // runs to completion (or to its abort) and the terminal callbacks - // (onFinish/onError/onAbort) fire — releasing the per-turn object graph - // (history, the per-request toolset closures, captured steps, SDK buffers) - // and closing leased MCP clients. WITHOUT this, a client disconnect leaves - // the pipe's dead socket as the only reader; backpressure stalls the stream, - // the callbacks never run, and every dropped turn stays rooted in memory — - // the heap-OOM leak. consumeStream removes that backpressure (AI SDK v6 - // "Handling client disconnects"). NOT awaited (fire-and-forget); the stream - // errors are already logged by the streamText `onError` callback above, so - // swallow here to avoid an unhandledRejection. - void result.consumeStream({ onError: () => undefined }); + // Generate the chat title for a freshly created chat AFTER the stream's + // provider call has completed — NOT concurrently with it. The z.ai coding + // endpoint stalls one of two concurrent requests to the same plan, which + // black-holed the chat stream (~300s headers timeout) when title + // generation raced it. Running it here (solo, fire-and-forget) avoids the + // race; never block the turn on it, swallow any error. + if (isNewChat && incomingText) { + void this.generateTitle(chatId, workspace.id, incomingText).catch( + (err) => { + this.logger.warn( + `Title generation failed: ${(err as Error)?.message ?? err}`, + ); + }, + ); + } + }, + onError: async ({ error }) => { + // NestJS Logger.error(message, stack?, context?): pass the real message + // (with statusCode when present) + the stack string, not the Error + // object, so the actual provider cause is clearly logged. Reuse the + // shared formatter so provider error formatting stays unified. + const e = error as { stack?: string }; + const errorText = describeProviderError(error, String(error)); + this.logger.error(`AI chat stream error: ${errorText}`, e?.stack); + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: timing of + // an error-terminated stream. + const diagNow = Date.now(); + this.logger.warn( + `AI chat stream DIAGNOSTIC (error): elapsed=${diagNow - streamStartedAt}ms ` + + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + + `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`, + ); + // Finalize the PARTIAL answer streamed before the failure (text + any + // finished tool steps) WITH the error in metadata, so the turn shows what + // the user already saw plus the cause — not just a bare error. Status + // 'error' (#183). + await finalizeAssistant( + flushAssistant(capturedSteps, inProgressText, 'error', { + error: errorText, + }), + ); + // #184: settle the RUN as failed, carrying the provider/transport cause. + if (runId) await runHooks?.onSettled?.(runId, 'error', errorText); + await closeExternalClients(); + }, + onAbort: async ({ steps }) => { + const partialChars = + capturedSteps.reduce((n, s) => n + (s.text?.length ?? 0), 0) + + inProgressText.length; + // Unlike onError/onFinish, this terminal path otherwise writes nothing, so + // an aborted turn (client disconnect / proxy drop / stop()) would be + // invisible in the logs. Log it (warn) so the abort is traceable. + this.logger.warn( + `AI chat stream aborted (chat ${chatId}) after ${steps.length} ` + + `step(s), ${partialChars} chars partial text; persisting partial turn.`, + ); + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: THE key + // line — classifies the Safari drop. + const diagNow = Date.now(); + this.logger.warn( + `AI chat stream DIAGNOSTIC (abort/disconnect): elapsed=${diagNow - streamStartedAt}ms ` + + `firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` + + `silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` + + `steps=${steps.length}`, + ); + await finalizeAssistant( + flushAssistant(capturedSteps, inProgressText, 'aborted'), + ); + // #184: settle the RUN as aborted (an explicit user stop reached the + // run's signal; a disconnect does not abort a run-wrapped turn). + if (runId) await runHooks?.onSettled?.(runId, 'aborted'); + await closeExternalClients(); + }, + }); - // Stream the UI-message protocol straight to the hijacked Node response. - // Without onError the AI SDK masks the cause ('An error occurred.') and the - // UI shows a generic failure. Surface the real provider message instead. - // AI SDK error messages / 4xx bodies never contain the API key, so this is - // safe; we never dump the resolved config/apiKey. - // - // SSE buffering / proxy note: pipeUIMessageStreamToResponse writes the - // headers immediately (res.writeHead) and each chunk incrementally, and the - // SDK's default UI_MESSAGE_STREAM_HEADERS already include - // `x-accel-buffering: no` (disables nginx response buffering) plus - // `content-type: text/event-stream` and `cache-control: no-cache`. We pass - // `headers` explicitly anyway so the intent is visible here and survives any - // future change to the SDK defaults (prepareHeaders only fills a header when - // absent, so this never clobbers the SDK's content-type). DEPLOYMENT: the - // reverse proxy in front of this server MUST NOT buffer this route, or the - // whole response is released at once and nothing streams. nginx honours the - // `x-accel-buffering: no` header we send (and additionally set - // `proxy_buffering off; proxy_cache off;` for /api/ai-chat/stream); traefik - // does not buffer responses by default. - // Scrub the SDK's hop-by-hop Connection header before it writes the head (Safari/HTTP2). - stripStreamingHopByHopHeaders(res.raw); - // Running sum of per-step usage (v6 `finish-step.usage` is per-step). Sent - // as the cumulative authoritative usage so the client never jumps DOWN. - let cumulativeStepUsage: ChatStreamUsage | undefined; - result.pipeUIMessageStreamToResponse(res.raw, { - headers: { 'X-Accel-Buffering': 'no' }, - // Surface the authoritative chatId on the streamed assistant UI message so - // the client adopts the REAL id of the row we created, instead of guessing - // the newest chat in its list. `messageMetadata` is invoked by the AI SDK - // on the `start`, `finish-step` and `finish` stream parts (ai@6 — note the - // `finish-step` trigger relies on it being delivered as its own - // message-metadata chunk); we attach `chatId` on the `start` part so it - // reaches the client (as message.metadata.chatId) at the very first chunk — - // before any second tab can race a newer chat into the list. This fixes the - // two-tab "adoption race" (#137). + // Drain the stream independently of the client socket so the turn always + // runs to completion (or to its abort) and the terminal callbacks + // (onFinish/onError/onAbort) fire — releasing the per-turn object graph + // (history, the per-request toolset closures, captured steps, SDK buffers) + // and closing leased MCP clients. WITHOUT this, a client disconnect leaves + // the pipe's dead socket as the only reader; backpressure stalls the stream, + // the callbacks never run, and every dropped turn stays rooted in memory — + // the heap-OOM leak. consumeStream removes that backpressure (AI SDK v6 + // "Handling client disconnects"). NOT awaited (fire-and-forget); the stream + // errors are already logged by the streamText `onError` callback above, so + // swallow here to avoid an unhandledRejection. + void result.consumeStream({ onError: () => undefined }); + + // Stream the UI-message protocol straight to the hijacked Node response. + // Without onError the AI SDK masks the cause ('An error occurred.') and the + // UI shows a generic failure. Surface the real provider message instead. + // AI SDK error messages / 4xx bodies never contain the API key, so this is + // safe; we never dump the resolved config/apiKey. // - // `finish-step.usage` is PER-STEP (not cumulative) in v6, and the client - // merges each metadata.usage by replacement — so on a multi-step agent turn - // (up to MAX_AGENT_STEPS) the naive per-step value would make the live - // counter jump DOWN at each boundary. We keep a running sum here and send - // the CUMULATIVE usage, which converges to `finish.totalUsage` (#151). - messageMetadata: ({ part }) => { - const p = part as StreamMetadataPart; - if (p.type === 'finish-step') { - cumulativeStepUsage = accumulateStepUsage( - cumulativeStepUsage, - normalizeStreamUsage(p.usage), - ); - } - return chatStreamMetadata(p, chatId, cumulativeStepUsage); - }, - // Stream reasoning (thinking) parts to the client so the live counter can - // estimate reasoning tokens from streamed text. v6 default is already - // true; set explicitly so the intent survives any future SDK default - // change. Providers that don't emit reasoning text still surface the - // count via the authoritative `usage.reasoningTokens` on finish-step. - sendReasoning: true, - onError: (error: unknown) => { - // Reuse the shared formatter so provider error formatting stays - // unified between the log line and the streamed error message. - return describeProviderError(error, 'AI stream error'); - }, - }); + // SSE buffering / proxy note: pipeUIMessageStreamToResponse writes the + // headers immediately (res.writeHead) and each chunk incrementally, and the + // SDK's default UI_MESSAGE_STREAM_HEADERS already include + // `x-accel-buffering: no` (disables nginx response buffering) plus + // `content-type: text/event-stream` and `cache-control: no-cache`. We pass + // `headers` explicitly anyway so the intent is visible here and survives any + // future change to the SDK defaults (prepareHeaders only fills a header when + // absent, so this never clobbers the SDK's content-type). DEPLOYMENT: the + // reverse proxy in front of this server MUST NOT buffer this route, or the + // whole response is released at once and nothing streams. nginx honours the + // `x-accel-buffering: no` header we send (and additionally set + // `proxy_buffering off; proxy_cache off;` for /api/ai-chat/stream); traefik + // does not buffer responses by default. + // Scrub the SDK's hop-by-hop Connection header before it writes the head (Safari/HTTP2). + stripStreamingHopByHopHeaders(res.raw); + // Running sum of per-step usage (v6 `finish-step.usage` is per-step). Sent + // as the cumulative authoritative usage so the client never jumps DOWN. + let cumulativeStepUsage: ChatStreamUsage | undefined; + result.pipeUIMessageStreamToResponse(res.raw, { + headers: { 'X-Accel-Buffering': 'no' }, + // Surface the authoritative chatId on the streamed assistant UI message so + // the client adopts the REAL id of the row we created, instead of guessing + // the newest chat in its list. `messageMetadata` is invoked by the AI SDK + // on the `start`, `finish-step` and `finish` stream parts (ai@6 — note the + // `finish-step` trigger relies on it being delivered as its own + // message-metadata chunk); we attach `chatId` on the `start` part so it + // reaches the client (as message.metadata.chatId) at the very first chunk — + // before any second tab can race a newer chat into the list. This fixes the + // two-tab "adoption race" (#137). + // + // `finish-step.usage` is PER-STEP (not cumulative) in v6, and the client + // merges each metadata.usage by replacement — so on a multi-step agent turn + // (up to MAX_AGENT_STEPS) the naive per-step value would make the live + // counter jump DOWN at each boundary. We keep a running sum here and send + // the CUMULATIVE usage, which converges to `finish.totalUsage` (#151). + messageMetadata: ({ part }) => { + const p = part as StreamMetadataPart; + if (p.type === 'finish-step') { + cumulativeStepUsage = accumulateStepUsage( + cumulativeStepUsage, + normalizeStreamUsage(p.usage), + ); + } + return chatStreamMetadata(p, chatId, cumulativeStepUsage, runId); + }, + // Stream reasoning (thinking) parts to the client so the live counter can + // estimate reasoning tokens from streamed text. v6 default is already + // true; set explicitly so the intent survives any future SDK default + // change. Providers that don't emit reasoning text still surface the + // count via the authoritative `usage.reasoningTokens` on finish-step. + sendReasoning: true, + onError: (error: unknown) => { + // Reuse the shared formatter so provider error formatting stays + // unified between the log line and the streamed error message. + return describeProviderError(error, 'AI stream error'); + }, + }); - // Force the status line + headers onto the socket NOW (before the model's - // first token), so the proxy sees the response start immediately even if the - // provider's first chunk is delayed. writeToServerResponse already called - // writeHead synchronously above; flushHeaders is a belt-and-braces no-op once - // headers are sent, and is guarded for response-likes that lack it. - res.raw.flushHeaders?.(); - // Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout). - // DIAGNOSTIC (Safari stream-drop investigation) — temporary: count beats so a disconnect log can show - // how many pings were written before Safari dropped. - startSseHeartbeat(res.raw, 15_000, () => { - heartbeatsSent += 1; - }); + // Force the status line + headers onto the socket NOW (before the model's + // first token), so the proxy sees the response start immediately even if the + // provider's first chunk is delayed. writeToServerResponse already called + // writeHead synchronously above; flushHeaders is a belt-and-braces no-op once + // headers are sent, and is guarded for response-likes that lack it. + res.raw.flushHeaders?.(); + // Heartbeat: keep the SSE stream progressing during silent tool/think gaps (Safari/proxy idle timeout). + // DIAGNOSTIC (Safari stream-drop investigation) — temporary: count beats so a disconnect log can show + // how many pings were written before Safari dropped. + startSseHeartbeat(res.raw, 15_000, () => { + heartbeatsSent += 1; + }); + } catch (err) { + // Synchronous failure before/while wiring the stream: the terminal + // callbacks will not run, so release the leased external clients here and + // re-throw for the controller to surface on the socket. + await closeExternalClients(); + throw err; + } } catch (err) { - // Synchronous failure before/while wiring the stream: the terminal - // callbacks will not run, so release the leased external clients here and - // re-throw for the controller to surface on the socket. - await closeExternalClients(); + // #184 safety net (see the opening comment): settle the run on ANY failure + // before streamText's callbacks own the lifecycle, so the run row never + // stays 'running' forever (which would 409 every later turn in this chat). + // finalizeRun (onSettled) is idempotent — a settle here and a settle from a + // streamText callback collapse to a single terminal write. + if (runId) { + await runHooks?.onSettled?.( + runId, + 'error', + err instanceof Error + ? err.message + : 'Agent run failed before streaming started', + ); + } throw err; } } @@ -856,7 +993,10 @@ export class AiChatService implements OnModuleInit { * permission). The content is truncated to keep the prompt cheap and within * context limits. Throws AiNotConfiguredException (503) if AI is unconfigured. */ - async generatePageTitle(workspaceId: string, content: string): Promise { + async generatePageTitle( + workspaceId: string, + content: string, + ): Promise { const model = await this.ai.getChatModel(workspaceId); const { text } = await generateText({ model, @@ -979,8 +1119,12 @@ export function chatStreamMetadata( part: StreamMetadataPart, chatId: string, cumulativeStepUsage?: ChatStreamUsage, -): { chatId: string } | { usage: ChatStreamUsage } | undefined { - if (part.type === 'start') return { chatId }; + // #184: the active run's id, attached alongside `chatId` on the `start` part so + // the client learns the run it can reconnect to / stop. Omitted when the turn + // is not run-wrapped (legacy path). + runId?: string, +): { chatId: string; runId?: string } | { usage: ChatStreamUsage } | undefined { + if (part.type === 'start') return runId ? { chatId, runId } : { chatId }; if (part.type === 'finish-step') { return cumulativeStepUsage ? { usage: cumulativeStepUsage } : undefined; } diff --git a/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts b/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts index 5c3e97e1..1fd3c8ff 100644 --- a/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts +++ b/apps/server/src/core/ai-chat/dto/ai-chat.dto.ts @@ -43,6 +43,30 @@ export class BoundChatDto { pageId: string; } +/** + * Reconnect to the latest run of a chat (#184): fetch its persisted lifecycle + * state (and the assistant message it projects) for an in-flight or finished run. + */ +export class GetRunDto { + @IsString() + chatId: string; +} + +/** + * Explicitly STOP an agent run (#184): the user pressed Stop — distinct from a + * browser disconnect, which never stops a run. Either the run id (preferred, from + * the streamed start metadata) or the chat id (stop whatever run is active on it). + */ +export class StopRunDto { + @IsOptional() + @IsString() + runId?: string; + + @IsOptional() + @IsString() + chatId?: string; +} + /** Export a chat to Markdown (#183). `lang` localizes the few fixed * role/tool-action labels; defaults to English server-side. */ export class ExportChatDto { diff --git a/apps/server/src/database/database.module.ts b/apps/server/src/database/database.module.ts index da90ef35..f155dff1 100644 --- a/apps/server/src/database/database.module.ts +++ b/apps/server/src/database/database.module.ts @@ -31,6 +31,7 @@ import { FavoriteRepo } from '@docmost/db/repos/favorite/favorite.repo'; import { TemplateRepo } from '@docmost/db/repos/template/template.repo'; import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo'; import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo'; +import { AiChatRunRepo } from '@docmost/db/repos/ai-chat/ai-chat-run.repo'; import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo'; import { AiMcpServerRepo } from '@docmost/db/repos/ai-chat/ai-mcp-server.repo'; import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo'; @@ -104,6 +105,7 @@ import { normalizePostgresUrl } from '../common/helpers'; TemplateRepo, AiChatRepo, AiChatMessageRepo, + AiChatRunRepo, AiProviderCredentialsRepo, AiMcpServerRepo, AiAgentRoleRepo, @@ -137,6 +139,7 @@ import { normalizePostgresUrl } from '../common/helpers'; TemplateRepo, AiChatRepo, AiChatMessageRepo, + AiChatRunRepo, AiProviderCredentialsRepo, AiMcpServerRepo, AiAgentRoleRepo, diff --git a/apps/server/src/database/migrations/20260627T130000-ai-chat-runs.ts b/apps/server/src/database/migrations/20260627T130000-ai-chat-runs.ts new file mode 100644 index 00000000..78b1c178 --- /dev/null +++ b/apps/server/src/database/migrations/20260627T130000-ai-chat-runs.ts @@ -0,0 +1,104 @@ +import { type Kysely, sql } from 'kysely'; + +/** + * `ai_chat_runs` — the agent RUN as a first-class, server-side lifecycle object + * (#184 phase 1: autonomous agent runs detached from the browser window). + * + * Until now an agent turn lived ONLY as long as the HTTP request was open + * (`res.hijack()` in ai-chat.controller.ts); a browser disconnect aborted it. + * This table makes a turn a persistent object the server owns: it is created + * when a run starts, transitions pending -> running -> succeeded|failed|aborted, + * and survives the subscriber (browser) going away. The DB is the source of + * truth — a later client reconnects/sees the result by reading this row plus the + * assistant message it projects (`assistant_message_id`). + * + * The assistant message row (#183 step-granular durability) is the PROJECTION of + * a run's output; this row is the run's LIFECYCLE. They are linked by + * `assistant_message_id` (SET NULL if the message is later pruned). + * + * `status` : 'pending' | 'running' | 'succeeded' | 'failed' | 'aborted'. + * `trigger` : 'user' | 'autostart' | 'schedule' | 'api' | 'continue' — only + * 'user' is produced in phase 1; the others are reserved for the + * autonomy triggers deferred to phase 2 so they need no later + * migration. + * + * ONE ACTIVE RUN PER CHAT is enforced by a partial unique index on `chat_id` + * WHERE status IN ('pending','running'): an autonomous run and a user run can + * never trample each other on the same chat. Settled runs (succeeded/failed/ + * aborted) are excluded from the index so a chat can accumulate any number of + * historical runs. + */ +export async function up(db: Kysely): Promise { + await db.schema + .createTable('ai_chat_runs') + .ifNotExists() + .addColumn('id', 'uuid', (col) => + col.primaryKey().defaultTo(sql`gen_uuid_v7()`), + ) + .addColumn('chat_id', 'uuid', (col) => + col.references('ai_chats.id').onDelete('cascade').notNull(), + ) + .addColumn('workspace_id', 'uuid', (col) => + col.references('workspaces.id').onDelete('cascade').notNull(), + ) + // The human who triggered the run (audit). SET NULL on user deletion so the + // run history outlives its author; NULL is also the natural value for a + // future system/cron/api trigger with no human actor. + .addColumn('created_by', 'uuid', (col) => + col.references('users.id').onDelete('set null'), + ) + // The assistant message this run materializes (the #183 projection). SET NULL + // if that message row is later deleted; nullable because the run row is + // created a moment BEFORE the assistant row is seeded. + .addColumn('assistant_message_id', 'uuid', (col) => + col.references('ai_chat_messages.id').onDelete('set null'), + ) + .addColumn('trigger', 'varchar(20)', (col) => + col.notNull().defaultTo('user'), + ) + .addColumn('status', 'varchar(20)', (col) => + col.notNull().defaultTo('pending'), + ) + // Terminal error message for a failed run (provider/transport cause), + // mirroring the assistant message's metadata.error. + .addColumn('error', 'text', (col) => col) + // Number of agent steps finished so far (kept monotonic with the projection). + .addColumn('step_count', 'integer', (col) => col.notNull().defaultTo(0)) + // Set when an EXPLICIT user stop is requested (distinct from a mere browser + // disconnect, which never stops a run). The runner aborts the turn and the + // run settles as 'aborted'. + .addColumn('stop_requested_at', 'timestamptz', (col) => col) + .addColumn('started_at', 'timestamptz', (col) => col) + .addColumn('finished_at', 'timestamptz', (col) => col) + .addColumn('created_at', 'timestamptz', (col) => + col.notNull().defaultTo(sql`now()`), + ) + .addColumn('updated_at', 'timestamptz', (col) => + col.notNull().defaultTo(sql`now()`), + ) + .execute(); + + // Reconnect / "latest run for this chat" reads hit chat_id first. + await db.schema + .createIndex('ai_chat_runs_chat_id_idx') + .ifNotExists() + .on('ai_chat_runs') + .column('chat_id') + .execute(); + + // One ACTIVE run per chat (advisory at the DB level): a second pending/running + // run on the same chat is rejected, so a user turn and an autonomous turn can + // never race on the same chat. Partial so settled runs do not collide. + await db.schema + .createIndex('ai_chat_runs_one_active_per_chat') + .ifNotExists() + .on('ai_chat_runs') + .column('chat_id') + .unique() + .where(sql.ref('status'), 'in', sql`('pending','running')`) + .execute(); +} + +export async function down(db: Kysely): Promise { + await db.schema.dropTable('ai_chat_runs').execute(); +} diff --git a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts index c9352e31..78cc7064 100644 --- a/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts +++ b/apps/server/src/database/repos/ai-chat/ai-chat-message.repo.ts @@ -121,6 +121,23 @@ export class AiChatMessageRepo { return rows.reverse(); } + /** Fetch a single message by id + workspace (e.g. a run's projection row for + * the #184 reconnect read). Returns undefined when nothing matches. */ + async findById( + id: string, + workspaceId: string, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .selectFrom('aiChatMessages') + .select(this.baseFields) + .where('id', '=', id) + .where('workspaceId', '=', workspaceId) + .where('deletedAt', 'is', null) + .executeTakeFirst(); + } + async insert( insertable: InsertableAiChatMessage, trx?: KyselyTransaction, diff --git a/apps/server/src/database/repos/ai-chat/ai-chat-run.repo.spec.ts b/apps/server/src/database/repos/ai-chat/ai-chat-run.repo.spec.ts new file mode 100644 index 00000000..1b5407de --- /dev/null +++ b/apps/server/src/database/repos/ai-chat/ai-chat-run.repo.spec.ts @@ -0,0 +1,84 @@ +import { AiChatRunRepo, SWEEP_RUN_STALE_MS } from './ai-chat-run.repo'; +import type { KyselyDB } from '../../types/kysely.types'; + +/** + * Unit coverage for AiChatRunRepo.sweepRunning over a chainable builder mock (no + * live DB). The F1 invariant under test (DECISION C): the BOOT sweep is + * UNCONDITIONAL — it adds NO `updatedAt <` predicate, so a fresh 'running' run + * (updatedAt = now) IS settled rather than skipped by a staleness window. The + * window is added ONLY when an explicit `staleMs` is supplied (the future phase-2 + * multi-instance timer sweep). We assert the EXACT predicates the spec mandates. + */ +describe('AiChatRunRepo.sweepRunning', () => { + type Recorded = { + table?: string; + set?: Record; + wheres: Array<[string, string, unknown]>; + returning?: string; + }; + + function makeDb(swept: Array<{ id: string }>): { + db: KyselyDB; + rec: Recorded; + } { + const rec: Recorded = { wheres: [] }; + const builder: Record = {}; + const chain = () => builder; + builder.set = (v: Record) => { + rec.set = v; + return builder; + }; + builder.where = (col: string, op: string, val: unknown) => { + rec.wheres.push([col, op, val]); + return builder; + }; + builder.returning = (col: string) => { + rec.returning = col; + return builder; + }; + builder.execute = () => Promise.resolve(swept); + void chain; + const db = { + updateTable: (table: string) => { + rec.table = table; + return builder; + }, + } as unknown as KyselyDB; + return { db, rec }; + } + + it('F1: the boot sweep (no staleMs) is UNCONDITIONAL — only a status filter, NO updatedAt window', async () => { + const { db, rec } = makeDb([{ id: 'r1' }, { id: 'r2' }]); + const repo = new AiChatRunRepo(db); + + const swept = await repo.sweepRunning(); + + expect(swept).toBe(2); + expect(rec.table).toBe('aiChatRuns'); + // The status filter is always present... + expect(rec.wheres).toContainEqual([ + 'status', + 'in', + expect.arrayContaining(['pending', 'running']), + ]); + // ...but a fresh 'running' run (updatedAt = now) must NOT be skipped: no + // updatedAt predicate at all on the boot path. + expect(rec.wheres.some(([col]) => col === 'updatedAt')).toBe(false); + // It flips to 'aborted' and stamps finishedAt. + expect(rec.set).toEqual( + expect.objectContaining({ status: 'aborted', finishedAt: expect.any(Date) }), + ); + }); + + it('phase-2 path: an explicit staleMs reintroduces the updatedAt window', async () => { + const { db, rec } = makeDb([]); + const repo = new AiChatRunRepo(db); + + await repo.sweepRunning({ staleMs: SWEEP_RUN_STALE_MS }); + + const updatedAtWhere = rec.wheres.find(([col]) => col === 'updatedAt'); + expect(updatedAtWhere).toBeDefined(); + expect(updatedAtWhere![1]).toBe('<'); + expect(updatedAtWhere![2]).toBeInstanceOf(Date); + }); +}); diff --git a/apps/server/src/database/repos/ai-chat/ai-chat-run.repo.ts b/apps/server/src/database/repos/ai-chat/ai-chat-run.repo.ts new file mode 100644 index 00000000..7bb6bcdb --- /dev/null +++ b/apps/server/src/database/repos/ai-chat/ai-chat-run.repo.ts @@ -0,0 +1,212 @@ +import { Injectable, Logger } from '@nestjs/common'; +import { InjectKysely } from 'nestjs-kysely'; +import { sql } from 'kysely'; +import { KyselyDB, KyselyTransaction } from '../../types/kysely.types'; +import { dbOrTx } from '../../utils'; +import { + AiChatRun, + InsertableAiChatRun, +} from '@docmost/db/types/entity.types'; + +// Statuses that count as "the run is still live" (an autonomous and a user run +// must never both be live on one chat — enforced by the partial unique index and +// checked here for friendly 409s before the insert races the constraint). +export const ACTIVE_RUN_STATUSES = ['pending', 'running'] as const; + +// Crash-recovery sweep recency threshold (mirrors AiChatMessageRepo.sweepStreaming, +// #183): when a staleness window is supplied, a 'running'/'pending' run is only +// swept to 'aborted' once it has been UNTOUCHED for this long, so a sibling +// replica's boot-sweep can never abort a run another replica is actively +// executing. The runner bumps `updatedAt` on every step, so a live run never +// matches. PHASE 1 is single-process and the boot sweep passes NO window (every +// dangling run is settled unconditionally — see sweepRunning / F1). This constant +// is the window to reintroduce for the phase-2 multi-instance timer sweep. +export const SWEEP_RUN_STALE_MS = 10 * 60 * 1000; // 10 minutes + +/** + * Repository for `ai_chat_runs` (#184 phase 1): the agent run as a first-class, + * server-side lifecycle object detached from the HTTP request. The run row is the + * point a client subscribes/reconnects to (by `id` or by chat); the assistant + * message it links to (`assistantMessageId`) is the #183 projection of its output. + */ +@Injectable() +export class AiChatRunRepo { + private readonly logger = new Logger(AiChatRunRepo.name); + + private baseFields: Array = [ + 'id', + 'chatId', + 'workspaceId', + 'createdBy', + 'assistantMessageId', + 'trigger', + 'status', + 'error', + 'stepCount', + 'stopRequestedAt', + 'startedAt', + 'finishedAt', + 'createdAt', + 'updatedAt', + ]; + + constructor(@InjectKysely() private readonly db: KyselyDB) {} + + async insert( + insertable: InsertableAiChatRun, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .insertInto('aiChatRuns') + .values(insertable) + .returning(this.baseFields) + .executeTakeFirst(); + } + + async findById( + id: string, + workspaceId: string, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .selectFrom('aiChatRuns') + .select(this.baseFields) + .where('id', '=', id) + .where('workspaceId', '=', workspaceId) + .executeTakeFirst(); + } + + /** The currently-active (pending|running) run for a chat, if any. At most one + * exists thanks to the partial unique index. */ + async findActiveByChat( + chatId: string, + workspaceId: string, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .selectFrom('aiChatRuns') + .select(this.baseFields) + .where('chatId', '=', chatId) + .where('workspaceId', '=', workspaceId) + .where('status', 'in', ACTIVE_RUN_STATUSES as unknown as string[]) + .executeTakeFirst(); + } + + /** The most-recent run for a chat (active or settled) — the reconnect target. */ + async findLatestByChat( + chatId: string, + workspaceId: string, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .selectFrom('aiChatRuns') + .select(this.baseFields) + .where('chatId', '=', chatId) + .where('workspaceId', '=', workspaceId) + .orderBy('createdAt', 'desc') + .orderBy('id', 'desc') + .limit(1) + .executeTakeFirst(); + } + + /** + * Patch a run by id + workspace; always bumps `updatedAt`. Used for every + * lifecycle transition (mark running, link the assistant message, bump + * step_count, finalize succeeded/failed/aborted). Returns the updated row or + * undefined when nothing matched (e.g. a foreign workspace). + */ + async update( + id: string, + workspaceId: string, + patch: Partial<{ + status: string; + error: string | null; + stepCount: number; + assistantMessageId: string | null; + stopRequestedAt: Date | null; + startedAt: Date | null; + finishedAt: Date | null; + }>, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .updateTable('aiChatRuns') + .set({ ...(patch as Record), updatedAt: new Date() }) + .where('id', '=', id) + .where('workspaceId', '=', workspaceId) + .returning(this.baseFields) + .executeTakeFirst(); + } + + /** + * Mark an EXPLICIT stop request on an active run (distinct from a browser + * disconnect, which never stops a run). Stamps `stop_requested_at` ONLY while + * the run is still active, so a late stop on an already-settled run is a no-op. + * Returns the row when a stop was recorded, else undefined (nothing active). + */ + async markStopRequested( + id: string, + workspaceId: string, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + return db + .updateTable('aiChatRuns') + .set({ stopRequestedAt: new Date(), updatedAt: new Date() }) + .where('id', '=', id) + .where('workspaceId', '=', workspaceId) + .where('status', 'in', ACTIVE_RUN_STATUSES as unknown as string[]) + .returning(this.baseFields) + .executeTakeFirst(); + } + + /** + * Crash-recovery sweep (mirrors AiChatMessageRepo.sweepStreaming): flip every + * run still left pending/running — a run whose process died before reaching a + * terminal status — to 'aborted', stamping `finished_at`. Returns the number + * swept. Workspace-wide on purpose (a crash can dangle runs in any workspace). + * + * F1 (DECISION C): the BOOT sweep is UNCONDITIONAL — it passes no `staleMs`, so + * EVERY dangling run is settled regardless of how recently it was touched. On a + * fresh single-process boot any pending|running run is definitionally hung (no + * runner is alive to own it), so a fast restart (deploy/OOM within minutes of + * the last step) no longer leaves a run stuck 'running' forever — which would + * make the one-active-run gate 409 every future turn in that chat. + * + * The optional `staleMs` window is reintroduced ONLY for the future phase-2 + * multi-instance timer sweep (see {@link SWEEP_RUN_STALE_MS}): there a booting + * replica must NOT abort a run another replica is actively executing, so it + * sweeps only runs UNTOUCHED past the window. Phase 1 is single-process, so the + * boot path supplies no window. + */ + async sweepRunning( + opts: { staleMs?: number } = {}, + trx?: KyselyTransaction, + ): Promise { + const db = dbOrTx(this.db, trx); + const now = new Date(); + let query = db + .updateTable('aiChatRuns') + .set({ + status: 'aborted', + finishedAt: now, + updatedAt: now, + error: sql`coalesce(error, ${'Run interrupted by a server restart.'})`, + }) + .where('status', 'in', ACTIVE_RUN_STATUSES as unknown as string[]); + // Multi-instance (phase 2) only: skip runs touched within the window so a + // sibling replica's live run is never aborted. Omitted on the phase-1 boot + // sweep -> unconditional. + if (typeof opts.staleMs === 'number') { + const staleBefore = new Date(now.getTime() - opts.staleMs); + query = query.where('updatedAt', '<', staleBefore); + } + const rows = await query.returning('id').execute(); + return rows.length; + } +} diff --git a/apps/server/src/database/types/db.d.ts b/apps/server/src/database/types/db.d.ts index 462a9349..ad03eacb 100644 --- a/apps/server/src/database/types/db.d.ts +++ b/apps/server/src/database/types/db.d.ts @@ -644,6 +644,35 @@ export interface AiChatMessages { deletedAt: Timestamp | null; } +// The agent RUN as a first-class server-side lifecycle object (#184 phase 1). +// Mirrors migration 20260627T130000-ai-chat-runs.ts. A run is created when an +// agent turn starts and survives the browser disconnecting; the DB is the source +// of truth a later client reconnects to. `assistantMessageId` links to the #183 +// projection row (the assistant message this run materializes). +export interface AiChatRuns { + id: Generated; + chatId: string; + workspaceId: string; + // SET NULL on user deletion (the run history outlives its author); also NULL + // for a future non-human trigger (cron/api). + createdBy: string | null; + // The assistant message this run materializes; SET NULL if it is pruned. + assistantMessageId: string | null; + // 'user' | 'autostart' | 'schedule' | 'api' | 'continue' (only 'user' is + // produced in phase 1; the rest are reserved for the deferred autonomy triggers). + trigger: Generated; + // 'pending' | 'running' | 'succeeded' | 'failed' | 'aborted'. + status: Generated; + error: string | null; + stepCount: Generated; + // Set when an EXPLICIT user stop is requested (distinct from a disconnect). + stopRequestedAt: Timestamp | null; + startedAt: Timestamp | null; + finishedAt: Timestamp | null; + createdAt: Generated; + updatedAt: Generated; +} + export interface UserSessions { id: Generated; userId: string; @@ -663,6 +692,7 @@ export interface DB { aiAgentRoles: AiAgentRoles; aiChats: AiChats; aiChatMessages: AiChatMessages; + aiChatRuns: AiChatRuns; apiKeys: ApiKeys; attachments: Attachments; audit: Audit; diff --git a/apps/server/src/database/types/entity.types.ts b/apps/server/src/database/types/entity.types.ts index 36f9be46..54c0753d 100644 --- a/apps/server/src/database/types/entity.types.ts +++ b/apps/server/src/database/types/entity.types.ts @@ -3,6 +3,7 @@ import { AiAgentRoles, AiChats, AiChatMessages, + AiChatRuns, Attachments, Comments, Groups, @@ -55,10 +56,12 @@ export type UpdatableAiChat = Updateable>; // full-text search. It is omitted from the public type so it never leaks // into HTTP responses or the chat history fed to the language model. export type AiChatMessage = Omit, 'tsv'>; -export type InsertableAiChatMessage = Omit< - Insertable, - 'tsv' ->; +export type InsertableAiChatMessage = Omit, 'tsv'>; + +// AI Chat Run (#184 phase 1): the agent run as a first-class lifecycle object, +// detached from the HTTP request / browser window. +export type AiChatRun = Selectable; +export type InsertableAiChatRun = Insertable; // AI Provider Credentials // SECURITY (D9/§8.1): holds encrypted per-workspace provider API keys. @@ -204,11 +207,14 @@ export type UpdatableFavorite = Updateable>; // Page Transclusion export type PageTransclusion = Selectable; export type InsertablePageTransclusion = Insertable; -export type UpdatablePageTransclusion = Updateable>; +export type UpdatablePageTransclusion = Updateable< + Omit +>; // Page Transclusion Reference export type PageTransclusionReference = Selectable; -export type InsertablePageTransclusionReference = Insertable; +export type InsertablePageTransclusionReference = + Insertable; export type UpdatablePageTransclusionReference = Updateable< Omit >; @@ -278,7 +284,9 @@ export type UpdatablePagePermission = Updateable>; // Page Verification export type PageVerification = Selectable<_PageVerifications>; export type InsertablePageVerification = Insertable<_PageVerifications>; -export type UpdatablePageVerification = Updateable>; +export type UpdatablePageVerification = Updateable< + Omit<_PageVerifications, 'id'> +>; // Page Verifier export type PageVerifier = Selectable<_PageVerifiers>; diff --git a/apps/server/test/integration/ai-chat-run.int-spec.ts b/apps/server/test/integration/ai-chat-run.int-spec.ts new file mode 100644 index 00000000..f6a753a1 --- /dev/null +++ b/apps/server/test/integration/ai-chat-run.int-spec.ts @@ -0,0 +1,304 @@ +import { Kysely } from 'kysely'; +import { + AiChatRunRepo, + SWEEP_RUN_STALE_MS, +} from '@docmost/db/repos/ai-chat/ai-chat-run.repo'; +import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo'; +import { AiChatRunService } from '../../src/core/ai-chat/ai-chat-run.service'; +import { + getTestDb, + destroyTestDb, + createWorkspace, + createUser, + createChat, +} from './db'; + +/** + * Integration coverage for the #184 phase-1 durable agent run: real SQL against + * docmost_test. Proves the core invariant primitives — a run is a first-class + * lifecycle row, at most one is active per chat, a detached run's progress + * survives with NO subscriber, an explicit stop settles it as aborted, a + * reconnect read returns the persisted state, and a crash sweep recovers + * dangling runs. + */ +describe('AiChatRun durable lifecycle [integration]', () => { + let db: Kysely; + let runRepo: AiChatRunRepo; + let messageRepo: AiChatMessageRepo; + let service: AiChatRunService; + let workspaceId: string; + let otherWorkspaceId: string; + let userId: string; + let chatId: string; + + beforeAll(async () => { + db = getTestDb(); + runRepo = new AiChatRunRepo(db as any); + messageRepo = new AiChatMessageRepo(db as any); + // Boot-sweep isn't triggered here; the isCloud stub is all the service needs + // for these direct-call integration cases (F7). + service = new AiChatRunService(runRepo, { isCloud: () => false } as never); + workspaceId = (await createWorkspace(db)).id; + otherWorkspaceId = (await createWorkspace(db)).id; + userId = (await createUser(db, workspaceId)).id; + chatId = (await createChat(db, { workspaceId, creatorId: userId })).id; + }); + + afterAll(async () => { + await destroyTestDb(); + }); + + // Each test that creates an active run settles it (or uses its own chat) so the + // partial unique index does not bleed across tests. + + it('insert + findById round-trips a run row, defaulting status/trigger', async () => { + const run = await runRepo.insert({ + chatId, + workspaceId, + createdBy: userId, + }); + expect(run.status).toBe('pending'); + expect(run.trigger).toBe('user'); + expect(run.stepCount).toBe(0); + + const found = await runRepo.findById(run.id, workspaceId); + expect(found!.id).toBe(run.id); + // Workspace-scoped: a foreign workspace sees nothing. + expect(await runRepo.findById(run.id, otherWorkspaceId)).toBeUndefined(); + + // settle so it does not occupy the active slot + await runRepo.update(run.id, workspaceId, { + status: 'succeeded', + finishedAt: new Date(), + }); + }); + + it('enforces ONE ACTIVE run per chat (partial unique index rejects a second)', async () => { + const activeChat = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const first = await runRepo.insert({ + chatId: activeChat, + workspaceId, + createdBy: userId, + status: 'running', + }); + // A second pending/running run on the SAME chat must be rejected by the DB. + await expect( + runRepo.insert({ + chatId: activeChat, + workspaceId, + createdBy: userId, + status: 'running', + }), + ).rejects.toThrow(); + + // findActiveByChat returns exactly the one active run. + const active = await runRepo.findActiveByChat(activeChat, workspaceId); + expect(active!.id).toBe(first.id); + + // Once it settles, the slot frees and a new run may start. + await runRepo.update(first.id, workspaceId, { + status: 'succeeded', + finishedAt: new Date(), + }); + expect( + await runRepo.findActiveByChat(activeChat, workspaceId), + ).toBeUndefined(); + const second = await runRepo.insert({ + chatId: activeChat, + workspaceId, + createdBy: userId, + status: 'running', + }); + expect(second.id).not.toBe(first.id); + await runRepo.update(second.id, workspaceId, { + status: 'aborted', + finishedAt: new Date(), + }); + }); + + it('DETACHED run: persists + finalizes succeeded with NO subscriber, reconnect returns state', async () => { + // A dedicated chat so the active-run slot is clean. + const runChat = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + + // beginRun = the runner starts the turn (registers an in-memory controller). + const handle = await service.beginRun({ + chatId: runChat, + workspaceId, + userId, + }); + expect(handle.signal.aborted).toBe(false); + expect(service.isLocallyActive(handle.runId)).toBe(true); + + // The assistant projection row (#183) is seeded + linked. + const seeded = await messageRepo.insert({ + chatId: runChat, + workspaceId, + userId, + role: 'assistant', + content: '', + status: 'streaming', + metadata: { parts: [] } as never, + }); + await service.linkAssistantMessage(handle.runId, workspaceId, seeded.id); + + // Progress is persisted as steps finish — NO HTTP socket involved here at all. + await service.recordStep(handle.runId, workspaceId, 1); + await messageRepo.update(seeded.id, workspaceId, { + content: 'partial work', + metadata: { parts: [{ type: 'text', text: 'partial work' }] }, + }); + + // The turn completes; finalize the projection then the run. + await messageRepo.update(seeded.id, workspaceId, { + content: 'final answer', + status: 'completed', + }); + await service.finalizeRun(handle.runId, workspaceId, 'completed'); + + expect(service.isLocallyActive(handle.runId)).toBe(false); + + // Reconnect: the latest run for the chat + its projected message, from the DB. + const run = await service.getLatestForChat(runChat, workspaceId); + expect(run!.status).toBe('succeeded'); + expect(run!.stepCount).toBe(1); + expect(run!.assistantMessageId).toBe(seeded.id); + expect(run!.finishedAt).toBeTruthy(); + const message = await messageRepo.findById(seeded.id, workspaceId); + expect(message!.status).toBe('completed'); + expect(message!.content).toBe('final answer'); + }); + + it('EXPLICIT stop aborts the run signal, marks the row, and settles as aborted', async () => { + const runChat = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const handle = await service.beginRun({ + chatId: runChat, + workspaceId, + userId, + }); + + // User presses Stop. + const stopped = await service.requestStop(handle.runId, workspaceId); + expect(stopped).toBe(true); + expect(handle.signal.aborted).toBe(true); + + // The row carries the stop request (distinct from a disconnect, which would + // leave stop_requested_at NULL). + const afterStop = await runRepo.findById(handle.runId, workspaceId); + expect(afterStop!.stopRequestedAt).toBeTruthy(); + + // The terminal callback (onAbort) settles the run. + await service.finalizeRun(handle.runId, workspaceId, 'aborted'); + const run = await service.getLatestForChat(runChat, workspaceId); + expect(run!.status).toBe('aborted'); + }); + + it('markStopRequested is a no-op on an already-settled run (returns undefined)', async () => { + const runChat = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const run = await runRepo.insert({ + chatId: runChat, + workspaceId, + createdBy: userId, + status: 'running', + }); + await runRepo.update(run.id, workspaceId, { + status: 'succeeded', + finishedAt: new Date(), + }); + const marked = await runRepo.markStopRequested(run.id, workspaceId); + expect(marked).toBeUndefined(); + }); + + it('sweepRunning aborts STALE dangling runs but not fresh or settled ones', async () => { + const sweepChat1 = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const sweepChat2 = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const sweepChat3 = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + + const stale = await runRepo.insert({ + chatId: sweepChat1, + workspaceId, + createdBy: userId, + status: 'running', + }); + const fresh = await runRepo.insert({ + chatId: sweepChat2, + workspaceId, + createdBy: userId, + status: 'running', + }); + const settled = await runRepo.insert({ + chatId: sweepChat3, + workspaceId, + createdBy: userId, + status: 'running', + }); + await runRepo.update(settled.id, workspaceId, { + status: 'succeeded', + finishedAt: new Date(), + }); + // Backdate the stale run's updatedAt past the 10-minute staleness window. + await db + .updateTable('aiChatRuns') + .set({ updatedAt: new Date(Date.now() - 20 * 60 * 1000) }) + .where('id', '=', stale.id) + .execute(); + + // WINDOWED sweep (phase-2 multi-instance timer path): only runs older than the + // staleness window are aborted, so a sibling replica's fresh run survives. The + // no-arg boot sweep (variant C) is unconditional — covered separately below. + const swept = await runRepo.sweepRunning({ staleMs: SWEEP_RUN_STALE_MS }); + expect(swept).toBeGreaterThanOrEqual(1); + + expect((await runRepo.findById(stale.id, workspaceId))!.status).toBe( + 'aborted', + ); + // Fresh (recently-updated) running run survives the WINDOWED sweep — a sibling + // replica may still be executing it. + expect((await runRepo.findById(fresh.id, workspaceId))!.status).toBe( + 'running', + ); + expect((await runRepo.findById(settled.id, workspaceId))!.status).toBe( + 'succeeded', + ); + + // cleanup active fresh run + await runRepo.update(fresh.id, workspaceId, { + status: 'aborted', + finishedAt: new Date(), + }); + }); + + it('sweepRunning() with NO args (boot sweep / variant C) aborts even a FRESH running run', async () => { + // F1/DECISION C at the SQL level: the unconditional boot sweep has NO + // staleness window, so a run updated just now (a fast restart) is settled too + // — otherwise it would stay 'running' forever and 409 every future turn. + const bootChat = ( + await createChat(db, { workspaceId, creatorId: userId }) + ).id; + const fresh = await runRepo.insert({ + chatId: bootChat, + workspaceId, + createdBy: userId, + status: 'running', + }); + // updatedAt = now (fresh, untouched). The no-arg sweep settles it anyway. + const swept = await runRepo.sweepRunning(); + expect(swept).toBeGreaterThanOrEqual(1); + expect((await runRepo.findById(fresh.id, workspaceId))!.status).toBe( + 'aborted', + ); + }); +});