fix(ai-chat): live streaming, open-page context, any-dimension embeddings" -m "- streaming: give useChat a STABLE store id (chatId ?? per-mount generated)

so the v6 hook stops re-creating its store every render on a new chat (which wiped the optimistic user message + streamed deltas, so nothing showed until the turn finished). Also send X-Accel-Buffering:no + flushHeaders. - context: client sends the currently-open page {id,title}; the system prompt tells the agent which page 'this page' refers to (it reads it via its CASL-scoped getPage tool; id is prompt-context only, no server-side fetch). - embeddings: make page_embeddings.embedding dimension-agnostic (drop the HNSW index + ALTER to vector), remove the hard 1536 guard, filter search by model_dimensions — so 3072-dim (and any) models index instead of being skipped. Seq-scan <=> search (wiki scale); existing pages reindex on next edit.
2026-06-17 04:58:06 +03:00
parent a4b7919753
commit 65f0713a70
7 changed files with 238 additions and 37 deletions
--- a/apps/server/src/core/ai-chat/ai-chat.service.ts
+++ b/apps/server/src/core/ai-chat/ai-chat.service.ts
@@ -24,6 +24,12 @@ import { buildSystemPrompt } from './ai-chat.prompt';
 */
 export interface AiChatStreamBody {
  chatId?: string;
+  // The page the user is currently viewing (client-supplied), or null on a
+  // non-page route. Used ONLY as prompt context so the agent knows what "this
+  // page" refers to; the page itself is never fetched server-side here. The id
+  // is attacker-controllable but harmless: the agent reads/writes via its
+  // CASL-enforced page tools, which 403 on a page the user cannot access.
+  openPage?: { id?: string; title?: string } | null;
  // useChat sends the full UIMessage list; the last one is the new user turn.
  messages?: UIMessage[];
 }
@@ -140,6 +146,7 @@ export class AiChatService {
    const system = buildSystemPrompt({
      workspace,
      adminPrompt: resolved?.systemPrompt,
+      openedPage: body.openPage,
    });

    // Pass the resolved chatId so the write tools can mint provenance tokens
@@ -310,7 +317,22 @@ export class AiChatService {
      // UI shows a generic failure. Surface the real provider message instead.
      // AI SDK error messages / 4xx bodies never contain the API key, so this is
      // safe; we never dump the resolved config/apiKey.
+      //
+      // SSE buffering / proxy note: pipeUIMessageStreamToResponse writes the
+      // headers immediately (res.writeHead) and each chunk incrementally, and the
+      // SDK's default UI_MESSAGE_STREAM_HEADERS already include
+      // `x-accel-buffering: no` (disables nginx response buffering) plus
+      // `content-type: text/event-stream` and `cache-control: no-cache`. We pass
+      // `headers` explicitly anyway so the intent is visible here and survives any
+      // future change to the SDK defaults (prepareHeaders only fills a header when
+      // absent, so this never clobbers the SDK's content-type). DEPLOYMENT: the
+      // reverse proxy in front of this server MUST NOT buffer this route, or the
+      // whole response is released at once and nothing streams. nginx honours the
+      // `x-accel-buffering: no` header we send (and additionally set
+      // `proxy_buffering off; proxy_cache off;` for /api/ai-chat/stream); traefik
+      // does not buffer responses by default.
      result.pipeUIMessageStreamToResponse(res.raw, {
+        headers: { 'X-Accel-Buffering': 'no' },
        onError: (error: unknown) => {
          const e = error as { statusCode?: number; message?: string };
          return e?.statusCode
@@ -318,6 +340,13 @@ export class AiChatService {
            : (e?.message ?? 'AI stream error');
        },
      });
+
+      // Force the status line + headers onto the socket NOW (before the model's
+      // first token), so the proxy sees the response start immediately even if the
+      // provider's first chunk is delayed. writeToServerResponse already called
+      // writeHead synchronously above; flushHeaders is a belt-and-braces no-op once
+      // headers are sent, and is guarded for response-likes that lack it.
+      res.raw.flushHeaders?.();
    } catch (err) {
      // Synchronous failure before/while wiring the stream: the terminal
      // callbacks will not run, so release the leased external clients here and