From 65f0713a703e4d95d1e1abc7c876d708405a82e8 Mon Sep 17 00:00:00 2001 From: vvzvlad Date: Wed, 17 Jun 2026 04:58:06 +0300 Subject: [PATCH] =?UTF-8?q?fix(ai-chat):=20live=20streaming,=20open-page?= =?UTF-8?q?=20context,=20any-dimension=20embeddings"=20-m=20"-=20streaming?= =?UTF-8?q?:=20give=20useChat=20a=20STABLE=20store=20id=20(chatId=20=3F=3F?= =?UTF-8?q?=20per-mount=20generated)=20=20=20so=20the=20v6=20hook=20stops?= =?UTF-8?q?=20re-creating=20its=20store=20every=20render=20on=20a=20new=20?= =?UTF-8?q?chat=20=20=20(which=20wiped=20the=20optimistic=20user=20message?= =?UTF-8?q?=20+=20streamed=20deltas,=20so=20nothing=20=20=20showed=20until?= =?UTF-8?q?=20the=20turn=20finished).=20Also=20send=20X-Accel-Buffering:no?= =?UTF-8?q?=20+=20flushHeaders.=20-=20context:=20client=20sends=20the=20cu?= =?UTF-8?q?rrently-open=20page=20{id,title};=20the=20system=20prompt=20=20?= =?UTF-8?q?=20tells=20the=20agent=20which=20page=20'this=20page'=20refers?= =?UTF-8?q?=20to=20(it=20reads=20it=20via=20its=20=20=20CASL-scoped=20getP?= =?UTF-8?q?age=20tool;=20id=20is=20prompt-context=20only,=20no=20server-si?= =?UTF-8?q?de=20fetch).=20-=20embeddings:=20make=20page=5Fembeddings.embed?= =?UTF-8?q?ding=20dimension-agnostic=20(drop=20the=20=20=20HNSW=20index=20?= =?UTF-8?q?+=20ALTER=20to=20vector),=20remove=20the=20hard=201536=20guard,?= =?UTF-8?q?=20filter=20search=20by=20=20=20model=5Fdimensions=20=E2=80=94?= =?UTF-8?q?=20so=203072-dim=20(and=20any)=20models=20index=20instead=20of?= =?UTF-8?q?=20being=20=20=20skipped.=20Seq-scan=20<=3D>=20search=20(wiki?= =?UTF-8?q?=20scale);=20existing=20pages=20reindex=20on=20next=20edit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../ai-chat/components/ai-chat-panel.tsx | 17 +++++ .../ai-chat/components/chat-thread.tsx | 58 ++++++++++++++-- .../server/src/core/ai-chat/ai-chat.prompt.ts | 25 ++++++- .../src/core/ai-chat/ai-chat.service.ts | 29 ++++++++ .../embedding/embedding-indexer.service.ts | 51 +++++++------- ...0000-page-embeddings-dimension-agnostic.ts | 67 +++++++++++++++++++ .../repos/ai-chat/page-embedding.repo.ts | 28 ++++++-- 7 files changed, 238 insertions(+), 37 deletions(-) create mode 100644 apps/server/src/database/migrations/20260617T140000-page-embeddings-dimension-agnostic.ts diff --git a/apps/client/src/features/ai-chat/components/ai-chat-panel.tsx b/apps/client/src/features/ai-chat/components/ai-chat-panel.tsx index 78314d31..b1a10e06 100644 --- a/apps/client/src/features/ai-chat/components/ai-chat-panel.tsx +++ b/apps/client/src/features/ai-chat/components/ai-chat-panel.tsx @@ -13,10 +13,13 @@ import { import { IconChevronDown, IconPlus, IconX } from "@tabler/icons-react"; import { useDisclosure } from "@mantine/hooks"; import { useAtom } from "jotai"; +import { useParams } from "react-router-dom"; import { useTranslation } from "react-i18next"; import { useQueryClient } from "@tanstack/react-query"; import { asideStateAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom.ts"; import { activeAiChatIdAtom } from "@/features/ai-chat/atoms/ai-chat-atom.ts"; +import { usePageQuery } from "@/features/page/queries/page-query.ts"; +import { extractPageSlugId } from "@/lib"; import { AI_CHATS_RQ_KEY, useAiChatMessagesQuery, @@ -46,6 +49,19 @@ export default function AiChatPanel() { const { data: messageRows, isLoading: messagesLoading } = useAiChatMessagesQuery(activeChatId ?? undefined); + // The page the user is currently viewing, derived from the route (same source + // the breadcrumb uses). On a non-page route `pageSlug` is undefined, so the + // query is disabled and `openPage` is null. This is passed to the chat thread + // as context so the agent knows what "this page"/"the current page" refers to; + // the agent still reads/writes via its CASL-enforced page tools using the id. + const { pageSlug } = useParams(); + const { data: openPageData } = usePageQuery({ + pageId: extractPageSlugId(pageSlug), + }); + const openPage = openPageData + ? { id: openPageData.id, title: openPageData.title } + : null; + const closeAside = (): void => setAsideState((s) => ({ ...s, isAsideOpen: false })); @@ -148,6 +164,7 @@ export default function AiChatPanel() { key={threadKey} chatId={activeChatId} initialRows={activeChatId ? messageRows : []} + openPage={openPage} onTurnFinished={onTurnFinished} /> )} diff --git a/apps/client/src/features/ai-chat/components/chat-thread.tsx b/apps/client/src/features/ai-chat/components/chat-thread.tsx index 14e1cd78..e0503d0f 100644 --- a/apps/client/src/features/ai-chat/components/chat-thread.tsx +++ b/apps/client/src/features/ai-chat/components/chat-thread.tsx @@ -1,4 +1,5 @@ import { useMemo, useRef } from "react"; +import { generateId } from "ai"; import { Alert, Box, Stack } from "@mantine/core"; import { IconAlertTriangle } from "@tabler/icons-react"; import { useTranslation } from "react-i18next"; @@ -9,11 +10,20 @@ import ChatInput from "@/features/ai-chat/components/chat-input.tsx"; import { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts"; import classes from "@/features/ai-chat/components/ai-chat.module.css"; +/** The page the user is currently viewing, sent as chat context. */ +export interface OpenPageContext { + id: string; + title: string; +} + interface ChatThreadProps { /** The open chat id, or null for a brand-new (not-yet-created) chat. */ chatId: string | null; /** Persisted rows to seed initial messages (existing chats only). */ initialRows?: IAiChatMessageRow[]; + /** The page currently open in the workspace, or null on a non-page route. + * Sent with each turn so the agent knows what "this page" refers to. */ + openPage?: OpenPageContext | null; /** Called when a turn finishes; the parent refreshes the chat list and, for * a new chat, adopts the freshly created chat id. */ onTurnFinished: () => void; @@ -41,6 +51,7 @@ function rowToUiMessage(row: IAiChatMessageRow): UIMessage { export default function ChatThread({ chatId, initialRows, + openPage, onTurnFinished, }: ChatThreadProps) { const { t } = useTranslation(); @@ -57,23 +68,60 @@ export default function ChatThread({ const chatIdRef = useRef(chatId); chatIdRef.current = chatId; + // Keep the currently-open page in a ref, updated each render, so the LATEST + // open page is sent on every send WITHOUT re-creating the `useMemo([])`-stable + // transport (and thus without re-creating the useChat store mid-stream — see + // the `chatStoreId` note below). Read live inside `prepareSendMessagesRequest`. + const openPageRef = useRef(openPage ?? null); + openPageRef.current = openPage ?? null; + + // Stable `useChat` store key for the lifetime of THIS mount. + // + // CRITICAL: `useChat` (@ai-sdk/react) re-creates its internal `Chat` store + // whenever the `id` option no longer equals the store's current id + // (`"id" in options && chatRef.current.id !== options.id`). For a brand-new + // chat (`chatId === null`) we previously passed `id: undefined`; the store + // then generated its OWN random id internally, so `store.id !== undefined` + // stayed true on EVERY render and the store was re-created on every render — + // wiping the optimistic user message, the "submitted" status, and every + // streamed delta until the turn fully finished (then the parent adopts the + // new chat id and remounts with the persisted history, making everything + // "appear at once"). Passing a STABLE non-undefined id keeps one store for + // the whole turn, so the user message shows immediately and tokens stream + // live. This id is purely the client store key; the server still resolves the + // real chat from `chatId` in the request body (see `prepareSendMessagesRequest`). + // The id only needs to be stable per mount — the parent remounts this via + // `key` on chat switch, which re-seeds cleanly. + const stableIdRef = useRef(chatId ?? `new-${generateId()}`); + const chatStoreId = chatId ?? stableIdRef.current; + const transport = useMemo( () => new DefaultChatTransport({ api: "/api/ai-chat/stream", credentials: "include", - // Inject the chat id alongside the useChat messages so the server can - // resolve an existing chat (or create one when null). + // Inject the chat id and the currently-open page alongside the useChat + // messages so the server can resolve an existing chat (or create one + // when null) and tell the agent which page "this page" refers to. Both + // are read live from refs so changing chats/pages does NOT recreate the + // transport. `openPage` is null on a non-page route. prepareSendMessagesRequest: ({ messages, body }) => ({ - body: { ...body, chatId: chatIdRef.current, messages }, + body: { + ...body, + chatId: chatIdRef.current, + openPage: openPageRef.current, + messages, + }, }), }), [], ); const { messages, sendMessage, status, stop, error } = useChat({ - // Key the hook by the chat id so shared-id chats don't collide. - id: chatId ?? undefined, + // Stable per-mount key. Existing chats use their real id; new chats use a + // generated client id (never `undefined`) so the store is NOT re-created on + // every render mid-stream (see `chatStoreId` above). + id: chatStoreId, messages: initialMessages, transport, onFinish: () => onTurnFinished(), diff --git a/apps/server/src/core/ai-chat/ai-chat.prompt.ts b/apps/server/src/core/ai-chat/ai-chat.prompt.ts index 770b9049..76608dca 100644 --- a/apps/server/src/core/ai-chat/ai-chat.prompt.ts +++ b/apps/server/src/core/ai-chat/ai-chat.prompt.ts @@ -55,6 +55,13 @@ export interface BuildSystemPromptInput { * used instead. */ adminPrompt?: string | null; + /** + * The page the user is currently viewing (client-supplied), if any. When it + * has an id, a CONTEXT line is added so the agent can resolve "this page" / + * "the current page" to that pageId. The page is NOT fetched here — the agent + * uses its CASL-enforced read/write page tools with the id when needed. + */ + openedPage?: { id?: string; title?: string } | null; } /** @@ -65,15 +72,27 @@ export interface BuildSystemPromptInput { export function buildSystemPrompt({ workspace, adminPrompt, + openedPage, }: BuildSystemPromptInput): string { const base = typeof adminPrompt === 'string' && adminPrompt.trim().length > 0 ? adminPrompt.trim() : DEFAULT_PROMPT; - const context = workspace?.name - ? `\n\nWorkspace: ${workspace.name}.` - : ''; + let context = workspace?.name ? `\n\nWorkspace: ${workspace.name}.` : ''; + + // When the user has a page open, tell the agent which page "this page" means. + // Context only — the agent reads/writes via its CASL-enforced page tools, so a + // spoofed id cannot escalate (getPage would 403). Added to the context section, + // never the immutable safety framework. Absent => nothing is added. + const pageId = openedPage?.id; + if (typeof pageId === 'string' && pageId.trim().length > 0) { + const title = + typeof openedPage?.title === 'string' && openedPage.title.trim().length > 0 + ? openedPage.title.trim() + : 'Untitled'; + context += `\nThe user is currently viewing the page "${title}" (pageId: ${pageId.trim()}). When they refer to "this page", "the current page", or similar, operate on that pageId — use the read/write page tools with it.`; + } return `${base}${context}\n${SAFETY_FRAMEWORK}`; } diff --git a/apps/server/src/core/ai-chat/ai-chat.service.ts b/apps/server/src/core/ai-chat/ai-chat.service.ts index 161478b0..4094357d 100644 --- a/apps/server/src/core/ai-chat/ai-chat.service.ts +++ b/apps/server/src/core/ai-chat/ai-chat.service.ts @@ -24,6 +24,12 @@ import { buildSystemPrompt } from './ai-chat.prompt'; */ export interface AiChatStreamBody { chatId?: string; + // The page the user is currently viewing (client-supplied), or null on a + // non-page route. Used ONLY as prompt context so the agent knows what "this + // page" refers to; the page itself is never fetched server-side here. The id + // is attacker-controllable but harmless: the agent reads/writes via its + // CASL-enforced page tools, which 403 on a page the user cannot access. + openPage?: { id?: string; title?: string } | null; // useChat sends the full UIMessage list; the last one is the new user turn. messages?: UIMessage[]; } @@ -140,6 +146,7 @@ export class AiChatService { const system = buildSystemPrompt({ workspace, adminPrompt: resolved?.systemPrompt, + openedPage: body.openPage, }); // Pass the resolved chatId so the write tools can mint provenance tokens @@ -310,7 +317,22 @@ export class AiChatService { // UI shows a generic failure. Surface the real provider message instead. // AI SDK error messages / 4xx bodies never contain the API key, so this is // safe; we never dump the resolved config/apiKey. + // + // SSE buffering / proxy note: pipeUIMessageStreamToResponse writes the + // headers immediately (res.writeHead) and each chunk incrementally, and the + // SDK's default UI_MESSAGE_STREAM_HEADERS already include + // `x-accel-buffering: no` (disables nginx response buffering) plus + // `content-type: text/event-stream` and `cache-control: no-cache`. We pass + // `headers` explicitly anyway so the intent is visible here and survives any + // future change to the SDK defaults (prepareHeaders only fills a header when + // absent, so this never clobbers the SDK's content-type). DEPLOYMENT: the + // reverse proxy in front of this server MUST NOT buffer this route, or the + // whole response is released at once and nothing streams. nginx honours the + // `x-accel-buffering: no` header we send (and additionally set + // `proxy_buffering off; proxy_cache off;` for /api/ai-chat/stream); traefik + // does not buffer responses by default. result.pipeUIMessageStreamToResponse(res.raw, { + headers: { 'X-Accel-Buffering': 'no' }, onError: (error: unknown) => { const e = error as { statusCode?: number; message?: string }; return e?.statusCode @@ -318,6 +340,13 @@ export class AiChatService { : (e?.message ?? 'AI stream error'); }, }); + + // Force the status line + headers onto the socket NOW (before the model's + // first token), so the proxy sees the response start immediately even if the + // provider's first chunk is delayed. writeToServerResponse already called + // writeHead synchronously above; flushHeaders is a belt-and-braces no-op once + // headers are sent, and is guarded for response-likes that lack it. + res.raw.flushHeaders?.(); } catch (err) { // Synchronous failure before/while wiring the stream: the terminal // callbacks will not run, so release the leased external clients here and diff --git a/apps/server/src/core/ai-chat/embedding/embedding-indexer.service.ts b/apps/server/src/core/ai-chat/embedding/embedding-indexer.service.ts index 67473e49..418008de 100644 --- a/apps/server/src/core/ai-chat/embedding/embedding-indexer.service.ts +++ b/apps/server/src/core/ai-chat/embedding/embedding-indexer.service.ts @@ -12,13 +12,12 @@ import { AiService } from '../../../integrations/ai/ai.service'; import { AiEmbeddingNotConfiguredException } from '../../../integrations/ai/ai-embedding-not-configured.exception'; import { jsonToText } from '../../../collaboration/collaboration.util'; -/** - * Embedding dimension the `page_embeddings.embedding` column is fixed at - * (`vector(1536)`). A model whose vectors have a different dimension cannot fit - * this column — v1 limitation (§14[M7]); see the dimension guard in - * `reindexPage`. - */ -const EMBEDDING_DIMENSIONS = 1536; +// NOTE: the `page_embeddings.embedding` column is now dimension-agnostic +// (bare pgvector `vector`, see migration 20260617T140000), so the indexer +// stores WHATEVER dimension the configured model returns and records it per row +// in `model_dimensions`. There is no fixed-dimension guard any more; search +// compares only same-dimension rows. Trade-off: a dimension-agnostic column has +// no ANN index, so retrieval is a seq scan with `<=>` (fine at wiki scale). // RecursiveCharacterTextSplitter settings. ~1000 chars per chunk with 200 char // overlap is a reasonable default for prose retrieval (§6.7 stage D). @@ -31,7 +30,7 @@ const CHUNK_OVERLAP = 200; * cosine ANN retrieval. * * Everything is workspace-scoped. Reindex HARD-replaces a page's rows (delete + - * insert in one transaction) so the HNSW index never serves stale vectors. + * insert in one transaction) so search never serves stale vectors. */ @Injectable() export class EmbeddingIndexerService { @@ -48,9 +47,9 @@ export class EmbeddingIndexerService { * (Re)build the embeddings for a single page. * * No-ops quietly when embeddings are unconfigured (so the queue never dies on - * an unconfigured workspace) and when a non-matching embedding dimension is - * returned (skip + single warning — §14[M7]). Deleted/empty pages have their - * rows purged and return. + * an unconfigured workspace). Any embedding dimension is accepted; the only + * defensive skip is a page whose chunks somehow yield mixed vector lengths. + * Deleted/empty pages have their rows purged and return. */ async reindexPage(pageId: string): Promise { const page = await this.pageRepo.findById(pageId, { @@ -115,17 +114,21 @@ export class EmbeddingIndexerService { // Embed all chunks in one batch. const vectors = await this.aiService.embedTexts(workspaceId, chunks); - // Dimension guard (§14[M7]): the column is a fixed vector(1536). A model - // with a different output dimension cannot be stored — skip the page and - // warn once rather than failing every row insert. - const wrongDim = vectors.find((v) => v.length !== EMBEDDING_DIMENSIONS); - if (wrongDim) { - this.logger.warn( - `reindexPage: embedding dimension ${wrongDim.length} != ${EMBEDDING_DIMENSIONS} ` + - `for workspace ${workspaceId}; skipping page ${pageId}. ` + - `The embedding column is fixed at ${EMBEDDING_DIMENSIONS} dims (v1 limitation §14[M7]).`, - ); - return; + // The column is dimension-agnostic, so ANY model dimension is stored as-is. + // Defensive sanity check only: all chunks of ONE page come from the SAME + // model and must share a dimension. A page that yields mixed lengths would + // poison the per-dimension search filter, so skip it with a warning rather + // than insert inconsistent rows. + const expectedDim = vectors[0]?.length; + if (expectedDim != null) { + const mixed = vectors.find((v) => v.length !== expectedDim); + if (mixed) { + this.logger.warn( + `reindexPage: mixed embedding dimensions (${expectedDim} vs ${mixed.length}) ` + + `for workspace ${workspaceId}; skipping page ${pageId}.`, + ); + return; + } } const rows = this.buildChunkRows( @@ -136,8 +139,8 @@ export class EmbeddingIndexerService { modelName, ); - // HARD replace in one transaction: delete then insert so the ANN index - // never holds stale vectors for this page. + // HARD replace in one transaction: delete then insert so search never + // returns stale vectors for this page. await executeTx(this.db, async (trx) => { await this.pageEmbeddingRepo.deleteByPage(pageId, workspaceId, trx); await this.pageEmbeddingRepo.insertChunks(rows, trx); diff --git a/apps/server/src/database/migrations/20260617T140000-page-embeddings-dimension-agnostic.ts b/apps/server/src/database/migrations/20260617T140000-page-embeddings-dimension-agnostic.ts new file mode 100644 index 00000000..5eac7954 --- /dev/null +++ b/apps/server/src/database/migrations/20260617T140000-page-embeddings-dimension-agnostic.ts @@ -0,0 +1,67 @@ +import { type Kysely, sql } from 'kysely'; + +/** + * Make `page_embeddings.embedding` dimension-agnostic. + * + * The original column was `vector(1536)` — a FIXED dimension. On deployments + * whose embedding model emits a different dimension (e.g. OpenAI + * `text-embedding-3-large` = 3072, Gemini `text-embedding-004` = 768) every + * vector failed the indexer's dimension guard and every page was SKIPPED, so + * RAG / semanticSearch was never populated. + * + * pgvector's bare `vector` type (no `(N)`) accepts vectors of ANY dimension, + * so this migration drops the fixed dimension. The dimension is still recorded + * PER ROW in `model_dimensions`, and search filters on it so the `<=>` cosine + * operator only ever compares same-dimension vectors (pgvector errors on a + * dimension mismatch — possible when rows from a previous model linger). + * + * TRADE-OFF: an HNSW / ivfflat ANN index REQUIRES a fixed dimension, so a + * dimension-agnostic column cannot carry one. We therefore DROP the HNSW index + * and rely on a sequential scan with `<=>`. That is fine at wiki scale; if a + * single embedding dimension is ever pinned per deployment, an HNSW index can + * be re-added in a follow-up migration. + */ +export async function up(db: Kysely): Promise { + // The HNSW ANN index requires a fixed dimension; drop it before relaxing the + // column type. Index name mirrors 20260617T120000-page-embeddings.ts. + await sql`DROP INDEX IF EXISTS idx_page_embeddings_embedding_hnsw`.execute(db); + + // Drop the (1536) dimension constraint so the column accepts any dimension. + // The identity cast `embedding::vector` is safe for existing 1536-dim rows; + // on the affected live stand the table is empty (everything was skipped), so + // there is no data risk. + await sql` + ALTER TABLE page_embeddings + ALTER COLUMN embedding TYPE vector USING embedding::vector + `.execute(db); + + // Btree index supporting the scoped + dimension-filtered seq-scan search + // (workspace_id + space_id IN (...) + model_dimensions = queryDim). + await db.schema + .createIndex('idx_page_embeddings_ws_space_dim') + .ifNotExists() + .on('page_embeddings') + .columns(['workspace_id', 'space_id', 'model_dimensions']) + .execute(); +} + +export async function down(db: Kysely): Promise { + // Best-effort rollback. The `::vector(1536)` cast only succeeds if EVERY row + // is already 1536-dim — acceptable for a dev rollback (the up migration is + // the intended steady state). On non-1536 data this will (correctly) error. + await db.schema + .dropIndex('idx_page_embeddings_ws_space_dim') + .ifExists() + .execute(); + + await sql` + ALTER TABLE page_embeddings + ALTER COLUMN embedding TYPE vector(1536) USING embedding::vector(1536) + `.execute(db); + + await sql` + CREATE INDEX IF NOT EXISTS idx_page_embeddings_embedding_hnsw + ON page_embeddings + USING hnsw (embedding vector_cosine_ops) + `.execute(db); +} diff --git a/apps/server/src/database/repos/ai-chat/page-embedding.repo.ts b/apps/server/src/database/repos/ai-chat/page-embedding.repo.ts index 6658973b..67e9b2c6 100644 --- a/apps/server/src/database/repos/ai-chat/page-embedding.repo.ts +++ b/apps/server/src/database/repos/ai-chat/page-embedding.repo.ts @@ -9,11 +9,17 @@ import { dbOrTx } from '../../utils'; * Repository for `page_embeddings` — the pgvector store backing the AI agent's * semantic search (§5.5 / §6.7 stage D). * - * The `embedding` column is `vector(1536)`, which is NOT a native Kysely column + * The `embedding` column is a dimension-agnostic pgvector `vector` (no fixed + * `(N)`, see migration 20260617T140000), which is NOT a native Kysely column * type, so every read/write of a vector is serialized with the `pgvector` npm * helper (`pgvector.toSql(number[])` → a `'[1,2,3]'` text literal) and cast back * to `vector` via a raw `::vector` SQL cast. Reindex is a HARD delete + insert - * (see `deleteByPage`) so the HNSW ANN index never returns stale vectors. + * (see `deleteByPage`) so search never returns stale vectors. + * + * TRADE-OFF: a dimension-agnostic column cannot carry an HNSW/ivfflat ANN index + * (those require a fixed dimension), so `searchByEmbedding` is a sequential scan + * with the `<=>` cosine operator. Fine at wiki scale; re-add an HNSW index if a + * single embedding dimension is ever pinned per deployment. */ /** A single chunk row to persist for a page (page-body embeddings). */ @@ -66,8 +72,8 @@ export class PageEmbeddingRepo { /** * Bulk-insert chunk rows for a page. The `embedding` value is serialized with - * `pgvector.toSql` and cast to `vector` so Postgres stores it in the fixed - * `vector(1536)` column. No-op on an empty array. + * `pgvector.toSql` and cast to `vector` so Postgres stores it in the + * dimension-agnostic `vector` column (any dimension). No-op on an empty array. */ async insertChunks( rows: PageEmbeddingChunkRow[], @@ -97,10 +103,17 @@ export class PageEmbeddingRepo { } /** - * Cosine ANN search over the embeddings, scoped to a workspace AND a set of + * Cosine search over the embeddings, scoped to a workspace AND a set of * spaces the caller may read (see semanticSearch access-scoping). Orders by * `embedding <=> $query` (cosine distance) and joins the page title cheaply. * Returns [] when `spaceIds` is empty (no accessible spaces => no results). + * + * Because the column is dimension-agnostic (no ANN index), this is a seq scan + * with `<=>`. The query MUST only be compared against same-dimension rows — + * pgvector raises on a dimension mismatch, which can happen when rows from a + * previously configured embedding model still linger. We therefore filter by + * `model_dimensions = queryEmbedding.length` so the `<=>` operands always + * agree on dimension. */ async searchByEmbedding( workspaceId: string, @@ -112,6 +125,8 @@ export class PageEmbeddingRepo { // Serialized + cast query vector reused for the distance expression. const queryVector = sql`${pgvector.toSql(queryEmbedding)}::vector`; + // Compare only against rows produced by a model of the SAME dimension. + const queryDim = queryEmbedding.length; const rows = await this.db .selectFrom('pageEmbeddings as pe') @@ -125,6 +140,9 @@ export class PageEmbeddingRepo { ]) .where('pe.workspaceId', '=', workspaceId) .where('pe.spaceId', 'in', spaceIds) + // Same-dimension only: avoids a pgvector dimension-mismatch error against + // rows from a previously configured embedding model. + .where('pe.modelDimensions', '=', queryDim) // Exclude chunks whose page is in the trash (defence in depth). .where('p.deletedAt', 'is', null) .orderBy('distance', 'asc')