fix(ai-chat): OpenAI Chat Completions for multi-turn + provider settings, stream UX & errors" -m "Live-stand fixes (OpenRouter / OpenAI-compatible):
- openai provider: use .chat() (Chat Completions) instead of the default callable (Responses API), which gateways reject on multi-turn -> 400. - updateAiProviderSettings: assemble settings.ai.provider via jsonb_build_object with ::text-cast bound params + jsonb_typeof self-heal (postgres.js was double-encoding it into an array; the ::text cast avoids 'could not determine data type of parameter'). - chat agent: drop the hard maxOutputTokens cap (truncated complex tool calls); keep a tiny cap only on the test-connection ping. - testConnection + chat stream: surface the real provider error (statusCode+message) to logs and the UI instead of generic masks; never log the API key. - chat UI: typing indicator, incremental streaming render, tool 'running' status, Stop. Also bundled (prior uncommitted ai-chat work): - history 'AI agent' provenance badge; vector RAG (pgvector image + page_embeddings + AI_QUEUE indexer + space-scoped semanticSearch); external MCP servers backend (@ai-sdk/mcp client, SSRF IP-pinning, encrypted headers, admin CRUD/Test); yjs duplicate-instance fix via pnpm patch (single CJS instance server-side).
This commit is contained in:
@@ -30,6 +30,8 @@ import { TemplateRepo } from '@docmost/db/repos/template/template.repo';
|
||||
import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
|
||||
import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { AiMcpServerRepo } from '@docmost/db/repos/ai-chat/ai-mcp-server.repo';
|
||||
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
|
||||
import { PageListener } from '@docmost/db/listeners/page.listener';
|
||||
import { PostgresJSDialect } from 'kysely-postgres-js';
|
||||
import * as postgres from 'postgres';
|
||||
@@ -98,6 +100,8 @@ import { normalizePostgresUrl } from '../common/helpers';
|
||||
AiChatRepo,
|
||||
AiChatMessageRepo,
|
||||
AiProviderCredentialsRepo,
|
||||
AiMcpServerRepo,
|
||||
PageEmbeddingRepo,
|
||||
PageListener,
|
||||
],
|
||||
exports: [
|
||||
@@ -126,6 +130,8 @@ import { normalizePostgresUrl } from '../common/helpers';
|
||||
AiChatRepo,
|
||||
AiChatMessageRepo,
|
||||
AiProviderCredentialsRepo,
|
||||
AiMcpServerRepo,
|
||||
PageEmbeddingRepo,
|
||||
],
|
||||
})
|
||||
export class DatabaseModule implements OnApplicationBootstrap {
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
import { type Kysely, sql } from 'kysely';
|
||||
|
||||
/**
|
||||
* Vector-RAG storage for the AI agent (§5.5 / §6.7 stage D / §14[M6,M7]).
|
||||
*
|
||||
* Creates the pgvector `vector` extension and the `page_embeddings` table that
|
||||
* backs semantic search. Columns mirror the hand-written `PageEmbeddings`
|
||||
* Kysely type (apps/server/src/database/types/embeddings.types.ts) one-to-one.
|
||||
*
|
||||
* The indexer + `semanticSearch` tool are a later unit; this migration only
|
||||
* provisions the extension, the table and its indexes.
|
||||
*
|
||||
* The `embedding` column is `vector(EMBEDDING_DIMENSIONS)`. The dimension is
|
||||
* FIXED at table-creation time and must match the embedding model in use.
|
||||
* 1536 is the default for OpenAI `text-embedding-3-small` / `-ada-002`.
|
||||
* Switching to a model with a DIFFERENT dimension (e.g. Gemini
|
||||
* `text-embedding-004` = 768, Ollama `nomic-embed-text` = 768) requires
|
||||
* re-creating the column and rebuilding the HNSW index. The actual model and
|
||||
* its dimension are recorded PER ROW in `model_name` / `model_dimensions` so a
|
||||
* future migration can detect and re-index mismatched rows.
|
||||
*/
|
||||
const EMBEDDING_DIMENSIONS = 1536;
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
// pgvector extension (provided by the pgvector/pgvector:pg18 image).
|
||||
await sql`CREATE EXTENSION IF NOT EXISTS vector`.execute(db);
|
||||
|
||||
await db.schema
|
||||
.createTable('page_embeddings')
|
||||
.ifNotExists()
|
||||
.addColumn('id', 'uuid', (col) =>
|
||||
col.primaryKey().defaultTo(sql`gen_uuid_v7()`),
|
||||
)
|
||||
.addColumn('workspace_id', 'uuid', (col) =>
|
||||
col.notNull().references('workspaces.id').onDelete('cascade'),
|
||||
)
|
||||
.addColumn('page_id', 'uuid', (col) =>
|
||||
col.notNull().references('pages.id').onDelete('cascade'),
|
||||
)
|
||||
.addColumn('space_id', 'uuid', (col) =>
|
||||
col.notNull().references('spaces.id').onDelete('cascade'),
|
||||
)
|
||||
// Embeddings may cover an attachment instead of page body; nullable, and the
|
||||
// attachment row going away should drop its embeddings.
|
||||
.addColumn('attachment_id', 'uuid', (col) =>
|
||||
col.references('attachments.id').onDelete('cascade'),
|
||||
)
|
||||
// One row per chunk of a page; chunk_index orders them within the page.
|
||||
.addColumn('chunk_index', 'integer', (col) => col.notNull().defaultTo(0))
|
||||
.addColumn('chunk_start', 'integer', (col) => col.notNull().defaultTo(0))
|
||||
.addColumn('chunk_length', 'integer', (col) => col.notNull().defaultTo(0))
|
||||
// The chunk text that produced the embedding (always set by the indexer).
|
||||
.addColumn('content', 'text', (col) => col.notNull())
|
||||
// Provenance of the vector: model id + its output dimension (see header).
|
||||
.addColumn('model_name', 'varchar', (col) => col.notNull())
|
||||
.addColumn('model_dimensions', 'integer', (col) => col.notNull())
|
||||
// Fixed-dimension vector column. Raw type since pgvector's `vector(N)` is not
|
||||
// a native Kysely column type.
|
||||
.addColumn(
|
||||
'embedding',
|
||||
sql`vector(${sql.raw(String(EMBEDDING_DIMENSIONS))})`,
|
||||
)
|
||||
.addColumn('metadata', 'jsonb', (col) =>
|
||||
col.notNull().defaultTo(sql`'{}'::jsonb`),
|
||||
)
|
||||
.addColumn('created_at', 'timestamptz', (col) =>
|
||||
col.notNull().defaultTo(sql`now()`),
|
||||
)
|
||||
.addColumn('updated_at', 'timestamptz', (col) =>
|
||||
col.notNull().defaultTo(sql`now()`),
|
||||
)
|
||||
.addColumn('deleted_at', 'timestamptz', (col) => col)
|
||||
// One stored vector per (page, chunk).
|
||||
.addUniqueConstraint('uq_page_embeddings_page_chunk', [
|
||||
'page_id',
|
||||
'chunk_index',
|
||||
])
|
||||
.execute();
|
||||
|
||||
// ANN index for cosine-similarity search over the embedding vectors (HNSW).
|
||||
await sql`
|
||||
CREATE INDEX IF NOT EXISTS idx_page_embeddings_embedding_hnsw
|
||||
ON page_embeddings
|
||||
USING hnsw (embedding vector_cosine_ops)
|
||||
`.execute(db);
|
||||
|
||||
// Btree indexes for scoped lookups/deletes (re-index a page, purge a workspace).
|
||||
await db.schema
|
||||
.createIndex('idx_page_embeddings_page_id')
|
||||
.ifNotExists()
|
||||
.on('page_embeddings')
|
||||
.column('page_id')
|
||||
.execute();
|
||||
|
||||
await db.schema
|
||||
.createIndex('idx_page_embeddings_workspace_id')
|
||||
.ifNotExists()
|
||||
.on('page_embeddings')
|
||||
.column('workspace_id')
|
||||
.execute();
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
// Drop the table only; leave the `vector` extension in place (it may be used
|
||||
// by other objects and dropping it is destructive).
|
||||
await db.schema.dropTable('page_embeddings').ifExists().execute();
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
import { type Kysely, sql } from 'kysely';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
await db.schema
|
||||
.createTable('ai_mcp_servers')
|
||||
.ifNotExists()
|
||||
.addColumn('id', 'uuid', (col) =>
|
||||
col.primaryKey().defaultTo(sql`gen_uuid_v7()`),
|
||||
)
|
||||
.addColumn('workspace_id', 'uuid', (col) =>
|
||||
col.references('workspaces.id').onDelete('cascade').notNull(),
|
||||
)
|
||||
// display name, e.g. 'Tavily'.
|
||||
.addColumn('name', 'varchar', (col) => col.notNull())
|
||||
// 'http' | 'sse' — the @ai-sdk/mcp transport type.
|
||||
.addColumn('transport', 'varchar', (col) => col.notNull())
|
||||
// remote MCP endpoint URL.
|
||||
.addColumn('url', 'text', (col) => col.notNull())
|
||||
// SECURITY (§8.10): AES-256-GCM blob of the JSON auth headers. Write-only;
|
||||
// NEVER added to workspace baseFields and NEVER returned by any endpoint.
|
||||
.addColumn('headers_enc', 'text', (col) => col)
|
||||
// optional: restrict which remote tool names to expose to the agent.
|
||||
.addColumn('tool_allowlist', 'jsonb', (col) => col)
|
||||
.addColumn('enabled', 'boolean', (col) => col.notNull().defaultTo(true))
|
||||
.addColumn('created_at', 'timestamptz', (col) =>
|
||||
col.notNull().defaultTo(sql`now()`),
|
||||
)
|
||||
.addColumn('updated_at', 'timestamptz', (col) =>
|
||||
col.notNull().defaultTo(sql`now()`),
|
||||
)
|
||||
.execute();
|
||||
|
||||
// Scoped lookups (listByWorkspace / listEnabled) hit workspace_id first.
|
||||
await db.schema
|
||||
.createIndex('ai_mcp_servers_workspace_id_idx')
|
||||
.ifNotExists()
|
||||
.on('ai_mcp_servers')
|
||||
.column('workspace_id')
|
||||
.execute();
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await db.schema.dropTable('ai_mcp_servers').execute();
|
||||
}
|
||||
143
apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.ts
Normal file
143
apps/server/src/database/repos/ai-chat/ai-mcp-server.repo.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { sql } from 'kysely';
|
||||
import { KyselyDB, KyselyTransaction } from '../../types/kysely.types';
|
||||
import { dbOrTx } from '../../utils';
|
||||
import { AiMcpServer } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
* Repository for per-workspace external MCP servers the agent may use (§5.4).
|
||||
*
|
||||
* SECURITY (§8.10): rows hold the encrypted auth-header blob (`headersEnc`).
|
||||
* That column must NEVER be returned to a non-admin path nor logged; the admin
|
||||
* controller projects an explicit allowlist of columns and the connect path
|
||||
* decrypts only server-side. All lookups are workspace-scoped.
|
||||
*/
|
||||
@Injectable()
|
||||
export class AiMcpServerRepo {
|
||||
constructor(@InjectKysely() private readonly db: KyselyDB) {}
|
||||
|
||||
async findById(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
): Promise<AiMcpServer | undefined> {
|
||||
return this.db
|
||||
.selectFrom('aiMcpServers')
|
||||
.selectAll('aiMcpServers')
|
||||
.where('id', '=', id)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
async listByWorkspace(workspaceId: string): Promise<AiMcpServer[]> {
|
||||
return this.db
|
||||
.selectFrom('aiMcpServers')
|
||||
.selectAll('aiMcpServers')
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.orderBy('createdAt', 'asc')
|
||||
.execute();
|
||||
}
|
||||
|
||||
/** Enabled servers only — used by the agent loop to build the toolset. */
|
||||
async listEnabled(workspaceId: string): Promise<AiMcpServer[]> {
|
||||
return this.db
|
||||
.selectFrom('aiMcpServers')
|
||||
.selectAll('aiMcpServers')
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.where('enabled', '=', true)
|
||||
.orderBy('createdAt', 'asc')
|
||||
.execute();
|
||||
}
|
||||
|
||||
async insert(
|
||||
values: {
|
||||
workspaceId: string;
|
||||
name: string;
|
||||
transport: string;
|
||||
url: string;
|
||||
headersEnc?: string | null;
|
||||
toolAllowlist?: string[] | null;
|
||||
enabled?: boolean;
|
||||
},
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<AiMcpServer> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
return db
|
||||
.insertInto('aiMcpServers')
|
||||
.values({
|
||||
workspaceId: values.workspaceId,
|
||||
name: values.name,
|
||||
transport: values.transport,
|
||||
url: values.url,
|
||||
headersEnc: values.headersEnc ?? null,
|
||||
// jsonb column: the postgres driver would otherwise encode a JS array as
|
||||
// a Postgres array literal. Bind the JSON text and cast it to jsonb.
|
||||
toolAllowlist: jsonbArray(values.toolAllowlist),
|
||||
enabled: values.enabled ?? true,
|
||||
})
|
||||
.returningAll()
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
async update(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
patch: {
|
||||
name?: string;
|
||||
transport?: string;
|
||||
url?: string;
|
||||
// undefined => leave unchanged; null => clear; string => set.
|
||||
headersEnc?: string | null;
|
||||
// undefined => leave unchanged; null => clear; string[] => set.
|
||||
toolAllowlist?: string[] | null;
|
||||
enabled?: boolean;
|
||||
},
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<void> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
const set: Record<string, unknown> = { updatedAt: new Date() };
|
||||
if (patch.name !== undefined) set.name = patch.name;
|
||||
if (patch.transport !== undefined) set.transport = patch.transport;
|
||||
if (patch.url !== undefined) set.url = patch.url;
|
||||
if (patch.headersEnc !== undefined) set.headersEnc = patch.headersEnc;
|
||||
if (patch.toolAllowlist !== undefined) {
|
||||
set.toolAllowlist = jsonbArray(patch.toolAllowlist);
|
||||
}
|
||||
if (patch.enabled !== undefined) set.enabled = patch.enabled;
|
||||
await db
|
||||
.updateTable('aiMcpServers')
|
||||
.set(set)
|
||||
.where('id', '=', id)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.execute();
|
||||
}
|
||||
|
||||
async delete(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<void> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
await db
|
||||
.deleteFrom('aiMcpServers')
|
||||
.where('id', '=', id)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.execute();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode a string[] as a jsonb bind for the `tool_allowlist` column. Passing a
|
||||
* plain JS array to the postgres driver would serialize it as a Postgres array
|
||||
* literal (incompatible with jsonb), so we bind the JSON text and cast it.
|
||||
* Returns null for null/empty arrays (an empty allowlist means "no restriction"
|
||||
* is not intended — callers pass null to clear; an empty array is normalized to
|
||||
* null here so it never round-trips as `[]`).
|
||||
*/
|
||||
function jsonbArray(value: string[] | null | undefined) {
|
||||
if (value === null || value === undefined || value.length === 0) {
|
||||
return null;
|
||||
}
|
||||
// Typed as string[] so it is assignable to the toolAllowlist column.
|
||||
return sql<string[]>`${JSON.stringify(value)}::jsonb`;
|
||||
}
|
||||
142
apps/server/src/database/repos/ai-chat/page-embedding.repo.ts
Normal file
142
apps/server/src/database/repos/ai-chat/page-embedding.repo.ts
Normal file
@@ -0,0 +1,142 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { sql } from 'kysely';
|
||||
import * as pgvector from 'pgvector';
|
||||
import { KyselyDB, KyselyTransaction } from '../../types/kysely.types';
|
||||
import { dbOrTx } from '../../utils';
|
||||
|
||||
/**
|
||||
* Repository for `page_embeddings` — the pgvector store backing the AI agent's
|
||||
* semantic search (§5.5 / §6.7 stage D).
|
||||
*
|
||||
* The `embedding` column is `vector(1536)`, which is NOT a native Kysely column
|
||||
* type, so every read/write of a vector is serialized with the `pgvector` npm
|
||||
* helper (`pgvector.toSql(number[])` → a `'[1,2,3]'` text literal) and cast back
|
||||
* to `vector` via a raw `::vector` SQL cast. Reindex is a HARD delete + insert
|
||||
* (see `deleteByPage`) so the HNSW ANN index never returns stale vectors.
|
||||
*/
|
||||
|
||||
/** A single chunk row to persist for a page (page-body embeddings). */
|
||||
export interface PageEmbeddingChunkRow {
|
||||
pageId: string;
|
||||
workspaceId: string;
|
||||
spaceId: string;
|
||||
// null for page-body chunks; set only for attachment chunks (future).
|
||||
attachmentId: string | null;
|
||||
chunkIndex: number;
|
||||
chunkStart: number;
|
||||
chunkLength: number;
|
||||
content: string;
|
||||
modelName: string;
|
||||
modelDimensions: number;
|
||||
embedding: number[];
|
||||
}
|
||||
|
||||
/** A single ANN search hit. */
|
||||
export interface PageEmbeddingSearchHit {
|
||||
pageId: string;
|
||||
spaceId: string;
|
||||
title: string | null;
|
||||
content: string;
|
||||
// Cosine distance (0 = identical direction). Lower is more similar.
|
||||
distance: number;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class PageEmbeddingRepo {
|
||||
constructor(@InjectKysely() private readonly db: KyselyDB) {}
|
||||
|
||||
/**
|
||||
* HARD-delete every embedding row for a page (within its workspace). Used
|
||||
* before a reindex and on page deletion — a hard delete (not soft) guarantees
|
||||
* the HNSW index never returns vectors for content that no longer exists.
|
||||
*/
|
||||
async deleteByPage(
|
||||
pageId: string,
|
||||
workspaceId: string,
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<void> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
await db
|
||||
.deleteFrom('pageEmbeddings')
|
||||
.where('pageId', '=', pageId)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.execute();
|
||||
}
|
||||
|
||||
/**
|
||||
* Bulk-insert chunk rows for a page. The `embedding` value is serialized with
|
||||
* `pgvector.toSql` and cast to `vector` so Postgres stores it in the fixed
|
||||
* `vector(1536)` column. No-op on an empty array.
|
||||
*/
|
||||
async insertChunks(
|
||||
rows: PageEmbeddingChunkRow[],
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<void> {
|
||||
if (rows.length === 0) return;
|
||||
const db = dbOrTx(this.db, trx);
|
||||
await db
|
||||
.insertInto('pageEmbeddings')
|
||||
.values(
|
||||
rows.map((row) => ({
|
||||
pageId: row.pageId,
|
||||
workspaceId: row.workspaceId,
|
||||
spaceId: row.spaceId,
|
||||
attachmentId: row.attachmentId,
|
||||
chunkIndex: row.chunkIndex,
|
||||
chunkStart: row.chunkStart,
|
||||
chunkLength: row.chunkLength,
|
||||
content: row.content,
|
||||
modelName: row.modelName,
|
||||
modelDimensions: row.modelDimensions,
|
||||
// pgvector.toSql -> '[1,2,3]'; cast the bound literal to vector.
|
||||
embedding: sql`${pgvector.toSql(row.embedding)}::vector`,
|
||||
})),
|
||||
)
|
||||
.execute();
|
||||
}
|
||||
|
||||
/**
|
||||
* Cosine ANN search over the embeddings, scoped to a workspace AND a set of
|
||||
* spaces the caller may read (see semanticSearch access-scoping). Orders by
|
||||
* `embedding <=> $query` (cosine distance) and joins the page title cheaply.
|
||||
* Returns [] when `spaceIds` is empty (no accessible spaces => no results).
|
||||
*/
|
||||
async searchByEmbedding(
|
||||
workspaceId: string,
|
||||
queryEmbedding: number[],
|
||||
spaceIds: string[],
|
||||
limit: number,
|
||||
): Promise<PageEmbeddingSearchHit[]> {
|
||||
if (spaceIds.length === 0) return [];
|
||||
|
||||
// Serialized + cast query vector reused for the distance expression.
|
||||
const queryVector = sql`${pgvector.toSql(queryEmbedding)}::vector`;
|
||||
|
||||
const rows = await this.db
|
||||
.selectFrom('pageEmbeddings as pe')
|
||||
.innerJoin('pages as p', 'p.id', 'pe.pageId')
|
||||
.select([
|
||||
'pe.pageId as pageId',
|
||||
'pe.spaceId as spaceId',
|
||||
'pe.content as content',
|
||||
'p.title as title',
|
||||
sql<number>`pe.embedding <=> ${queryVector}`.as('distance'),
|
||||
])
|
||||
.where('pe.workspaceId', '=', workspaceId)
|
||||
.where('pe.spaceId', 'in', spaceIds)
|
||||
// Exclude chunks whose page is in the trash (defence in depth).
|
||||
.where('p.deletedAt', 'is', null)
|
||||
.orderBy('distance', 'asc')
|
||||
.limit(limit)
|
||||
.execute();
|
||||
|
||||
return rows.map((row) => ({
|
||||
pageId: row.pageId,
|
||||
spaceId: row.spaceId,
|
||||
title: row.title,
|
||||
content: row.content,
|
||||
distance: Number(row.distance),
|
||||
}));
|
||||
}
|
||||
}
|
||||
@@ -214,11 +214,16 @@ export class WorkspaceRepo {
|
||||
/**
|
||||
* Deep-merge a partial provider config into the fixed path
|
||||
* `settings.ai.provider`. Unlike `updateAiSettings` (single scalar key under
|
||||
* `settings.ai`), this stores a nested object. The path is constant — only the
|
||||
* provider value is parameterized (bound, not `sql.raw`) — so it cannot store
|
||||
* a secret and is safe from injection. Sibling `settings.ai.*` keys (search /
|
||||
* generative / chat / mcp / systemPrompt) and provider fields absent from the
|
||||
* partial are preserved via jsonb `||` merge.
|
||||
* `settings.ai`), this stores a nested object. The provider object is assembled
|
||||
* IN SQL via `jsonb_build_object`: keys come from a fixed allowlist (inlined
|
||||
* via `sql.lit`, so no injection) and values are bound params, so the result is
|
||||
* a real jsonb object and never a double-encoded string (postgres.js would
|
||||
* otherwise re-serialize a `JSON.stringify`'d string, yielding a jsonb string
|
||||
* that `||` turns into an array). A `jsonb_typeof = 'object'` CASE self-heals
|
||||
* workspaces whose `settings.ai.provider` was previously corrupted into an
|
||||
* array/string. Sibling `settings.ai.*` keys (search / generative / chat / mcp
|
||||
* / systemPrompt) and provider fields absent from the partial are preserved via
|
||||
* jsonb `||` merge.
|
||||
*/
|
||||
async updateAiProviderSettings(
|
||||
workspaceId: string,
|
||||
@@ -226,14 +231,33 @@ export class WorkspaceRepo {
|
||||
trx?: KyselyTransaction,
|
||||
): Promise<Workspace> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
const providerJson = JSON.stringify(provider);
|
||||
// Assemble the provider object IN SQL. Keys are fixed provider field names
|
||||
// (sql.lit -> inlined literals, no injection); values are bound params cast
|
||||
// to ::text — postgres.js sends bound params untyped, and jsonb_build_object's
|
||||
// value args are polymorphic ("any"), so without the explicit ::text cast
|
||||
// Postgres throws "could not determine data type of parameter $1". The result
|
||||
// is a real jsonb object, never a double-encoded string. The CASE self-heals
|
||||
// workspaces whose settings.ai.provider was previously corrupted into an
|
||||
// array/string.
|
||||
const ALLOWED = ['driver', 'chatModel', 'embeddingModel', 'baseUrl', 'systemPrompt'];
|
||||
const entries = Object.entries(provider).filter(
|
||||
([k, v]) => v !== undefined && ALLOWED.includes(k),
|
||||
);
|
||||
const patch = entries.length
|
||||
? sql`jsonb_build_object(${sql.join(
|
||||
entries.flatMap(([k, v]) => [sql.lit(k), sql`${v}::text`]),
|
||||
)})`
|
||||
: sql`'{}'::jsonb`;
|
||||
return db
|
||||
.updateTable('workspaces')
|
||||
.set({
|
||||
settings: sql`COALESCE(settings, '{}'::jsonb)
|
||||
|| jsonb_build_object('ai', COALESCE(settings->'ai', '{}'::jsonb)
|
||||
|| jsonb_build_object('provider', COALESCE(settings->'ai'->'provider', '{}'::jsonb)
|
||||
|| ${providerJson}::jsonb))`,
|
||||
settings: sql`COALESCE(settings, '{}'::jsonb) || jsonb_build_object(
|
||||
'ai', COALESCE(settings->'ai', '{}'::jsonb) || jsonb_build_object(
|
||||
'provider',
|
||||
(CASE WHEN jsonb_typeof(settings->'ai'->'provider') = 'object'
|
||||
THEN settings->'ai'->'provider' ELSE '{}'::jsonb END)
|
||||
|| ${patch}
|
||||
))`,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where('id', '=', workspaceId)
|
||||
|
||||
28
apps/server/src/database/types/ai-mcp-servers.types.ts
Normal file
28
apps/server/src/database/types/ai-mcp-servers.types.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { Timestamp, Generated } from '@docmost/db/types/db';
|
||||
|
||||
// ai_mcp_servers type
|
||||
// Hand-written (not generated) because codegen requires a live DB.
|
||||
// Mirrors the migration 20260617T130000-ai-mcp-servers.ts.
|
||||
//
|
||||
// SECURITY (§8.10/§8.11): `headersEnc` is the AES-256-GCM blob of the per-server
|
||||
// auth headers (the external service's API key, e.g. Tavily). It is WRITE-ONLY:
|
||||
// it must NEVER be added to workspace `baseFields`, returned by any endpoint, or
|
||||
// written to logs. Only the server-side MCP client layer decrypts it.
|
||||
export interface AiMcpServers {
|
||||
id: Generated<string>;
|
||||
workspaceId: string;
|
||||
// Display name, e.g. 'Tavily'. Also drives the tool-name namespace prefix.
|
||||
name: string;
|
||||
// '@ai-sdk/mcp' transport type: 'http' | 'sse'.
|
||||
transport: string;
|
||||
// Remote MCP endpoint URL.
|
||||
url: string;
|
||||
// Encrypted JSON of the auth headers. Nullable (a server may need no auth).
|
||||
headersEnc: string | null;
|
||||
// Optional allowlist of remote tool names to expose; null = expose all.
|
||||
// Stored as jsonb; reads come back as a string[] from the postgres driver.
|
||||
toolAllowlist: string[] | null;
|
||||
enabled: Generated<boolean>;
|
||||
createdAt: Generated<Timestamp>;
|
||||
updatedAt: Generated<Timestamp>;
|
||||
}
|
||||
@@ -1,8 +1,10 @@
|
||||
import { DB } from '@docmost/db/types/db';
|
||||
import { PageEmbeddings } from '@docmost/db/types/embeddings.types';
|
||||
import { AiProviderCredentials } from '@docmost/db/types/ai-provider-credentials.types';
|
||||
import { AiMcpServers } from '@docmost/db/types/ai-mcp-servers.types';
|
||||
|
||||
export interface DbInterface extends DB {
|
||||
pageEmbeddings: PageEmbeddings;
|
||||
aiProviderCredentials: AiProviderCredentials;
|
||||
aiMcpServers: AiMcpServers;
|
||||
}
|
||||
|
||||
@@ -8,11 +8,14 @@ export interface PageEmbeddings {
|
||||
modelName: string;
|
||||
modelDimensions: number;
|
||||
workspaceId: string;
|
||||
attachmentId: string;
|
||||
// Nullable: page-body embeddings have no attachment (only attachment chunks set it).
|
||||
attachmentId: string | null;
|
||||
embedding: number[];
|
||||
chunkIndex: Generated<number>;
|
||||
chunkStart: Generated<number>;
|
||||
chunkLength: Generated<number>;
|
||||
// The chunk text that produced the embedding (NOT NULL in the table).
|
||||
content: string;
|
||||
metadata: Generated<Json>;
|
||||
createdAt: Generated<Timestamp>;
|
||||
updatedAt: Generated<Timestamp>;
|
||||
|
||||
@@ -40,6 +40,7 @@ import {
|
||||
} from './db';
|
||||
import { PageEmbeddings } from '@docmost/db/types/embeddings.types';
|
||||
import { AiProviderCredentials as AiProviderCredentialsTable } from '@docmost/db/types/ai-provider-credentials.types';
|
||||
import { AiMcpServers as AiMcpServersTable } from '@docmost/db/types/ai-mcp-servers.types';
|
||||
|
||||
// AI Chat
|
||||
export type AiChat = Selectable<AiChats>;
|
||||
@@ -66,6 +67,13 @@ export type UpdatableAiProviderCredentials = Updateable<
|
||||
Omit<AiProviderCredentialsTable, 'id'>
|
||||
>;
|
||||
|
||||
// AI MCP Servers (external MCP servers the agent may use, e.g. Tavily).
|
||||
// SECURITY (§8.10): `headersEnc` is the encrypted auth-header blob; never
|
||||
// expose this table (or that column) through any non-admin path.
|
||||
export type AiMcpServer = Selectable<AiMcpServersTable>;
|
||||
export type InsertableAiMcpServer = Insertable<AiMcpServersTable>;
|
||||
export type UpdatableAiMcpServer = Updateable<Omit<AiMcpServersTable, 'id'>>;
|
||||
|
||||
// Workspace
|
||||
export type Workspace = Selectable<Workspaces>;
|
||||
export type InsertableWorkspace = Insertable<Workspaces>;
|
||||
|
||||
Reference in New Issue
Block a user