diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c2aa9c9..9eaf9757 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 OpenRouter, etc.; `openai` uses the official provider (real-OpenAI reasoning-model request shaping). Chosen explicitly rather than inferred from the base URL, since a custom URL can front real OpenAI too. (#175, #177) +- **AI chat "Context window (tokens)" setting (`chatContextWindow`).** A new + admin field in AI settings that records the chat model's context-window size. + When set (> 0) it becomes the denominator of the header context-badge, which + now reads "used / max"; `0`/empty clears the limit and the badge shows only + the current context as before. There is no provider-independent way to read a + model's window automatically, so it is an explicit workspace-level value. + (#189) - **Per-MCP-server instructions in the agent prompt.** Each external MCP server now has an admin-authored `instructions` field ("how/when to use this server's tools") that is injected into the agent's system prompt next to that server's @@ -61,6 +68,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 model's reasoning out of the box. An endpoint that is real OpenAI behind a custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177) +- **AI chat header context-badge now shows "used / max".** When an admin sets + the new `chatContextWindow`, the badge displays the current context size over + the configured window (e.g. `120k / 200k`) instead of switching to a live + per-turn token counter during streaming. With no window configured the badge + keeps showing just the current context. (#189) + - **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the same id are ONE footnote — one number, one definition, several back-references — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 3b5d1955..52a4de13 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -256,11 +256,17 @@ export class WorkspaceRepo { ): Promise { const db = dbOrTx(this.db, trx); // Assemble the provider object IN SQL. Keys are fixed provider field names - // (sql.lit -> inlined literals, no injection); values are bound params cast - // to ::text — postgres.js sends bound params untyped, and jsonb_build_object's - // value args are polymorphic ("any"), so without the explicit ::text cast - // Postgres throws "could not determine data type of parameter $1". The result - // is a real jsonb object, never a double-encoded string. The CASE self-heals + // (sql.lit -> inlined literals, no injection); values are bound params with + // an explicit cast — postgres.js sends bound params untyped, and + // jsonb_build_object's value args are polymorphic ("any"), so without the + // cast Postgres throws "could not determine data type of parameter $1". The + // cast is branched by the JS runtime type so the value lands in jsonb with + // the matching JSON type: a number stays a JSON number (e.g. + // chatContextWindow → `{"chatContextWindow":200000}`, jsonb_typeof 'number'), + // a boolean a JSON boolean, everything else a JSON string. A plain `::text` + // for all would store a numeric field as the JSON STRING `"200000"`, which + // the client's `typeof === "number"` guards reject. The result is a real + // jsonb object, never a double-encoded string. The CASE self-heals // workspaces whose settings.ai.provider was previously corrupted into an // array/string. const entries = Object.entries(provider).filter( @@ -268,7 +274,14 @@ export class WorkspaceRepo { ); const patch = entries.length ? sql`jsonb_build_object(${sql.join( - entries.flatMap(([k, v]) => [sql.lit(k), sql`${v}::text`]), + entries.flatMap(([k, v]) => [ + sql.lit(k), + typeof v === 'number' + ? sql`${v}::numeric` + : typeof v === 'boolean' + ? sql`${v}::boolean` + : sql`${v}::text`, + ]), )})` : sql`'{}'::jsonb`; return db diff --git a/apps/server/test/integration/workspace-repo-ai-provider-settings.int-spec.ts b/apps/server/test/integration/workspace-repo-ai-provider-settings.int-spec.ts new file mode 100644 index 00000000..90afc036 --- /dev/null +++ b/apps/server/test/integration/workspace-repo-ai-provider-settings.int-spec.ts @@ -0,0 +1,91 @@ +import { Kysely, sql } from 'kysely'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { getTestDb, destroyTestDb, createWorkspace } from './db'; + +/** + * WorkspaceRepo.updateAiProviderSettings numeric round-trip (#189, #213). + * + * `chatContextWindow` is the first NUMERIC provider field routed through this + * generic SQL layer. The patch builder must cast a JS number so it lands in + * jsonb as a JSON NUMBER, not the JSON STRING `"200000"` — the client guards + * (`typeof === "number"`) reject a string, silently killing the `/ max` badge + * denominator. A plain `::text` cast (the prior code) regressed exactly this. + * These specs are real SQL and assert both the JS value type and the on-disk + * `jsonb_typeof`. + */ +describe('WorkspaceRepo.updateAiProviderSettings (numeric round-trip) [integration]', () => { + let db: Kysely; + let repo: WorkspaceRepo; + + beforeAll(() => { + db = getTestDb(); + repo = new WorkspaceRepo(db as any); + }); + + afterAll(async () => { + await destroyTestDb(); + }); + + it('stores chatContextWindow as a JSON number (not a "200000" string)', async () => { + const ws = await createWorkspace(db, { settings: undefined }); + + const updated = await repo.updateAiProviderSettings(ws.id, { + driver: 'openai', + chatModel: 'gpt-4o', + chatContextWindow: 200000, + }); + + // Returned row: the number survives as a real JS number, alongside the + // string fields which stay strings. + const provider = (updated.settings as any)?.ai?.provider; + expect(provider.chatContextWindow).toBe(200000); + expect(typeof provider.chatContextWindow).toBe('number'); + expect(provider.driver).toBe('openai'); + expect(provider.chatModel).toBe('gpt-4o'); + + // On disk: the jsonb value is typed 'number' (the must-fix assertion), and + // sibling string fields are typed 'string'. + const typed = await db + .selectFrom('workspaces') + .select([ + sql`jsonb_typeof(settings->'ai'->'provider'->'chatContextWindow')`.as( + 'windowType', + ), + sql`jsonb_typeof(settings->'ai'->'provider'->'chatModel')`.as( + 'modelType', + ), + ]) + .where('id', '=', ws.id) + .executeTakeFirstOrThrow(); + + expect(typed.windowType).toBe('number'); + expect(typed.modelType).toBe('string'); + }); + + it('re-reads chatContextWindow as a number after a partial-merge update', async () => { + const ws = await createWorkspace(db, { + settings: { ai: { provider: { driver: 'openai', chatModel: 'x' } } }, + }); + + // Merge in only the numeric field; siblings must be preserved and the value + // must still be a JSON number, not a string. + await repo.updateAiProviderSettings(ws.id, { chatContextWindow: 128000 }); + + const row = await db + .selectFrom('workspaces') + .select([ + 'settings', + sql`jsonb_typeof(settings->'ai'->'provider'->'chatContextWindow')`.as( + 'windowType', + ), + ]) + .where('id', '=', ws.id) + .executeTakeFirstOrThrow(); + + expect(row.windowType).toBe('number'); + const provider = (row.settings as any)?.ai?.provider; + expect(provider.chatContextWindow).toBe(128000); + expect(provider.driver).toBe('openai'); + expect(provider.chatModel).toBe('x'); + }); +});