diff --git a/.env.example b/.env.example index 5df90742..7407e629 100644 --- a/.env.example +++ b/.env.example @@ -132,11 +132,12 @@ MCP_DOCMOST_PASSWORD= # NEVER set is_agent on a human or shared account — every action by that account # (including normal human edits) would then be mis-attributed as AI. -# Agent-roles catalog source: an http(s):// base URL => the catalog is fetched -# remotely (e.g. the raw GitHub base URL of the catalog repo); any other value -# => a local filesystem directory. Empty (default) => the in-repo -# ./agent-roles-catalog folder (dev). Used by the admin "import role from -# catalog" feature only. +# Agent-roles catalog source: an http(s):// base URL to the catalog's raw files +# (the server appends /index.json and /bundles//.json). This value is +# baked into the Docker image at build time per branch (see the Dockerfile ARG +# AI_AGENT_ROLES_CATALOG_URL and the CI build-args). Set it here only to point a +# local/non-Docker run at a catalog; if unset, the "import role from catalog" +# admin feature is unavailable. Local-filesystem sources are no longer supported. # AI_AGENT_ROLES_CATALOG_URL= # Per-embedding-call timeout in milliseconds for the RAG indexer. diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index f25bac74..35a5a367 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -52,6 +52,7 @@ jobs: platforms: linux/amd64 build-args: | APP_VERSION=${{ steps.version.outputs.value }} + AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/develop/agent-roles-catalog push: true tags: ${{ env.IMAGE }}:develop cache-from: type=gha,scope=develop-amd64 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 694df01b..e45704e9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -57,6 +57,7 @@ jobs: platforms: ${{ matrix.platform }} build-args: | APP_VERSION=${{ env.VERSION }} + AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/main/agent-roles-catalog outputs: type=image,name=${{ env.IMAGE }},push-by-digest=true,name-canonical=true,push=true cache-from: type=gha,scope=${{ matrix.suffix }} cache-to: type=gha,scope=${{ matrix.suffix }},mode=max,ignore-error=true @@ -85,6 +86,7 @@ jobs: platforms: ${{ matrix.platform }} build-args: | APP_VERSION=${{ env.VERSION }} + AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/main/agent-roles-catalog push: false tags: | ${{ env.IMAGE }}:latest diff --git a/CHANGELOG.md b/CHANGELOG.md index 771f74dc..9fe9c7f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,10 +37,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 admin endpoints — `POST /ai-chat/roles/catalog` (browse bundles), `/catalog/bundle` (read one bundle's roles), `/import`, and `/update-from-catalog` — and a new `source` column linking a role to its - catalog slug/language/version. The catalog source is configurable via the new - `AI_AGENT_ROLES_CATALOG_URL` env var (an `http(s)://` base URL fetches it - remotely; otherwise a local directory; empty defaults to the in-repo - `agent-roles-catalog/` folder — see `.env.example`). (#222) + catalog slug/language/version. The catalog source is configured via the + `AI_AGENT_ROLES_CATALOG_URL` env var — an `http(s)://` base URL to the + catalog's raw files, baked into the image at build time and set per branch in + CI (see `.env.example`). (#222) ## [0.94.0] - 2026-06-26 diff --git a/Dockerfile b/Dockerfile index 34e5b17f..591cddba 100644 --- a/Dockerfile +++ b/Dockerfile @@ -23,6 +23,10 @@ RUN apt-get update \ WORKDIR /app +# Agent-roles catalog base URL, baked at build time (set per-branch in CI). +ARG AI_AGENT_ROLES_CATALOG_URL="" +ENV AI_AGENT_ROLES_CATALOG_URL=$AI_AGENT_ROLES_CATALOG_URL + # Copy apps COPY --from=builder /app/apps/server/dist /app/apps/server/dist COPY --from=builder /app/apps/client/dist /app/apps/client/dist diff --git a/agent-roles-catalog/README.md b/agent-roles-catalog/README.md index 8962662c..5a03689e 100644 --- a/agent-roles-catalog/README.md +++ b/agent-roles-catalog/README.md @@ -30,20 +30,19 @@ Currently shipped bundles: The server does not bundle this data; it reads it at request time from a single configured location, the `AI_AGENT_ROLES_CATALOG_URL` env var -(`EnvironmentService.getAiAgentRolesCatalogSource()`). The value selects one of -three sources: +(`EnvironmentService.getAiAgentRolesCatalogSource()`), an `http(s)://` base URL +to the catalog's raw files. The server fetches `/index.json` for the +manifest and `/bundles//.json` for each opened bundle +file (REMOTE only). -- **`http(s)://…`** — a REMOTE base URL. The server fetches `/index.json` - for the manifest and `/bundles//.json` for each opened - bundle file (e.g. the raw GitHub base of the catalog repo in production). -- **any other non-empty value** — a LOCAL filesystem directory; the same - `index.json` / `bundles//.json` paths are read from disk. -- **empty / unset** (the default) — the in-repo `agent-roles-catalog/` folder - (this directory), i.e. local dev reads these files directly. +That base URL is baked into the Docker image at build time and set per branch in +CI: a `develop` build points at the `develop` raw URL, a release build at the +`main` raw URL. Local-filesystem sources are no longer supported; if the value +is unset the catalog is unavailable. -In every case the layout below is what the server expects, and the fetched JSON -is re-validated server-side (the catalog is treated as untrusted input). See -`.env.example` for the variable and the CHANGELOG for the rollout. +The fetched JSON is re-validated server-side (the catalog is treated as +untrusted input). See `.env.example` for the variable and the CHANGELOG for the +rollout. ## `index.json` schema diff --git a/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.spec.ts b/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.spec.ts index 9a17ffeb..eef6010d 100644 --- a/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.spec.ts +++ b/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.spec.ts @@ -1,18 +1,14 @@ -import { promises as fs } from 'node:fs'; -import * as os from 'node:os'; -import * as path from 'node:path'; import { BadGatewayException, BadRequestException } from '@nestjs/common'; import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider'; /** - * Provider tests against a LOCAL fixture directory (no network). They cover the - * happy read path (fetchIndex / fetchBundle), the malformed-shape rejection, a - * missing file => unavailable, and — most importantly — the `^[a-z0-9-]+$` - * path-traversal guard that runs BEFORE any path is built. + * Provider tests against a mocked remote source (no network). They cover the + * happy read path (fetchIndex / fetchBundle), the malformed-shape rejection, + * rejection of non-http(s) sources (local sources are gone), and — most + * importantly — the `^[a-z0-9-]+$` path-traversal guard that runs BEFORE any + * path/URL is built. */ -describe('AiAgentRolesCatalogProvider (local fixtures)', () => { - let dir: string; - +describe('AiAgentRolesCatalogProvider', () => { function makeProvider(source: string) { const env = { getAiAgentRolesCatalogSource: () => source, @@ -20,96 +16,13 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => { return new AiAgentRolesCatalogProvider(env as never); } - beforeAll(async () => { - dir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-roles-catalog-')); - await fs.writeFile( - path.join(dir, 'index.json'), - JSON.stringify({ - schemaVersion: 1, - bundles: [ - { - id: 'general', - name: { en: 'General', ru: 'Общие' }, - languages: ['en'], - roles: [{ slug: 'researcher', version: 2 }], - }, - ], - }), - 'utf8', - ); - await fs.mkdir(path.join(dir, 'bundles', 'general'), { recursive: true }); - await fs.writeFile( - path.join(dir, 'bundles', 'general', 'en.json'), - JSON.stringify({ - schemaVersion: 1, - language: 'en', - roles: [ - { - slug: 'researcher', - name: 'Researcher', - instructions: 'be a researcher', - }, - ], - }), - 'utf8', - ); - // A malformed bundle (a role missing `instructions`) to test rejection. - await fs.writeFile( - path.join(dir, 'bundles', 'general', 'fr.json'), - JSON.stringify({ - schemaVersion: 1, - language: 'fr', - roles: [{ slug: 'researcher', name: 'Chercheur' }], - }), - 'utf8', - ); - }); - - afterAll(async () => { - await fs.rm(dir, { recursive: true, force: true }); - }); - - it('fetchIndex reads + validates index.json', async () => { - const provider = makeProvider(dir); - const index = await provider.fetchIndex(); - expect(index.schemaVersion).toBe(1); - expect(index.bundles[0].id).toBe('general'); - expect(index.bundles[0].roles[0]).toEqual({ - slug: 'researcher', - version: 2, - }); - }); - - it('fetchBundle reads + validates a language file', async () => { - const provider = makeProvider(dir); - const bundle = await provider.fetchBundle('general', 'en'); - expect(bundle.language).toBe('en'); - expect(bundle.roles[0].slug).toBe('researcher'); - expect(bundle.roles[0].instructions).toBe('be a researcher'); - }); - - it('malformed bundle (missing instructions) => BadGateway', async () => { - const provider = makeProvider(dir); - await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf( - BadGatewayException, - ); - }); - - it('missing file => BadGateway (unavailable)', async () => { - const provider = makeProvider(dir); - await expect( - provider.fetchBundle('general', 'de'), - ).rejects.toBeInstanceOf(BadGatewayException); - }); - - it('empty source resolves to the in-repo folder (no throw building the path)', async () => { - // With an empty source the provider targets ./agent-roles-catalog under the - // cwd; that folder is created by a separate task, so a read here surfaces as - // BadGateway (unavailable) rather than a path-build error. - const provider = makeProvider(''); - await expect(provider.fetchIndex()).rejects.toBeInstanceOf( - BadGatewayException, - ); + it('non-http(s) source => BadGateway (local sources removed)', async () => { + for (const source of ['', '/var/lib/agent-roles-catalog', './agent-roles-catalog']) { + const provider = makeProvider(source); + await expect(provider.fetchIndex()).rejects.toBeInstanceOf( + BadGatewayException, + ); + } }); describe('remote fetch streaming size cap', () => { @@ -157,6 +70,43 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => { } as unknown as Response; } + it('fetchBundle remote happy path => parses + validates', async () => { + const json = JSON.stringify({ + schemaVersion: 1, + language: 'en', + roles: [ + { + slug: 'researcher', + name: 'Researcher', + instructions: 'be a researcher', + }, + ], + }); + const body = streamOf([new TextEncoder().encode(json)]); + global.fetch = jest + .fn() + .mockResolvedValue(mockResponse({ body })) as never; + const provider = makeProvider('https://catalog.example.com'); + const bundle = await provider.fetchBundle('general', 'en'); + expect(bundle.roles[0].slug).toBe('researcher'); + }); + + it('fetchBundle remote malformed (role missing instructions) => BadGateway', async () => { + const json = JSON.stringify({ + schemaVersion: 1, + language: 'fr', + roles: [{ slug: 'researcher', name: 'Chercheur' }], + }); + const body = streamOf([new TextEncoder().encode(json)]); + global.fetch = jest + .fn() + .mockResolvedValue(mockResponse({ body })) as never; + const provider = makeProvider('https://catalog.example.com'); + await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf( + BadGatewayException, + ); + }); + it('declared Content-Length over the cap => BadGateway before reading the body', async () => { global.fetch = jest.fn().mockResolvedValue( mockResponse({ @@ -340,14 +290,14 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => { for (const value of bad) { it(`rejects bundleId="${value}" with BadRequest`, async () => { - const provider = makeProvider(dir); + const provider = makeProvider('https://catalog.example.com'); await expect( provider.fetchBundle(value, 'en'), ).rejects.toBeInstanceOf(BadRequestException); }); it(`rejects language="${value}" with BadRequest`, async () => { - const provider = makeProvider(dir); + const provider = makeProvider('https://catalog.example.com'); await expect( provider.fetchBundle('general', value), ).rejects.toBeInstanceOf(BadRequestException); diff --git a/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.ts b/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.ts index d2d4a6aa..2004fd0c 100644 --- a/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.ts +++ b/apps/server/src/core/ai-chat/roles/catalog/ai-agent-roles-catalog.provider.ts @@ -1,5 +1,3 @@ -import { promises as fs } from 'node:fs'; -import * as path from 'node:path'; import { BadGatewayException, BadRequestException, @@ -26,9 +24,9 @@ const MAX_BYTES = 1_000_000; /** * Fetches + validates the agent-roles catalog from its configured source. The - * source location (EnvironmentService.getAiAgentRolesCatalogSource()) is either - * an http(s):// base URL (REMOTE) or a local filesystem directory (LOCAL; the - * empty default resolves to the in-repo `agent-roles-catalog/` folder). + * source (EnvironmentService.getAiAgentRolesCatalogSource()) is an http(s):// + * base URL — REMOTE only; local-filesystem sources are no longer supported. The + * value is baked into the Docker image at build time (set per-branch in CI). * * The catalog is UNTRUSTED input: every file is JSON-parsed and run through a * hand-written type guard before any field is exposed, and every dynamic path @@ -91,31 +89,20 @@ export class AiAgentRolesCatalogProvider { } } - /** Read a relative catalog path as text from the configured source. */ + /** Read a relative catalog path as text from the configured remote source. */ private async readRelative(rel: string): Promise { const source = this.environmentService .getAiAgentRolesCatalogSource() .trim(); - if (/^https?:\/\//i.test(source)) { - return this.fetchRemote(source, rel); - } - const dir = source || path.join(process.cwd(), 'agent-roles-catalog'); - return this.readLocal(dir, rel); - } - - /** Read a local catalog file. Missing => the catalog is unavailable. */ - private async readLocal(dir: string, rel: string): Promise { - try { - return await fs.readFile(path.join(dir, rel), 'utf8'); - } catch (err) { - const reason = shortError(err); + if (!/^https?:\/\//i.test(source)) { this.logger.error( - `Agent roles catalog local read failed (${path.join(dir, rel)}): ${reason}`, + 'Agent roles catalog source is not configured (expected an http(s):// base URL)', ); throw new BadGatewayException( - `Agent roles catalog is unavailable: ${reason}`, + 'Agent roles catalog is unavailable: source is not configured', ); } + return this.fetchRemote(source, rel); } /** diff --git a/apps/server/src/integrations/environment/environment.service.ts b/apps/server/src/integrations/environment/environment.service.ts index c487a7b9..1f6298b7 100644 --- a/apps/server/src/integrations/environment/environment.service.ts +++ b/apps/server/src/integrations/environment/environment.service.ts @@ -290,11 +290,12 @@ export class EnvironmentService { // ai_provider_credentials, with no env fallback. APP_SECRET stays (getAppSecret). getAiAgentRolesCatalogSource(): string { - // Catalog location. http(s):// URL => fetched remotely; anything else => a - // local filesystem directory. Defaults to the in-repo folder (dev). In prod - // set this to the raw GitHub base URL of the catalog repo. Unlike the AI_* - // getters above this is INFRA config (where the catalog lives), not - // provider/model config — so an env var here is appropriate. + // Catalog location: an http(s):// base URL the catalog is fetched from. + // The value is baked into the image at build time (Dockerfile ARG + // AI_AGENT_ROLES_CATALOG_URL, set per-branch in CI); local-filesystem + // sources are no longer supported. Empty/unset => the catalog is + // unavailable (the provider returns 502). This is INFRA config (where the + // catalog lives), not provider/model config, so an env var is appropriate. return this.configService.get('AI_AGENT_ROLES_CATALOG_URL', ''); }