refactor(ai-roles): bake catalog URL at image build, drop local-fs source

The agent-roles catalog source is no longer hardcoded in app code and no
longer supports a local filesystem directory. The provider now fetches only
from an http(s):// base URL read from AI_AGENT_ROLES_CATALOG_URL; an empty or
non-http value yields a 502 (catalog unavailable). The default URL is baked
into the Docker image at build time and set per branch in CI.

- provider: drop readLocal + node:fs/node:path; readRelative requires http(s)
  and 502s otherwise; remote fetch/streaming-cap/SSRF guards unchanged.
- environment.service: keep AI_AGENT_ROLES_CATALOG_URL (default ''); comment
  updated to reflect build-time injection, remote-only.
- Dockerfile: add ARG+ENV AI_AGENT_ROLES_CATALOG_URL in the installer stage.
- CI: develop.yml builds with the develop raw URL; release.yml (both build
  steps) with the main raw URL.
- tests: replace local-fixture tests with remote-mock happy/malformed bundle
  tests and a non-http => 502 case; path-traversal block uses an https source.
- docs: update .env.example, CHANGELOG (#222), agent-roles-catalog/README.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-27 03:32:48 +03:00
parent 3511301331
commit 2a4ef9267e
9 changed files with 94 additions and 149 deletions

View File

@@ -132,11 +132,12 @@ MCP_DOCMOST_PASSWORD=
# NEVER set is_agent on a human or shared account — every action by that account # NEVER set is_agent on a human or shared account — every action by that account
# (including normal human edits) would then be mis-attributed as AI. # (including normal human edits) would then be mis-attributed as AI.
# Agent-roles catalog source: an http(s):// base URL => the catalog is fetched # Agent-roles catalog source: an http(s):// base URL to the catalog's raw files
# remotely (e.g. the raw GitHub base URL of the catalog repo); any other value # (the server appends /index.json and /bundles/<id>/<lang>.json). This value is
# => a local filesystem directory. Empty (default) => the in-repo # baked into the Docker image at build time per branch (see the Dockerfile ARG
# ./agent-roles-catalog folder (dev). Used by the admin "import role from # AI_AGENT_ROLES_CATALOG_URL and the CI build-args). Set it here only to point a
# catalog" feature only. # local/non-Docker run at a catalog; if unset, the "import role from catalog"
# admin feature is unavailable. Local-filesystem sources are no longer supported.
# AI_AGENT_ROLES_CATALOG_URL= # AI_AGENT_ROLES_CATALOG_URL=
# Per-embedding-call timeout in milliseconds for the RAG indexer. # Per-embedding-call timeout in milliseconds for the RAG indexer.

View File

@@ -52,6 +52,7 @@ jobs:
platforms: linux/amd64 platforms: linux/amd64
build-args: | build-args: |
APP_VERSION=${{ steps.version.outputs.value }} APP_VERSION=${{ steps.version.outputs.value }}
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/develop/agent-roles-catalog
push: true push: true
tags: ${{ env.IMAGE }}:develop tags: ${{ env.IMAGE }}:develop
cache-from: type=gha,scope=develop-amd64 cache-from: type=gha,scope=develop-amd64

View File

@@ -57,6 +57,7 @@ jobs:
platforms: ${{ matrix.platform }} platforms: ${{ matrix.platform }}
build-args: | build-args: |
APP_VERSION=${{ env.VERSION }} APP_VERSION=${{ env.VERSION }}
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/main/agent-roles-catalog
outputs: type=image,name=${{ env.IMAGE }},push-by-digest=true,name-canonical=true,push=true outputs: type=image,name=${{ env.IMAGE }},push-by-digest=true,name-canonical=true,push=true
cache-from: type=gha,scope=${{ matrix.suffix }} cache-from: type=gha,scope=${{ matrix.suffix }}
cache-to: type=gha,scope=${{ matrix.suffix }},mode=max,ignore-error=true cache-to: type=gha,scope=${{ matrix.suffix }},mode=max,ignore-error=true
@@ -85,6 +86,7 @@ jobs:
platforms: ${{ matrix.platform }} platforms: ${{ matrix.platform }}
build-args: | build-args: |
APP_VERSION=${{ env.VERSION }} APP_VERSION=${{ env.VERSION }}
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/main/agent-roles-catalog
push: false push: false
tags: | tags: |
${{ env.IMAGE }}:latest ${{ env.IMAGE }}:latest

View File

@@ -37,10 +37,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
admin endpoints — `POST /ai-chat/roles/catalog` (browse bundles), admin endpoints — `POST /ai-chat/roles/catalog` (browse bundles),
`/catalog/bundle` (read one bundle's roles), `/import`, and `/catalog/bundle` (read one bundle's roles), `/import`, and
`/update-from-catalog` — and a new `source` column linking a role to its `/update-from-catalog` — and a new `source` column linking a role to its
catalog slug/language/version. The catalog source is configurable via the new catalog slug/language/version. The catalog source is configured via the
`AI_AGENT_ROLES_CATALOG_URL` env var (an `http(s)://` base URL fetches it `AI_AGENT_ROLES_CATALOG_URL` env var an `http(s)://` base URL to the
remotely; otherwise a local directory; empty defaults to the in-repo catalog's raw files, baked into the image at build time and set per branch in
`agent-roles-catalog/` folder — see `.env.example`). (#222) CI (see `.env.example`). (#222)
## [0.94.0] - 2026-06-26 ## [0.94.0] - 2026-06-26

View File

@@ -23,6 +23,10 @@ RUN apt-get update \
WORKDIR /app WORKDIR /app
# Agent-roles catalog base URL, baked at build time (set per-branch in CI).
ARG AI_AGENT_ROLES_CATALOG_URL=""
ENV AI_AGENT_ROLES_CATALOG_URL=$AI_AGENT_ROLES_CATALOG_URL
# Copy apps # Copy apps
COPY --from=builder /app/apps/server/dist /app/apps/server/dist COPY --from=builder /app/apps/server/dist /app/apps/server/dist
COPY --from=builder /app/apps/client/dist /app/apps/client/dist COPY --from=builder /app/apps/client/dist /app/apps/client/dist

View File

@@ -30,20 +30,19 @@ Currently shipped bundles:
The server does not bundle this data; it reads it at request time from a single The server does not bundle this data; it reads it at request time from a single
configured location, the `AI_AGENT_ROLES_CATALOG_URL` env var configured location, the `AI_AGENT_ROLES_CATALOG_URL` env var
(`EnvironmentService.getAiAgentRolesCatalogSource()`). The value selects one of (`EnvironmentService.getAiAgentRolesCatalogSource()`), an `http(s)://` base URL
three sources: to the catalog's raw files. The server fetches `<base>/index.json` for the
manifest and `<base>/bundles/<bundle-id>/<lang>.json` for each opened bundle
file (REMOTE only).
- **`http(s)://…`** — a REMOTE base URL. The server fetches `<base>/index.json` That base URL is baked into the Docker image at build time and set per branch in
for the manifest and `<base>/bundles/<bundle-id>/<lang>.json` for each opened CI: a `develop` build points at the `develop` raw URL, a release build at the
bundle file (e.g. the raw GitHub base of the catalog repo in production). `main` raw URL. Local-filesystem sources are no longer supported; if the value
- **any other non-empty value** — a LOCAL filesystem directory; the same is unset the catalog is unavailable.
`index.json` / `bundles/<id>/<lang>.json` paths are read from disk.
- **empty / unset** (the default) — the in-repo `agent-roles-catalog/` folder
(this directory), i.e. local dev reads these files directly.
In every case the layout below is what the server expects, and the fetched JSON The fetched JSON is re-validated server-side (the catalog is treated as
is re-validated server-side (the catalog is treated as untrusted input). See untrusted input). See `.env.example` for the variable and the CHANGELOG for the
`.env.example` for the variable and the CHANGELOG for the rollout. rollout.
## `index.json` schema ## `index.json` schema

View File

@@ -1,18 +1,14 @@
import { promises as fs } from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { BadGatewayException, BadRequestException } from '@nestjs/common'; import { BadGatewayException, BadRequestException } from '@nestjs/common';
import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider'; import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider';
/** /**
* Provider tests against a LOCAL fixture directory (no network). They cover the * Provider tests against a mocked remote source (no network). They cover the
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection, a * happy read path (fetchIndex / fetchBundle), the malformed-shape rejection,
* missing file => unavailable, and — most importantly — the `^[a-z0-9-]+$` * rejection of non-http(s) sources (local sources are gone), and — most
* path-traversal guard that runs BEFORE any path is built. * importantly — the `^[a-z0-9-]+$` path-traversal guard that runs BEFORE any
* path/URL is built.
*/ */
describe('AiAgentRolesCatalogProvider (local fixtures)', () => { describe('AiAgentRolesCatalogProvider', () => {
let dir: string;
function makeProvider(source: string) { function makeProvider(source: string) {
const env = { const env = {
getAiAgentRolesCatalogSource: () => source, getAiAgentRolesCatalogSource: () => source,
@@ -20,96 +16,13 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
return new AiAgentRolesCatalogProvider(env as never); return new AiAgentRolesCatalogProvider(env as never);
} }
beforeAll(async () => { it('non-http(s) source => BadGateway (local sources removed)', async () => {
dir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-roles-catalog-')); for (const source of ['', '/var/lib/agent-roles-catalog', './agent-roles-catalog']) {
await fs.writeFile( const provider = makeProvider(source);
path.join(dir, 'index.json'), await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
JSON.stringify({ BadGatewayException,
schemaVersion: 1, );
bundles: [ }
{
id: 'general',
name: { en: 'General', ru: 'Общие' },
languages: ['en'],
roles: [{ slug: 'researcher', version: 2 }],
},
],
}),
'utf8',
);
await fs.mkdir(path.join(dir, 'bundles', 'general'), { recursive: true });
await fs.writeFile(
path.join(dir, 'bundles', 'general', 'en.json'),
JSON.stringify({
schemaVersion: 1,
language: 'en',
roles: [
{
slug: 'researcher',
name: 'Researcher',
instructions: 'be a researcher',
},
],
}),
'utf8',
);
// A malformed bundle (a role missing `instructions`) to test rejection.
await fs.writeFile(
path.join(dir, 'bundles', 'general', 'fr.json'),
JSON.stringify({
schemaVersion: 1,
language: 'fr',
roles: [{ slug: 'researcher', name: 'Chercheur' }],
}),
'utf8',
);
});
afterAll(async () => {
await fs.rm(dir, { recursive: true, force: true });
});
it('fetchIndex reads + validates index.json', async () => {
const provider = makeProvider(dir);
const index = await provider.fetchIndex();
expect(index.schemaVersion).toBe(1);
expect(index.bundles[0].id).toBe('general');
expect(index.bundles[0].roles[0]).toEqual({
slug: 'researcher',
version: 2,
});
});
it('fetchBundle reads + validates a language file', async () => {
const provider = makeProvider(dir);
const bundle = await provider.fetchBundle('general', 'en');
expect(bundle.language).toBe('en');
expect(bundle.roles[0].slug).toBe('researcher');
expect(bundle.roles[0].instructions).toBe('be a researcher');
});
it('malformed bundle (missing instructions) => BadGateway', async () => {
const provider = makeProvider(dir);
await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf(
BadGatewayException,
);
});
it('missing file => BadGateway (unavailable)', async () => {
const provider = makeProvider(dir);
await expect(
provider.fetchBundle('general', 'de'),
).rejects.toBeInstanceOf(BadGatewayException);
});
it('empty source resolves to the in-repo folder (no throw building the path)', async () => {
// With an empty source the provider targets ./agent-roles-catalog under the
// cwd; that folder is created by a separate task, so a read here surfaces as
// BadGateway (unavailable) rather than a path-build error.
const provider = makeProvider('');
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
BadGatewayException,
);
}); });
describe('remote fetch streaming size cap', () => { describe('remote fetch streaming size cap', () => {
@@ -157,6 +70,43 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
} as unknown as Response; } as unknown as Response;
} }
it('fetchBundle remote happy path => parses + validates', async () => {
const json = JSON.stringify({
schemaVersion: 1,
language: 'en',
roles: [
{
slug: 'researcher',
name: 'Researcher',
instructions: 'be a researcher',
},
],
});
const body = streamOf([new TextEncoder().encode(json)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
const provider = makeProvider('https://catalog.example.com');
const bundle = await provider.fetchBundle('general', 'en');
expect(bundle.roles[0].slug).toBe('researcher');
});
it('fetchBundle remote malformed (role missing instructions) => BadGateway', async () => {
const json = JSON.stringify({
schemaVersion: 1,
language: 'fr',
roles: [{ slug: 'researcher', name: 'Chercheur' }],
});
const body = streamOf([new TextEncoder().encode(json)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
const provider = makeProvider('https://catalog.example.com');
await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf(
BadGatewayException,
);
});
it('declared Content-Length over the cap => BadGateway before reading the body', async () => { it('declared Content-Length over the cap => BadGateway before reading the body', async () => {
global.fetch = jest.fn().mockResolvedValue( global.fetch = jest.fn().mockResolvedValue(
mockResponse({ mockResponse({
@@ -340,14 +290,14 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
for (const value of bad) { for (const value of bad) {
it(`rejects bundleId="${value}" with BadRequest`, async () => { it(`rejects bundleId="${value}" with BadRequest`, async () => {
const provider = makeProvider(dir); const provider = makeProvider('https://catalog.example.com');
await expect( await expect(
provider.fetchBundle(value, 'en'), provider.fetchBundle(value, 'en'),
).rejects.toBeInstanceOf(BadRequestException); ).rejects.toBeInstanceOf(BadRequestException);
}); });
it(`rejects language="${value}" with BadRequest`, async () => { it(`rejects language="${value}" with BadRequest`, async () => {
const provider = makeProvider(dir); const provider = makeProvider('https://catalog.example.com');
await expect( await expect(
provider.fetchBundle('general', value), provider.fetchBundle('general', value),
).rejects.toBeInstanceOf(BadRequestException); ).rejects.toBeInstanceOf(BadRequestException);

View File

@@ -1,5 +1,3 @@
import { promises as fs } from 'node:fs';
import * as path from 'node:path';
import { import {
BadGatewayException, BadGatewayException,
BadRequestException, BadRequestException,
@@ -26,9 +24,9 @@ const MAX_BYTES = 1_000_000;
/** /**
* Fetches + validates the agent-roles catalog from its configured source. The * Fetches + validates the agent-roles catalog from its configured source. The
* source location (EnvironmentService.getAiAgentRolesCatalogSource()) is either * source (EnvironmentService.getAiAgentRolesCatalogSource()) is an http(s)://
* an http(s):// base URL (REMOTE) or a local filesystem directory (LOCAL; the * base URL REMOTE only; local-filesystem sources are no longer supported. The
* empty default resolves to the in-repo `agent-roles-catalog/` folder). * value is baked into the Docker image at build time (set per-branch in CI).
* *
* The catalog is UNTRUSTED input: every file is JSON-parsed and run through a * The catalog is UNTRUSTED input: every file is JSON-parsed and run through a
* hand-written type guard before any field is exposed, and every dynamic path * hand-written type guard before any field is exposed, and every dynamic path
@@ -91,31 +89,20 @@ export class AiAgentRolesCatalogProvider {
} }
} }
/** Read a relative catalog path as text from the configured source. */ /** Read a relative catalog path as text from the configured remote source. */
private async readRelative(rel: string): Promise<string> { private async readRelative(rel: string): Promise<string> {
const source = this.environmentService const source = this.environmentService
.getAiAgentRolesCatalogSource() .getAiAgentRolesCatalogSource()
.trim(); .trim();
if (/^https?:\/\//i.test(source)) { if (!/^https?:\/\//i.test(source)) {
return this.fetchRemote(source, rel);
}
const dir = source || path.join(process.cwd(), 'agent-roles-catalog');
return this.readLocal(dir, rel);
}
/** Read a local catalog file. Missing => the catalog is unavailable. */
private async readLocal(dir: string, rel: string): Promise<string> {
try {
return await fs.readFile(path.join(dir, rel), 'utf8');
} catch (err) {
const reason = shortError(err);
this.logger.error( this.logger.error(
`Agent roles catalog local read failed (${path.join(dir, rel)}): ${reason}`, 'Agent roles catalog source is not configured (expected an http(s):// base URL)',
); );
throw new BadGatewayException( throw new BadGatewayException(
`Agent roles catalog is unavailable: ${reason}`, 'Agent roles catalog is unavailable: source is not configured',
); );
} }
return this.fetchRemote(source, rel);
} }
/** /**

View File

@@ -290,11 +290,12 @@ export class EnvironmentService {
// ai_provider_credentials, with no env fallback. APP_SECRET stays (getAppSecret). // ai_provider_credentials, with no env fallback. APP_SECRET stays (getAppSecret).
getAiAgentRolesCatalogSource(): string { getAiAgentRolesCatalogSource(): string {
// Catalog location. http(s):// URL => fetched remotely; anything else => a // Catalog location: an http(s):// base URL the catalog is fetched from.
// local filesystem directory. Defaults to the in-repo folder (dev). In prod // The value is baked into the image at build time (Dockerfile ARG
// set this to the raw GitHub base URL of the catalog repo. Unlike the AI_* // AI_AGENT_ROLES_CATALOG_URL, set per-branch in CI); local-filesystem
// getters above this is INFRA config (where the catalog lives), not // sources are no longer supported. Empty/unset => the catalog is
// provider/model config — so an env var here is appropriate. // unavailable (the provider returns 502). This is INFRA config (where the
// catalog lives), not provider/model config, so an env var is appropriate.
return this.configService.get<string>('AI_AGENT_ROLES_CATALOG_URL', ''); return this.configService.get<string>('AI_AGENT_ROLES_CATALOG_URL', '');
} }