refactor(ai-roles): bake catalog URL at image build, drop local-fs source
The agent-roles catalog source is no longer hardcoded in app code and no longer supports a local filesystem directory. The provider now fetches only from an http(s):// base URL read from AI_AGENT_ROLES_CATALOG_URL; an empty or non-http value yields a 502 (catalog unavailable). The default URL is baked into the Docker image at build time and set per branch in CI. - provider: drop readLocal + node:fs/node:path; readRelative requires http(s) and 502s otherwise; remote fetch/streaming-cap/SSRF guards unchanged. - environment.service: keep AI_AGENT_ROLES_CATALOG_URL (default ''); comment updated to reflect build-time injection, remote-only. - Dockerfile: add ARG+ENV AI_AGENT_ROLES_CATALOG_URL in the installer stage. - CI: develop.yml builds with the develop raw URL; release.yml (both build steps) with the main raw URL. - tests: replace local-fixture tests with remote-mock happy/malformed bundle tests and a non-http => 502 case; path-traversal block uses an https source. - docs: update .env.example, CHANGELOG (#222), agent-roles-catalog/README. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
11
.env.example
11
.env.example
@@ -132,11 +132,12 @@ MCP_DOCMOST_PASSWORD=
|
||||
# NEVER set is_agent on a human or shared account — every action by that account
|
||||
# (including normal human edits) would then be mis-attributed as AI.
|
||||
|
||||
# Agent-roles catalog source: an http(s):// base URL => the catalog is fetched
|
||||
# remotely (e.g. the raw GitHub base URL of the catalog repo); any other value
|
||||
# => a local filesystem directory. Empty (default) => the in-repo
|
||||
# ./agent-roles-catalog folder (dev). Used by the admin "import role from
|
||||
# catalog" feature only.
|
||||
# Agent-roles catalog source: an http(s):// base URL to the catalog's raw files
|
||||
# (the server appends /index.json and /bundles/<id>/<lang>.json). This value is
|
||||
# baked into the Docker image at build time per branch (see the Dockerfile ARG
|
||||
# AI_AGENT_ROLES_CATALOG_URL and the CI build-args). Set it here only to point a
|
||||
# local/non-Docker run at a catalog; if unset, the "import role from catalog"
|
||||
# admin feature is unavailable. Local-filesystem sources are no longer supported.
|
||||
# AI_AGENT_ROLES_CATALOG_URL=
|
||||
|
||||
# Per-embedding-call timeout in milliseconds for the RAG indexer.
|
||||
|
||||
1
.github/workflows/develop.yml
vendored
1
.github/workflows/develop.yml
vendored
@@ -52,6 +52,7 @@ jobs:
|
||||
platforms: linux/amd64
|
||||
build-args: |
|
||||
APP_VERSION=${{ steps.version.outputs.value }}
|
||||
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/develop/agent-roles-catalog
|
||||
push: true
|
||||
tags: ${{ env.IMAGE }}:develop
|
||||
cache-from: type=gha,scope=develop-amd64
|
||||
|
||||
2
.github/workflows/release.yml
vendored
2
.github/workflows/release.yml
vendored
@@ -57,6 +57,7 @@ jobs:
|
||||
platforms: ${{ matrix.platform }}
|
||||
build-args: |
|
||||
APP_VERSION=${{ env.VERSION }}
|
||||
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/main/agent-roles-catalog
|
||||
outputs: type=image,name=${{ env.IMAGE }},push-by-digest=true,name-canonical=true,push=true
|
||||
cache-from: type=gha,scope=${{ matrix.suffix }}
|
||||
cache-to: type=gha,scope=${{ matrix.suffix }},mode=max,ignore-error=true
|
||||
@@ -85,6 +86,7 @@ jobs:
|
||||
platforms: ${{ matrix.platform }}
|
||||
build-args: |
|
||||
APP_VERSION=${{ env.VERSION }}
|
||||
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/main/agent-roles-catalog
|
||||
push: false
|
||||
tags: |
|
||||
${{ env.IMAGE }}:latest
|
||||
|
||||
@@ -37,10 +37,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
admin endpoints — `POST /ai-chat/roles/catalog` (browse bundles),
|
||||
`/catalog/bundle` (read one bundle's roles), `/import`, and
|
||||
`/update-from-catalog` — and a new `source` column linking a role to its
|
||||
catalog slug/language/version. The catalog source is configurable via the new
|
||||
`AI_AGENT_ROLES_CATALOG_URL` env var (an `http(s)://` base URL fetches it
|
||||
remotely; otherwise a local directory; empty defaults to the in-repo
|
||||
`agent-roles-catalog/` folder — see `.env.example`). (#222)
|
||||
catalog slug/language/version. The catalog source is configured via the
|
||||
`AI_AGENT_ROLES_CATALOG_URL` env var — an `http(s)://` base URL to the
|
||||
catalog's raw files, baked into the image at build time and set per branch in
|
||||
CI (see `.env.example`). (#222)
|
||||
|
||||
## [0.94.0] - 2026-06-26
|
||||
|
||||
|
||||
@@ -23,6 +23,10 @@ RUN apt-get update \
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Agent-roles catalog base URL, baked at build time (set per-branch in CI).
|
||||
ARG AI_AGENT_ROLES_CATALOG_URL=""
|
||||
ENV AI_AGENT_ROLES_CATALOG_URL=$AI_AGENT_ROLES_CATALOG_URL
|
||||
|
||||
# Copy apps
|
||||
COPY --from=builder /app/apps/server/dist /app/apps/server/dist
|
||||
COPY --from=builder /app/apps/client/dist /app/apps/client/dist
|
||||
|
||||
@@ -30,20 +30,19 @@ Currently shipped bundles:
|
||||
|
||||
The server does not bundle this data; it reads it at request time from a single
|
||||
configured location, the `AI_AGENT_ROLES_CATALOG_URL` env var
|
||||
(`EnvironmentService.getAiAgentRolesCatalogSource()`). The value selects one of
|
||||
three sources:
|
||||
(`EnvironmentService.getAiAgentRolesCatalogSource()`), an `http(s)://` base URL
|
||||
to the catalog's raw files. The server fetches `<base>/index.json` for the
|
||||
manifest and `<base>/bundles/<bundle-id>/<lang>.json` for each opened bundle
|
||||
file (REMOTE only).
|
||||
|
||||
- **`http(s)://…`** — a REMOTE base URL. The server fetches `<base>/index.json`
|
||||
for the manifest and `<base>/bundles/<bundle-id>/<lang>.json` for each opened
|
||||
bundle file (e.g. the raw GitHub base of the catalog repo in production).
|
||||
- **any other non-empty value** — a LOCAL filesystem directory; the same
|
||||
`index.json` / `bundles/<id>/<lang>.json` paths are read from disk.
|
||||
- **empty / unset** (the default) — the in-repo `agent-roles-catalog/` folder
|
||||
(this directory), i.e. local dev reads these files directly.
|
||||
That base URL is baked into the Docker image at build time and set per branch in
|
||||
CI: a `develop` build points at the `develop` raw URL, a release build at the
|
||||
`main` raw URL. Local-filesystem sources are no longer supported; if the value
|
||||
is unset the catalog is unavailable.
|
||||
|
||||
In every case the layout below is what the server expects, and the fetched JSON
|
||||
is re-validated server-side (the catalog is treated as untrusted input). See
|
||||
`.env.example` for the variable and the CHANGELOG for the rollout.
|
||||
The fetched JSON is re-validated server-side (the catalog is treated as
|
||||
untrusted input). See `.env.example` for the variable and the CHANGELOG for the
|
||||
rollout.
|
||||
|
||||
## `index.json` schema
|
||||
|
||||
|
||||
@@ -1,18 +1,14 @@
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { BadGatewayException, BadRequestException } from '@nestjs/common';
|
||||
import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider';
|
||||
|
||||
/**
|
||||
* Provider tests against a LOCAL fixture directory (no network). They cover the
|
||||
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection, a
|
||||
* missing file => unavailable, and — most importantly — the `^[a-z0-9-]+$`
|
||||
* path-traversal guard that runs BEFORE any path is built.
|
||||
* Provider tests against a mocked remote source (no network). They cover the
|
||||
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection,
|
||||
* rejection of non-http(s) sources (local sources are gone), and — most
|
||||
* importantly — the `^[a-z0-9-]+$` path-traversal guard that runs BEFORE any
|
||||
* path/URL is built.
|
||||
*/
|
||||
describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
|
||||
let dir: string;
|
||||
|
||||
describe('AiAgentRolesCatalogProvider', () => {
|
||||
function makeProvider(source: string) {
|
||||
const env = {
|
||||
getAiAgentRolesCatalogSource: () => source,
|
||||
@@ -20,96 +16,13 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
|
||||
return new AiAgentRolesCatalogProvider(env as never);
|
||||
}
|
||||
|
||||
beforeAll(async () => {
|
||||
dir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-roles-catalog-'));
|
||||
await fs.writeFile(
|
||||
path.join(dir, 'index.json'),
|
||||
JSON.stringify({
|
||||
schemaVersion: 1,
|
||||
bundles: [
|
||||
{
|
||||
id: 'general',
|
||||
name: { en: 'General', ru: 'Общие' },
|
||||
languages: ['en'],
|
||||
roles: [{ slug: 'researcher', version: 2 }],
|
||||
},
|
||||
],
|
||||
}),
|
||||
'utf8',
|
||||
);
|
||||
await fs.mkdir(path.join(dir, 'bundles', 'general'), { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(dir, 'bundles', 'general', 'en.json'),
|
||||
JSON.stringify({
|
||||
schemaVersion: 1,
|
||||
language: 'en',
|
||||
roles: [
|
||||
{
|
||||
slug: 'researcher',
|
||||
name: 'Researcher',
|
||||
instructions: 'be a researcher',
|
||||
},
|
||||
],
|
||||
}),
|
||||
'utf8',
|
||||
);
|
||||
// A malformed bundle (a role missing `instructions`) to test rejection.
|
||||
await fs.writeFile(
|
||||
path.join(dir, 'bundles', 'general', 'fr.json'),
|
||||
JSON.stringify({
|
||||
schemaVersion: 1,
|
||||
language: 'fr',
|
||||
roles: [{ slug: 'researcher', name: 'Chercheur' }],
|
||||
}),
|
||||
'utf8',
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await fs.rm(dir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('fetchIndex reads + validates index.json', async () => {
|
||||
const provider = makeProvider(dir);
|
||||
const index = await provider.fetchIndex();
|
||||
expect(index.schemaVersion).toBe(1);
|
||||
expect(index.bundles[0].id).toBe('general');
|
||||
expect(index.bundles[0].roles[0]).toEqual({
|
||||
slug: 'researcher',
|
||||
version: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('fetchBundle reads + validates a language file', async () => {
|
||||
const provider = makeProvider(dir);
|
||||
const bundle = await provider.fetchBundle('general', 'en');
|
||||
expect(bundle.language).toBe('en');
|
||||
expect(bundle.roles[0].slug).toBe('researcher');
|
||||
expect(bundle.roles[0].instructions).toBe('be a researcher');
|
||||
});
|
||||
|
||||
it('malformed bundle (missing instructions) => BadGateway', async () => {
|
||||
const provider = makeProvider(dir);
|
||||
await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf(
|
||||
BadGatewayException,
|
||||
);
|
||||
});
|
||||
|
||||
it('missing file => BadGateway (unavailable)', async () => {
|
||||
const provider = makeProvider(dir);
|
||||
await expect(
|
||||
provider.fetchBundle('general', 'de'),
|
||||
).rejects.toBeInstanceOf(BadGatewayException);
|
||||
});
|
||||
|
||||
it('empty source resolves to the in-repo folder (no throw building the path)', async () => {
|
||||
// With an empty source the provider targets ./agent-roles-catalog under the
|
||||
// cwd; that folder is created by a separate task, so a read here surfaces as
|
||||
// BadGateway (unavailable) rather than a path-build error.
|
||||
const provider = makeProvider('');
|
||||
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
|
||||
BadGatewayException,
|
||||
);
|
||||
it('non-http(s) source => BadGateway (local sources removed)', async () => {
|
||||
for (const source of ['', '/var/lib/agent-roles-catalog', './agent-roles-catalog']) {
|
||||
const provider = makeProvider(source);
|
||||
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
|
||||
BadGatewayException,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
describe('remote fetch streaming size cap', () => {
|
||||
@@ -157,6 +70,43 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
|
||||
} as unknown as Response;
|
||||
}
|
||||
|
||||
it('fetchBundle remote happy path => parses + validates', async () => {
|
||||
const json = JSON.stringify({
|
||||
schemaVersion: 1,
|
||||
language: 'en',
|
||||
roles: [
|
||||
{
|
||||
slug: 'researcher',
|
||||
name: 'Researcher',
|
||||
instructions: 'be a researcher',
|
||||
},
|
||||
],
|
||||
});
|
||||
const body = streamOf([new TextEncoder().encode(json)]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
const bundle = await provider.fetchBundle('general', 'en');
|
||||
expect(bundle.roles[0].slug).toBe('researcher');
|
||||
});
|
||||
|
||||
it('fetchBundle remote malformed (role missing instructions) => BadGateway', async () => {
|
||||
const json = JSON.stringify({
|
||||
schemaVersion: 1,
|
||||
language: 'fr',
|
||||
roles: [{ slug: 'researcher', name: 'Chercheur' }],
|
||||
});
|
||||
const body = streamOf([new TextEncoder().encode(json)]);
|
||||
global.fetch = jest
|
||||
.fn()
|
||||
.mockResolvedValue(mockResponse({ body })) as never;
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf(
|
||||
BadGatewayException,
|
||||
);
|
||||
});
|
||||
|
||||
it('declared Content-Length over the cap => BadGateway before reading the body', async () => {
|
||||
global.fetch = jest.fn().mockResolvedValue(
|
||||
mockResponse({
|
||||
@@ -340,14 +290,14 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
|
||||
|
||||
for (const value of bad) {
|
||||
it(`rejects bundleId="${value}" with BadRequest`, async () => {
|
||||
const provider = makeProvider(dir);
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
await expect(
|
||||
provider.fetchBundle(value, 'en'),
|
||||
).rejects.toBeInstanceOf(BadRequestException);
|
||||
});
|
||||
|
||||
it(`rejects language="${value}" with BadRequest`, async () => {
|
||||
const provider = makeProvider(dir);
|
||||
const provider = makeProvider('https://catalog.example.com');
|
||||
await expect(
|
||||
provider.fetchBundle('general', value),
|
||||
).rejects.toBeInstanceOf(BadRequestException);
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import {
|
||||
BadGatewayException,
|
||||
BadRequestException,
|
||||
@@ -26,9 +24,9 @@ const MAX_BYTES = 1_000_000;
|
||||
|
||||
/**
|
||||
* Fetches + validates the agent-roles catalog from its configured source. The
|
||||
* source location (EnvironmentService.getAiAgentRolesCatalogSource()) is either
|
||||
* an http(s):// base URL (REMOTE) or a local filesystem directory (LOCAL; the
|
||||
* empty default resolves to the in-repo `agent-roles-catalog/` folder).
|
||||
* source (EnvironmentService.getAiAgentRolesCatalogSource()) is an http(s)://
|
||||
* base URL — REMOTE only; local-filesystem sources are no longer supported. The
|
||||
* value is baked into the Docker image at build time (set per-branch in CI).
|
||||
*
|
||||
* The catalog is UNTRUSTED input: every file is JSON-parsed and run through a
|
||||
* hand-written type guard before any field is exposed, and every dynamic path
|
||||
@@ -91,31 +89,20 @@ export class AiAgentRolesCatalogProvider {
|
||||
}
|
||||
}
|
||||
|
||||
/** Read a relative catalog path as text from the configured source. */
|
||||
/** Read a relative catalog path as text from the configured remote source. */
|
||||
private async readRelative(rel: string): Promise<string> {
|
||||
const source = this.environmentService
|
||||
.getAiAgentRolesCatalogSource()
|
||||
.trim();
|
||||
if (/^https?:\/\//i.test(source)) {
|
||||
return this.fetchRemote(source, rel);
|
||||
}
|
||||
const dir = source || path.join(process.cwd(), 'agent-roles-catalog');
|
||||
return this.readLocal(dir, rel);
|
||||
}
|
||||
|
||||
/** Read a local catalog file. Missing => the catalog is unavailable. */
|
||||
private async readLocal(dir: string, rel: string): Promise<string> {
|
||||
try {
|
||||
return await fs.readFile(path.join(dir, rel), 'utf8');
|
||||
} catch (err) {
|
||||
const reason = shortError(err);
|
||||
if (!/^https?:\/\//i.test(source)) {
|
||||
this.logger.error(
|
||||
`Agent roles catalog local read failed (${path.join(dir, rel)}): ${reason}`,
|
||||
'Agent roles catalog source is not configured (expected an http(s):// base URL)',
|
||||
);
|
||||
throw new BadGatewayException(
|
||||
`Agent roles catalog is unavailable: ${reason}`,
|
||||
'Agent roles catalog is unavailable: source is not configured',
|
||||
);
|
||||
}
|
||||
return this.fetchRemote(source, rel);
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -290,11 +290,12 @@ export class EnvironmentService {
|
||||
// ai_provider_credentials, with no env fallback. APP_SECRET stays (getAppSecret).
|
||||
|
||||
getAiAgentRolesCatalogSource(): string {
|
||||
// Catalog location. http(s):// URL => fetched remotely; anything else => a
|
||||
// local filesystem directory. Defaults to the in-repo folder (dev). In prod
|
||||
// set this to the raw GitHub base URL of the catalog repo. Unlike the AI_*
|
||||
// getters above this is INFRA config (where the catalog lives), not
|
||||
// provider/model config — so an env var here is appropriate.
|
||||
// Catalog location: an http(s):// base URL the catalog is fetched from.
|
||||
// The value is baked into the image at build time (Dockerfile ARG
|
||||
// AI_AGENT_ROLES_CATALOG_URL, set per-branch in CI); local-filesystem
|
||||
// sources are no longer supported. Empty/unset => the catalog is
|
||||
// unavailable (the provider returns 502). This is INFRA config (where the
|
||||
// catalog lives), not provider/model config, so an env var is appropriate.
|
||||
return this.configService.get<string>('AI_AGENT_ROLES_CATALOG_URL', '');
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user