refactor(ai-roles): bake catalog URL at image build, drop local-fs source

The agent-roles catalog source is no longer hardcoded in app code and no
longer supports a local filesystem directory. The provider now fetches only
from an http(s):// base URL read from AI_AGENT_ROLES_CATALOG_URL; an empty or
non-http value yields a 502 (catalog unavailable). The default URL is baked
into the Docker image at build time and set per branch in CI.

- provider: drop readLocal + node:fs/node:path; readRelative requires http(s)
  and 502s otherwise; remote fetch/streaming-cap/SSRF guards unchanged.
- environment.service: keep AI_AGENT_ROLES_CATALOG_URL (default ''); comment
  updated to reflect build-time injection, remote-only.
- Dockerfile: add ARG+ENV AI_AGENT_ROLES_CATALOG_URL in the installer stage.
- CI: develop.yml builds with the develop raw URL; release.yml (both build
  steps) with the main raw URL.
- tests: replace local-fixture tests with remote-mock happy/malformed bundle
  tests and a non-http => 502 case; path-traversal block uses an https source.
- docs: update .env.example, CHANGELOG (#222), agent-roles-catalog/README.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-27 03:32:48 +03:00
parent 3511301331
commit 2a4ef9267e
9 changed files with 94 additions and 149 deletions

View File

@@ -1,18 +1,14 @@
import { promises as fs } from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { BadGatewayException, BadRequestException } from '@nestjs/common';
import { AiAgentRolesCatalogProvider } from './ai-agent-roles-catalog.provider';
/**
* Provider tests against a LOCAL fixture directory (no network). They cover the
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection, a
* missing file => unavailable, and — most importantly — the `^[a-z0-9-]+$`
* path-traversal guard that runs BEFORE any path is built.
* Provider tests against a mocked remote source (no network). They cover the
* happy read path (fetchIndex / fetchBundle), the malformed-shape rejection,
* rejection of non-http(s) sources (local sources are gone), and — most
* importantly — the `^[a-z0-9-]+$` path-traversal guard that runs BEFORE any
* path/URL is built.
*/
describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
let dir: string;
describe('AiAgentRolesCatalogProvider', () => {
function makeProvider(source: string) {
const env = {
getAiAgentRolesCatalogSource: () => source,
@@ -20,96 +16,13 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
return new AiAgentRolesCatalogProvider(env as never);
}
beforeAll(async () => {
dir = await fs.mkdtemp(path.join(os.tmpdir(), 'agent-roles-catalog-'));
await fs.writeFile(
path.join(dir, 'index.json'),
JSON.stringify({
schemaVersion: 1,
bundles: [
{
id: 'general',
name: { en: 'General', ru: 'Общие' },
languages: ['en'],
roles: [{ slug: 'researcher', version: 2 }],
},
],
}),
'utf8',
);
await fs.mkdir(path.join(dir, 'bundles', 'general'), { recursive: true });
await fs.writeFile(
path.join(dir, 'bundles', 'general', 'en.json'),
JSON.stringify({
schemaVersion: 1,
language: 'en',
roles: [
{
slug: 'researcher',
name: 'Researcher',
instructions: 'be a researcher',
},
],
}),
'utf8',
);
// A malformed bundle (a role missing `instructions`) to test rejection.
await fs.writeFile(
path.join(dir, 'bundles', 'general', 'fr.json'),
JSON.stringify({
schemaVersion: 1,
language: 'fr',
roles: [{ slug: 'researcher', name: 'Chercheur' }],
}),
'utf8',
);
});
afterAll(async () => {
await fs.rm(dir, { recursive: true, force: true });
});
it('fetchIndex reads + validates index.json', async () => {
const provider = makeProvider(dir);
const index = await provider.fetchIndex();
expect(index.schemaVersion).toBe(1);
expect(index.bundles[0].id).toBe('general');
expect(index.bundles[0].roles[0]).toEqual({
slug: 'researcher',
version: 2,
});
});
it('fetchBundle reads + validates a language file', async () => {
const provider = makeProvider(dir);
const bundle = await provider.fetchBundle('general', 'en');
expect(bundle.language).toBe('en');
expect(bundle.roles[0].slug).toBe('researcher');
expect(bundle.roles[0].instructions).toBe('be a researcher');
});
it('malformed bundle (missing instructions) => BadGateway', async () => {
const provider = makeProvider(dir);
await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf(
BadGatewayException,
);
});
it('missing file => BadGateway (unavailable)', async () => {
const provider = makeProvider(dir);
await expect(
provider.fetchBundle('general', 'de'),
).rejects.toBeInstanceOf(BadGatewayException);
});
it('empty source resolves to the in-repo folder (no throw building the path)', async () => {
// With an empty source the provider targets ./agent-roles-catalog under the
// cwd; that folder is created by a separate task, so a read here surfaces as
// BadGateway (unavailable) rather than a path-build error.
const provider = makeProvider('');
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
BadGatewayException,
);
it('non-http(s) source => BadGateway (local sources removed)', async () => {
for (const source of ['', '/var/lib/agent-roles-catalog', './agent-roles-catalog']) {
const provider = makeProvider(source);
await expect(provider.fetchIndex()).rejects.toBeInstanceOf(
BadGatewayException,
);
}
});
describe('remote fetch streaming size cap', () => {
@@ -157,6 +70,43 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
} as unknown as Response;
}
it('fetchBundle remote happy path => parses + validates', async () => {
const json = JSON.stringify({
schemaVersion: 1,
language: 'en',
roles: [
{
slug: 'researcher',
name: 'Researcher',
instructions: 'be a researcher',
},
],
});
const body = streamOf([new TextEncoder().encode(json)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
const provider = makeProvider('https://catalog.example.com');
const bundle = await provider.fetchBundle('general', 'en');
expect(bundle.roles[0].slug).toBe('researcher');
});
it('fetchBundle remote malformed (role missing instructions) => BadGateway', async () => {
const json = JSON.stringify({
schemaVersion: 1,
language: 'fr',
roles: [{ slug: 'researcher', name: 'Chercheur' }],
});
const body = streamOf([new TextEncoder().encode(json)]);
global.fetch = jest
.fn()
.mockResolvedValue(mockResponse({ body })) as never;
const provider = makeProvider('https://catalog.example.com');
await expect(provider.fetchBundle('general', 'fr')).rejects.toBeInstanceOf(
BadGatewayException,
);
});
it('declared Content-Length over the cap => BadGateway before reading the body', async () => {
global.fetch = jest.fn().mockResolvedValue(
mockResponse({
@@ -340,14 +290,14 @@ describe('AiAgentRolesCatalogProvider (local fixtures)', () => {
for (const value of bad) {
it(`rejects bundleId="${value}" with BadRequest`, async () => {
const provider = makeProvider(dir);
const provider = makeProvider('https://catalog.example.com');
await expect(
provider.fetchBundle(value, 'en'),
).rejects.toBeInstanceOf(BadRequestException);
});
it(`rejects language="${value}" with BadRequest`, async () => {
const provider = makeProvider(dir);
const provider = makeProvider('https://catalog.example.com');
await expect(
provider.fetchBundle('general', value),
).rejects.toBeInstanceOf(BadRequestException);

View File

@@ -1,5 +1,3 @@
import { promises as fs } from 'node:fs';
import * as path from 'node:path';
import {
BadGatewayException,
BadRequestException,
@@ -26,9 +24,9 @@ const MAX_BYTES = 1_000_000;
/**
* Fetches + validates the agent-roles catalog from its configured source. The
* source location (EnvironmentService.getAiAgentRolesCatalogSource()) is either
* an http(s):// base URL (REMOTE) or a local filesystem directory (LOCAL; the
* empty default resolves to the in-repo `agent-roles-catalog/` folder).
* source (EnvironmentService.getAiAgentRolesCatalogSource()) is an http(s)://
* base URL REMOTE only; local-filesystem sources are no longer supported. The
* value is baked into the Docker image at build time (set per-branch in CI).
*
* The catalog is UNTRUSTED input: every file is JSON-parsed and run through a
* hand-written type guard before any field is exposed, and every dynamic path
@@ -91,31 +89,20 @@ export class AiAgentRolesCatalogProvider {
}
}
/** Read a relative catalog path as text from the configured source. */
/** Read a relative catalog path as text from the configured remote source. */
private async readRelative(rel: string): Promise<string> {
const source = this.environmentService
.getAiAgentRolesCatalogSource()
.trim();
if (/^https?:\/\//i.test(source)) {
return this.fetchRemote(source, rel);
}
const dir = source || path.join(process.cwd(), 'agent-roles-catalog');
return this.readLocal(dir, rel);
}
/** Read a local catalog file. Missing => the catalog is unavailable. */
private async readLocal(dir: string, rel: string): Promise<string> {
try {
return await fs.readFile(path.join(dir, rel), 'utf8');
} catch (err) {
const reason = shortError(err);
if (!/^https?:\/\//i.test(source)) {
this.logger.error(
`Agent roles catalog local read failed (${path.join(dir, rel)}): ${reason}`,
'Agent roles catalog source is not configured (expected an http(s):// base URL)',
);
throw new BadGatewayException(
`Agent roles catalog is unavailable: ${reason}`,
'Agent roles catalog is unavailable: source is not configured',
);
}
return this.fetchRemote(source, rel);
}
/**

View File

@@ -290,11 +290,12 @@ export class EnvironmentService {
// ai_provider_credentials, with no env fallback. APP_SECRET stays (getAppSecret).
getAiAgentRolesCatalogSource(): string {
// Catalog location. http(s):// URL => fetched remotely; anything else => a
// local filesystem directory. Defaults to the in-repo folder (dev). In prod
// set this to the raw GitHub base URL of the catalog repo. Unlike the AI_*
// getters above this is INFRA config (where the catalog lives), not
// provider/model config — so an env var here is appropriate.
// Catalog location: an http(s):// base URL the catalog is fetched from.
// The value is baked into the image at build time (Dockerfile ARG
// AI_AGENT_ROLES_CATALOG_URL, set per-branch in CI); local-filesystem
// sources are no longer supported. Empty/unset => the catalog is
// unavailable (the provider returns 502). This is INFRA config (where the
// catalog lives), not provider/model config, so an env var is appropriate.
return this.configService.get<string>('AI_AGENT_ROLES_CATALOG_URL', '');
}