fix(#355 review round-2 F9-F11): register-gate test + shutdown idle-close + DB-path metrics gate
- F10 [stability]: closeMetricsServer() now calls server.closeIdleConnections()
+ server.unref() after server.close(). server.close()'s callback doesn't fire
until keep-alive sockets drain, and the scraper (VictoriaMetrics/vmagent) holds
an idle keep-alive socket — so onModuleDestroy's awaited close would hang until
the scraper disconnects or the orchestrator SIGKILLs on the kill-grace window.
closeIdleConnections() drops idle keep-alive sockets so shutdown completes
immediately (Node 22, per the Dockerfile base).
- F9 [test]: client-telemetry.module.spec.ts pins the E1=B register() gate — the
core of the "public endpoint OFF by default" decision: flag unset / any non-
"true" value ("false"/""/"0"/…) → empty controllers+providers (route absent);
"true"/"TRUE" → registers VitalsController + VitalsService. A flag-inversion or
truthiness regression that reopened the anonymous disk-fill surface now fails.
- F11 [regression/perf]: the db_query_duration_seconds token work (firstSqlToken
regex + Set lookup) is now gated on isMetricsEnabled() in database.module.ts, so
a non-metrics deployment pays NOTHING per query (previously observeDbQuery
no-op'd but the token was still computed on every query). Also hoisted the
13-element known-token Set to a module const (KNOWN_SQL_TOKENS) so it's built
once, not per query.
Gate: server tsc 0; metrics + vitals + client-telemetry suites pass (incl. the
new register-gate test).
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
import { ClientTelemetryModule } from './client-telemetry.module';
|
||||
import { VitalsController } from './vitals.controller';
|
||||
import { VitalsService } from './vitals.service';
|
||||
|
||||
// The register() gate is the CORE of the maintainer's E1=B decision: the public,
|
||||
// unauthenticated /api/telemetry/vitals endpoint must be OFF by default, so a
|
||||
// self-host deploy has no anonymous disk-fill surface into `client_metrics`. A
|
||||
// regression that inverts the flag (or a truthiness bug where "" / "false"
|
||||
// registers the route) would silently reopen that surface — pin it here.
|
||||
describe('ClientTelemetryModule.register (E1=B gate)', () => {
|
||||
const original = process.env.CLIENT_TELEMETRY_ENABLED;
|
||||
afterEach(() => {
|
||||
if (original === undefined) delete process.env.CLIENT_TELEMETRY_ENABLED;
|
||||
else process.env.CLIENT_TELEMETRY_ENABLED = original;
|
||||
});
|
||||
|
||||
it('OFF by default (flag unset) — no controller, no provider (endpoint absent)', () => {
|
||||
delete process.env.CLIENT_TELEMETRY_ENABLED;
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toEqual([]);
|
||||
expect(mod.providers).toEqual([]);
|
||||
});
|
||||
|
||||
it.each(['false', 'False', '0', '', 'yes', '1'])(
|
||||
'stays OFF for non-"true" value %p (no route)',
|
||||
(val) => {
|
||||
process.env.CLIENT_TELEMETRY_ENABLED = val;
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toEqual([]);
|
||||
expect(mod.providers).toEqual([]);
|
||||
},
|
||||
);
|
||||
|
||||
it('ON only for "true" — registers VitalsController + VitalsService', () => {
|
||||
process.env.CLIENT_TELEMETRY_ENABLED = 'true';
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toContain(VitalsController);
|
||||
expect(mod.providers).toContain(VitalsService);
|
||||
});
|
||||
|
||||
it('ON is case-insensitive ("TRUE")', () => {
|
||||
process.env.CLIENT_TELEMETRY_ENABLED = 'TRUE';
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toContain(VitalsController);
|
||||
expect(mod.providers).toContain(VitalsService);
|
||||
});
|
||||
});
|
||||
@@ -40,7 +40,10 @@ import { PageListener } from '@docmost/db/listeners/page.listener';
|
||||
import { PostgresJSDialect } from 'kysely-postgres-js';
|
||||
import * as postgres from 'postgres';
|
||||
import { normalizePostgresUrl } from '../common/helpers';
|
||||
import { observeDbQuery } from '../integrations/metrics/metrics.registry';
|
||||
import {
|
||||
observeDbQuery,
|
||||
isMetricsEnabled,
|
||||
} from '../integrations/metrics/metrics.registry';
|
||||
import { firstSqlToken } from '../integrations/metrics/metrics.constants';
|
||||
|
||||
@Global()
|
||||
@@ -70,12 +73,16 @@ import { firstSqlToken } from '../integrations/metrics/metrics.constants';
|
||||
plugins: [new CamelCasePlugin()],
|
||||
log: (event: LogEvent) => {
|
||||
// #355 — db_query_duration_seconds, labelled by the leading SQL token
|
||||
// (bounded cardinality). No-op when METRICS_PORT is unset. Runs for
|
||||
// every query, independent of the dev-only debug logging below.
|
||||
observeDbQuery(
|
||||
firstSqlToken(event.query.sql),
|
||||
event.queryDurationMillis / 1000,
|
||||
);
|
||||
// (bounded cardinality). Gated on isMetricsEnabled() so the token work
|
||||
// (regex + Set lookup) is skipped entirely when metrics are OFF — not
|
||||
// just observeDbQuery no-op'd — so a non-metrics deployment pays nothing
|
||||
// per query. Runs independent of the dev-only debug logging below.
|
||||
if (isMetricsEnabled()) {
|
||||
observeDbQuery(
|
||||
firstSqlToken(event.query.sql),
|
||||
event.queryDurationMillis / 1000,
|
||||
);
|
||||
}
|
||||
|
||||
if (environmentService.getNodeEnv() !== 'development') return;
|
||||
const logger = new Logger(DatabaseModule.name);
|
||||
|
||||
@@ -30,28 +30,32 @@ export const JOB_BUCKETS = [
|
||||
* the full query text would blow up label cardinality; the leading keyword is a
|
||||
* finite set. Unknown/empty queries collapse to `other`.
|
||||
*/
|
||||
// The bounded set of SQL leading keywords used as db_query_duration_seconds
|
||||
// labels. Module-const so it is built ONCE, not per query (this runs on every DB
|
||||
// query when metrics are enabled).
|
||||
const KNOWN_SQL_TOKENS = new Set([
|
||||
'select',
|
||||
'insert',
|
||||
'update',
|
||||
'delete',
|
||||
'with',
|
||||
'begin',
|
||||
'commit',
|
||||
'rollback',
|
||||
'alter',
|
||||
'create',
|
||||
'drop',
|
||||
'truncate',
|
||||
'explain',
|
||||
]);
|
||||
|
||||
export function firstSqlToken(sql: string | undefined): string {
|
||||
if (!sql) return 'other';
|
||||
// Skip leading whitespace / comments and grab the first word.
|
||||
const match = /^[\s(]*([a-zA-Z]+)/.exec(sql);
|
||||
if (!match) return 'other';
|
||||
const token = match[1].toLowerCase();
|
||||
const known = new Set([
|
||||
'select',
|
||||
'insert',
|
||||
'update',
|
||||
'delete',
|
||||
'with',
|
||||
'begin',
|
||||
'commit',
|
||||
'rollback',
|
||||
'alter',
|
||||
'create',
|
||||
'drop',
|
||||
'truncate',
|
||||
'explain',
|
||||
]);
|
||||
return known.has(token) ? token : 'other';
|
||||
return KNOWN_SQL_TOKENS.has(token) ? token : 'other';
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -73,5 +73,14 @@ export function closeMetricsServer(): Promise<void> {
|
||||
if (!server) return Promise.resolve();
|
||||
return new Promise((resolve) => {
|
||||
server.close(() => resolve());
|
||||
// server.close() stops accepting NEW connections but its callback does not
|
||||
// fire until existing keep-alive sockets drain. The scraper (VictoriaMetrics/
|
||||
// vmagent) holds an idle HTTP keep-alive socket, so without this the callback
|
||||
// — and thus shutdown — would hang until the scraper disconnects or the
|
||||
// orchestrator escalates to SIGKILL on the kill-grace window. Force-close idle
|
||||
// keep-alive sockets so close() completes immediately, and unref so this
|
||||
// server never keeps the event loop alive on its own.
|
||||
server.closeIdleConnections();
|
||||
server.unref();
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user