Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fdb6f39a8e | |||
| 6475cb81e0 | |||
| 51925e955f |
@@ -242,27 +242,3 @@ MCP_DOCMOST_PASSWORD=
|
||||
# FAILS CLOSED if Redis is unavailable (default: 1,000,000 tokens per workspace
|
||||
# per rolling day).
|
||||
# SHARE_AI_WORKSPACE_TOKEN_BUDGET_PER_DAY=1000000
|
||||
|
||||
# --- Observability / perf metrics (#355) ---
|
||||
#
|
||||
# Two INDEPENDENT toggles, both OFF by default:
|
||||
#
|
||||
# 1) METRICS_PORT — the server-side Prometheus scrape endpoint.
|
||||
# UNSET (default) => the whole prom subsystem is OFF: no registry, no
|
||||
# collectors, and NOTHING is exposed on the main app port. There is NO
|
||||
# default port — leaving it blank disables it. When set to a port (e.g.
|
||||
# 9464), a SEPARATE bare node:http listener serves GET /metrics on that port
|
||||
# only (never on the main :3000 app listener), for a scraper such as
|
||||
# VictoriaMetrics/Prometheus reaching it as <host>:<port>/metrics.
|
||||
# METRICS_PORT=9464
|
||||
#
|
||||
# 2) CLIENT_TELEMETRY_ENABLED — the public client perf-telemetry sink.
|
||||
# OFF by default. When true, the unauthenticated POST /api/telemetry/vitals
|
||||
# endpoint is registered and browsers collect + send web-vitals / editor
|
||||
# metrics into the `client_metrics` table (read directly by Grafana, separate
|
||||
# from METRICS_PORT). Leave OFF unless you actually consume this data: the
|
||||
# endpoint is public and the table has NO app-side retention, so enabling it
|
||||
# requires an EXTERNAL pruner to bound `client_metrics` growth (the deployed
|
||||
# infra prunes rows >90d via a maintenance container). When off, the endpoint
|
||||
# does not exist and the client installs no observers.
|
||||
# CLIENT_TELEMETRY_ENABLED=false
|
||||
|
||||
@@ -18,48 +18,12 @@ env:
|
||||
IMAGE: ghcr.io/vvzvlad/gitmost
|
||||
|
||||
jobs:
|
||||
# Run the reusable test suite. Together with the e2e jobs below it gates the
|
||||
# publish job (the image push), not the build itself — build runs in parallel.
|
||||
# Run the reusable test suite first so a failing test blocks the image build.
|
||||
test:
|
||||
uses: ./.github/workflows/test.yml
|
||||
|
||||
# Runs in parallel with the test/e2e jobs and only warms the buildx cache
|
||||
# (GHA cache, scope develop-amd64). No push happens here — the publish job
|
||||
# below is the only one that pushes the image.
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Resolve version
|
||||
id: version
|
||||
run: echo "value=$(git describe --tags --always)" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Build develop image (warm cache, no push)
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
platforms: linux/amd64
|
||||
build-args: |
|
||||
APP_VERSION=${{ steps.version.outputs.value }}
|
||||
AI_AGENT_ROLES_CATALOG_URL=https://raw.githubusercontent.com/vvzvlad/gitmost/develop/agent-roles-catalog
|
||||
push: false
|
||||
cache-from: type=gha,scope=develop-amd64
|
||||
cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true
|
||||
|
||||
# The gate: rebuilds from the cache the build job just wrote (near-instant on
|
||||
# a cache hit; worst case — cache eviction — a full rebuild, which matches the
|
||||
# old sequential timing) and pushes :develop only when unit tests AND both
|
||||
# e2e suites AND the build are green.
|
||||
publish:
|
||||
needs: [test, e2e-server, e2e-mcp, build]
|
||||
needs: test
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 30
|
||||
steps:
|
||||
@@ -93,10 +57,13 @@ jobs:
|
||||
push: true
|
||||
tags: ${{ env.IMAGE }}:develop
|
||||
cache-from: type=gha,scope=develop-amd64
|
||||
cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true
|
||||
|
||||
# e2e jobs gate the publish (image push), not the build: the :develop image
|
||||
# is pushed only when unit tests AND both e2e suites pass (publish.needs
|
||||
# lists them all).
|
||||
# e2e jobs run on every develop push but DO NOT gate the build/publish above:
|
||||
# `build` stays `needs: test` only, so the :develop image still ships even if
|
||||
# e2e fails. A failing e2e job turns the run red and triggers GitHub's email
|
||||
# to the pusher — that red run + email is the intended notification, not a
|
||||
# deploy block.
|
||||
e2e-server:
|
||||
runs-on: ubuntu-latest
|
||||
# Hard cap: the full-AppModule e2e leaks open handles and hung jest to the 6h max.
|
||||
@@ -157,7 +124,9 @@ jobs:
|
||||
- name: Run server e2e
|
||||
run: pnpm --filter ./apps/server test:e2e
|
||||
|
||||
# Gates the publish too — see the comment above e2e-server.
|
||||
# Same rationale as e2e-server: this job is intentionally NOT in
|
||||
# `build.needs`. Deploy of the :develop image must not be blocked by e2e;
|
||||
# a red run plus GitHub's email to the pusher is the notification mechanism.
|
||||
e2e-mcp:
|
||||
runs-on: ubuntu-latest
|
||||
timeout-minutes: 20
|
||||
|
||||
+2
-16
@@ -5,13 +5,6 @@ RUN npm install -g pnpm@10.4.0
|
||||
|
||||
FROM base AS builder
|
||||
|
||||
# re2 (packages/mcp) always compiles from source under pnpm (the prebuilt-binary
|
||||
# download cannot identify the GitHub repo), so node-gyp needs python3/make/g++.
|
||||
# This stage is discarded, so the toolchain can stay installed.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends python3 make g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
@@ -64,17 +57,10 @@ COPY --from=builder /app/patches /app/patches
|
||||
|
||||
RUN chown -R node:node /app
|
||||
|
||||
# Toolchain is needed transiently to compile re2 during the prod install; install
|
||||
# and purge it in one layer to keep the final image slim. The install itself runs
|
||||
# as the node user via su to keep node_modules ownership without a costly chown layer.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends python3 make g++ \
|
||||
&& su node -c "pnpm install --frozen-lockfile --prod" \
|
||||
&& apt-get purge -y --auto-remove python3 make g++ \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
USER node
|
||||
|
||||
RUN pnpm install --frozen-lockfile --prod
|
||||
|
||||
RUN mkdir -p /app/data/storage
|
||||
|
||||
VOLUME ["/app/data/storage"]
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
},
|
||||
"dependencies": {
|
||||
"@ai-sdk/react": "^3.0.208",
|
||||
"@braintree/sanitize-url": "7.1.2",
|
||||
"@atlaskit/pragmatic-drag-and-drop": "1.8.1",
|
||||
"@atlaskit/pragmatic-drag-and-drop-auto-scroll": "2.1.5",
|
||||
"@atlaskit/pragmatic-drag-and-drop-flourish": "2.0.15",
|
||||
@@ -61,7 +62,6 @@
|
||||
"react-clear-modal": "^2.0.18",
|
||||
"react-dom": "^18.3.1",
|
||||
"react-drawio": "1.0.7",
|
||||
"web-vitals": "^5.1.0",
|
||||
"react-error-boundary": "6.1.1",
|
||||
"react-helmet-async": "3.0.0",
|
||||
"react-i18next": "16.5.8",
|
||||
|
||||
+58
-24
@@ -1,38 +1,72 @@
|
||||
import { lazy, Suspense } from "react";
|
||||
import { Navigate, Route, Routes } from "react-router-dom";
|
||||
import { Center, Loader } from "@mantine/core";
|
||||
import { Error404 } from "@/components/ui/error-404.tsx";
|
||||
import Layout from "@/components/layouts/global/layout.tsx";
|
||||
import { useTrackOrigin } from "@/hooks/use-track-origin";
|
||||
|
||||
// ShareLayout is route-split: its ShareShell chrome pulls in the table of
|
||||
// contents (and thus TipTap), so keeping it out of the eager graph removes the
|
||||
// editor engine from startup for authenticated users too.
|
||||
const ShareLayout = lazy(
|
||||
() => import("@/features/share/components/share-layout.tsx"),
|
||||
);
|
||||
|
||||
// Auth / entry pages stay eager: they are the first paint for an unauthenticated
|
||||
// visitor (e.g. /login) and are already small, so code-splitting them would only
|
||||
// add a cold-chunk round trip to the most common cold-start path.
|
||||
import SetupWorkspace from "@/pages/auth/setup-workspace.tsx";
|
||||
import LoginPage from "@/pages/auth/login";
|
||||
import Home from "@/pages/dashboard/home";
|
||||
import Page from "@/pages/page/page";
|
||||
import AccountSettings from "@/pages/settings/account/account-settings";
|
||||
import WorkspaceMembers from "@/pages/settings/workspace/workspace-members";
|
||||
import WorkspaceSettings from "@/pages/settings/workspace/workspace-settings";
|
||||
import AiSettings from "@/pages/settings/workspace/ai-settings";
|
||||
import Groups from "@/pages/settings/group/groups";
|
||||
import GroupInfo from "./pages/settings/group/group-info";
|
||||
import Spaces from "@/pages/settings/space/spaces.tsx";
|
||||
import { Error404 } from "@/components/ui/error-404.tsx";
|
||||
import AccountPreferences from "@/pages/settings/account/account-preferences.tsx";
|
||||
import SpaceHome from "@/pages/space/space-home.tsx";
|
||||
import PageRedirect from "@/pages/page/page-redirect.tsx";
|
||||
import Layout from "@/components/layouts/global/layout.tsx";
|
||||
import InviteSignup from "@/pages/auth/invite-signup.tsx";
|
||||
import ForgotPassword from "@/pages/auth/forgot-password.tsx";
|
||||
import PasswordReset from "./pages/auth/password-reset";
|
||||
import SharedPage from "@/pages/share/shared-page.tsx";
|
||||
import Shares from "@/pages/settings/shares/shares.tsx";
|
||||
import ShareLayout from "@/features/share/components/share-layout.tsx";
|
||||
import PageRedirect from "@/pages/page/page-redirect.tsx";
|
||||
import ShareRedirect from "@/pages/share/share-redirect.tsx";
|
||||
import { useTrackOrigin } from "@/hooks/use-track-origin";
|
||||
import SpacesPage from "@/pages/spaces/spaces.tsx";
|
||||
import SpaceTrash from "@/pages/space/space-trash.tsx";
|
||||
import FavoritesPage from "@/pages/favorites/favorites-page";
|
||||
import LabelPage from "@/pages/label/label-page";
|
||||
|
||||
// Heavy / leaf pages are route-split with React.lazy so their code (most
|
||||
// importantly the whole TipTap editor + KaTeX + lowlight grammars + drawio that
|
||||
// the page editor and the readonly share editor pull in) is fetched only when
|
||||
// the matching route is actually visited. The <Suspense> boundaries live inside
|
||||
// each Layout (around its <Outlet/>), so the app shell stays mounted while a
|
||||
// route chunk loads.
|
||||
const Home = lazy(() => import("@/pages/dashboard/home"));
|
||||
const Page = lazy(() => import("@/pages/page/page"));
|
||||
const SpaceHome = lazy(() => import("@/pages/space/space-home.tsx"));
|
||||
const SpaceTrash = lazy(() => import("@/pages/space/space-trash.tsx"));
|
||||
const SpacesPage = lazy(() => import("@/pages/spaces/spaces.tsx"));
|
||||
const FavoritesPage = lazy(() => import("@/pages/favorites/favorites-page"));
|
||||
const LabelPage = lazy(() => import("@/pages/label/label-page"));
|
||||
const SharedPage = lazy(() => import("@/pages/share/shared-page.tsx"));
|
||||
|
||||
const AccountSettings = lazy(
|
||||
() => import("@/pages/settings/account/account-settings"),
|
||||
);
|
||||
const AccountPreferences = lazy(
|
||||
() => import("@/pages/settings/account/account-preferences.tsx"),
|
||||
);
|
||||
const WorkspaceSettings = lazy(
|
||||
() => import("@/pages/settings/workspace/workspace-settings"),
|
||||
);
|
||||
const AiSettings = lazy(() => import("@/pages/settings/workspace/ai-settings"));
|
||||
const WorkspaceMembers = lazy(
|
||||
() => import("@/pages/settings/workspace/workspace-members"),
|
||||
);
|
||||
const Groups = lazy(() => import("@/pages/settings/group/groups"));
|
||||
const GroupInfo = lazy(() => import("./pages/settings/group/group-info"));
|
||||
const Spaces = lazy(() => import("@/pages/settings/space/spaces.tsx"));
|
||||
const Shares = lazy(() => import("@/pages/settings/shares/shares.tsx"));
|
||||
|
||||
export default function App() {
|
||||
useTrackOrigin();
|
||||
|
||||
return (
|
||||
<>
|
||||
<Suspense
|
||||
fallback={
|
||||
<Center h="100vh">
|
||||
<Loader size="sm" />
|
||||
</Center>
|
||||
}
|
||||
>
|
||||
<Routes>
|
||||
<Route index element={<Navigate to="/home" />} />
|
||||
<Route path={"/login"} element={<LoginPage />} />
|
||||
@@ -83,6 +117,6 @@ export default function App() {
|
||||
|
||||
<Route path="*" element={<Error404 />} />
|
||||
</Routes>
|
||||
</>
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { isChunkLoadError } from "./chunk-load-error-boundary";
|
||||
|
||||
// The detector decides whether a caught render error is a stale-deploy chunk-404
|
||||
// (→ auto-reload to fetch the new manifest) vs a genuine app error (→ generic
|
||||
// recovery UI, no reload). A false negative on a real chunk failure re-blanks the
|
||||
// app; a false positive would auto-reload on an ordinary error. Pin both sides.
|
||||
describe("isChunkLoadError", () => {
|
||||
it("detects the ChunkLoadError name", () => {
|
||||
expect(isChunkLoadError({ name: "ChunkLoadError", message: "x" })).toBe(true);
|
||||
});
|
||||
|
||||
it.each([
|
||||
"Failed to fetch dynamically imported module: https://x/assets/index-abc.js",
|
||||
"error loading dynamically imported module",
|
||||
"Importing a module script failed.",
|
||||
])("detects the dynamic-import failure message %#", (message) => {
|
||||
expect(isChunkLoadError({ name: "TypeError", message })).toBe(true);
|
||||
});
|
||||
|
||||
it("is case-insensitive on the message", () => {
|
||||
expect(
|
||||
isChunkLoadError({ message: "FAILED TO FETCH DYNAMICALLY IMPORTED MODULE" }),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it.each([
|
||||
null,
|
||||
undefined,
|
||||
{},
|
||||
{ name: "TypeError", message: "Cannot read properties of undefined" },
|
||||
{ message: "Network request failed" },
|
||||
new Error("some ordinary render error"),
|
||||
])("returns false for a non-chunk error %#", (err) => {
|
||||
expect(isChunkLoadError(err)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,71 @@
|
||||
import { ReactNode } from "react";
|
||||
import { ErrorBoundary } from "react-error-boundary";
|
||||
import { Button, Center, Stack, Text } from "@mantine/core";
|
||||
|
||||
const RELOAD_FLAG = "chunk-reload-attempted";
|
||||
|
||||
// Heuristic detection of a failed dynamic import. Since the code-splitting work,
|
||||
// every route (plus Aside / AiChatWindow) is React.lazy: when a new deploy
|
||||
// replaces the hashed chunks, a tab left open on the old index.html requests a
|
||||
// chunk URL that now 404s, and React.lazy rejects. Browsers / Vite surface these
|
||||
// with a ChunkLoadError name or one of these messages.
|
||||
export function isChunkLoadError(error: unknown): boolean {
|
||||
if (!error) return false;
|
||||
const name = (error as { name?: string }).name ?? "";
|
||||
const message = (error as { message?: string }).message ?? "";
|
||||
return (
|
||||
name === "ChunkLoadError" ||
|
||||
/Failed to fetch dynamically imported module/i.test(message) ||
|
||||
/error loading dynamically imported module/i.test(message) ||
|
||||
/Importing a module script failed/i.test(message)
|
||||
);
|
||||
}
|
||||
|
||||
function handleError(error: unknown) {
|
||||
if (!isChunkLoadError(error)) return;
|
||||
// A stale-chunk 404 is cured by a full reload that re-fetches index.html and
|
||||
// the new chunk manifest. Auto-reload once, guarding against a reload loop
|
||||
// (e.g. a genuinely missing chunk) with a one-shot sessionStorage flag. If the
|
||||
// flag is already set we fall through to the manual recovery UI below.
|
||||
try {
|
||||
if (sessionStorage.getItem(RELOAD_FLAG)) return;
|
||||
sessionStorage.setItem(RELOAD_FLAG, "1");
|
||||
} catch {
|
||||
// sessionStorage unavailable (private mode / disabled): skip the automatic
|
||||
// reload rather than risk an unguarded loop; the fallback UI still recovers.
|
||||
return;
|
||||
}
|
||||
window.location.reload();
|
||||
}
|
||||
|
||||
// Root-level boundary that sits ABOVE every route-level Suspense boundary so a
|
||||
// lazy route/component chunk failure is caught here instead of unmounting the
|
||||
// whole tree into a blank white screen. Per-feature ErrorBoundaries (page.tsx,
|
||||
// transclusion, page-embed) remain in place underneath for their local errors.
|
||||
export function ChunkLoadErrorBoundary({ children }: { children: ReactNode }) {
|
||||
return (
|
||||
<ErrorBoundary
|
||||
onError={handleError}
|
||||
fallbackRender={({ error }) => {
|
||||
const chunk = isChunkLoadError(error);
|
||||
return (
|
||||
<Center h="100vh" p="md">
|
||||
<Stack align="center" gap="sm" maw={420}>
|
||||
<Text fw={600}>
|
||||
{chunk ? "A new version is available" : "Something went wrong"}
|
||||
</Text>
|
||||
<Text size="sm" c="dimmed" ta="center">
|
||||
{chunk
|
||||
? "Please reload the page to load the latest version."
|
||||
: "An unexpected error occurred. Reloading the page may help."}
|
||||
</Text>
|
||||
<Button onClick={() => window.location.reload()}>Reload</Button>
|
||||
</Stack>
|
||||
</Center>
|
||||
);
|
||||
}}
|
||||
>
|
||||
{children}
|
||||
</ErrorBoundary>
|
||||
);
|
||||
}
|
||||
@@ -1,9 +1,10 @@
|
||||
import { AppShell, Container } from "@mantine/core";
|
||||
import React, { useEffect, useRef, useState } from "react";
|
||||
import React, { Suspense, useEffect, useRef, useState } from "react";
|
||||
import { useLocation } from "react-router-dom";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import SettingsSidebar from "@/components/settings/settings-sidebar.tsx";
|
||||
import { useAtom } from "jotai";
|
||||
import { useAtom, useAtomValue } from "jotai";
|
||||
import { aiChatWindowOpenAtom } from "@/features/ai-chat/atoms/ai-chat-atom.ts";
|
||||
import {
|
||||
APP_NAVBAR_ID,
|
||||
NAVBAR_COLLAPSE_BREAKPOINT,
|
||||
@@ -14,8 +15,6 @@ import {
|
||||
} from "@/components/layouts/global/hooks/atoms/sidebar-atom.ts";
|
||||
import { SpaceSidebar } from "@/features/space/components/sidebar/space-sidebar.tsx";
|
||||
import { AppHeader } from "@/components/layouts/global/app-header.tsx";
|
||||
import Aside from "@/components/layouts/global/aside.tsx";
|
||||
import AiChatWindow from "@/features/ai-chat/components/ai-chat-window.tsx";
|
||||
import GitmostGlobalBridge from "@/features/editor/gitmost/gitmost-global-bridge.tsx";
|
||||
import classes from "./app-shell.module.css";
|
||||
import { useToggleSidebar } from "@/components/layouts/global/hooks/hooks/use-toggle-sidebar.ts";
|
||||
@@ -23,6 +22,21 @@ import GlobalSidebar from "@/components/layouts/global/global-sidebar.tsx";
|
||||
import { ASIDE_PANEL_ID } from "@/hooks/use-toggle-aside.tsx";
|
||||
import { MAIN_CONTENT_ID, SkipToMain } from "@/components/ui/skip-to-main.tsx";
|
||||
|
||||
// Lazily load the AI chat window so the AI SDK runtime it pulls in is fetched
|
||||
// only after the user first opens the chat, instead of for every authenticated
|
||||
// user on load. The window itself renders null while closed, so there is no
|
||||
// behavior difference — it simply is not mounted until first opened.
|
||||
const AiChatWindow = React.lazy(
|
||||
() => import("@/features/ai-chat/components/ai-chat-window.tsx"),
|
||||
);
|
||||
|
||||
// The right aside hosts the comment panel and table of contents, both of which
|
||||
// pull in TipTap. It only ever renders on page routes, so lazy-loading it keeps
|
||||
// the whole editor engine out of the eager global-shell startup graph.
|
||||
const Aside = React.lazy(
|
||||
() => import("@/components/layouts/global/aside.tsx"),
|
||||
);
|
||||
|
||||
export default function GlobalAppShell({
|
||||
children,
|
||||
}: {
|
||||
@@ -37,6 +51,15 @@ export default function GlobalAppShell({
|
||||
const [isResizing, setIsResizing] = useState(false);
|
||||
const sidebarRef = useRef(null);
|
||||
|
||||
// Latch: once the AI chat window has been opened, keep it mounted so an
|
||||
// in-flight stream is never torn down. Before the first open the AI chat chunk
|
||||
// is never fetched.
|
||||
const aiChatOpen = useAtomValue(aiChatWindowOpenAtom);
|
||||
const [aiChatEverOpened, setAiChatEverOpened] = useState(false);
|
||||
useEffect(() => {
|
||||
if (aiChatOpen) setAiChatEverOpened(true);
|
||||
}, [aiChatOpen]);
|
||||
|
||||
const startResizing = React.useCallback((mouseDownEvent) => {
|
||||
mouseDownEvent.preventDefault();
|
||||
setIsResizing(true);
|
||||
@@ -160,13 +183,21 @@ export default function GlobalAppShell({
|
||||
: undefined
|
||||
}
|
||||
>
|
||||
<Aside />
|
||||
<Suspense fallback={null}>
|
||||
<Aside />
|
||||
</Suspense>
|
||||
</AppShell.Aside>
|
||||
)}
|
||||
</AppShell>
|
||||
{/* Floating AI chat window. Mounted once globally; it is position: fixed
|
||||
and self-hides when closed, so its place in the tree is not critical. */}
|
||||
<AiChatWindow />
|
||||
{/* Floating AI chat window. Mounted once globally on first open; it is
|
||||
position: fixed and self-hides when closed, so its place in the tree is
|
||||
not critical. Kept mounted after the first open so a live stream is not
|
||||
aborted. */}
|
||||
{aiChatEverOpened && (
|
||||
<Suspense fallback={null}>
|
||||
<AiChatWindow />
|
||||
</Suspense>
|
||||
)}
|
||||
{/* Global gitmost native bridge: registers listSpaces / listPages /
|
||||
createPageWithRecording on window.gitmost so the native host can
|
||||
create a page with a recording even when no page editor is open. */}
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { Suspense, useEffect } from "react";
|
||||
import { UserProvider } from "@/features/user/user-provider.tsx";
|
||||
import { Outlet, useParams } from "react-router-dom";
|
||||
import { Center, Loader } from "@mantine/core";
|
||||
import GlobalAppShell from "@/components/layouts/global/global-app-shell.tsx";
|
||||
import { SearchSpotlight } from "@/features/search/components/search-spotlight.tsx";
|
||||
import { useGetSpaceBySlugQuery } from "@/features/space/queries/space-query.ts";
|
||||
@@ -8,10 +10,39 @@ export default function Layout() {
|
||||
const { spaceSlug } = useParams();
|
||||
const { data: space } = useGetSpaceBySlugQuery(spaceSlug);
|
||||
|
||||
// Warm the (now route-split) editor chunk during idle time on authenticated
|
||||
// routes, so the first navigation to a page renders from cache instead of a
|
||||
// cold chunk fetch. Best-effort: gated on requestIdleCallback and never blocks
|
||||
// startup — the dynamic import mirrors the App.tsx route lazy loader so both
|
||||
// resolve to the same chunk.
|
||||
useEffect(() => {
|
||||
const ric =
|
||||
typeof window !== "undefined" && (window as any).requestIdleCallback;
|
||||
const warm = () => {
|
||||
// Best-effort prefetch: a failed warm-up (offline, stale 404) is harmless
|
||||
// and must not surface as an unhandledrejection.
|
||||
void import("@/pages/page/page").catch(() => {});
|
||||
};
|
||||
if (ric) {
|
||||
const id = ric(warm);
|
||||
return () => (window as any).cancelIdleCallback?.(id);
|
||||
}
|
||||
const timer = setTimeout(warm, 2000);
|
||||
return () => clearTimeout(timer);
|
||||
}, []);
|
||||
|
||||
return (
|
||||
<UserProvider>
|
||||
<GlobalAppShell>
|
||||
<Outlet />
|
||||
<Suspense
|
||||
fallback={
|
||||
<Center h="60vh">
|
||||
<Loader size="sm" />
|
||||
</Center>
|
||||
}
|
||||
>
|
||||
<Outlet />
|
||||
</Suspense>
|
||||
</GlobalAppShell>
|
||||
<SearchSpotlight spaceId={space?.id} />
|
||||
</UserProvider>
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
import { atom } from "jotai";
|
||||
import { Editor } from "@tiptap/core";
|
||||
// Type-only: these atoms only hold an Editor reference for typing. A value
|
||||
// import would drag the whole @tiptap/core engine into the eager graph of every
|
||||
// shell component that reads one of these atoms.
|
||||
import type { Editor } from "@tiptap/core";
|
||||
import { PageEditMode } from "@/features/user/types/user.types.ts";
|
||||
import type { DictationUnavailableReason } from "@/features/dictation/dictation-status";
|
||||
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
import { lazy, Suspense } from "react";
|
||||
import { EditorMenuProps } from "@/features/editor/components/table/types/types.ts";
|
||||
|
||||
// Lazily load the drawio bubble menu so it is split out of the editor chunk and
|
||||
// fetched only when an editable editor is mounted (mirrors excalidraw-menu-lazy).
|
||||
const DrawioMenu = lazy(
|
||||
() => import("@/features/editor/components/drawio/drawio-menu.tsx"),
|
||||
);
|
||||
|
||||
export default function DrawioMenuLazy(props: EditorMenuProps) {
|
||||
return (
|
||||
<Suspense fallback={null}>
|
||||
<DrawioMenu {...props} />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
import { lazy, Suspense } from "react";
|
||||
import { NodeViewProps } from "@tiptap/react";
|
||||
|
||||
// Lazily load the drawio node view so the heavy react-drawio embed runtime is
|
||||
// split into its own chunk and fetched only when a drawio diagram is actually
|
||||
// rendered (mirrors excalidraw-view-lazy).
|
||||
const DrawioView = lazy(
|
||||
() => import("@/features/editor/components/drawio/drawio-view.tsx"),
|
||||
);
|
||||
|
||||
export default function DrawioViewLazy(props: NodeViewProps) {
|
||||
return (
|
||||
<Suspense fallback={null}>
|
||||
<DrawioView {...props} />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import { lazy, Suspense } from "react";
|
||||
import { NodeViewProps } from "@tiptap/react";
|
||||
|
||||
// Lazily load the KaTeX-backed block math view so the katex chunk is fetched
|
||||
// only when a document actually contains a math node (mirrors the mermaid/
|
||||
// excalidraw lazy pattern). The local Suspense keeps a slow katex chunk from
|
||||
// crashing or blocking the whole editor: while it loads we render the raw
|
||||
// LaTeX source as a node-sized placeholder.
|
||||
const MathBlockView = lazy(
|
||||
() => import("@/features/editor/components/math/math-block.tsx"),
|
||||
);
|
||||
|
||||
export default function MathBlockViewLazy(props: NodeViewProps) {
|
||||
return (
|
||||
<Suspense fallback={<div data-katex="true">{props.node.attrs.text}</div>}>
|
||||
<MathBlockView {...props} />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
import { lazy, Suspense } from "react";
|
||||
import { NodeViewProps } from "@tiptap/react";
|
||||
|
||||
// Lazily load the KaTeX-backed inline math view so the katex chunk is fetched
|
||||
// only when a document actually contains a math node (mirrors the mermaid/
|
||||
// excalidraw lazy pattern). The local Suspense keeps a slow katex chunk from
|
||||
// crashing or blocking the whole editor: while it loads we render the raw
|
||||
// LaTeX source as a node-sized placeholder.
|
||||
const MathInlineView = lazy(
|
||||
() => import("@/features/editor/components/math/math-inline.tsx"),
|
||||
);
|
||||
|
||||
export default function MathInlineViewLazy(props: NodeViewProps) {
|
||||
return (
|
||||
<Suspense fallback={<span data-katex="true">{props.node.attrs.text}</span>}>
|
||||
<MathInlineView {...props} />
|
||||
</Suspense>
|
||||
);
|
||||
}
|
||||
@@ -81,8 +81,8 @@ import {
|
||||
createResizeHandle,
|
||||
buildResizeClasses,
|
||||
} from "@/features/editor/components/common/node-resize-handles.ts";
|
||||
import MathInlineView from "@/features/editor/components/math/math-inline.tsx";
|
||||
import MathBlockView from "@/features/editor/components/math/math-block.tsx";
|
||||
import MathInlineView from "@/features/editor/components/math/math-inline-lazy.tsx";
|
||||
import MathBlockView from "@/features/editor/components/math/math-block-lazy.tsx";
|
||||
import ImageView from "@/features/editor/components/image/image-view.tsx";
|
||||
import CalloutView from "@/features/editor/components/callout/callout-view.tsx";
|
||||
import StatusView from "@/features/editor/components/status/status-view.tsx";
|
||||
@@ -90,7 +90,7 @@ import VideoView from "@/features/editor/components/video/video-view.tsx";
|
||||
import AudioView from "@/features/editor/components/audio/audio-view.tsx";
|
||||
import AttachmentView from "@/features/editor/components/attachment/attachment-view.tsx";
|
||||
import CodeBlockView from "@/features/editor/components/code-block/code-block-view.tsx";
|
||||
import DrawioView from "../components/drawio/drawio-view";
|
||||
import DrawioView from "../components/drawio/drawio-view-lazy.tsx";
|
||||
import ExcalidrawView from "@/features/editor/components/excalidraw/excalidraw-view-lazy.tsx";
|
||||
import EmbedView from "@/features/editor/components/embed/embed-view.tsx";
|
||||
import HtmlEmbedView from "@/features/editor/components/html-embed/html-embed-view.tsx";
|
||||
|
||||
@@ -1,8 +1,17 @@
|
||||
import { useEffect, useRef } from "react";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
import { getDefaultStore } from "jotai";
|
||||
import { WebSocketStatus } from "@hocuspocus/provider";
|
||||
import { Editor } from "@tiptap/core";
|
||||
|
||||
// Literal value of WebSocketStatus.Connected from @hocuspocus/provider. Inlined
|
||||
// so this always-mounted global bridge does not statically import
|
||||
// @hocuspocus/provider — that import pulls Yjs (and, through a shared chunk, the
|
||||
// whole TipTap engine) into the eager startup graph. yjsConnectionStatusAtom
|
||||
// already stores these raw status strings.
|
||||
const YJS_STATUS_CONNECTED = "connected";
|
||||
// Type-only: importing Editor as a type keeps @tiptap/core (the whole editor
|
||||
// engine) out of the eager global-shell graph — the bridge only uses it for
|
||||
// annotations/casts, never as a runtime value.
|
||||
import type { Editor } from "@tiptap/core";
|
||||
import {
|
||||
pageEditorAtom,
|
||||
yjsConnectionStatusAtom,
|
||||
@@ -16,15 +25,19 @@ import {
|
||||
getSidebarPages,
|
||||
} from "@/features/page/services/page-service.ts";
|
||||
import { buildPageUrl } from "@/features/page/page.utils.ts";
|
||||
import {
|
||||
// Types are erased at build time, so importing them does not pull the module's
|
||||
// runtime (which drags in @tiptap + the editor-ext barrel). The actual recording
|
||||
// helpers are dynamically imported at call time inside createPageWithRecording,
|
||||
// keeping the editor engine out of the eager global-shell startup graph — the
|
||||
// bridge is mounted for every authenticated user but recording is a rare,
|
||||
// native-host-driven action.
|
||||
import type {
|
||||
GitmostBridge,
|
||||
GitmostCreatePagePayload,
|
||||
GitmostCreatePageResult,
|
||||
GitmostListPagesPayload,
|
||||
GitmostListPagesResult,
|
||||
GitmostListSpacesResult,
|
||||
gitmostDecodePayloadToFile,
|
||||
gitmostUploadFileToEditor,
|
||||
} from "@/features/editor/gitmost/gitmost-recording.ts";
|
||||
|
||||
// How long to wait for a freshly-navigated page's editor to mount, become
|
||||
@@ -57,7 +70,7 @@ function gitmostWaitForEditor(
|
||||
!editor.isDestroyed &&
|
||||
editor.isEditable &&
|
||||
editorPageId === pageId &&
|
||||
yjsStatus === WebSocketStatus.Connected;
|
||||
yjsStatus === YJS_STATUS_CONNECTED;
|
||||
if (ready) {
|
||||
resolve(editor);
|
||||
return;
|
||||
@@ -171,6 +184,12 @@ export default function GitmostGlobalBridge() {
|
||||
};
|
||||
}
|
||||
|
||||
// Load the recording helpers on demand (see the import note above). This
|
||||
// is the only place they are needed, so the @tiptap/editor-ext code they
|
||||
// pull in stays out of the eager startup graph.
|
||||
const { gitmostDecodePayloadToFile, gitmostUploadFileToEditor } =
|
||||
await import("@/features/editor/gitmost/gitmost-recording.ts");
|
||||
|
||||
// Validate/decode the recording BEFORE creating the page so a bad
|
||||
// payload never leaves an empty junk page behind. Per the createPage
|
||||
// error contract, any decode failure collapses to "insert-failed" (the
|
||||
|
||||
@@ -59,7 +59,7 @@ import {
|
||||
handlePaste,
|
||||
} from "@/features/editor/components/common/editor-paste-handler.tsx";
|
||||
import ExcalidrawMenu from "./components/excalidraw/excalidraw-menu-lazy";
|
||||
import DrawioMenu from "./components/drawio/drawio-menu";
|
||||
import DrawioMenu from "./components/drawio/drawio-menu-lazy";
|
||||
import { useCollabToken } from "@/features/auth/queries/auth-query.tsx";
|
||||
import SearchAndReplaceDialog from "@/features/editor/components/search-and-replace/search-and-replace-dialog.tsx";
|
||||
import { useDebouncedCallback, useDocumentVisibility } from "@mantine/hooks";
|
||||
@@ -93,11 +93,6 @@ import {
|
||||
isBodyEditable,
|
||||
isCollabSynced,
|
||||
} from "@/features/editor/editor-sync-state";
|
||||
import {
|
||||
isVitalsActive,
|
||||
measurePageOpen,
|
||||
reportEditorTx,
|
||||
} from "@/lib/telemetry/vitals";
|
||||
|
||||
interface PageEditorProps {
|
||||
pageId: string;
|
||||
@@ -356,40 +351,6 @@ export default function PageEditor({
|
||||
editor.storage.pageId = pageId;
|
||||
handleScrollTo(editor);
|
||||
editorRef.current = editor;
|
||||
|
||||
// #355 — perf instrumentation. Skip ALL of it when telemetry is
|
||||
// disabled (F1 flag off) or this session isn't sampled: no page-open
|
||||
// measure, and crucially NO dispatch wrapping, so a non-collecting
|
||||
// session pays zero per-transaction cost.
|
||||
if (isVitalsActive()) {
|
||||
// page_open_ms: this is the first editor-content render, so measure
|
||||
// against any page-open mark set on the tree-row/link click.
|
||||
measurePageOpen();
|
||||
|
||||
// editor_tx_ms: time the SYNCHRONOUS part of applying each
|
||||
// transaction (state.apply + updateState) by wrapping the view's
|
||||
// dispatch. Only slow syncs (>8ms) are reported (see reportEditorTx),
|
||||
// so the common path adds just one performance.now() pair. Passive:
|
||||
// the original dispatch still runs unchanged.
|
||||
try {
|
||||
const view = editor.view as unknown as {
|
||||
dispatch: (tr: unknown) => void;
|
||||
};
|
||||
const originalDispatch = view.dispatch.bind(view);
|
||||
view.dispatch = (tr: unknown) => {
|
||||
const started = performance.now();
|
||||
originalDispatch(tr);
|
||||
const elapsed = performance.now() - started;
|
||||
try {
|
||||
reportEditorTx(elapsed, editor.state.doc.content.size);
|
||||
} catch {
|
||||
// never let telemetry break editing
|
||||
}
|
||||
};
|
||||
} catch {
|
||||
// if the view shape changes, skip editor_tx instrumentation
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
onUpdate({ editor }) {
|
||||
|
||||
@@ -1,10 +1,20 @@
|
||||
import { Suspense } from "react";
|
||||
import { Outlet } from "react-router-dom";
|
||||
import { Center, Loader } from "@mantine/core";
|
||||
import ShareShell from "@/features/share/components/share-shell.tsx";
|
||||
|
||||
export default function ShareLayout() {
|
||||
return (
|
||||
<ShareShell>
|
||||
<Outlet />
|
||||
<Suspense
|
||||
fallback={
|
||||
<Center h="60vh">
|
||||
<Loader size="sm" />
|
||||
</Center>
|
||||
}
|
||||
>
|
||||
<Outlet />
|
||||
</Suspense>
|
||||
</ShareShell>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Source: https://github.com/mantinedev/mantine/blob/master/packages/@mantine/hooks/src/use-clipboard/use-clipboard.ts
|
||||
// polyfilled to support execCommand fallback
|
||||
import { useState } from "react";
|
||||
import { execCommandCopy } from "@docmost/editor-ext";
|
||||
import { execCommandCopy } from "@/lib/copy-to-clipboard.ts";
|
||||
|
||||
export type UseClipboardOptions = {
|
||||
timeout?: number;
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import bytes from "bytes";
|
||||
import { castToBoolean } from "@/lib/utils.tsx";
|
||||
import { AvatarIconType } from "@/features/attachments/types/attachment.types.ts";
|
||||
import { sanitizeUrl } from "@docmost/editor-ext";
|
||||
import { sanitizeUrl } from "@/lib/sanitize-url.ts";
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
@@ -47,13 +47,6 @@ export function isCompactPageTreeEnabled(): boolean {
|
||||
return castToBoolean(getConfigValue("COMPACT_PAGE_TREE", "true"));
|
||||
}
|
||||
|
||||
// #355 — operator toggle for client perf-telemetry. DEFAULT OFF: the server
|
||||
// mirrors CLIENT_TELEMETRY_ENABLED into window.CONFIG; when off the client
|
||||
// installs no observers and sends nothing (the sink endpoint doesn't exist).
|
||||
export function isClientTelemetryEnabled(): boolean {
|
||||
return castToBoolean(getConfigValue("CLIENT_TELEMETRY_ENABLED", "false"));
|
||||
}
|
||||
|
||||
export function getAvatarUrl(
|
||||
avatarUrl: string,
|
||||
type: AvatarIconType = AvatarIconType.AVATAR,
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
// Client-local execCommand copy fallback (previously imported from
|
||||
// @docmost/editor-ext). It lives here so the ubiquitous useClipboard / CopyButton
|
||||
// path does not pull in the editor-ext barrel — and with it the whole TipTap
|
||||
// engine — through the eager startup graph. Behavior is identical to the
|
||||
// editor-ext helper it replaces.
|
||||
export function execCommandCopy(text: string): void {
|
||||
const textarea = document.createElement("textarea");
|
||||
textarea.value = text;
|
||||
textarea.style.position = "fixed";
|
||||
textarea.style.left = "-9999px";
|
||||
textarea.style.top = "-9999px";
|
||||
document.body.appendChild(textarea);
|
||||
textarea.select();
|
||||
document.execCommand("copy");
|
||||
document.body.removeChild(textarea);
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { sanitizeUrl } from "./sanitize-url";
|
||||
|
||||
// `sanitizeUrl` is a byte-identical client-local copy of editor-ext's wrapper
|
||||
// around @braintree/sanitize-url: it maps the sanitizer's "about:blank" XSS
|
||||
// sentinel to "". These assertions mirror editor-ext's own security-contract
|
||||
// test so the extracted copy keeps the same guarantees.
|
||||
describe("sanitizeUrl", () => {
|
||||
it("blocks dangerous schemes (returns empty string)", () => {
|
||||
expect(sanitizeUrl("javascript:alert(1)")).toBe("");
|
||||
expect(sanitizeUrl("data:text/html,<script>alert(1)</script>")).toBe("");
|
||||
expect(sanitizeUrl("vbscript:msgbox(1)")).toBe("");
|
||||
// Case / whitespace obfuscation must not slip past the sanitizer.
|
||||
expect(sanitizeUrl(" JaVaScRiPt:alert(1)")).toBe("");
|
||||
});
|
||||
|
||||
it("returns empty string for empty / undefined input", () => {
|
||||
expect(sanitizeUrl(undefined)).toBe("");
|
||||
expect(sanitizeUrl("")).toBe("");
|
||||
});
|
||||
|
||||
it("allows safe https, relative file and mailto URLs", () => {
|
||||
expect(sanitizeUrl("https://example.com/page")).toMatch(
|
||||
/^https:\/\/example\.com\/page/,
|
||||
);
|
||||
expect(sanitizeUrl("/api/files/abc-123")).toBe("/api/files/abc-123");
|
||||
expect(sanitizeUrl("mailto:user@example.com")).toBe(
|
||||
"mailto:user@example.com",
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,15 @@
|
||||
import { sanitizeUrl as braintreeSanitizeUrl } from "@braintree/sanitize-url";
|
||||
|
||||
// Client-local copy of editor-ext's sanitizeUrl wrapper. Importing it from the
|
||||
// editor-ext barrel dragged the whole TipTap engine into the eager startup graph
|
||||
// via the app-wide config module (getFileUrl). This keeps the exact same
|
||||
// behavior (braintree sanitize + normalize "about:blank" -> "") without that
|
||||
// dependency.
|
||||
export function sanitizeUrl(url: string | undefined): string {
|
||||
if (!url) return "";
|
||||
|
||||
const sanitized = braintreeSanitizeUrl(url);
|
||||
|
||||
// Return an empty string instead of "about:blank".
|
||||
return sanitized === "about:blank" ? "" : sanitized;
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { templateRoute } from "./route-template";
|
||||
|
||||
describe("templateRoute", () => {
|
||||
it("templates a space page path (never leaks slugs)", () => {
|
||||
const t = templateRoute("/s/engineering/p/design-doc-abc123");
|
||||
expect(t).toBe("/s/:space/p/:slug");
|
||||
expect(t).not.toContain("engineering");
|
||||
expect(t).not.toContain("design-doc");
|
||||
});
|
||||
|
||||
it("templates share, redirect and space paths", () => {
|
||||
expect(templateRoute("/share/abc/p/xyz")).toBe("/share/:shareId/p/:slug");
|
||||
expect(templateRoute("/share/p/xyz")).toBe("/share/p/:slug");
|
||||
expect(templateRoute("/p/some-slug")).toBe("/p/:slug");
|
||||
expect(templateRoute("/s/team")).toBe("/s/:space");
|
||||
expect(templateRoute("/s/team/trash")).toBe("/s/:space/trash");
|
||||
expect(templateRoute("/labels/urgent")).toBe("/labels/:label");
|
||||
});
|
||||
|
||||
it("keeps known static routes verbatim", () => {
|
||||
expect(templateRoute("/home")).toBe("/home");
|
||||
expect(templateRoute("/settings/members")).toBe("/settings/members");
|
||||
expect(templateRoute("/")).toBe("/");
|
||||
});
|
||||
|
||||
it("normalises a trailing slash", () => {
|
||||
expect(templateRoute("/s/team/p/slug/")).toBe("/s/:space/p/:slug");
|
||||
});
|
||||
|
||||
it("collapses unknown paths to 'other' (bounded cardinality)", () => {
|
||||
expect(templateRoute("/weird/unknown/thing")).toBe("other");
|
||||
expect(templateRoute("/s/team/p/slug/extra/segments")).toBe("other");
|
||||
});
|
||||
});
|
||||
@@ -1,70 +0,0 @@
|
||||
/**
|
||||
* Map a raw pathname to a BOUNDED route TEMPLATE (#355).
|
||||
*
|
||||
* Perf metrics must be labelled by route template only — never a raw path with
|
||||
* slugs/ids — so the server-side `route` column and any downstream aggregation
|
||||
* stay low-cardinality and carry NO page slugs/titles (privacy). Anything that
|
||||
* does not match a known pattern collapses to `other`.
|
||||
*
|
||||
* The template vocabulary mirrors the issue's example (`/s/:space/p/:slug`).
|
||||
*/
|
||||
const ROUTE_PATTERNS: { re: RegExp; template: string }[] = [
|
||||
// Share pages (public).
|
||||
{ re: /^\/share\/[^/]+\/p\/[^/]+$/, template: '/share/:shareId/p/:slug' },
|
||||
{ re: /^\/share\/p\/[^/]+$/, template: '/share/p/:slug' },
|
||||
{ re: /^\/share\/[^/]+$/, template: '/share/:shareId' },
|
||||
// Page redirect.
|
||||
{ re: /^\/p\/[^/]+$/, template: '/p/:slug' },
|
||||
// Space + page.
|
||||
{ re: /^\/s\/[^/]+\/p\/[^/]+$/, template: '/s/:space/p/:slug' },
|
||||
{ re: /^\/s\/[^/]+\/trash$/, template: '/s/:space/trash' },
|
||||
{ re: /^\/s\/[^/]+$/, template: '/s/:space' },
|
||||
// Misc dynamic.
|
||||
{ re: /^\/labels\/[^/]+$/, template: '/labels/:label' },
|
||||
{ re: /^\/invites\/[^/]+$/, template: '/invites/:invitationId' },
|
||||
{ re: /^\/settings\/groups\/[^/]+$/, template: '/settings/groups/:groupId' },
|
||||
];
|
||||
|
||||
// Static routes we accept verbatim (finite set).
|
||||
const STATIC_ROUTES = new Set<string>([
|
||||
'/home',
|
||||
'/spaces',
|
||||
'/favorites',
|
||||
'/login',
|
||||
'/forgot-password',
|
||||
'/password-reset',
|
||||
'/setup/register',
|
||||
'/settings/account/profile',
|
||||
'/settings/account/preferences',
|
||||
'/settings/workspace',
|
||||
'/settings/ai',
|
||||
'/settings/members',
|
||||
'/settings/groups',
|
||||
'/settings/spaces',
|
||||
'/settings/sharing',
|
||||
]);
|
||||
|
||||
export function templateRoute(pathname: string): string {
|
||||
// Normalise a trailing slash (except root).
|
||||
const path =
|
||||
pathname.length > 1 && pathname.endsWith('/')
|
||||
? pathname.slice(0, -1)
|
||||
: pathname;
|
||||
|
||||
if (path === '' || path === '/') return '/';
|
||||
if (STATIC_ROUTES.has(path)) return path;
|
||||
|
||||
for (const { re, template } of ROUTE_PATTERNS) {
|
||||
if (re.test(path)) return template;
|
||||
}
|
||||
return 'other';
|
||||
}
|
||||
|
||||
/** Template for the current window location. */
|
||||
export function currentRouteTemplate(): string {
|
||||
try {
|
||||
return templateRoute(window.location.pathname);
|
||||
} catch {
|
||||
return 'other';
|
||||
}
|
||||
}
|
||||
@@ -1,290 +0,0 @@
|
||||
import {
|
||||
onCLS,
|
||||
onINP,
|
||||
onLCP,
|
||||
onTTFB,
|
||||
type CLSMetricWithAttribution,
|
||||
type INPMetricWithAttribution,
|
||||
type LCPMetricWithAttribution,
|
||||
type TTFBMetricWithAttribution,
|
||||
} from "web-vitals/attribution";
|
||||
import { isClientTelemetryEnabled } from "@/lib/config";
|
||||
import { currentRouteTemplate } from "./route-template";
|
||||
|
||||
/**
|
||||
* Client perf-telemetry (#355): web-vitals + custom metrics buffered and posted
|
||||
* to POST /api/telemetry/vitals via sendBeacon.
|
||||
*
|
||||
* Design constraints from the issue:
|
||||
* - Sampling is decided ONCE per session (25%), cached in sessionStorage,
|
||||
* BEFORE any observer is subscribed. Non-sampled sessions send nothing.
|
||||
* - Route labels are TEMPLATES only; attr is truncated to 120 chars; no page
|
||||
* titles/slugs/text ever leave the browser.
|
||||
* - Observers are passive and reporting is best-effort — telemetry must not
|
||||
* degrade the perf it measures.
|
||||
*/
|
||||
|
||||
const ENDPOINT = "/api/telemetry/vitals";
|
||||
const SAMPLE_RATE = 0.25;
|
||||
const SAMPLE_KEY = "gm_vitals_sampled";
|
||||
const FLUSH_INTERVAL_MS = 15_000;
|
||||
const MAX_BUFFER = 40; // flush early if the buffer fills between timers
|
||||
const MAX_ATTR_LENGTH = 120;
|
||||
const EDITOR_TX_MIN_MS = 8; // only report editor transactions slower than this
|
||||
|
||||
const ALLOWED_NAMES = new Set([
|
||||
"INP",
|
||||
"LCP",
|
||||
"CLS",
|
||||
"TTFB",
|
||||
"editor_tx_ms",
|
||||
"page_open_ms",
|
||||
"longtask_ms",
|
||||
]);
|
||||
|
||||
interface VitalEvent {
|
||||
name: string;
|
||||
value: number;
|
||||
rating?: string;
|
||||
route?: string;
|
||||
attr?: string;
|
||||
docSize?: number;
|
||||
}
|
||||
|
||||
let sampledCache: boolean | null = null;
|
||||
let initialised = false;
|
||||
let buffer: VitalEvent[] = [];
|
||||
let longtaskSum = 0; // accumulated longtask duration (ms) for the current window
|
||||
|
||||
/**
|
||||
* Decide once per session whether this session is sampled. Cached in
|
||||
* sessionStorage so the choice is stable across reloads within the session and
|
||||
* identical for every observer/custom-metric caller.
|
||||
*/
|
||||
export function isVitalsSampled(): boolean {
|
||||
if (sampledCache !== null) return sampledCache;
|
||||
try {
|
||||
const stored = sessionStorage.getItem(SAMPLE_KEY);
|
||||
if (stored === "1") return (sampledCache = true);
|
||||
if (stored === "0") return (sampledCache = false);
|
||||
const sampled = Math.random() < SAMPLE_RATE;
|
||||
sessionStorage.setItem(SAMPLE_KEY, sampled ? "1" : "0");
|
||||
return (sampledCache = sampled);
|
||||
} catch {
|
||||
// sessionStorage unavailable (private mode / SSR): default to not sampled.
|
||||
return (sampledCache = false);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* True only when telemetry is BOTH enabled by the operator (F1 flag) AND this
|
||||
* session is sampled. Callers outside initVitals (e.g. the editor dispatch
|
||||
* wrapper) use this to skip ALL instrumentation cost on disabled/non-sampled
|
||||
* sessions — no observers, no per-transaction timing.
|
||||
*/
|
||||
export function isVitalsActive(): boolean {
|
||||
return isClientTelemetryEnabled() && isVitalsSampled();
|
||||
}
|
||||
|
||||
function truncateAttr(value: unknown): string | undefined {
|
||||
if (typeof value !== "string" || value.length === 0) return undefined;
|
||||
return value.slice(0, MAX_ATTR_LENGTH);
|
||||
}
|
||||
|
||||
function enqueue(event: VitalEvent): void {
|
||||
if (!ALLOWED_NAMES.has(event.name)) return;
|
||||
if (!Number.isFinite(event.value)) return;
|
||||
buffer.push(event);
|
||||
if (buffer.length >= MAX_BUFFER) flush();
|
||||
}
|
||||
|
||||
function flush(): void {
|
||||
// Fold any pending longtask total into the batch first.
|
||||
if (longtaskSum > 0) {
|
||||
buffer.push({
|
||||
name: "longtask_ms",
|
||||
value: Math.round(longtaskSum),
|
||||
route: currentRouteTemplate(),
|
||||
});
|
||||
longtaskSum = 0;
|
||||
}
|
||||
if (buffer.length === 0) return;
|
||||
|
||||
const payload = JSON.stringify({ events: buffer });
|
||||
buffer = [];
|
||||
|
||||
try {
|
||||
const blob = new Blob([payload], { type: "application/json" });
|
||||
if (navigator.sendBeacon && navigator.sendBeacon(ENDPOINT, blob)) return;
|
||||
// Fallback for browsers without sendBeacon: keepalive fetch.
|
||||
void fetch(ENDPOINT, {
|
||||
method: "POST",
|
||||
body: payload,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
keepalive: true,
|
||||
}).catch(() => undefined);
|
||||
} catch {
|
||||
// Best-effort: never throw out of telemetry.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Report a custom client metric (editor_tx_ms, page_open_ms). No-op unless the
|
||||
* session is sampled. Route is always the current TEMPLATE.
|
||||
*/
|
||||
export function reportClientMetric(
|
||||
name: "editor_tx_ms" | "page_open_ms",
|
||||
value: number,
|
||||
extra?: { docSize?: number },
|
||||
): void {
|
||||
if (!isVitalsActive()) return;
|
||||
if (!Number.isFinite(value)) return;
|
||||
enqueue({
|
||||
name,
|
||||
value,
|
||||
route: currentRouteTemplate(),
|
||||
docSize: extra?.docSize,
|
||||
});
|
||||
}
|
||||
|
||||
/** Threshold-gated editor transaction reporter (only reports slow syncs). */
|
||||
export function reportEditorTx(ms: number, docSize: number): void {
|
||||
if (ms <= EDITOR_TX_MIN_MS) return;
|
||||
reportClientMetric("editor_tx_ms", ms, { docSize });
|
||||
}
|
||||
|
||||
const PAGE_OPEN_MARK = "gm_page_open_start";
|
||||
|
||||
/** Mark the start of a page-open interaction (tree-row / link click). */
|
||||
export function markPageOpenStart(): void {
|
||||
try {
|
||||
performance.clearMarks(PAGE_OPEN_MARK);
|
||||
performance.mark(PAGE_OPEN_MARK);
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Measure page_open_ms at first editor-content render, if a start mark exists.
|
||||
* Consumes the mark so a later render doesn't double-count.
|
||||
*/
|
||||
export function measurePageOpen(): void {
|
||||
try {
|
||||
const marks = performance.getEntriesByName(PAGE_OPEN_MARK, "mark");
|
||||
if (marks.length === 0) return;
|
||||
const started = marks[0].startTime;
|
||||
const elapsed = performance.now() - started;
|
||||
performance.clearMarks(PAGE_OPEN_MARK);
|
||||
if (elapsed > 0 && Number.isFinite(elapsed)) {
|
||||
reportClientMetric("page_open_ms", elapsed);
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
}
|
||||
|
||||
function attrTarget(
|
||||
metric:
|
||||
| INPMetricWithAttribution
|
||||
| LCPMetricWithAttribution
|
||||
| CLSMetricWithAttribution,
|
||||
): string | undefined {
|
||||
const a = metric.attribution as Record<string, unknown> | undefined;
|
||||
if (!a) return undefined;
|
||||
// Different vitals expose their culprit element under different keys; only a
|
||||
// CSS-selector-ish target string is taken (no text content / titles).
|
||||
return (
|
||||
truncateAttr(a.interactionTarget) ??
|
||||
truncateAttr(a.element) ??
|
||||
truncateAttr(a.largestShiftTarget) ??
|
||||
undefined
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialise client telemetry. Safe to call multiple times (idempotent). Returns
|
||||
* immediately without subscribing when the session is not sampled — so a
|
||||
* non-sampled session subscribes to NO observers and sends nothing.
|
||||
*/
|
||||
export function initVitals(): void {
|
||||
if (initialised) return;
|
||||
initialised = true;
|
||||
|
||||
// Operator flag gate (F1, default OFF): when telemetry is disabled the sink
|
||||
// endpoint does not even exist server-side, so install ZERO observers.
|
||||
if (!isClientTelemetryEnabled()) return;
|
||||
|
||||
// Sampling gate is evaluated BEFORE any observer subscription.
|
||||
if (!isVitalsSampled()) return;
|
||||
|
||||
const report = (
|
||||
metric:
|
||||
| INPMetricWithAttribution
|
||||
| LCPMetricWithAttribution
|
||||
| CLSMetricWithAttribution
|
||||
| TTFBMetricWithAttribution,
|
||||
) => {
|
||||
enqueue({
|
||||
name: metric.name,
|
||||
value: metric.value,
|
||||
rating: metric.rating,
|
||||
route: currentRouteTemplate(),
|
||||
attr:
|
||||
metric.name === "TTFB"
|
||||
? undefined
|
||||
: attrTarget(
|
||||
metric as
|
||||
| INPMetricWithAttribution
|
||||
| LCPMetricWithAttribution
|
||||
| CLSMetricWithAttribution,
|
||||
),
|
||||
});
|
||||
};
|
||||
|
||||
onINP(report);
|
||||
onLCP(report);
|
||||
onCLS(report);
|
||||
onTTFB(report);
|
||||
|
||||
// Long tasks: aggregate the total blocking time per flush window (a passive
|
||||
// observer; individual entries are summed, never stored/sent individually).
|
||||
try {
|
||||
if (typeof PerformanceObserver !== "undefined") {
|
||||
const observer = new PerformanceObserver((list) => {
|
||||
for (const entry of list.getEntries()) {
|
||||
longtaskSum += entry.duration;
|
||||
}
|
||||
});
|
||||
observer.observe({ type: "longtask", buffered: true });
|
||||
}
|
||||
} catch {
|
||||
// longtask entry type unsupported: skip silently.
|
||||
}
|
||||
|
||||
// page_open_ms start: mark when the user clicks a page link/tree-row (any
|
||||
// anchor navigating to a page URL). Passive capture listener; the matching
|
||||
// measure fires at first editor-content render (measurePageOpen). No page
|
||||
// titles/slugs are read — only the click timing is marked.
|
||||
document.addEventListener(
|
||||
"click",
|
||||
(event) => {
|
||||
const target = event.target as Element | null;
|
||||
const anchor = target?.closest?.("a[href]") as HTMLAnchorElement | null;
|
||||
if (!anchor) return;
|
||||
const href = anchor.getAttribute("href") ?? "";
|
||||
// A page link is `/s/:space/p/:slug`, `/p/:slug` or a share page path.
|
||||
if (/\/p\//.test(href)) markPageOpenStart();
|
||||
},
|
||||
{ capture: true, passive: true },
|
||||
);
|
||||
|
||||
// Flush on tab hide (most reliable delivery point) and periodically.
|
||||
const onHidden = () => {
|
||||
if (document.visibilityState === "hidden") flush();
|
||||
};
|
||||
document.addEventListener("visibilitychange", onHidden);
|
||||
window.addEventListener("pagehide", flush);
|
||||
|
||||
setInterval(flush, FLUSH_INTERVAL_MS);
|
||||
}
|
||||
+60
-32
@@ -13,16 +13,14 @@ import { ModalsProvider } from "@mantine/modals";
|
||||
import { Notifications } from "@mantine/notifications";
|
||||
import { QueryClient, QueryClientProvider } from "@tanstack/react-query";
|
||||
import { HelmetProvider } from "react-helmet-async";
|
||||
import { ChunkLoadErrorBoundary } from "@/components/chunk-load-error-boundary.tsx";
|
||||
import "./i18n";
|
||||
import { PostHogProvider } from "posthog-js/react";
|
||||
import {
|
||||
getPostHogHost,
|
||||
getPostHogKey,
|
||||
isCloud,
|
||||
isPostHogEnabled,
|
||||
} from "@/lib/config.ts";
|
||||
import posthog from "posthog-js";
|
||||
import { initVitals } from "@/lib/telemetry/vitals";
|
||||
|
||||
export const queryClient = new QueryClient({
|
||||
defaultOptions: {
|
||||
@@ -35,35 +33,65 @@ export const queryClient = new QueryClient({
|
||||
},
|
||||
});
|
||||
|
||||
if (isCloud() && isPostHogEnabled) {
|
||||
posthog.init(getPostHogKey(), {
|
||||
api_host: getPostHogHost(),
|
||||
defaults: "2025-05-24",
|
||||
disable_session_recording: true,
|
||||
capture_pageleave: false,
|
||||
});
|
||||
}
|
||||
|
||||
// #355 — client perf-telemetry. Decides sampling ONCE (25%/session) before
|
||||
// subscribing to any observer; non-sampled sessions send nothing.
|
||||
initVitals();
|
||||
|
||||
const container = document.getElementById("root") as HTMLElement;
|
||||
const root = (container as any).__reactRoot ??= ReactDOM.createRoot(container);
|
||||
|
||||
root.render(
|
||||
<BrowserRouter>
|
||||
<MantineProvider theme={theme} cssVariablesResolver={mantineCssResolver}>
|
||||
<ModalsProvider>
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<Notifications position="bottom-center" limit={3} zIndex={10000} />
|
||||
<HelmetProvider>
|
||||
<PostHogProvider client={posthog}>
|
||||
<App />
|
||||
</PostHogProvider>
|
||||
</HelmetProvider>
|
||||
</QueryClientProvider>
|
||||
</ModalsProvider>
|
||||
</MantineProvider>
|
||||
</BrowserRouter>,
|
||||
);
|
||||
function renderApp() {
|
||||
root.render(
|
||||
<BrowserRouter>
|
||||
<MantineProvider theme={theme} cssVariablesResolver={mantineCssResolver}>
|
||||
<ModalsProvider>
|
||||
<QueryClientProvider client={queryClient}>
|
||||
<Notifications position="bottom-center" limit={3} zIndex={10000} />
|
||||
<HelmetProvider>
|
||||
{/* Root boundary above every lazy route's Suspense: a stale-chunk
|
||||
404 after a deploy is caught and recovered here instead of
|
||||
blanking the whole app. */}
|
||||
<ChunkLoadErrorBoundary>
|
||||
<App />
|
||||
</ChunkLoadErrorBoundary>
|
||||
</HelmetProvider>
|
||||
</QueryClientProvider>
|
||||
</ModalsProvider>
|
||||
</MantineProvider>
|
||||
</BrowserRouter>,
|
||||
);
|
||||
}
|
||||
|
||||
async function initAnalytics() {
|
||||
// posthog-js is only pulled in for cloud deployments with analytics enabled, so
|
||||
// self-hosted builds never download it. The gate is kept identical to the
|
||||
// previous eager code so cloud analytics behavior is unchanged; the import is
|
||||
// simply deferred behind it.
|
||||
//
|
||||
// Crucially this runs AFTER the immediate first render below, so first paint is
|
||||
// never gated on the analytics chunk. Any failure (network, stale 404, or an
|
||||
// ad-blocker blocking a chunk named "posthog") is swallowed so the user keeps a
|
||||
// working app without analytics instead of a permanently blank page.
|
||||
//
|
||||
// NOTE: we init the posthog SINGLETON only and do NOT wrap the tree in
|
||||
// <PostHogProvider>. The app has zero consumers of the PostHog React context
|
||||
// (no usePostHog / useFeatureFlag* / PostHogFeature), and PostHogProvider given
|
||||
// an already-initialized `client` is a no-op — all capture goes through the
|
||||
// singleton. Re-rendering to attach the provider would only REMOUNT the whole
|
||||
// App (running every mount effect twice and dropping local state / focus /
|
||||
// in-progress input on cloud cold-load) for no functional gain.
|
||||
if (!(isCloud() && isPostHogEnabled)) return;
|
||||
try {
|
||||
const { default: posthog } = await import("posthog-js");
|
||||
posthog.init(getPostHogKey(), {
|
||||
api_host: getPostHogHost(),
|
||||
defaults: "2025-05-24",
|
||||
disable_session_recording: true,
|
||||
capture_pageleave: false,
|
||||
});
|
||||
} catch {
|
||||
// Analytics failed to load — degrade gracefully; the app already rendered.
|
||||
}
|
||||
}
|
||||
|
||||
// Paint immediately for everyone (self-hosted stays exactly as instant as before,
|
||||
// cloud no longer blocks on the analytics import). The posthog singleton is
|
||||
// initialized after, without re-rendering the tree.
|
||||
renderApp();
|
||||
void initAnalytics();
|
||||
|
||||
@@ -63,6 +63,20 @@ export default defineConfig(({ mode }) => {
|
||||
name: "vendor-mantine",
|
||||
test: /[\\/]node_modules[\\/]@mantine[\\/]/,
|
||||
},
|
||||
// NOTE: TipTap/ProseMirror/Yjs are intentionally NOT force-grouped
|
||||
// into a single vendor chunk. Doing so backfires: rolldown co-locates
|
||||
// a small module shared with the (eager) react-i18next runtime into
|
||||
// that group chunk, which then drags the whole ~590KB editor engine
|
||||
// into the eager modulepreload graph. Left to the default splitting,
|
||||
// the editor engine stays in lazily-loaded chunks pulled only by the
|
||||
// route-split editor/share pages. KaTeX is safe to group (nothing
|
||||
// eager references it).
|
||||
// KaTeX in its own stable chunk; loaded on demand by the lazy math
|
||||
// node views (never in the startup path).
|
||||
{
|
||||
name: "vendor-katex",
|
||||
test: /[\\/]node_modules[\\/]katex[\\/]/,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
@@ -111,7 +111,6 @@
|
||||
"pino-pretty": "^13.1.3",
|
||||
"postgres": "^3.4.8",
|
||||
"postmark": "^4.0.7",
|
||||
"prom-client": "^15.1.3",
|
||||
"react": "^18.3.1",
|
||||
"react-email": "6.0.8",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
|
||||
@@ -31,8 +31,6 @@ import { McpModule } from './integrations/mcp/mcp.module';
|
||||
import { SandboxModule } from './integrations/sandbox/sandbox.module';
|
||||
import { AiModule } from './integrations/ai/ai.module';
|
||||
import { AiChatModule } from './core/ai-chat/ai-chat.module';
|
||||
import { MetricsModule } from './integrations/metrics/metrics.module';
|
||||
import { ClientTelemetryModule } from './core/telemetry/client-telemetry.module';
|
||||
|
||||
const enterpriseModules = [];
|
||||
try {
|
||||
@@ -95,10 +93,6 @@ try {
|
||||
SandboxModule,
|
||||
AiModule,
|
||||
AiChatModule,
|
||||
MetricsModule,
|
||||
// Gated OFF by default: only registers the public vitals sink controller
|
||||
// when CLIENT_TELEMETRY_ENABLED=true (maintainer decision E1=B).
|
||||
ClientTelemetryModule.register(),
|
||||
...enterpriseModules,
|
||||
],
|
||||
controllers: [AppController],
|
||||
|
||||
@@ -41,7 +41,6 @@ import {
|
||||
HISTORY_INTERVAL,
|
||||
} from '../constants';
|
||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||
import { observeCollabStore } from '../../integrations/metrics/metrics.registry';
|
||||
|
||||
/**
|
||||
* #251 — wire format of the client→server stateless message that signals a
|
||||
@@ -193,17 +192,6 @@ export class PersistenceExtension implements Extension {
|
||||
}
|
||||
|
||||
async onStoreDocument(data: onStoreDocumentPayload) {
|
||||
// #355 — time the full store (persist + post-store side effects) into
|
||||
// collab_store_duration_seconds. No-op when METRICS_PORT is unset.
|
||||
const startedAt = performance.now();
|
||||
try {
|
||||
await this.storeDocument(data);
|
||||
} finally {
|
||||
observeCollabStore((performance.now() - startedAt) / 1000);
|
||||
}
|
||||
}
|
||||
|
||||
private async storeDocument(data: onStoreDocumentPayload) {
|
||||
const { documentName, document, context } = data;
|
||||
|
||||
const pageId = getPageId(documentName);
|
||||
|
||||
@@ -16,7 +16,6 @@ import {
|
||||
AUTH_THROTTLER,
|
||||
PAGE_TEMPLATE_THROTTLER,
|
||||
PUBLIC_SHARE_AI_THROTTLER,
|
||||
VITALS_THROTTLER,
|
||||
} from '../../integrations/throttle/throttler-names';
|
||||
import { LoginDto } from './dto/login.dto';
|
||||
import { AuthService } from './services/auth.service';
|
||||
@@ -185,21 +184,16 @@ export class AuthController {
|
||||
}
|
||||
|
||||
// The global ThrottlerGuard applies ALL named throttlers to every route by
|
||||
// default, so each non-AUTH bucket (AI chat, page template, public-share AI,
|
||||
// client vitals) is explicitly skipped here. collab-token is auth-guarded
|
||||
// (JwtAuthGuard), per-user and client-cached, so those feature buckets are
|
||||
// irrelevant to it; skipping them avoids spurious 429s when a user opens many
|
||||
// pages in a short window. The VITALS bucket must be skipped too: it is a
|
||||
// process-wide named throttler, so without this skip its per-IP limit would
|
||||
// silently cap collab-token (the one route that opts out of every other
|
||||
// bucket) and break editing behind shared/NAT IPs. The AUTH bucket is skipped
|
||||
// for the same per-user, cached reason.
|
||||
// default, so each non-AUTH bucket (AI chat, page template, public-share AI)
|
||||
// is explicitly skipped here. collab-token is auth-guarded (JwtAuthGuard),
|
||||
// per-user and client-cached, so those feature buckets are irrelevant to it;
|
||||
// skipping them avoids spurious 429s when a user opens many pages in a short
|
||||
// window. The AUTH bucket is skipped too for the same per-user, cached reason.
|
||||
@SkipThrottle({
|
||||
[AUTH_THROTTLER]: true,
|
||||
[AI_CHAT_THROTTLER]: true,
|
||||
[PAGE_TEMPLATE_THROTTLER]: true,
|
||||
[PUBLIC_SHARE_AI_THROTTLER]: true,
|
||||
[VITALS_THROTTLER]: true,
|
||||
})
|
||||
@UseGuards(JwtAuthGuard)
|
||||
@HttpCode(HttpStatus.OK)
|
||||
|
||||
@@ -1,105 +0,0 @@
|
||||
/**
|
||||
* Server-side whitelist + limits for POST /api/telemetry/vitals (#355).
|
||||
*
|
||||
* The endpoint is PUBLIC (browsers post it, no auth) so it is a privacy and
|
||||
* abuse surface: everything not on these lists is silently DROPPED and the
|
||||
* request still returns 200 (never 400 — a 400 would make browsers retry).
|
||||
*/
|
||||
|
||||
// The only metric names accepted. Anything else is dropped.
|
||||
export const ALLOWED_METRIC_NAMES = new Set<string>([
|
||||
'INP',
|
||||
'LCP',
|
||||
'CLS',
|
||||
'TTFB',
|
||||
'editor_tx_ms',
|
||||
'page_open_ms',
|
||||
'longtask_ms',
|
||||
]);
|
||||
|
||||
// The only rating values accepted (web-vitals). Anything else -> null.
|
||||
export const ALLOWED_RATINGS = new Set<string>([
|
||||
'good',
|
||||
'needs-improvement',
|
||||
'poor',
|
||||
]);
|
||||
|
||||
// Max events accepted per batch; the rest are ignored.
|
||||
export const MAX_EVENTS_PER_BATCH = 50;
|
||||
|
||||
// Defence-in-depth body cap (~16KB). Fastify's global bodyLimit is far larger,
|
||||
// so we re-check the parsed payload size here and drop oversized batches.
|
||||
export const MAX_BODY_BYTES = 16 * 1024;
|
||||
|
||||
// attr is truncated to this many characters (attribution target only, no PII).
|
||||
export const MAX_ATTR_LENGTH = 120;
|
||||
|
||||
// route label sanity cap (client sends a template like /s/:space/p/:slug).
|
||||
export const MAX_ROUTE_LENGTH = 200;
|
||||
|
||||
// `client_metrics.doc_size` is a Postgres `int` (int4). A garbage/huge docSize
|
||||
// on a single event would overflow int4 and make Postgres reject the WHOLE
|
||||
// batch INSERT, losing every event in it. Values outside this range are DROPPED
|
||||
// to null (the event is still kept) so one bad field never loses the batch.
|
||||
export const DOC_SIZE_MAX = 2147483647; // 2^31 - 1 (int4 max)
|
||||
|
||||
export interface ClientMetricRow {
|
||||
name: string;
|
||||
value: number;
|
||||
rating: string | null;
|
||||
route: string | null;
|
||||
attr: string | null;
|
||||
docSize: number | null;
|
||||
workspaceId: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate + normalise a single incoming event into a DB row, or return null to
|
||||
* DROP it. Pure so it is directly unit-testable. Enforces the name whitelist,
|
||||
* numeric value, rating whitelist, attr truncation and doc_size (int) coercion.
|
||||
*/
|
||||
export function sanitizeVitalEvent(
|
||||
raw: unknown,
|
||||
workspaceId: string | null,
|
||||
): ClientMetricRow | null {
|
||||
if (!raw || typeof raw !== 'object') return null;
|
||||
const e = raw as Record<string, unknown>;
|
||||
|
||||
const name = e.name;
|
||||
if (typeof name !== 'string' || !ALLOWED_METRIC_NAMES.has(name)) return null;
|
||||
|
||||
const value =
|
||||
typeof e.value === 'number' && Number.isFinite(e.value) ? e.value : null;
|
||||
if (value === null) return null;
|
||||
|
||||
const rating =
|
||||
typeof e.rating === 'string' && ALLOWED_RATINGS.has(e.rating)
|
||||
? e.rating
|
||||
: null;
|
||||
|
||||
let route: string | null = null;
|
||||
if (typeof e.route === 'string' && e.route.length > 0) {
|
||||
route = e.route.slice(0, MAX_ROUTE_LENGTH);
|
||||
}
|
||||
|
||||
let attr: string | null = null;
|
||||
if (typeof e.attr === 'string' && e.attr.length > 0) {
|
||||
attr = e.attr.slice(0, MAX_ATTR_LENGTH);
|
||||
}
|
||||
|
||||
let docSize: number | null = null;
|
||||
if (typeof e.docSize === 'number' && Number.isFinite(e.docSize)) {
|
||||
docSize = Math.trunc(e.docSize);
|
||||
} else if (typeof e.doc_size === 'number' && Number.isFinite(e.doc_size)) {
|
||||
// Accept snake_case too, in case a client sends the raw column name.
|
||||
docSize = Math.trunc(e.doc_size as number);
|
||||
}
|
||||
// Guard the int4 column: an out-of-range docSize would overflow int4 and make
|
||||
// Postgres reject the whole batch INSERT. Drop the field (keep the event)
|
||||
// rather than lose every other event in the batch.
|
||||
if (docSize !== null && (docSize < 0 || docSize > DOC_SIZE_MAX)) {
|
||||
docSize = null;
|
||||
}
|
||||
|
||||
return { name, value, rating, route, attr, docSize, workspaceId };
|
||||
}
|
||||
@@ -1,47 +0,0 @@
|
||||
import { ClientTelemetryModule } from './client-telemetry.module';
|
||||
import { VitalsController } from './vitals.controller';
|
||||
import { VitalsService } from './vitals.service';
|
||||
|
||||
// The register() gate is the CORE of the maintainer's E1=B decision: the public,
|
||||
// unauthenticated /api/telemetry/vitals endpoint must be OFF by default, so a
|
||||
// self-host deploy has no anonymous disk-fill surface into `client_metrics`. A
|
||||
// regression that inverts the flag (or a truthiness bug where "" / "false"
|
||||
// registers the route) would silently reopen that surface — pin it here.
|
||||
describe('ClientTelemetryModule.register (E1=B gate)', () => {
|
||||
const original = process.env.CLIENT_TELEMETRY_ENABLED;
|
||||
afterEach(() => {
|
||||
if (original === undefined) delete process.env.CLIENT_TELEMETRY_ENABLED;
|
||||
else process.env.CLIENT_TELEMETRY_ENABLED = original;
|
||||
});
|
||||
|
||||
it('OFF by default (flag unset) — no controller, no provider (endpoint absent)', () => {
|
||||
delete process.env.CLIENT_TELEMETRY_ENABLED;
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toEqual([]);
|
||||
expect(mod.providers).toEqual([]);
|
||||
});
|
||||
|
||||
it.each(['false', 'False', '0', '', 'yes', '1'])(
|
||||
'stays OFF for non-"true" value %p (no route)',
|
||||
(val) => {
|
||||
process.env.CLIENT_TELEMETRY_ENABLED = val;
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toEqual([]);
|
||||
expect(mod.providers).toEqual([]);
|
||||
},
|
||||
);
|
||||
|
||||
it('ON only for "true" — registers VitalsController + VitalsService', () => {
|
||||
process.env.CLIENT_TELEMETRY_ENABLED = 'true';
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toContain(VitalsController);
|
||||
expect(mod.providers).toContain(VitalsService);
|
||||
});
|
||||
|
||||
it('ON is case-insensitive ("TRUE")', () => {
|
||||
process.env.CLIENT_TELEMETRY_ENABLED = 'TRUE';
|
||||
const mod = ClientTelemetryModule.register();
|
||||
expect(mod.controllers).toContain(VitalsController);
|
||||
expect(mod.providers).toContain(VitalsService);
|
||||
});
|
||||
});
|
||||
@@ -1,32 +0,0 @@
|
||||
import { DynamicModule, Module } from '@nestjs/common';
|
||||
import { VitalsController } from './vitals.controller';
|
||||
import { VitalsService } from './vitals.service';
|
||||
|
||||
/**
|
||||
* Client perf-telemetry (#355): the public /api/telemetry/vitals sink that
|
||||
* persists web-vitals + custom client metrics into `client_metrics`.
|
||||
* Named ClientTelemetryModule to avoid confusion with the unrelated
|
||||
* integrations/telemetry (product usage ping) module.
|
||||
*
|
||||
* GATED OFF BY DEFAULT (maintainer decision E1=B). The public, unauthenticated
|
||||
* endpoint is only registered when CLIENT_TELEMETRY_ENABLED=true — otherwise the
|
||||
* route does NOT exist at all (no anonymous disk-fill surface, and no unbounded
|
||||
* `client_metrics` growth on a self-host deploy without an external pruner). The
|
||||
* client is told the same flag via window.CONFIG and skips sending when off.
|
||||
*/
|
||||
@Module({})
|
||||
export class ClientTelemetryModule {
|
||||
static register(): DynamicModule {
|
||||
// Read process.env directly (not EnvironmentService) so the toggle is
|
||||
// resolved at module-registration time, identical to how the metrics
|
||||
// subsystem reads METRICS_PORT. Absent/anything-but-"true" => OFF.
|
||||
const enabled =
|
||||
(process.env.CLIENT_TELEMETRY_ENABLED ?? '').toLowerCase() === 'true';
|
||||
|
||||
return {
|
||||
module: ClientTelemetryModule,
|
||||
controllers: enabled ? [VitalsController] : [],
|
||||
providers: enabled ? [VitalsService] : [],
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -1,64 +0,0 @@
|
||||
import {
|
||||
Body,
|
||||
Controller,
|
||||
HttpCode,
|
||||
Post,
|
||||
Req,
|
||||
UseGuards,
|
||||
} from '@nestjs/common';
|
||||
import { SkipThrottle, Throttle, ThrottlerGuard } from '@nestjs/throttler';
|
||||
import { FastifyRequest } from 'fastify';
|
||||
import { Public } from '../../common/decorators/public.decorator';
|
||||
import {
|
||||
AI_CHAT_THROTTLER,
|
||||
AUTH_THROTTLER,
|
||||
PAGE_TEMPLATE_THROTTLER,
|
||||
PUBLIC_SHARE_AI_THROTTLER,
|
||||
VITALS_THROTTLER,
|
||||
} from '../../integrations/throttle/throttler-names';
|
||||
import { VitalsService } from './vitals.service';
|
||||
|
||||
/**
|
||||
* POST /api/telemetry/vitals (#355) — public client perf-metrics sink.
|
||||
*
|
||||
* PUBLIC (browsers post via sendBeacon, no session) but IP-throttled. Always
|
||||
* returns 200 with no body of interest: invalid/foreign/oversized payloads are
|
||||
* silently dropped by the service rather than 400'd, so browsers never retry.
|
||||
*/
|
||||
@Controller('telemetry')
|
||||
export class VitalsController {
|
||||
constructor(private readonly vitalsService: VitalsService) {}
|
||||
|
||||
@Public()
|
||||
@UseGuards(ThrottlerGuard)
|
||||
// The global ThrottlerGuard applies ALL named throttlers to every route, so
|
||||
// every OTHER bucket must be skipped here — otherwise the strictest of them
|
||||
// (public-share AI at 5/min) would override the intended vitals limit and cap
|
||||
// this route at 5/min instead of 120/min. Skip them all so ONLY the VITALS
|
||||
// bucket below applies.
|
||||
@SkipThrottle({
|
||||
[AUTH_THROTTLER]: true,
|
||||
[AI_CHAT_THROTTLER]: true,
|
||||
[PAGE_TEMPLATE_THROTTLER]: true,
|
||||
[PUBLIC_SHARE_AI_THROTTLER]: true,
|
||||
})
|
||||
@Throttle({ [VITALS_THROTTLER]: { limit: 120, ttl: 60_000 } })
|
||||
@Post('vitals')
|
||||
@HttpCode(200)
|
||||
async vitals(
|
||||
@Body() body: unknown,
|
||||
@Req() req: FastifyRequest,
|
||||
): Promise<{ ok: true }> {
|
||||
// workspaceId is resolved by the workspace-host middleware onto req.raw when
|
||||
// the browser posts from a workspace host; null otherwise. No other PII.
|
||||
const workspaceId =
|
||||
((req.raw as unknown as { workspaceId?: string })?.workspaceId ?? null) ||
|
||||
null;
|
||||
try {
|
||||
await this.vitalsService.ingest(body, workspaceId);
|
||||
} catch {
|
||||
// Never surface storage errors to the browser; telemetry is best-effort.
|
||||
}
|
||||
return { ok: true };
|
||||
}
|
||||
}
|
||||
@@ -1,149 +0,0 @@
|
||||
import { VitalsService } from './vitals.service';
|
||||
import { MAX_ATTR_LENGTH } from './client-metrics.constants';
|
||||
|
||||
// buildRows is pure (no DB access), so a null db is fine here.
|
||||
const svc = new VitalsService(null as any);
|
||||
|
||||
describe('VitalsService.buildRows', () => {
|
||||
const WS = 'ws-uuid';
|
||||
|
||||
it('accepts a valid batch and maps whitelisted fields to rows', () => {
|
||||
const body = {
|
||||
events: [
|
||||
{ name: 'INP', value: 123.4, rating: 'good', route: '/s/:space/p/:slug' },
|
||||
{ name: 'editor_tx_ms', value: 12, route: '/s/:space/p/:slug', docSize: 4096 },
|
||||
],
|
||||
};
|
||||
const rows = svc.buildRows(body, WS);
|
||||
expect(rows).toHaveLength(2);
|
||||
expect(rows[0]).toEqual({
|
||||
name: 'INP',
|
||||
value: 123.4,
|
||||
rating: 'good',
|
||||
route: '/s/:space/p/:slug',
|
||||
attr: null,
|
||||
docSize: null,
|
||||
workspaceId: WS,
|
||||
});
|
||||
expect(rows[1].name).toBe('editor_tx_ms');
|
||||
expect(rows[1].docSize).toBe(4096);
|
||||
expect(rows[1].workspaceId).toBe(WS);
|
||||
});
|
||||
|
||||
it('accepts a bare array body', () => {
|
||||
const rows = svc.buildRows([{ name: 'LCP', value: 1 }], WS);
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].name).toBe('LCP');
|
||||
});
|
||||
|
||||
it('drops events with foreign metric names', () => {
|
||||
const rows = svc.buildRows(
|
||||
{ events: [{ name: 'evil_metric', value: 1 }, { name: 'LCP', value: 2 }] },
|
||||
WS,
|
||||
);
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].name).toBe('LCP');
|
||||
});
|
||||
|
||||
it('drops events with a non-numeric or missing value', () => {
|
||||
const rows = svc.buildRows(
|
||||
{
|
||||
events: [
|
||||
{ name: 'CLS', value: 'nan' },
|
||||
{ name: 'CLS' },
|
||||
{ name: 'CLS', value: 0.1 },
|
||||
],
|
||||
},
|
||||
WS,
|
||||
);
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(rows[0].value).toBe(0.1);
|
||||
});
|
||||
|
||||
it('strips foreign fields and only keeps whitelisted columns', () => {
|
||||
const rows = svc.buildRows(
|
||||
{ events: [{ name: 'TTFB', value: 5, secret: 'drop-me', title: 'my page' }] },
|
||||
WS,
|
||||
);
|
||||
expect(rows).toHaveLength(1);
|
||||
expect(Object.keys(rows[0]).sort()).toEqual(
|
||||
['attr', 'docSize', 'name', 'rating', 'route', 'value', 'workspaceId'].sort(),
|
||||
);
|
||||
expect((rows[0] as any).secret).toBeUndefined();
|
||||
expect((rows[0] as any).title).toBeUndefined();
|
||||
});
|
||||
|
||||
it('rejects a rating outside the allowed set (-> null)', () => {
|
||||
const rows = svc.buildRows(
|
||||
{ events: [{ name: 'INP', value: 1, rating: 'terrible' }] },
|
||||
WS,
|
||||
);
|
||||
expect(rows[0].rating).toBeNull();
|
||||
});
|
||||
|
||||
it('truncates attr to 120 chars', () => {
|
||||
const longAttr = 'a'.repeat(500);
|
||||
const rows = svc.buildRows(
|
||||
{ events: [{ name: 'INP', value: 1, attr: longAttr }] },
|
||||
WS,
|
||||
);
|
||||
expect(rows[0].attr).toHaveLength(MAX_ATTR_LENGTH);
|
||||
});
|
||||
|
||||
it('caps the batch at 50 events', () => {
|
||||
const events = Array.from({ length: 200 }, () => ({ name: 'CLS', value: 1 }));
|
||||
const rows = svc.buildRows({ events }, WS);
|
||||
expect(rows).toHaveLength(50);
|
||||
});
|
||||
|
||||
it('drops an oversized (>16KB) payload wholesale', () => {
|
||||
const events = Array.from({ length: 50 }, () => ({
|
||||
name: 'INP',
|
||||
value: 1,
|
||||
attr: 'x'.repeat(400),
|
||||
route: '/s/:space/p/:slug',
|
||||
}));
|
||||
// Serialised body far exceeds 16KB.
|
||||
const rows = svc.buildRows({ events }, WS);
|
||||
expect(rows).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('returns [] for malformed bodies', () => {
|
||||
expect(svc.buildRows(null, WS)).toEqual([]);
|
||||
expect(svc.buildRows('nope', WS)).toEqual([]);
|
||||
expect(svc.buildRows({ notEvents: 1 }, WS)).toEqual([]);
|
||||
expect(svc.buildRows(42, WS)).toEqual([]);
|
||||
});
|
||||
|
||||
it('carries a null workspaceId through', () => {
|
||||
const rows = svc.buildRows({ events: [{ name: 'LCP', value: 1 }] }, null);
|
||||
expect(rows[0].workspaceId).toBeNull();
|
||||
});
|
||||
|
||||
it('drops an out-of-int4-range docSize to null without losing the batch', () => {
|
||||
const rows = svc.buildRows(
|
||||
{
|
||||
events: [
|
||||
// Garbage docSize overflowing int4 must NOT reject the whole batch:
|
||||
// the field is dropped to null and the event is kept.
|
||||
{ name: 'editor_tx_ms', value: 10, docSize: 9_999_999_999 },
|
||||
{ name: 'editor_tx_ms', value: 20, docSize: -5 },
|
||||
{ name: 'editor_tx_ms', value: 30, docSize: 4096 },
|
||||
],
|
||||
},
|
||||
WS,
|
||||
);
|
||||
expect(rows).toHaveLength(3);
|
||||
expect(rows[0].docSize).toBeNull();
|
||||
expect(rows[1].docSize).toBeNull();
|
||||
expect(rows[2].docSize).toBe(4096);
|
||||
});
|
||||
|
||||
it('keeps a docSize exactly at the int4 max', () => {
|
||||
const rows = svc.buildRows(
|
||||
{ events: [{ name: 'editor_tx_ms', value: 1, docSize: 2147483647 }] },
|
||||
WS,
|
||||
);
|
||||
expect(rows[0].docSize).toBe(2147483647);
|
||||
});
|
||||
});
|
||||
@@ -1,70 +0,0 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import {
|
||||
ClientMetricRow,
|
||||
MAX_BODY_BYTES,
|
||||
MAX_EVENTS_PER_BATCH,
|
||||
sanitizeVitalEvent,
|
||||
} from './client-metrics.constants';
|
||||
|
||||
@Injectable()
|
||||
export class VitalsService {
|
||||
constructor(@InjectKysely() private readonly db: KyselyDB) {}
|
||||
|
||||
/**
|
||||
* Turn a raw request body into the (bounded, whitelisted) rows to persist.
|
||||
* Pure/synchronous so it is unit-testable without a DB. Returns [] for any
|
||||
* malformed / oversized / foreign input — the caller still responds 200.
|
||||
*/
|
||||
buildRows(body: unknown, workspaceId: string | null): ClientMetricRow[] {
|
||||
if (!body || typeof body !== 'object') return [];
|
||||
|
||||
// Defence-in-depth body cap (~16KB): drop oversized batches wholesale.
|
||||
try {
|
||||
if (JSON.stringify(body).length > MAX_BODY_BYTES) return [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
|
||||
// Accept either a bare array or `{ events: [...] }`.
|
||||
const events = Array.isArray(body)
|
||||
? body
|
||||
: Array.isArray((body as { events?: unknown }).events)
|
||||
? ((body as { events: unknown[] }).events as unknown[])
|
||||
: null;
|
||||
if (!events) return [];
|
||||
|
||||
const rows: ClientMetricRow[] = [];
|
||||
for (const event of events) {
|
||||
if (rows.length >= MAX_EVENTS_PER_BATCH) break;
|
||||
const row = sanitizeVitalEvent(event, workspaceId);
|
||||
if (row) rows.push(row);
|
||||
}
|
||||
return rows;
|
||||
}
|
||||
|
||||
/** Batch-insert the sanitised rows in a single statement. No-op on []. */
|
||||
async insertRows(rows: ClientMetricRow[]): Promise<void> {
|
||||
if (rows.length === 0) return;
|
||||
await this.db
|
||||
.insertInto('clientMetrics')
|
||||
.values(
|
||||
rows.map((r) => ({
|
||||
name: r.name,
|
||||
value: r.value,
|
||||
rating: r.rating,
|
||||
route: r.route,
|
||||
attr: r.attr,
|
||||
docSize: r.docSize,
|
||||
workspaceId: r.workspaceId,
|
||||
})),
|
||||
)
|
||||
.execute();
|
||||
}
|
||||
|
||||
async ingest(body: unknown, workspaceId: string | null): Promise<void> {
|
||||
const rows = this.buildRows(body, workspaceId);
|
||||
await this.insertRows(rows);
|
||||
}
|
||||
}
|
||||
@@ -40,11 +40,6 @@ import { PageListener } from '@docmost/db/listeners/page.listener';
|
||||
import { PostgresJSDialect } from 'kysely-postgres-js';
|
||||
import * as postgres from 'postgres';
|
||||
import { normalizePostgresUrl } from '../common/helpers';
|
||||
import {
|
||||
observeDbQuery,
|
||||
isMetricsEnabled,
|
||||
} from '../integrations/metrics/metrics.registry';
|
||||
import { firstSqlToken } from '../integrations/metrics/metrics.constants';
|
||||
|
||||
@Global()
|
||||
@Module({
|
||||
@@ -72,18 +67,6 @@ import { firstSqlToken } from '../integrations/metrics/metrics.constants';
|
||||
}),
|
||||
plugins: [new CamelCasePlugin()],
|
||||
log: (event: LogEvent) => {
|
||||
// #355 — db_query_duration_seconds, labelled by the leading SQL token
|
||||
// (bounded cardinality). Gated on isMetricsEnabled() so the token work
|
||||
// (regex + Set lookup) is skipped entirely when metrics are OFF — not
|
||||
// just observeDbQuery no-op'd — so a non-metrics deployment pays nothing
|
||||
// per query. Runs independent of the dev-only debug logging below.
|
||||
if (isMetricsEnabled()) {
|
||||
observeDbQuery(
|
||||
firstSqlToken(event.query.sql),
|
||||
event.queryDurationMillis / 1000,
|
||||
);
|
||||
}
|
||||
|
||||
if (environmentService.getNodeEnv() !== 'development') return;
|
||||
const logger = new Logger(DatabaseModule.name);
|
||||
if (process.env.DEBUG_DB?.toLowerCase() === 'true') {
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
import { type Kysely, sql } from 'kysely';
|
||||
|
||||
/**
|
||||
* #355 — `client_metrics`: raw sink for client-side perf telemetry (web-vitals
|
||||
* + custom editor/page metrics) posted to /api/telemetry/vitals.
|
||||
*
|
||||
* The table/columns/indexes here are a FIXED contract shared with the deployed
|
||||
* Grafana infra (the `grafana_ro` role reads this table; a separate maintenance
|
||||
* container prunes rows >90d and re-GRANTs daily). No app-side retention is
|
||||
* added on purpose. Written as raw SQL to match that contract 1:1 (identity PK,
|
||||
* conditional GRANT).
|
||||
*/
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
await sql`
|
||||
CREATE TABLE client_metrics (
|
||||
id bigint GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
|
||||
created_at timestamptz NOT NULL DEFAULT now(),
|
||||
name text NOT NULL, -- INP|LCP|CLS|TTFB|editor_tx_ms|page_open_ms|longtask_ms
|
||||
value double precision NOT NULL,
|
||||
rating text, -- good|needs-improvement|poor (web-vitals only)
|
||||
route text, -- templated: /s/:space/p/:slug — never raw slugs
|
||||
attr text, -- attribution target, truncated to 120 chars
|
||||
doc_size int, -- editor_tx_ms only
|
||||
workspace_id uuid
|
||||
)
|
||||
`.execute(db);
|
||||
|
||||
await sql`
|
||||
CREATE INDEX idx_client_metrics_name_created
|
||||
ON client_metrics (name, created_at)
|
||||
`.execute(db);
|
||||
|
||||
await sql`
|
||||
CREATE INDEX idx_client_metrics_created
|
||||
ON client_metrics (created_at)
|
||||
`.execute(db);
|
||||
|
||||
// The read-only Grafana role only exists in the deployed environment; guard so
|
||||
// the migration still applies cleanly in dev/CI where the role is absent.
|
||||
await sql`
|
||||
DO $$
|
||||
BEGIN
|
||||
IF EXISTS (SELECT FROM pg_roles WHERE rolname = 'grafana_ro') THEN
|
||||
GRANT SELECT ON client_metrics TO grafana_ro;
|
||||
END IF;
|
||||
END $$;
|
||||
`.execute(db);
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await sql`DROP TABLE IF EXISTS client_metrics`.execute(db);
|
||||
}
|
||||
-13
@@ -156,18 +156,6 @@ export interface Billing {
|
||||
workspaceId: string;
|
||||
}
|
||||
|
||||
export interface ClientMetrics {
|
||||
id: Generated<Int8>;
|
||||
createdAt: Generated<Timestamp>;
|
||||
name: string;
|
||||
value: number;
|
||||
rating: string | null;
|
||||
route: string | null;
|
||||
attr: string | null;
|
||||
docSize: number | null;
|
||||
workspaceId: string | null;
|
||||
}
|
||||
|
||||
export interface Comments {
|
||||
aiChatId: string | null;
|
||||
content: Json | null;
|
||||
@@ -703,7 +691,6 @@ export interface DB {
|
||||
authProviders: AuthProviders;
|
||||
backlinks: Backlinks;
|
||||
billing: Billing;
|
||||
clientMetrics: ClientMetrics;
|
||||
comments: Comments;
|
||||
favorites: Favorites;
|
||||
fileTasks: FileTasks;
|
||||
|
||||
@@ -227,22 +227,6 @@ export class EnvironmentService {
|
||||
return compactTree === 'true';
|
||||
}
|
||||
|
||||
/**
|
||||
* Operator toggle for the public client-telemetry sink (#355). DEFAULT OFF:
|
||||
* the unauthenticated POST /api/telemetry/vitals endpoint + client vitals
|
||||
* collection are only wired when this is explicitly true. Kept SEPARATE from
|
||||
* METRICS_PORT (the server Prometheus half) because Grafana reads the
|
||||
* `client_metrics` table directly, independent of the scrape port — and
|
||||
* because `client_metrics` has no app-side retention, so an operator must opt
|
||||
* in and run an external pruner.
|
||||
*/
|
||||
isClientTelemetryEnabled(): boolean {
|
||||
const enabled = this.configService
|
||||
.get<string>('CLIENT_TELEMETRY_ENABLED', 'false')
|
||||
.toLowerCase();
|
||||
return enabled === 'true';
|
||||
}
|
||||
|
||||
getStripePublishableKey(): string {
|
||||
return this.configService.get<string>('STRIPE_PUBLISHABLE_KEY');
|
||||
}
|
||||
|
||||
@@ -1,46 +0,0 @@
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { isStreamingResponse } from './metrics.constants';
|
||||
import { observeHttp } from './metrics.registry';
|
||||
|
||||
/**
|
||||
* Resolve the BOUNDED route label for an HTTP response.
|
||||
*
|
||||
* HARD REQUIREMENT (#355): use the ROUTE TEMPLATE (`/pages/:id`), NEVER the raw
|
||||
* URL (`/pages/abc-123`), so label cardinality stays finite. Fastify exposes the
|
||||
* matched template on `req.routeOptions.url`. On 404s (no route matched) that is
|
||||
* missing → collapse to the literal `unknown`.
|
||||
*/
|
||||
export function resolveRouteLabel(req: FastifyRequest): string {
|
||||
const url = req.routeOptions?.url;
|
||||
return typeof url === 'string' && url.length > 0 ? url : 'unknown';
|
||||
}
|
||||
|
||||
/**
|
||||
* Fastify onResponse handler that records http_request_duration_seconds.
|
||||
* No-op when metrics are disabled (the hook is only registered when enabled,
|
||||
* but the observe helpers are also guarded). Never throws into the response
|
||||
* pipeline — telemetry must not break request handling.
|
||||
*/
|
||||
export function recordHttpResponse(
|
||||
req: FastifyRequest,
|
||||
reply: FastifyReply,
|
||||
): void {
|
||||
try {
|
||||
const route = resolveRouteLabel(req);
|
||||
|
||||
// Exclude SSE/streaming responses: onResponse fires at connection close for
|
||||
// those, so it would record the stream lifetime and poison p95/p99.
|
||||
const contentType = reply.getHeader('content-type');
|
||||
if (isStreamingResponse(contentType, route)) return;
|
||||
|
||||
observeHttp(
|
||||
req.method,
|
||||
route,
|
||||
reply.statusCode,
|
||||
// Fastify measures elapsed time in ms; the metric is in seconds.
|
||||
reply.elapsedTime / 1000,
|
||||
);
|
||||
} catch {
|
||||
// Swallow: a telemetry failure must never affect the served response.
|
||||
}
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
import {
|
||||
Injectable,
|
||||
Logger,
|
||||
OnModuleDestroy,
|
||||
OnModuleInit,
|
||||
} from '@nestjs/common';
|
||||
import { InjectQueue } from '@nestjs/bullmq';
|
||||
import { Queue, QueueEvents } from 'bullmq';
|
||||
import { QueueName } from '../queue/constants';
|
||||
import { EnvironmentService } from '../environment/environment.service';
|
||||
import { parseRedisUrl } from '../../common/helpers';
|
||||
import {
|
||||
isMetricsEnabled,
|
||||
observeJobDuration,
|
||||
setQueueDepth,
|
||||
} from './metrics.registry';
|
||||
|
||||
const POLL_INTERVAL_MS = 15_000;
|
||||
// Cap the in-flight start-time map so a job that never emits completed/failed
|
||||
// (worker crash) cannot leak memory unbounded. Well above realistic concurrency.
|
||||
const MAX_INFLIGHT = 10_000;
|
||||
|
||||
/**
|
||||
* BullMQ instrumentation for #355:
|
||||
* - `bullmq_queue_depth{queue}`: polled from getJobCounts() every 15s.
|
||||
* - `bullmq_job_duration_seconds{queue}`: wall-clock time between a job going
|
||||
* `active` and `completed`/`failed`, observed via per-queue QueueEvents.
|
||||
*
|
||||
* Queue names are a FINITE list (the QueueName enum), so labels are bounded — no
|
||||
* job ids ever enter a label. Everything is gated on METRICS_PORT: when metrics
|
||||
* are off, onModuleInit does nothing (no interval, no QueueEvents connections).
|
||||
*/
|
||||
@Injectable()
|
||||
export class MetricsBullService implements OnModuleInit, OnModuleDestroy {
|
||||
private readonly logger = new Logger(MetricsBullService.name);
|
||||
private readonly queues: { label: string; queue: Queue }[];
|
||||
private timer: NodeJS.Timeout | null = null;
|
||||
private queueEvents: QueueEvents[] = [];
|
||||
// jobId -> start timestamp (ms). Bounded by MAX_INFLIGHT.
|
||||
private readonly inflight = new Map<string, number>();
|
||||
|
||||
constructor(
|
||||
private readonly environmentService: EnvironmentService,
|
||||
@InjectQueue(QueueName.EMAIL_QUEUE) emailQueue: Queue,
|
||||
@InjectQueue(QueueName.ATTACHMENT_QUEUE) attachmentQueue: Queue,
|
||||
@InjectQueue(QueueName.GENERAL_QUEUE) generalQueue: Queue,
|
||||
@InjectQueue(QueueName.BILLING_QUEUE) billingQueue: Queue,
|
||||
@InjectQueue(QueueName.FILE_TASK_QUEUE) fileTaskQueue: Queue,
|
||||
@InjectQueue(QueueName.SEARCH_QUEUE) searchQueue: Queue,
|
||||
@InjectQueue(QueueName.AI_QUEUE) aiQueue: Queue,
|
||||
@InjectQueue(QueueName.HISTORY_QUEUE) historyQueue: Queue,
|
||||
@InjectQueue(QueueName.NOTIFICATION_QUEUE) notificationQueue: Queue,
|
||||
@InjectQueue(QueueName.AUDIT_QUEUE) auditQueue: Queue,
|
||||
) {
|
||||
this.queues = [
|
||||
{ label: 'email', queue: emailQueue },
|
||||
{ label: 'attachment', queue: attachmentQueue },
|
||||
{ label: 'general', queue: generalQueue },
|
||||
{ label: 'billing', queue: billingQueue },
|
||||
{ label: 'file-task', queue: fileTaskQueue },
|
||||
{ label: 'search', queue: searchQueue },
|
||||
{ label: 'ai', queue: aiQueue },
|
||||
{ label: 'history', queue: historyQueue },
|
||||
{ label: 'notification', queue: notificationQueue },
|
||||
{ label: 'audit', queue: auditQueue },
|
||||
];
|
||||
}
|
||||
|
||||
onModuleInit(): void {
|
||||
if (!isMetricsEnabled()) return;
|
||||
|
||||
// Poll queue depth.
|
||||
this.timer = setInterval(() => {
|
||||
void this.pollDepths();
|
||||
}, POLL_INTERVAL_MS);
|
||||
// Do not keep the event loop alive solely for polling.
|
||||
this.timer.unref?.();
|
||||
void this.pollDepths();
|
||||
|
||||
// Wire per-queue job-duration events.
|
||||
const redisConfig = parseRedisUrl(this.environmentService.getRedisUrl());
|
||||
const connection = {
|
||||
host: redisConfig.host,
|
||||
port: redisConfig.port,
|
||||
password: redisConfig.password,
|
||||
db: redisConfig.db,
|
||||
family: redisConfig.family,
|
||||
};
|
||||
|
||||
for (const { label, queue } of this.queues) {
|
||||
const events = new QueueEvents(queue.name, { connection });
|
||||
events.on('active', ({ jobId }) => {
|
||||
if (this.inflight.size >= MAX_INFLIGHT) {
|
||||
// Drop the oldest tracked start to keep the map bounded.
|
||||
const oldest = this.inflight.keys().next().value;
|
||||
if (oldest !== undefined) this.inflight.delete(oldest);
|
||||
}
|
||||
this.inflight.set(jobId, Date.now());
|
||||
});
|
||||
const finalize = ({ jobId }: { jobId: string }) => {
|
||||
const start = this.inflight.get(jobId);
|
||||
if (start === undefined) return;
|
||||
this.inflight.delete(jobId);
|
||||
observeJobDuration(label, (Date.now() - start) / 1000);
|
||||
};
|
||||
events.on('completed', finalize);
|
||||
events.on('failed', finalize);
|
||||
events.on('error', (err) => {
|
||||
this.logger.debug(`QueueEvents error (${label}): ${err?.message}`);
|
||||
});
|
||||
this.queueEvents.push(events);
|
||||
}
|
||||
}
|
||||
|
||||
private async pollDepths(): Promise<void> {
|
||||
for (const { label, queue } of this.queues) {
|
||||
try {
|
||||
const counts = await queue.getJobCounts();
|
||||
// "Depth" = jobs not yet finished (backlog + in-flight).
|
||||
const depth =
|
||||
(counts.waiting ?? 0) +
|
||||
(counts.active ?? 0) +
|
||||
(counts.delayed ?? 0) +
|
||||
(counts.prioritized ?? 0) +
|
||||
(counts.paused ?? 0);
|
||||
setQueueDepth(label, depth);
|
||||
} catch (err) {
|
||||
this.logger.debug(
|
||||
`Failed to read job counts for ${label}: ${(err as Error)?.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async onModuleDestroy(): Promise<void> {
|
||||
if (this.timer) {
|
||||
clearInterval(this.timer);
|
||||
this.timer = null;
|
||||
}
|
||||
await Promise.all(
|
||||
this.queueEvents.map((e) => e.close().catch(() => undefined)),
|
||||
);
|
||||
this.queueEvents = [];
|
||||
this.inflight.clear();
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
import { Injectable, OnModuleDestroy } from '@nestjs/common';
|
||||
import { closeMetricsServer } from './metrics.server';
|
||||
|
||||
/**
|
||||
* Ties the bare node:http metrics scrape server (started in main.ts after the
|
||||
* Fastify app is up, outside the DI container) into Nest's shutdown lifecycle.
|
||||
* With `app.enableShutdownHooks()`, onModuleDestroy fires on SIGTERM/SIGINT and
|
||||
* closes the listener so it is not left dangling (jest/e2e never exits, and a
|
||||
* prod restart doesn't leak the port). No-op when metrics are disabled.
|
||||
*/
|
||||
@Injectable()
|
||||
export class MetricsServerLifecycle implements OnModuleDestroy {
|
||||
async onModuleDestroy(): Promise<void> {
|
||||
await closeMetricsServer();
|
||||
}
|
||||
}
|
||||
@@ -1,84 +0,0 @@
|
||||
/**
|
||||
* Perf-metrics contract (#355). These names/labels are FIXED by the already
|
||||
* deployed scrape+dashboard infra (VictoriaMetrics scraping docmost:9464,
|
||||
* Grafana dashboards, alerts). Do NOT rename them.
|
||||
*/
|
||||
export const METRIC_HTTP_REQUEST_DURATION = 'http_request_duration_seconds';
|
||||
export const METRIC_DB_QUERY_DURATION = 'db_query_duration_seconds';
|
||||
export const METRIC_BULLMQ_QUEUE_DEPTH = 'bullmq_queue_depth';
|
||||
export const METRIC_BULLMQ_JOB_DURATION = 'bullmq_job_duration_seconds';
|
||||
export const METRIC_COLLAB_STORE_DURATION = 'collab_store_duration_seconds';
|
||||
|
||||
// Histogram buckets (seconds). Chosen to give useful p50/p95/p99 resolution
|
||||
// for typical web/DB latencies without exploding series cardinality.
|
||||
export const HTTP_BUCKETS = [
|
||||
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10,
|
||||
];
|
||||
export const DB_BUCKETS = [
|
||||
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5,
|
||||
];
|
||||
export const COLLAB_BUCKETS = [
|
||||
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5,
|
||||
];
|
||||
export const JOB_BUCKETS = [
|
||||
0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30, 60, 120,
|
||||
];
|
||||
|
||||
/**
|
||||
* Extract the first SQL token (select/insert/update/delete/...) from a query,
|
||||
* lower-cased, to use as a BOUNDED label for db_query_duration_seconds. Using
|
||||
* the full query text would blow up label cardinality; the leading keyword is a
|
||||
* finite set. Unknown/empty queries collapse to `other`.
|
||||
*/
|
||||
// The bounded set of SQL leading keywords used as db_query_duration_seconds
|
||||
// labels. Module-const so it is built ONCE, not per query (this runs on every DB
|
||||
// query when metrics are enabled).
|
||||
const KNOWN_SQL_TOKENS = new Set([
|
||||
'select',
|
||||
'insert',
|
||||
'update',
|
||||
'delete',
|
||||
'with',
|
||||
'begin',
|
||||
'commit',
|
||||
'rollback',
|
||||
'alter',
|
||||
'create',
|
||||
'drop',
|
||||
'truncate',
|
||||
'explain',
|
||||
]);
|
||||
|
||||
export function firstSqlToken(sql: string | undefined): string {
|
||||
if (!sql) return 'other';
|
||||
// Skip leading whitespace / comments and grab the first word.
|
||||
const match = /^[\s(]*([a-zA-Z]+)/.exec(sql);
|
||||
if (!match) return 'other';
|
||||
const token = match[1].toLowerCase();
|
||||
return KNOWN_SQL_TOKENS.has(token) ? token : 'other';
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether an HTTP response must be EXCLUDED from http_request_duration_seconds.
|
||||
*
|
||||
* SSE/streaming responses (the AI-chat `text/event-stream`) keep the connection
|
||||
* open for the whole conversation, so Fastify's onResponse fires only when the
|
||||
* client disconnects — recording the connection lifetime, not a response time,
|
||||
* which would poison p95/p99. We skip by content-type (authoritative) with a
|
||||
* route-suffix fallback for the two known stream endpoints.
|
||||
*/
|
||||
export function isStreamingResponse(
|
||||
contentType: unknown,
|
||||
route: string | undefined,
|
||||
): boolean {
|
||||
if (
|
||||
typeof contentType === 'string' &&
|
||||
contentType.toLowerCase().includes('text/event-stream')
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
// Fallback: the AI-chat stream routes (/api/ai-chat/stream,
|
||||
// /api/shares/ai/stream) both end in `/stream`.
|
||||
if (route && route.endsWith('/stream')) return true;
|
||||
return false;
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { MetricsBullService } from './metrics-bull.service';
|
||||
import { MetricsServerLifecycle } from './metrics-server.lifecycle';
|
||||
|
||||
/**
|
||||
* Wires the BullMQ collectors (#355). The queues are provided by the @Global
|
||||
* QueueModule (which exports BullModule), so no re-registration is needed here.
|
||||
* The HTTP histogram, DB-query and collab-store collectors live in module-level
|
||||
* singletons (metrics.registry) and are wired directly at their call sites.
|
||||
* MetricsServerLifecycle closes the scrape server on shutdown.
|
||||
*/
|
||||
@Module({
|
||||
providers: [MetricsBullService, MetricsServerLifecycle],
|
||||
})
|
||||
export class MetricsModule {}
|
||||
@@ -1,126 +0,0 @@
|
||||
import {
|
||||
collectDefaultMetrics,
|
||||
Histogram,
|
||||
Gauge,
|
||||
Registry,
|
||||
} from 'prom-client';
|
||||
import {
|
||||
COLLAB_BUCKETS,
|
||||
DB_BUCKETS,
|
||||
HTTP_BUCKETS,
|
||||
JOB_BUCKETS,
|
||||
METRIC_BULLMQ_JOB_DURATION,
|
||||
METRIC_BULLMQ_QUEUE_DEPTH,
|
||||
METRIC_COLLAB_STORE_DURATION,
|
||||
METRIC_DB_QUERY_DURATION,
|
||||
METRIC_HTTP_REQUEST_DURATION,
|
||||
} from './metrics.constants';
|
||||
|
||||
/**
|
||||
* Process-wide perf-metrics registry (#355).
|
||||
*
|
||||
* This is a plain module singleton (NOT a Nest provider) because the collectors
|
||||
* are cross-cutting: the Kysely `log` callback (built in a DI factory), the
|
||||
* Fastify onResponse hook (main.ts, before the Nest container hands out
|
||||
* providers) and the collab persistence extension all need the SAME instruments
|
||||
* without threading DI through them.
|
||||
*
|
||||
* HARD CONTRACT: when `METRICS_PORT` is unset the whole subsystem is OFF — the
|
||||
* registry is never created, `collectDefaultMetrics` never runs, and every
|
||||
* observe/set helper is a cheap no-op. Nothing is exposed on :3000.
|
||||
*/
|
||||
|
||||
// Decided once at process start. Deliberately read here (not via
|
||||
// EnvironmentService) so the toggle is identical for the DI and non-DI callers.
|
||||
const enabled = Boolean(process.env.METRICS_PORT);
|
||||
|
||||
let registry: Registry | null = null;
|
||||
let httpHist: Histogram<'method' | 'route' | 'status'> | null = null;
|
||||
let dbHist: Histogram<'op'> | null = null;
|
||||
let queueDepthGauge: Gauge<'queue'> | null = null;
|
||||
let jobHist: Histogram<'queue'> | null = null;
|
||||
let collabHist: Histogram | null = null;
|
||||
|
||||
function init(): void {
|
||||
if (registry || !enabled) return;
|
||||
|
||||
registry = new Registry();
|
||||
|
||||
// Node/runtime metrics: gives nodejs_eventloop_lag_p99_seconds, GC, heap, etc.
|
||||
collectDefaultMetrics({ register: registry });
|
||||
|
||||
httpHist = new Histogram({
|
||||
name: METRIC_HTTP_REQUEST_DURATION,
|
||||
help: 'HTTP request duration in seconds, by method, route template and status',
|
||||
labelNames: ['method', 'route', 'status'],
|
||||
buckets: HTTP_BUCKETS,
|
||||
registers: [registry],
|
||||
});
|
||||
|
||||
dbHist = new Histogram({
|
||||
name: METRIC_DB_QUERY_DURATION,
|
||||
help: 'Database query duration in seconds, by leading SQL keyword',
|
||||
labelNames: ['op'],
|
||||
buckets: DB_BUCKETS,
|
||||
registers: [registry],
|
||||
});
|
||||
|
||||
queueDepthGauge = new Gauge({
|
||||
name: METRIC_BULLMQ_QUEUE_DEPTH,
|
||||
help: 'Number of not-yet-finished BullMQ jobs per queue',
|
||||
labelNames: ['queue'],
|
||||
registers: [registry],
|
||||
});
|
||||
|
||||
jobHist = new Histogram({
|
||||
name: METRIC_BULLMQ_JOB_DURATION,
|
||||
help: 'BullMQ job processing duration in seconds, per queue',
|
||||
labelNames: ['queue'],
|
||||
buckets: JOB_BUCKETS,
|
||||
registers: [registry],
|
||||
});
|
||||
|
||||
collabHist = new Histogram({
|
||||
name: METRIC_COLLAB_STORE_DURATION,
|
||||
help: 'Collaboration onStoreDocument duration in seconds',
|
||||
buckets: COLLAB_BUCKETS,
|
||||
registers: [registry],
|
||||
});
|
||||
}
|
||||
|
||||
// Runs once when this module is first imported. Safe to call again (idempotent).
|
||||
init();
|
||||
|
||||
export function isMetricsEnabled(): boolean {
|
||||
return enabled;
|
||||
}
|
||||
|
||||
/** The prom-client registry, or null when metrics are disabled. */
|
||||
export function getMetricsRegistry(): Registry | null {
|
||||
return registry;
|
||||
}
|
||||
|
||||
export function observeHttp(
|
||||
method: string,
|
||||
route: string,
|
||||
status: number,
|
||||
seconds: number,
|
||||
): void {
|
||||
httpHist?.observe({ method, route, status }, seconds);
|
||||
}
|
||||
|
||||
export function observeDbQuery(op: string, seconds: number): void {
|
||||
dbHist?.observe({ op }, seconds);
|
||||
}
|
||||
|
||||
export function setQueueDepth(queue: string, depth: number): void {
|
||||
queueDepthGauge?.set({ queue }, depth);
|
||||
}
|
||||
|
||||
export function observeJobDuration(queue: string, seconds: number): void {
|
||||
jobHist?.observe({ queue }, seconds);
|
||||
}
|
||||
|
||||
export function observeCollabStore(seconds: number): void {
|
||||
collabHist?.observe(seconds);
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
import { createServer, Server } from 'node:http';
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { getMetricsRegistry, isMetricsEnabled } from './metrics.registry';
|
||||
|
||||
/**
|
||||
* Start the Prometheus scrape endpoint on a SEPARATE port, taken from
|
||||
* `METRICS_PORT`. There is NO default port: when `METRICS_PORT` is unset the
|
||||
* whole metrics subsystem is OFF and this returns null. This is a bare node:http
|
||||
* server, NOT part of the Fastify app, so `/metrics` never exists on the public
|
||||
* :3000 listener.
|
||||
*
|
||||
* Returns the http.Server (so callers can close it on shutdown) or null when
|
||||
* metrics are disabled. The reference is also kept module-side so the Nest
|
||||
* lifecycle (see MetricsModule) can close it on application shutdown without
|
||||
* threading the handle back through the non-DI bootstrap.
|
||||
*/
|
||||
let metricsServer: Server | null = null;
|
||||
|
||||
export function startMetricsServer(): Server | null {
|
||||
if (!isMetricsEnabled()) return null;
|
||||
|
||||
const logger = new Logger('MetricsServer');
|
||||
const register = getMetricsRegistry();
|
||||
if (!register) return null;
|
||||
|
||||
const port = Number(process.env.METRICS_PORT);
|
||||
if (!Number.isInteger(port) || port <= 0) {
|
||||
logger.warn(
|
||||
`Invalid METRICS_PORT="${process.env.METRICS_PORT}", metrics endpoint not started`,
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
const server = createServer(async (req, res) => {
|
||||
if (req.method === 'GET' && req.url === '/metrics') {
|
||||
try {
|
||||
const body = await register.metrics();
|
||||
res.setHeader('Content-Type', register.contentType);
|
||||
res.statusCode = 200;
|
||||
res.end(body);
|
||||
} catch (err) {
|
||||
res.statusCode = 500;
|
||||
res.end(String((err as Error)?.message ?? 'error'));
|
||||
}
|
||||
return;
|
||||
}
|
||||
res.statusCode = 404;
|
||||
res.end();
|
||||
});
|
||||
|
||||
// Bind on all interfaces: the scraper (VictoriaMetrics) reaches this from
|
||||
// another container as docmost:9464. The port is not published to the host.
|
||||
server.listen(port, '0.0.0.0', () => {
|
||||
logger.log(`Metrics endpoint listening on :${port}/metrics`);
|
||||
});
|
||||
|
||||
server.on('error', (err) => {
|
||||
logger.error(`Metrics server error: ${err?.message}`);
|
||||
});
|
||||
|
||||
metricsServer = server;
|
||||
return server;
|
||||
}
|
||||
|
||||
/**
|
||||
* Close the metrics scrape server if one is running. Idempotent and safe to call
|
||||
* when metrics are disabled (no server was ever started). Wired into Nest's
|
||||
* shutdown lifecycle so the listener is not left dangling on shutdown.
|
||||
*/
|
||||
export function closeMetricsServer(): Promise<void> {
|
||||
const server = metricsServer;
|
||||
metricsServer = null;
|
||||
if (!server) return Promise.resolve();
|
||||
return new Promise((resolve) => {
|
||||
server.close(() => resolve());
|
||||
// server.close() stops accepting NEW connections but its callback does not
|
||||
// fire until existing keep-alive sockets drain. The scraper (VictoriaMetrics/
|
||||
// vmagent) holds an idle HTTP keep-alive socket, so without this the callback
|
||||
// — and thus shutdown — would hang until the scraper disconnects or the
|
||||
// orchestrator escalates to SIGKILL on the kill-grace window. Force-close idle
|
||||
// keep-alive sockets so close() completes immediately, and unref so this
|
||||
// server never keeps the event loop alive on its own.
|
||||
server.closeIdleConnections();
|
||||
server.unref();
|
||||
});
|
||||
}
|
||||
@@ -1,70 +0,0 @@
|
||||
import { FastifyRequest } from 'fastify';
|
||||
import { resolveRouteLabel } from './http-metrics.hook';
|
||||
import { firstSqlToken, isStreamingResponse } from './metrics.constants';
|
||||
|
||||
describe('resolveRouteLabel (histogram route label)', () => {
|
||||
it('uses the ROUTE TEMPLATE, never the raw URL', () => {
|
||||
// routeOptions.url is the matched template; url is the raw path with the id.
|
||||
const req = {
|
||||
url: '/api/pages/abc-123-def',
|
||||
routeOptions: { url: '/api/pages/:id' },
|
||||
} as unknown as FastifyRequest;
|
||||
expect(resolveRouteLabel(req)).toBe('/api/pages/:id');
|
||||
expect(resolveRouteLabel(req)).not.toContain('abc-123-def');
|
||||
});
|
||||
|
||||
it('falls back to "unknown" on a 404 (no matched route template)', () => {
|
||||
const req = {
|
||||
url: '/totally/unmatched/path',
|
||||
routeOptions: {},
|
||||
} as unknown as FastifyRequest;
|
||||
expect(resolveRouteLabel(req)).toBe('unknown');
|
||||
});
|
||||
|
||||
it('falls back to "unknown" when routeOptions is missing', () => {
|
||||
const req = { url: '/x' } as unknown as FastifyRequest;
|
||||
expect(resolveRouteLabel(req)).toBe('unknown');
|
||||
});
|
||||
});
|
||||
|
||||
describe('isStreamingResponse (SSE exclusion)', () => {
|
||||
it('excludes text/event-stream responses by content-type', () => {
|
||||
expect(isStreamingResponse('text/event-stream', '/api/ai-chat/stream')).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isStreamingResponse('text/event-stream; charset=utf-8', '/x')).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('excludes known /stream routes by suffix as a fallback', () => {
|
||||
expect(isStreamingResponse('application/json', '/api/ai-chat/stream')).toBe(
|
||||
true,
|
||||
);
|
||||
expect(isStreamingResponse(undefined, '/api/shares/ai/stream')).toBe(true);
|
||||
});
|
||||
|
||||
it('does not exclude ordinary JSON responses', () => {
|
||||
expect(isStreamingResponse('application/json', '/api/pages/:id')).toBe(
|
||||
false,
|
||||
);
|
||||
expect(isStreamingResponse(undefined, '/api/pages/:id')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('firstSqlToken (bounded db label)', () => {
|
||||
it('returns the lower-cased leading keyword', () => {
|
||||
expect(firstSqlToken('SELECT * FROM pages')).toBe('select');
|
||||
expect(firstSqlToken(' insert into x values (1)')).toBe('insert');
|
||||
expect(firstSqlToken('UPDATE pages SET a=1')).toBe('update');
|
||||
expect(firstSqlToken('delete from pages')).toBe('delete');
|
||||
expect(firstSqlToken('(SELECT 1)')).toBe('select');
|
||||
});
|
||||
|
||||
it('collapses unknown/empty queries to "other"', () => {
|
||||
expect(firstSqlToken('')).toBe('other');
|
||||
expect(firstSqlToken(undefined)).toBe('other');
|
||||
expect(firstSqlToken('123 not sql')).toBe('other');
|
||||
expect(firstSqlToken('vacuum analyze')).toBe('other');
|
||||
});
|
||||
});
|
||||
@@ -50,10 +50,6 @@ export class StaticModule implements OnModuleInit {
|
||||
: undefined,
|
||||
POSTHOG_HOST: this.environmentService.getPostHogHost(),
|
||||
POSTHOG_KEY: this.environmentService.getPostHogKey(),
|
||||
// #355 — mirrors the server-side CLIENT_TELEMETRY_ENABLED gate so the
|
||||
// client only collects/sends vitals when the operator opts in.
|
||||
CLIENT_TELEMETRY_ENABLED:
|
||||
this.environmentService.isClientTelemetryEnabled(),
|
||||
};
|
||||
|
||||
const windowScriptContent = `<script>window.CONFIG=${JSON.stringify(configString)};</script>`;
|
||||
|
||||
@@ -9,7 +9,6 @@ import {
|
||||
AI_CHAT_THROTTLER,
|
||||
PAGE_TEMPLATE_THROTTLER,
|
||||
PUBLIC_SHARE_AI_THROTTLER,
|
||||
VITALS_THROTTLER,
|
||||
} from './throttler-names';
|
||||
|
||||
@Module({
|
||||
@@ -30,8 +29,6 @@ import {
|
||||
{ name: PAGE_TEMPLATE_THROTTLER, ttl: 60_000, limit: 30 },
|
||||
// Anonymous public-share assistant: ~5 req/min per IP.
|
||||
{ name: PUBLIC_SHARE_AI_THROTTLER, ttl: 60_000, limit: 5 },
|
||||
// Anonymous client perf-telemetry sink: 120 batched posts/min per IP.
|
||||
{ name: VITALS_THROTTLER, ttl: 60_000, limit: 120 },
|
||||
],
|
||||
errorMessage: 'Too many requests',
|
||||
// Pass ioredis options (not a pre-built Redis instance) so
|
||||
|
||||
@@ -6,7 +6,3 @@ export const PAGE_TEMPLATE_THROTTLER = 'page-template';
|
||||
// ThrottlerGuard tracker) to bound anonymous abuse — the workspace owner pays
|
||||
// for the tokens.
|
||||
export const PUBLIC_SHARE_AI_THROTTLER = 'public-share-ai';
|
||||
// IP-keyed throttler for the anonymous client perf-telemetry sink
|
||||
// (POST /api/telemetry/vitals). Browsers batch metrics, so the limit is
|
||||
// generous; it only exists to bound abuse of the public, unauthenticated route.
|
||||
export const VITALS_THROTTLER = 'vitals';
|
||||
|
||||
@@ -16,9 +16,6 @@ import { EnvironmentService } from './integrations/environment/environment.servi
|
||||
import { SANDBOX_API_PATH } from './integrations/sandbox/sandbox.constants';
|
||||
import { resolveFrameHeader } from './common/helpers';
|
||||
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
|
||||
import { isMetricsEnabled } from './integrations/metrics/metrics.registry';
|
||||
import { recordHttpResponse } from './integrations/metrics/http-metrics.hook';
|
||||
import { startMetricsServer } from './integrations/metrics/metrics.server';
|
||||
|
||||
async function bootstrap() {
|
||||
const app = await NestFactory.create<NestFastifyApplication>(
|
||||
@@ -94,19 +91,6 @@ async function bootstrap() {
|
||||
done();
|
||||
});
|
||||
|
||||
// #355 — HTTP request-duration histogram. Registered ONLY when METRICS_PORT is
|
||||
// set (otherwise no collector runs at all). Uses the bounded route template
|
||||
// label and excludes SSE/streaming responses (see recordHttpResponse).
|
||||
if (isMetricsEnabled()) {
|
||||
app
|
||||
.getHttpAdapter()
|
||||
.getInstance()
|
||||
.addHook('onResponse', (req, reply, done) => {
|
||||
recordHttpResponse(req, reply);
|
||||
done();
|
||||
});
|
||||
}
|
||||
|
||||
app
|
||||
.getHttpAdapter()
|
||||
.getInstance()
|
||||
@@ -143,9 +127,6 @@ async function bootstrap() {
|
||||
'/api/workspace/create',
|
||||
'/api/workspace/joined',
|
||||
'/api/workspace/find-by-email',
|
||||
// Public client perf-telemetry sink: browsers post it without a
|
||||
// resolved workspace host, so the workspace-resolution gate must not 404 it.
|
||||
'/api/telemetry/vitals',
|
||||
// Anonymous in-RAM blob sandbox: a remote consumer fetches blobs by an
|
||||
// unguessable UUID without any workspace host context, so the
|
||||
// workspace-resolution gate must not apply.
|
||||
@@ -194,11 +175,6 @@ async function bootstrap() {
|
||||
`Listening on http://127.0.0.1:${port} / ${process.env.APP_URL}`,
|
||||
);
|
||||
});
|
||||
|
||||
// #355 — Prometheus scrape endpoint on a SEPARATE port (METRICS_PORT),
|
||||
// started after the app is up. No default port: a no-op when METRICS_PORT is
|
||||
// unset. Closed on shutdown by MetricsServerLifecycle (MetricsModule).
|
||||
startMetricsServer();
|
||||
}
|
||||
|
||||
bootstrap();
|
||||
|
||||
@@ -450,7 +450,7 @@ async function main() {
|
||||
// 8. get_page markdown round-trip sanity (table separator present)
|
||||
const md = await client.getPage(pageId);
|
||||
check("get_page md: table separator emitted", md.data.content.includes("| --- |"), "");
|
||||
check("get_page md: callout exported as Obsidian '> [!info]'", md.data.content.includes("> [!info]"));
|
||||
check("get_page md: callout exported as :::", md.data.content.includes(":::info"));
|
||||
|
||||
// 9. comments: create / list / reply / update / check_new / delete
|
||||
const beforeComments = new Date(Date.now() - 1000).toISOString();
|
||||
|
||||
Generated
+3
-27
@@ -269,6 +269,9 @@ importers:
|
||||
'@atlaskit/pragmatic-drag-and-drop-live-region':
|
||||
specifier: 1.3.4
|
||||
version: 1.3.4
|
||||
'@braintree/sanitize-url':
|
||||
specifier: 7.1.2
|
||||
version: 7.1.2
|
||||
'@casl/react':
|
||||
specifier: 5.0.1
|
||||
version: 5.0.1(@casl/ability@6.8.0)(react@18.3.1)
|
||||
@@ -416,9 +419,6 @@ importers:
|
||||
socket.io-client:
|
||||
specifier: 4.8.3
|
||||
version: 4.8.3
|
||||
web-vitals:
|
||||
specifier: ^5.1.0
|
||||
version: 5.1.0
|
||||
zod:
|
||||
specifier: 4.3.6
|
||||
version: 4.3.6
|
||||
@@ -747,9 +747,6 @@ importers:
|
||||
postmark:
|
||||
specifier: ^4.0.7
|
||||
version: 4.0.7
|
||||
prom-client:
|
||||
specifier: ^15.1.3
|
||||
version: 15.1.3
|
||||
react:
|
||||
specifier: ^18.3.1
|
||||
version: 18.3.1
|
||||
@@ -5994,9 +5991,6 @@ packages:
|
||||
bind-event-listener@3.0.0:
|
||||
resolution: {integrity: sha512-PJvH288AWQhKs2v9zyfYdPzlPqf5bXbGMmhmUIY9x4dAUGIWgomO771oBQNwJnMQSnUIXhKu6sgzpBRXTlvb8Q==}
|
||||
|
||||
bintrees@1.0.2:
|
||||
resolution: {integrity: sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw==}
|
||||
|
||||
bl@4.1.0:
|
||||
resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==}
|
||||
|
||||
@@ -9327,10 +9321,6 @@ packages:
|
||||
process-warning@5.0.0:
|
||||
resolution: {integrity: sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==}
|
||||
|
||||
prom-client@15.1.3:
|
||||
resolution: {integrity: sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==}
|
||||
engines: {node: ^16 || ^18 || >=20}
|
||||
|
||||
prompts@2.4.2:
|
||||
resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==}
|
||||
engines: {node: '>= 6'}
|
||||
@@ -10158,9 +10148,6 @@ packages:
|
||||
resolution: {integrity: sha512-4LeEWl96twnS2Q7Bz4MGqgazLqO+hJN63GZxXoIqh1T3VweYD997gbU1ItNsQafqqXTXd5WFyFdReLtwvRBNiw==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
tdigest@0.1.2:
|
||||
resolution: {integrity: sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==}
|
||||
|
||||
terser-webpack-plugin@5.4.0:
|
||||
resolution: {integrity: sha512-Bn5vxm48flOIfkdl5CaD2+1CiUVbonWQ3KQPyP7/EuIl9Gbzq/gQFOzaMFUEgVjB1396tcK0SG8XcNJ/2kDH8g==}
|
||||
engines: {node: '>= 10.13.0'}
|
||||
@@ -16681,8 +16668,6 @@ snapshots:
|
||||
|
||||
bind-event-listener@3.0.0: {}
|
||||
|
||||
bintrees@1.0.2: {}
|
||||
|
||||
bl@4.1.0:
|
||||
dependencies:
|
||||
buffer: 5.7.1
|
||||
@@ -20494,11 +20479,6 @@ snapshots:
|
||||
|
||||
process-warning@5.0.0: {}
|
||||
|
||||
prom-client@15.1.3:
|
||||
dependencies:
|
||||
'@opentelemetry/api': 1.9.0
|
||||
tdigest: 0.1.2
|
||||
|
||||
prompts@2.4.2:
|
||||
dependencies:
|
||||
kleur: 3.0.3
|
||||
@@ -21544,10 +21524,6 @@ snapshots:
|
||||
minizlib: 3.1.0
|
||||
yallist: 5.0.0
|
||||
|
||||
tdigest@0.1.2:
|
||||
dependencies:
|
||||
bintrees: 1.0.2
|
||||
|
||||
terser-webpack-plugin@5.4.0(@swc/core@1.5.25(@swc/helpers@0.5.5))(webpack@5.106.0(@swc/core@1.5.25(@swc/helpers@0.5.5))):
|
||||
dependencies:
|
||||
'@jridgewell/trace-mapping': 0.3.31
|
||||
|
||||
Reference in New Issue
Block a user