diff --git a/apps/server/src/collaboration/constants.ts b/apps/server/src/collaboration/constants.ts index 8ce8c825..9401d973 100644 --- a/apps/server/src/collaboration/constants.ts +++ b/apps/server/src/collaboration/constants.ts @@ -1,3 +1,10 @@ export const HISTORY_INTERVAL = 5 * 60 * 1000; export const HISTORY_FAST_INTERVAL = 60 * 1000; export const HISTORY_FAST_THRESHOLD = 5 * 60 * 1000; + +// #348 — debounce window for the per-page RAG re-embed job. Repeated saves +// within this window collapse to a single delayed job (coalesced by a stable +// jobId), so active editing does not pile up expensive re-embeds (external API +// + page_embeddings rewrite, concurrency 1). The worker reads the CURRENT page +// state at run time, so the last content within the window wins. +export const EMBED_DEBOUNCE_MS = 30 * 1000; diff --git a/apps/server/src/collaboration/extensions/persistence-store.spec.ts b/apps/server/src/collaboration/extensions/persistence-store.spec.ts index e707290f..122356bb 100644 --- a/apps/server/src/collaboration/extensions/persistence-store.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts @@ -431,7 +431,17 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot' it('uses the canonical page.id (not the slugId doc name) for post-store side effects (#260)', async () => { const SLUG = 'slug-1'; // persistedHumanPage.slugId; findById resolves it const document = ydocFor(doc('NEW AGENT CONTENT')); - pageRepo.findById.mockResolvedValue(persistedHumanPage('NEW AGENT CONTENT')); + // #348 — the transclusion sync now runs only when the new OR the previously + // persisted content carries a transclusion-family node. Give the persisted + // (old) content a pageEmbed so the sync path is exercised and the #260 + // UUID-vs-slugId contract asserted below is still verified. + pageRepo.findById.mockResolvedValue({ + ...persistedHumanPage('NEW AGENT CONTENT'), + content: { + type: 'doc', + content: [{ type: 'pageEmbed', attrs: { sourcePageId: 'src-1' } }], + }, + }); pageHistoryRepo.findPageLastHistory.mockResolvedValue(null); // A `page.` document name (the bug's smoking gun), agent store over diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 35310cf4..b5b207fa 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -36,11 +36,13 @@ import { import { Page } from '@docmost/db/types/entity.types'; import { CollabHistoryService } from '../services/collab-history.service'; import { + EMBED_DEBOUNCE_MS, HISTORY_FAST_INTERVAL, HISTORY_FAST_THRESHOLD, HISTORY_INTERVAL, } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; +import { hasTransclusionFamilyNodes } from '../../core/page/transclusion/utils/transclusion-prosemirror.util'; import { observeCollabStore } from '../../integrations/metrics/metrics.registry'; /** @@ -415,7 +417,18 @@ export class PersistenceExtension implements Extension { // Use the canonical page UUID (page.id), not the doc-name id, which may be // a slugId for a `page.` doc (#260). The transclusion/reference // syncs write uuid-typed columns, so a slugId here threw Postgres 22P02. - await this.syncTransclusion(page.id, page.workspaceId, tiptapJson); + // + // #348 — skip the three sync SELECTs when neither the new content nor the + // previously-persisted content has any transclusion/reference/pageEmbed + // node: nothing to insert, and (the DB mirrors the old content) nothing to + // delete. Whenever either side has one, run the idempotent sync exactly as + // before so removals are still reconciled. + if ( + hasTransclusionFamilyNodes(tiptapJson) || + hasTransclusionFamilyNodes(page.content) + ) { + await this.syncTransclusion(page.id, page.workspaceId, tiptapJson); + } } if (page) { @@ -431,7 +444,17 @@ export class PersistenceExtension implements Extension { (m) => m.entityId, ); - if (userMentions.length > 0) { + // #348 — only enqueue when the mentioned-user set actually GAINED a member. + // The processor (processPageMention) already no-ops when every current + // mention was present before (newMentions.length === 0), so skipping the + // enqueue in that case is behavior-identical and avoids piling up no-op jobs + // on every save of a page that merely CONTAINS (unchanged) mentions. + const oldMentionedUserIdSet = new Set(oldMentionedUserIds); + const hasNewMentionedUser = userMentions.some( + (m) => !oldMentionedUserIdSet.has(m.entityId), + ); + + if (hasNewMentionedUser) { await this.notificationQueue.add(QueueJob.PAGE_MENTION_NOTIFICATION, { userMentions: userMentions.map((m) => ({ userId: m.entityId, @@ -446,12 +469,23 @@ export class PersistenceExtension implements Extension { } as IPageMentionNotificationJob); } - await this.aiQueue.add(QueueJob.PAGE_CONTENT_UPDATED, { - // Canonical UUID: the embedding reindex resolves pages by uuid, so a - // slugId here threw Postgres 22P02 invalid-uuid (#260). - pageIds: [page.id], - workspaceId: page.workspaceId, - }); + await this.aiQueue.add( + QueueJob.PAGE_CONTENT_UPDATED, + { + // Canonical UUID: the embedding reindex resolves pages by uuid, so a + // slugId here threw Postgres 22P02 invalid-uuid (#260). + pageIds: [page.id], + workspaceId: page.workspaceId, + }, + // #348 — coalesce re-embeds during active editing. A stable per-page + // jobId + delay means repeated saves within EMBED_DEBOUNCE_MS collapse + // to one delayed job instead of one expensive re-embed per save. The + // worker reads the current page state at run time, so last content wins. + // BullMQ forbids ':' in custom job ids (Redis key separator), so '-' is + // used; page.id is a UUID, so the id is unique per page. removeOnComplete + // (queue.module) frees the id after each run so the next window re-arms. + { jobId: `embed-${page.id}`, delay: EMBED_DEBOUNCE_MS }, + ); await this.enqueuePageHistory(page, lastUpdatedSource); } diff --git a/apps/server/src/collaboration/extensions/redis-sync/redis-sync.extension.ts b/apps/server/src/collaboration/extensions/redis-sync/redis-sync.extension.ts index 38747465..2424d13b 100644 --- a/apps/server/src/collaboration/extensions/redis-sync/redis-sync.extension.ts +++ b/apps/server/src/collaboration/extensions/redis-sync/redis-sync.extension.ts @@ -220,6 +220,13 @@ export class RedisSyncExtension implements Extension { }; async maintainLock(documentName: string) { + // #348 — clear any existing timer for this document before installing a new + // one. Without this, a second maintainLock for the same document (a + // reload-without-unload) overwrites this.locks[documentName] and leaks the + // previous interval, which keeps firing SET forever with no way to clear it. + if (this.locks[documentName]) { + clearInterval(this.locks[documentName]); + } this.locks[documentName] = setInterval(() => { this.pub.set( this.getKey(documentName), diff --git a/apps/server/src/common/helpers/cache-keys.ts b/apps/server/src/common/helpers/cache-keys.ts index 38b24d20..128501ca 100644 --- a/apps/server/src/common/helpers/cache-keys.ts +++ b/apps/server/src/common/helpers/cache-keys.ts @@ -4,8 +4,21 @@ export const CacheKey = { `perm:space-roles:${userId}:${spaceId}`, PAGE_CAN_EDIT: (userId: string, pageId: string) => `perm:can-edit:${userId}:${pageId}`, + // #348 — DomainMiddleware workspace resolution. Self-hosted resolves the single + // workspace (constant key); cloud resolves by the request subdomain (lowercased + // to match the case-insensitive `LOWER(hostname)` lookup). Every WorkspaceRepo + // mutator busts these, so staleness is bounded by both explicit invalidation and + // the short TTL below. + WORKSPACE_SELF_HOSTED: 'workspace:self-hosted', + WORKSPACE_BY_HOST: (subdomain: string) => + `workspace:byhost:${subdomain.toLowerCase()}`, }; // Permission caches dedupe repeated checks within and across short request bursts. // 5s keeps staleness on revocations bounded. export const PERMISSION_CACHE_TTL_MS = 5_000; + +// #348 — workspace row changes rarely; a short TTL bounds staleness of +// security-relevant fields (enforceSso/enforceMfa/status) even if an explicit +// bust is ever missed, while still removing the per-request workspace query. +export const WORKSPACE_CACHE_TTL_MS = 15_000; diff --git a/apps/server/src/common/middlewares/domain.middleware.ts b/apps/server/src/common/middlewares/domain.middleware.ts index 1a2400b8..cc21fcc5 100644 --- a/apps/server/src/common/middlewares/domain.middleware.ts +++ b/apps/server/src/common/middlewares/domain.middleware.ts @@ -1,13 +1,42 @@ -import { Injectable, NestMiddleware, NotFoundException } from '@nestjs/common'; +import { Inject, Injectable, NestMiddleware } from '@nestjs/common'; import { FastifyRequest, FastifyReply } from 'fastify'; +import { CACHE_MANAGER } from '@nestjs/cache-manager'; +import { Cache } from 'cache-manager'; import { EnvironmentService } from '../../integrations/environment/environment.service'; import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { Workspace } from '@docmost/db/types/entity.types'; +import { withCache } from '../helpers/with-cache'; +import { CacheKey, WORKSPACE_CACHE_TTL_MS } from '../helpers/cache-keys'; + +// #348 — timestamptz columns on the workspace row. The cache store (Keyv/Redis) +// JSON-serializes values, so a cached workspace comes back with these fields as +// ISO strings. Reviving them to Date keeps the cached path byte-identical to the +// direct DB path (postgres.js returns Date), so nothing downstream can observe a +// cache hit vs miss. Idempotent: `new Date(date)` on an already-Date value is a +// no-op-equivalent. Keep in sync with the workspace timestamptz columns. +const WORKSPACE_DATE_FIELDS: Array = [ + 'createdAt', + 'updatedAt', + 'deletedAt', + 'trialEndAt', +]; + +function reviveWorkspaceDates(workspace: Workspace): Workspace { + for (const field of WORKSPACE_DATE_FIELDS) { + const value = workspace[field]; + if (value != null) { + (workspace as any)[field] = new Date(value as any); + } + } + return workspace; +} @Injectable() export class DomainMiddleware implements NestMiddleware { constructor( private workspaceRepo: WorkspaceRepo, private environmentService: EnvironmentService, + @Inject(CACHE_MANAGER) private readonly cacheManager: Cache, ) {} async use( req: FastifyRequest['raw'], @@ -15,13 +44,21 @@ export class DomainMiddleware implements NestMiddleware { next: () => void, ) { if (this.environmentService.isSelfHosted()) { - const workspace = await this.workspaceRepo.findFirst(); + // #348 — cache the single-workspace lookup that runs on every request. + // Invalidated by every WorkspaceRepo mutator (see bustWorkspaceCache). + const workspace = await withCache( + this.cacheManager, + CacheKey.WORKSPACE_SELF_HOSTED, + WORKSPACE_CACHE_TTL_MS, + () => this.workspaceRepo.findFirst(), + ); if (!workspace) { //throw new NotFoundException('Workspace not found'); (req as any).workspaceId = null; return next(); } + reviveWorkspaceDates(workspace); // TODO: unify (req as any).workspaceId = workspace.id; (req as any).workspace = workspace; @@ -29,13 +66,21 @@ export class DomainMiddleware implements NestMiddleware { const header = req.headers.host; const subdomain = header.split('.')[0]; - const workspace = await this.workspaceRepo.findByHostname(subdomain); + // #348 — cache per-subdomain workspace resolution. Keyed by subdomain (the + // hostname column); busted per hostname by every WorkspaceRepo mutator. + const workspace = await withCache( + this.cacheManager, + CacheKey.WORKSPACE_BY_HOST(subdomain), + WORKSPACE_CACHE_TTL_MS, + () => this.workspaceRepo.findByHostname(subdomain), + ); if (!workspace) { (req as any).workspaceId = null; return next(); } + reviveWorkspaceDates(workspace); (req as any).workspaceId = workspace.id; (req as any).workspace = workspace; } diff --git a/apps/server/src/core/auth/strategies/jwt.strategy.ts b/apps/server/src/core/auth/strategies/jwt.strategy.ts index 024b05de..9e5604a9 100644 --- a/apps/server/src/core/auth/strategies/jwt.strategy.ts +++ b/apps/server/src/core/auth/strategies/jwt.strategy.ts @@ -51,7 +51,21 @@ export class JwtStrategy extends PassportStrategy(Strategy, 'jwt') { throw new UnauthorizedException(); } - const workspace = await this.workspaceRepo.findById(payload.workspaceId); + // #348 — reuse the workspace DomainMiddleware already loaded for this request + // instead of re-querying it. `validate()` above has confirmed + // `req.raw.workspaceId === payload.workspaceId` (or that it is unset), and the + // middleware sets `req.raw.workspace` alongside `req.raw.workspaceId` from the + // SAME workspace row, so when the ids match this is that row. NOTE it is the + // middleware's `selectAll` object (a superset of the fallback `findById` base + // fields — it also carries licenseKey/auditRetentionDays); that is harmless + // here because every consumer reads this workspace via the AuthWorkspace + // decorator, which already preferred `req.raw.workspace` (the selectAll object) + // over `req.user.workspace` before this change. Fall back to the query if the + // middleware did not populate it (a path that bypasses DomainMiddleware). + const workspace = + req.raw.workspace && req.raw.workspaceId === payload.workspaceId + ? req.raw.workspace + : await this.workspaceRepo.findById(payload.workspaceId); if (!workspace) { throw new UnauthorizedException(); diff --git a/apps/server/src/core/favorite/services/favorite.service.ts b/apps/server/src/core/favorite/services/favorite.service.ts index 79902e64..c3e9a0ae 100644 --- a/apps/server/src/core/favorite/services/favorite.service.ts +++ b/apps/server/src/core/favorite/services/favorite.service.ts @@ -38,6 +38,8 @@ export class FavoriteService { await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds: result.items, userId, + // #348 — favorites load at app-start; enable the workspace short-circuit. + workspaceId, }); const accessibleSet = new Set(accessibleIds); result.items = result.items.filter((id) => accessibleSet.has(id)); @@ -125,6 +127,8 @@ export class FavoriteService { await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds, userId, + // #348 — workspace-level short-circuit for the favorites list. + workspaceId, }); accessiblePageSet = new Set(accessibleIds); } diff --git a/apps/server/src/core/notification/notification.controller.ts b/apps/server/src/core/notification/notification.controller.ts index be5ee1d3..c5fec794 100644 --- a/apps/server/src/core/notification/notification.controller.ts +++ b/apps/server/src/core/notification/notification.controller.ts @@ -23,7 +23,12 @@ export class NotificationController { @Body() dto: ListNotificationsDto, @AuthUser() user: User, ) { - return this.notificationService.findByUserId(user.id, dto, dto.type); + return this.notificationService.findByUserId( + user.id, + dto, + dto.type, + user.workspaceId, + ); } @HttpCode(HttpStatus.OK) diff --git a/apps/server/src/core/notification/notification.service.ts b/apps/server/src/core/notification/notification.service.ts index 1f88bf59..73450a50 100644 --- a/apps/server/src/core/notification/notification.service.ts +++ b/apps/server/src/core/notification/notification.service.ts @@ -45,6 +45,7 @@ export class NotificationService { userId: string, pagination: PaginationOptions, type: NotificationTab = 'all', + workspaceId?: string | null, ) { const result = await this.notificationRepo.findByUserId( userId, @@ -61,6 +62,8 @@ export class NotificationService { await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds, userId, + // #348 — notifications list; enable the workspace short-circuit. + workspaceId, }); const accessibleSet = new Set(accessiblePageIds); diff --git a/apps/server/src/core/page/page.controller.ts b/apps/server/src/core/page/page.controller.ts index fd5c866e..05b363e0 100644 --- a/apps/server/src/core/page/page.controller.ts +++ b/apps/server/src/core/page/page.controller.ts @@ -446,7 +446,11 @@ export class PageController { ); } - return this.pageService.getRecentPages(user.id, pagination); + return this.pageService.getRecentPages( + user.id, + pagination, + user.workspaceId, + ); } @HttpCode(HttpStatus.OK) @@ -469,7 +473,13 @@ export class PageController { } } - return this.pageService.getCreatedByPages(targetUserId, user.id, pagination, dto.spaceId); + return this.pageService.getCreatedByPages( + targetUserId, + user.id, + pagination, + dto.spaceId, + user.workspaceId, + ); } @HttpCode(HttpStatus.OK) diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index c6ee150d..cd87a489 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -1163,6 +1163,7 @@ export class PageService { async getRecentPages( userId: string, pagination: PaginationOptions, + workspaceId?: string | null, ): Promise> { const result = await this.pageRepo.getRecentPages(userId, pagination); @@ -1172,6 +1173,8 @@ export class PageService { await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds, userId, + // #348 — cross-space "recent"; enable the workspace short-circuit. + workspaceId, }); const accessibleSet = new Set(accessibleIds); result.items = result.items.filter((p) => accessibleSet.has(p.id)); @@ -1185,6 +1188,7 @@ export class PageService { requestingUserId: string, pagination: PaginationOptions, spaceId?: string, + workspaceId?: string | null, ): Promise> { const result = await this.pageRepo.getCreatedByPages( creatorId, @@ -1199,6 +1203,9 @@ export class PageService { await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds, userId: requestingUserId, + spaceId, + // #348 — enable the workspace short-circuit when not space-scoped. + workspaceId, }); const accessibleSet = new Set(accessibleIds); result.items = result.items.filter((p) => accessibleSet.has(p.id)); diff --git a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts index c49b1bf5..b2592f61 100644 --- a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts +++ b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts @@ -93,6 +93,41 @@ function collectNodes( return Array.from(byKey.values()); } +/** + * #348 — cheap early-exit probe: does this doc contain ANY node the transclusion + * syncs care about (`transclusionSource` / `transclusionReference` / `pageEmbed`)? + * Lets the collab store skip the three sync SELECTs when neither the previous nor + * the new content has any such node — there is nothing to insert, and (since the + * DB mirrors the previously-persisted content) nothing to delete. Walks once and + * short-circuits on the first match; uses the same depth ceiling as the + * collectors. Deliberately does NOT skip `transclusionSource` subtrees: it only + * answers "any node present?", so descending everywhere is strictly conservative + * (it can never wrongly report "none"). + */ +export function hasTransclusionFamilyNodes(doc: unknown): boolean { + const visit = (node: any, depth: number): boolean => { + if (!node || typeof node !== 'object') return false; + if (depth > MAX_PM_WALK_DEPTH) return false; + + if ( + node.type === TRANSCLUSION_TYPE || + node.type === REFERENCE_TYPE || + node.type === PAGE_EMBED_TYPE + ) { + return true; + } + + if (Array.isArray(node.content)) { + for (const child of node.content) { + if (visit(child, depth + 1)) return true; + } + } + return false; + }; + + return visit(doc, 0); +} + /** * Walks a ProseMirror JSON document and returns one snapshot per top-level * `transclusion` node. Does not recurse into transclusions (schema disallows diff --git a/apps/server/src/core/search/search.service.ts b/apps/server/src/core/search/search.service.ts index f844941e..91e5e5f4 100644 --- a/apps/server/src/core/search/search.service.ts +++ b/apps/server/src/core/search/search.service.ts @@ -155,6 +155,8 @@ export class SearchService { pageIds, userId: opts.userId, spaceId: searchParams.spaceId, + // #348 — enables the workspace-level short-circuit when not space-scoped. + workspaceId: opts.workspaceId, }); const accessibleSet = new Set(accessibleIds); results = results.filter((r: any) => accessibleSet.has(r.id)); @@ -266,6 +268,8 @@ export class SearchService { await this.pagePermissionRepo.filterAccessiblePageIds({ pageIds, userId, + // #348 — workspace-level short-circuit for the suggest path. + workspaceId, }); const accessibleSet = new Set(accessibleIds); pages = pages.filter((p) => accessibleSet.has(p.id)); diff --git a/apps/server/src/database/migrations/20260705T120000-perf-indexes.ts b/apps/server/src/database/migrations/20260705T120000-perf-indexes.ts new file mode 100644 index 00000000..b39112d4 --- /dev/null +++ b/apps/server/src/database/migrations/20260705T120000-perf-indexes.ts @@ -0,0 +1,118 @@ +import { type Kysely, sql } from 'kysely'; + +/** + * #348 — targeted hot-path indexes. + * + * 1. GIN trigram indexes for `/search/suggest`. That endpoint runs a + * leading-wildcard `LOWER(f_unaccent(col)) LIKE '%q%'` per keystroke, which + * is a sequential scan without a trigram index. The index EXPRESSIONS below + * are `LOWER(f_unaccent(title|name))`, matching the predicates in + * search.service.ts exactly so the planner uses them (verified with EXPLAIN: + * the suggest predicate resolves to a Bitmap Index Scan on these indexes). + * + * IMMUTABLE-wrapper fix (required for the index to build): `f_unaccent` was + * defined as `SELECT unaccent('unaccent', $1)` (the two-arg, dictionary-named + * unaccent). That body CANNOT be used in an index expression: when Postgres + * inlines the IMMUTABLE SQL wrapper while building the index it fails to + * resolve the two-arg call (`function unaccent(unknown, text) does not exist`, + * the `'unaccent'` literal loses its regdictionary coercion). The single-arg + * `unaccent($1)` is the same operation (the default text-search dictionary IS + * `unaccent`; verified byte-equal on accented samples), and — crucially — + * SCHEMA-QUALIFIED as `public.unaccent($1)` it inlines cleanly, so the index + * builds. We therefore `CREATE OR REPLACE` `f_unaccent` to the qualified + * single-arg body. This is output-identical for every existing caller (the + * tsvector trigger, the main `tsv @@` search, and the suggest LIKE), so no + * reindex/backfill is needed; `down()` restores the original two-arg body. + * (The `unaccent` extension is installed in `public` in this codebase, which + * is why `public.unaccent` is the correct qualification.) + * + * 2. Composite indexes for two ORDER-BY-only-on-id queries that currently sort + * on top of a created_at index: + * - page_history: `findPageHistoryByPageId` does WHERE page_id ORDER BY id + * DESC, but only `(page_id, created_at DESC)` exists → extra sort. + * - comments: `findPageComments` does WHERE page_id ORDER BY id ASC, but only + * `(page_id)` exists → extra sort. + * + * DEPLOY-TIME LOCK WARNING: these are plain (non-CONCURRENT) CREATE INDEX + * statements — CONCURRENTLY is impossible because Kysely runs each migration in a + * transaction. They take a SHARE lock that BLOCKS writes (INSERT/UPDATE/DELETE) on + * pages/users/groups/comments/page_history for the duration of the build. The two + * GIN trigram builds on pages.title / users.name are the slow ones and can take + * minutes on a large tenant → a write-outage window during the deploy migration. + * For large installations, run this migration in a maintenance window, or build + * the trigram indexes out-of-band with CREATE INDEX CONCURRENTLY before deploying + * (then this migration's `IF NOT EXISTS` is a no-op). Small/typical tenants are + * unaffected. + */ +export async function up(db: Kysely): Promise { + // Index-compatible, output-identical redefinition of f_unaccent (see header). + await sql` + CREATE OR REPLACE FUNCTION f_unaccent(text) + RETURNS text + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT + AS $func$ + SELECT public.unaccent($1); + $func$ + `.execute(db); + + // Search-suggest trigram indexes. Expressions match search.service.ts. + await sql` + CREATE INDEX IF NOT EXISTS idx_pages_title_trgm + ON pages USING gin ((LOWER(f_unaccent(title))) gin_trgm_ops) + `.execute(db); + + await sql` + CREATE INDEX IF NOT EXISTS idx_users_name_trgm + ON users USING gin ((LOWER(f_unaccent(name))) gin_trgm_ops) + `.execute(db); + + await sql` + CREATE INDEX IF NOT EXISTS idx_groups_name_trgm + ON groups USING gin ((LOWER(f_unaccent(name))) gin_trgm_ops) + `.execute(db); + + // page_history: WHERE page_id ORDER BY id DESC (findPageHistoryByPageId). + await sql` + CREATE INDEX IF NOT EXISTS idx_page_history_page_id + ON page_history (page_id, id DESC) + `.execute(db); + + // comments: WHERE page_id ORDER BY id ASC (findPageComments). + await sql` + CREATE INDEX IF NOT EXISTS idx_comments_page_id_id + ON comments (page_id, id) + `.execute(db); + + // page_access(workspace_id): #348 made hasRestrictedPagesInWorkspace uncached + // (F1 fix), so `EXISTS(SELECT 1 FROM page_access WHERE workspace_id=?)` now runs + // per-request on every whole-workspace list endpoint (global search + suggest, + // favorites, notifications, recent, created-by). page_access only had a + // space_id index → that EXISTS was a seq scan in the common zero-restriction + // case. This index makes it an index-only existence probe. + await sql` + CREATE INDEX IF NOT EXISTS idx_page_access_workspace_id + ON page_access (workspace_id) + `.execute(db); +} + +export async function down(db: Kysely): Promise { + // Drop the expression indexes before restoring the function body. + await sql`DROP INDEX IF EXISTS idx_pages_title_trgm`.execute(db); + await sql`DROP INDEX IF EXISTS idx_users_name_trgm`.execute(db); + await sql`DROP INDEX IF EXISTS idx_groups_name_trgm`.execute(db); + await sql`DROP INDEX IF EXISTS idx_page_history_page_id`.execute(db); + await sql`DROP INDEX IF EXISTS idx_comments_page_id_id`.execute(db); + await sql`DROP INDEX IF EXISTS idx_page_access_workspace_id`.execute(db); + + // Restore the original two-arg (dictionary-named) f_unaccent body. + await sql` + CREATE OR REPLACE FUNCTION f_unaccent(text) + RETURNS text + LANGUAGE sql + IMMUTABLE PARALLEL SAFE STRICT + AS $func$ + SELECT unaccent('unaccent', $1); + $func$ + `.execute(db); +} diff --git a/apps/server/src/database/repos/page/page-permission.repo.ts b/apps/server/src/database/repos/page/page-permission.repo.ts index f753526c..38e03b4e 100644 --- a/apps/server/src/database/repos/page/page-permission.repo.ts +++ b/apps/server/src/database/repos/page/page-permission.repo.ts @@ -657,8 +657,9 @@ export class PagePermissionRepo { pageIds: string[]; userId: string; spaceId?: string; + workspaceId?: string | null; }): Promise { - const { pageIds, userId, spaceId } = opts; + const { pageIds, userId, spaceId, workspaceId } = opts; if (pageIds.length === 0) return []; if (spaceId) { @@ -666,6 +667,17 @@ export class PagePermissionRepo { if (!hasRestrictions) { return pageIds; } + } else if (workspaceId) { + // #348 — whole-workspace callers (no spaceId: favorites, notifications, + // recent, created-by, global search) skip the recursive-ancestor CTE + anti + // -join entirely when the workspace has ZERO restricted pages. When any + // restriction DOES exist, fall through to the identical CTE below, so + // behavior is unchanged whenever restrictions are present. + const hasRestrictions = + await this.hasRestrictedPagesInWorkspace(workspaceId); + if (!hasRestrictions) { + return pageIds; + } } const results = await this.db @@ -903,6 +915,39 @@ export class PagePermissionRepo { return Boolean(result?.exists); } + /** + * Workspace-level analogue of hasRestrictedPagesInSpace: does ANY page in the + * whole workspace carry a restriction? Lets whole-workspace access filters + * short-circuit the recursive-ancestor CTE when nothing is restricted at all. + * + * UNCACHED (like the sibling hasRestrictedPagesInSpace) — a single cheap + * `EXISTS(pageAccess WHERE workspaceId=?)` per call. This is an ACCESS-CONTROL + * gate on whole-workspace list endpoints, so it must never go stale: caching it + * (even 5s) reintroduced a leak the space-path never had — a concurrent + * whole-workspace read in the insert->commit window of the FIRST restricted page + * could re-populate `false` under withCache (read-then-set, no del-during-read + * guard) and override the insert bust, leaking that page to unauthorized users + * for up to the TTL (#348 review F1). An uncached EXISTS removes both the + * cache/DB asymmetry with hasRestrictedPagesInSpace and that race; the space + * path already accepts this exact per-call cost. + */ + async hasRestrictedPagesInWorkspace(workspaceId: string): Promise { + const result = await this.db + .selectNoFrom((eb) => + eb + .exists( + eb + .selectFrom('pageAccess') + .select(sql`1`.as('one')) + .where('pageAccess.workspaceId', '=', workspaceId), + ) + .as('exists'), + ) + .executeTakeFirst(); + + return Boolean(result?.exists); + } + /** * Given a list of parent page IDs, return which ones have at least one accessible child. * Efficient batch query for sidebar hasChildren calculation. diff --git a/apps/server/src/database/repos/page/page.repo.ts b/apps/server/src/database/repos/page/page.repo.ts index c8626553..7fed922a 100644 --- a/apps/server/src/database/repos/page/page.repo.ts +++ b/apps/server/src/database/repos/page/page.repo.ts @@ -581,6 +581,9 @@ export class PageRepo { const query = this.db .selectFrom('pages') .select(this.baseFields) + // NOTE: `content` IS needed here — the trash UI reads page.content to render + // the deleted-page preview modal (trash.tsx handlePageClick -> + // TrashPageContentModal pageContent). Do NOT drop it (see #348 review F3). .select('content') .select((eb) => this.withSpace(eb)) .select((eb) => this.withDeletedBy(eb)) diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 23ebcc67..89996766 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -1,4 +1,6 @@ -import { Injectable } from '@nestjs/common'; +import { Inject, Injectable } from '@nestjs/common'; +import { CACHE_MANAGER } from '@nestjs/cache-manager'; +import { Cache } from 'cache-manager'; import { InjectKysely } from 'nestjs-kysely'; import { KyselyDB, KyselyTransaction } from '../../types/kysely.types'; import { dbOrTx } from '../../utils'; @@ -9,6 +11,7 @@ import { } from '@docmost/db/types/entity.types'; import { ExpressionBuilder, sql } from 'kysely'; import { DB, Workspaces } from '@docmost/db/types/db'; +import { CacheKey } from '../../../common/helpers/cache-keys'; /** * Writable `settings.ai.provider` keys, enforced at this generic SQL layer. This @@ -61,7 +64,34 @@ export class WorkspaceRepo { 'temporaryNoteHours', 'isScimEnabled', ]; - constructor(@InjectKysely() private readonly db: KyselyDB) {} + constructor( + @InjectKysely() private readonly db: KyselyDB, + @Inject(CACHE_MANAGER) private readonly cacheManager: Cache, + ) {} + + /** + * #348 — bust the DomainMiddleware workspace caches after any workspace write. + * Deletes BOTH the self-hosted (constant) key and the cloud per-hostname key so + * a single implementation covers either deployment mode (the irrelevant key is a + * harmless no-op). Best-effort: a cache error must never fail the write, and a + * missed bust is bounded by WORKSPACE_CACHE_TTL_MS. Note: a hostname RENAME only + * busts the NEW hostname's key (the row returned here carries the new hostname); + * the old key expires via TTL. + */ + private async bustWorkspaceCache( + workspace?: Pick | undefined, + ): Promise { + try { + await this.cacheManager.del(CacheKey.WORKSPACE_SELF_HOSTED); + if (workspace?.hostname) { + await this.cacheManager.del( + CacheKey.WORKSPACE_BY_HOST(workspace.hostname), + ); + } + } catch { + // cache is best-effort; TTL is the backstop + } + } async findById( workspaceId: string, @@ -144,12 +174,14 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ): Promise { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .updateTable('workspaces') .set({ ...updatableWorkspace, updatedAt: new Date() }) .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } async insertWorkspace( @@ -157,11 +189,14 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ): Promise { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .insertInto('workspaces') .values(insertableWorkspace) .returning(this.baseFields) .executeTakeFirst(); + // Bust the cached "not found" so a fresh install / new tenant is seen at once. + await this.bustWorkspaceCache(workspace); + return workspace; } async count(): Promise { @@ -203,7 +238,7 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ) { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .updateTable('workspaces') .set({ settings: sql`COALESCE(settings, '{}'::jsonb) @@ -214,6 +249,8 @@ export class WorkspaceRepo { .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } async updateAiSettings( @@ -223,7 +260,7 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ) { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .updateTable('workspaces') .set({ settings: sql`COALESCE(settings, '{}'::jsonb) @@ -234,6 +271,8 @@ export class WorkspaceRepo { .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } /** @@ -272,7 +311,7 @@ export class WorkspaceRepo { entries.flatMap(([k, v]) => [sql.lit(k), sql`${v}::text`]), )})` : sql`'{}'::jsonb`; - return db + const workspace = await db .updateTable('workspaces') .set({ settings: sql`COALESCE(settings, '{}'::jsonb) || jsonb_build_object( @@ -287,6 +326,8 @@ export class WorkspaceRepo { .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } /** @@ -303,7 +344,7 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ) { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .updateTable('workspaces') .set({ settings: sql`COALESCE(settings, '{}'::jsonb) @@ -313,6 +354,8 @@ export class WorkspaceRepo { .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } async updateSharingSettings( @@ -322,7 +365,7 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ) { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .updateTable('workspaces') .set({ settings: sql`COALESCE(settings, '{}'::jsonb) @@ -333,6 +376,8 @@ export class WorkspaceRepo { .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } async updateTemplateSettings( @@ -342,7 +387,7 @@ export class WorkspaceRepo { trx?: KyselyTransaction, ) { const db = dbOrTx(this.db, trx); - return db + const workspace = await db .updateTable('workspaces') .set({ settings: sql`COALESCE(settings, '{}'::jsonb) @@ -353,6 +398,8 @@ export class WorkspaceRepo { .where('id', '=', workspaceId) .returning(this.baseFields) .executeTakeFirst(); + await this.bustWorkspaceCache(workspace); + return workspace; } } diff --git a/apps/server/test/integration/page-permission-workspace-filter.int-spec.ts b/apps/server/test/integration/page-permission-workspace-filter.int-spec.ts new file mode 100644 index 00000000..5a5fdd32 --- /dev/null +++ b/apps/server/test/integration/page-permission-workspace-filter.int-spec.ts @@ -0,0 +1,113 @@ +import { Kysely } from 'kysely'; +import { PagePermissionRepo } from '@docmost/db/repos/page/page-permission.repo'; +import { GroupRepo } from '@docmost/db/repos/group/group.repo'; +import { + getTestDb, + destroyTestDb, + createWorkspace, + createSpace, + createUser, + createPage, +} from './db'; + +/** + * #348 — the whole-workspace access-filter short-circuit is an ACCESS-CONTROL + * path, so it must produce the SAME result as the full recursive-ancestor CTE. + * + * filterAccessiblePageIds({ workspaceId }) (no spaceId — the favorites / + * notifications / recent / created-by / global-search callers) skips the CTE only + * when the workspace has ZERO restricted pages. A page is "restricted & + * inaccessible" when it (or an ancestor) has a `pageAccess` row and the user has + * no matching `pagePermissions`. Driven against real Postgres, asserts: + * 1. zero restrictions -> short-circuit returns the full input set; + * 2. a restriction present -> the CTE runs and drops the page the user can't + * reach while keeping the reachable ones (behavior unchanged); + * 3. inserting the FIRST pageAccess flips hasRestrictedPagesInWorkspace + * false -> true immediately (the 0->1 transition — now uncached, no stale + * window, review F1); it is scoped per workspace. + */ +describe('#348 filterAccessiblePageIds workspace short-circuit (real PG)', () => { + let db: Kysely; + let repo: PagePermissionRepo; + let workspaceId: string; + let otherWorkspaceId: string; + let userId: string; + let spaceId: string; + + beforeAll(async () => { + db = getTestDb(); + // hasRestrictedPagesInWorkspace is now uncached, and no other cached + // permission path is exercised here, so a no-op cache stub suffices. + const cacheStub = { + get: async () => undefined, + set: async () => undefined, + del: async () => undefined, + } as never; + repo = new PagePermissionRepo(db, new GroupRepo(db), cacheStub); + + const ws = await createWorkspace(db); + workspaceId = ws.id; + const other = await createWorkspace(db); + otherWorkspaceId = other.id; + const user = await createUser(db, workspaceId); + userId = user.id; + const space = await createSpace(db, workspaceId); + spaceId = space.id; + }); + + afterAll(async () => { + await destroyTestDb(); + }); + + it('zero restrictions: short-circuit returns the full input set', async () => { + const p1 = await createPage(db, { workspaceId, spaceId }); + const p2 = await createPage(db, { workspaceId, spaceId }); + + expect(await repo.hasRestrictedPagesInWorkspace(workspaceId)).toBe(false); + + const ids = [p1.id, p2.id]; + const filtered = await repo.filterAccessiblePageIds({ + pageIds: ids, + userId, + workspaceId, + }); + expect(new Set(filtered)).toEqual(new Set(ids)); + }); + + it('a restriction present: filters out the page the user cannot reach', async () => { + const openPage = await createPage(db, { workspaceId, spaceId }); + const restrictedPage = await createPage(db, { workspaceId, spaceId }); + + // Add a pageAccess row on restrictedPage with NO matching pagePermissions for + // `userId` → the CTE anti-join marks it inaccessible for this user. + await db + .insertInto('pageAccess') + .values({ + pageId: restrictedPage.id, + workspaceId, + spaceId, + accessLevel: 'read', + creatorId: userId, + }) + .execute(); + + // 0->1 transition is reflected immediately (uncached). + expect(await repo.hasRestrictedPagesInWorkspace(workspaceId)).toBe(true); + + const filtered = await repo.filterAccessiblePageIds({ + pageIds: [openPage.id, restrictedPage.id], + userId, + workspaceId, + }); + expect(filtered).toContain(openPage.id); + expect(filtered).not.toContain(restrictedPage.id); + }); + + it('hasRestrictedPagesInWorkspace is scoped per workspace', async () => { + // The other workspace has no pageAccess rows → still false, unaffected by the + // restriction added above in `workspaceId`. + expect(await repo.hasRestrictedPagesInWorkspace(otherWorkspaceId)).toBe( + false, + ); + }); +}); diff --git a/apps/server/test/integration/workspace-repo-update-setting.int-spec.ts b/apps/server/test/integration/workspace-repo-update-setting.int-spec.ts index f4589e1b..6b021e31 100644 --- a/apps/server/test/integration/workspace-repo-update-setting.int-spec.ts +++ b/apps/server/test/integration/workspace-repo-update-setting.int-spec.ts @@ -1,7 +1,25 @@ import { Kysely } from 'kysely'; import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { CacheKey } from 'src/common/helpers/cache-keys'; import { getTestDb, destroyTestDb, createWorkspace } from './db'; +// A minimal Map-backed cache double with a working `del` (the previous `{}` stub +// made bustWorkspaceCache's `del` throw into its own try/catch, so the #348 +// invalidation was never actually exercised — review F6). +function makeCacheDouble() { + const store = new Map(); + return { + store, + get: async (k: string) => store.get(k), + set: async (k: string, v: unknown) => { + store.set(k, v); + }, + del: async (k: string) => { + store.delete(k); + }, + }; +} + /** * A — WorkspaceRepo.updateSetting jsonb-MERGE (the html-embed kill-switch * write-half). Setting a single top-level key must NOT clobber sibling @@ -15,7 +33,9 @@ describe('WorkspaceRepo.updateSetting (jsonb merge) [integration]', () => { beforeAll(() => { db = getTestDb(); // Repos are plain classes taking @InjectKysely() db — instantiate directly. - repo = new WorkspaceRepo(db as any); + // 2nd arg is CACHE_MANAGER (used only to bust the #348 workspace cache); a + // stub is fine here since bustWorkspaceCache is best-effort (try/catch). + repo = new WorkspaceRepo(db as any, {} as any); }); afterAll(async () => { @@ -58,3 +78,62 @@ describe('WorkspaceRepo.updateSetting (jsonb merge) [integration]', () => { expect(updated.settings).toEqual({ htmlEmbed: false }); }); }); + +/** + * #348 F6 — the DomainMiddleware workspace cache (WORKSPACE_SELF_HOSTED / + * WORKSPACE_BY_HOST, 15s TTL) caches security-relevant fields (enforceSso/ + * enforceMfa/status). Its correctness rests entirely on bustWorkspaceCache being + * called from every mutator. This exercises the real invalidation with a working + * cache double (not the {} stub, whose del throws-and-swallows): warm the cache + * like DomainMiddleware, mutate, and assert the busted key is gone so a stale + * workspace row can't outlive the mutation. + */ +describe('WorkspaceRepo bustWorkspaceCache invalidation [integration]', () => { + let db: Kysely; + + beforeAll(() => { + db = getTestDb(); + }); + afterAll(async () => { + await destroyTestDb(); + }); + + it('updateSetting busts the self-hosted workspace cache key', async () => { + const cache = makeCacheDouble(); + const repo = new WorkspaceRepo(db as any, cache as any); + const ws = await createWorkspace(db, { settings: {} }); + + // Warm the cache as DomainMiddleware would (self-hosted key). + cache.store.set(CacheKey.WORKSPACE_SELF_HOSTED, ws); + expect(cache.store.has(CacheKey.WORKSPACE_SELF_HOSTED)).toBe(true); + + await repo.updateSetting(ws.id, 'htmlEmbed', true); + + // The mutation must have invalidated the cached row. + expect(cache.store.has(CacheKey.WORKSPACE_SELF_HOSTED)).toBe(false); + }); + + it('updateSharingSettings busts the by-host workspace cache key too', async () => { + const cache = makeCacheDouble(); + const repo = new WorkspaceRepo(db as any, cache as any); + const ws = await createWorkspace(db, { settings: {} }); + // createWorkspace assigns a unique hostname; read it back for the by-host key. + const { hostname } = await db + .selectFrom('workspaces') + .select(['hostname']) + .where('id', '=', ws.id) + .executeTakeFirstOrThrow(); + + // Warm BOTH keys (self-hosted + by-host); the by-host bust needs the row's + // hostname, which the mutator returns from the DB. + cache.store.set(CacheKey.WORKSPACE_SELF_HOSTED, ws); + cache.store.set(CacheKey.WORKSPACE_BY_HOST(hostname as string), ws); + + await repo.updateSharingSettings(ws.id, 'allowInvite', true); + + expect(cache.store.has(CacheKey.WORKSPACE_SELF_HOSTED)).toBe(false); + expect(cache.store.has(CacheKey.WORKSPACE_BY_HOST(hostname as string))).toBe( + false, + ); + }); +});