From 6a052b88b496e15718f61db8c2d1bc6f28283df3 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:52:32 +0300 Subject: [PATCH 1/5] fix(html-embed): strip embeds at serve time on authenticated read paths (#28) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the workspace htmlEmbed kill-switch. The public-share path already strips at serve time when the toggle is OFF, but the authenticated read paths (/info and /history/info) returned page/history content with embeds intact, so a disabled feature kept executing for in-workspace view-only viewers until the page was next saved. Now both paths resolve the workspace toggle and run stripHtmlEmbedNodes when it's OFF (fail-closed on a missing workspace), before any markdown/html format conversion. Admin-authored content only — completeness, not privilege escalation. Injects WorkspaceRepo into PageController. Co-Authored-By: Claude Opus 4.8 --- apps/server/src/core/page/page.controller.ts | 28 ++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/apps/server/src/core/page/page.controller.ts b/apps/server/src/core/page/page.controller.ts index 968480c9..28ec083e 100644 --- a/apps/server/src/core/page/page.controller.ts +++ b/apps/server/src/core/page/page.controller.ts @@ -39,6 +39,11 @@ import { } from '../casl/interfaces/space-ability.type'; import SpaceAbilityFactory from '../casl/abilities/space-ability.factory'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../common/helpers/prosemirror/html-embed.util'; import { RecentPageDto } from './dto/recent-page.dto'; import { CreatedByUserDto } from './dto/created-by-user.dto'; import { DuplicatePageDto } from './dto/duplicate-page.dto'; @@ -63,6 +68,7 @@ export class PageController { constructor( private readonly pageService: PageService, private readonly pageRepo: PageRepo, + private readonly workspaceRepo: WorkspaceRepo, private readonly pageHistoryService: PageHistoryService, private readonly spaceAbility: SpaceAbilityFactory, private readonly pageAccessService: PageAccessService, @@ -92,6 +98,18 @@ export class PageController { const permissions = { canEdit, hasRestriction }; + if (page.content) { + const workspace = await this.workspaceRepo.findById(page.workspaceId); + if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) { + // Kill-switch: when the workspace feature is OFF, never serve raw + // htmlEmbed nodes on the read path (mirrors the public-share strip), + // so disabling the feature is an immediate, total kill-switch and not + // dependent on the page being re-saved. Admin-authored content only. + // Fail-closed: a missing workspace resolves to OFF and is stripped. + page.content = stripHtmlEmbedNodes(page.content) as any; + } + } + if (dto.format && dto.format !== 'json' && page.content) { const contentOutput = dto.format === 'markdown' @@ -536,6 +554,16 @@ export class PageController { await this.pageAccessService.validateCanView(page, user); + if (history.content) { + const workspace = await this.workspaceRepo.findById(page.workspaceId); + if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) { + // Kill-switch: history snapshots are an authenticated read path too, so + // strip htmlEmbed when the workspace feature is OFF (same as /info and + // the public-share path). Fail-closed on a missing workspace. + history.content = stripHtmlEmbedNodes(history.content) as any; + } + } + return history; } -- 2.49.1 From 8ee4279d300c2d364d030ad8eea88b935bf076db Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:52:32 +0300 Subject: [PATCH 2/5] harden(html-embed): make stripHtmlEmbedNodes total with a root-type check (#30) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stripHtmlEmbedNodes only filtered children, so a (never-in-practice) bare htmlEmbed root node would be returned as-is. Add a defensive root check that returns an embed-free doc, making the helper total — it can never return a node for which hasHtmlEmbedNode is true. Adds a unit test for the root case. Co-Authored-By: Claude Opus 4.8 --- .../src/common/helpers/prosemirror/html-embed.spec.ts | 11 +++++++++++ .../src/common/helpers/prosemirror/html-embed.util.ts | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts index 6b07ec0b..b48e9e73 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -92,6 +92,17 @@ describe('stripHtmlEmbedNodes', () => { const result = stripHtmlEmbedNodes(doc); expect(result).toEqual(doc); }); + + it('neutralizes a root node that is itself an htmlEmbed', () => { + // Defensive: the PM root is always a `doc`, so this is unreachable in normal + // use, but the helper must still never return a bare htmlEmbed. + const root = { + type: 'htmlEmbed', + attrs: { source: '' }, + }; + const result = stripHtmlEmbedNodes(root); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); }); describe('canAuthorHtmlEmbed', () => { diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts index f1d0b6e5..aa5d579d 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts @@ -22,6 +22,15 @@ export function stripHtmlEmbedNodes(pmJson: T): T { const node = pmJson as unknown as JSONContent; + // Defensive root-type check: if the ROOT node is itself an htmlEmbed, the + // children-filtering below could never drop it, so a bare htmlEmbed would be + // returned as-is. This branch is unreachable in normal use (the PM document + // root is always a `doc`) and exists only to make the helper total — a bare + // htmlEmbed can never be returned by this function. + if (node.type === HTML_EMBED_NODE_NAME) { + return { type: 'doc', content: [] } as unknown as T; + } + if (Array.isArray(node.content)) { const filtered: JSONContent[] = []; for (const child of node.content) { -- 2.49.1 From 8191c37daa306ad1fd442108bb17db10542242f0 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 22:49:18 +0300 Subject: [PATCH 3/5] test(html-embed): real-execution gate tests for create/duplicate/import (#27) The create/duplicate/import gate tests asserted gate presence via brittle expect(SRC).toMatch(/regex/) over the source text plus a reimplemented applyGate() stand-in, so a refactor could break the real gate while they still passed. Rewrite both specs to execute the REAL methods (PageService.create / duplicatePage; ImportService.importPage; FileImportTaskService.processGenericImport) with each caller role and assert on the PERSISTED content via hasHtmlEmbedNode: member -> stripped, admin/owner+toggle ON -> preserved, toggle OFF -> stripped for everyone, unknown/missing role -> fail-closed. No source-regex assertions remain. Co-Authored-By: Claude Opus 4.8 --- .../page-service-html-embed-identity.spec.ts | 286 ++++++++++---- .../import-html-embed-identity.spec.ts | 349 ++++++++++++------ 2 files changed, 458 insertions(+), 177 deletions(-) diff --git a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts index f23d565a..bc1b8254 100644 --- a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts +++ b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts @@ -1,25 +1,31 @@ -import { readFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { - hasHtmlEmbedNode, - htmlEmbedAllowed, - stripHtmlEmbedNodes, -} from '../../../common/helpers/prosemirror/html-embed.util'; +// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab +// write paths: PageService.create() and PageService.duplicatePage(). Both build +// content/textContent/ydoc directly and persist, bypassing the collab +// onStoreDocument strip, so each must run the incoming document through the +// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not +// allowed, `stripHtmlEmbedNodes`) BEFORE persisting. +// +// This spec constructs the REAL PageService with every constructor dep mocked, +// feeds content containing an `htmlEmbed`, calls the real method, and asserts on +// the PERSISTED content (captured at the repo insert / db insert boundary) that +// the embed was actually stripped (member/unknown role) or preserved +// (admin/owner + toggle ON). Mirrors the GOOD pattern in +// transclusion/spec/transclusion-unsync-html-embed.spec.ts. +// +// page.service.ts pulls in the collaboration gateway (a transitive ESM chain +// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so +// that single module is mocked away — it is never used on the create/duplicate +// gate paths. +jest.mock('../../../collaboration/collaboration.gateway', () => ({ + CollaborationGateway: class {}, +})); -// PageService.create() and duplicatePage() guards. -// -// page.service.ts cannot be unit-LOADED under the server's jest config (a -// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns), -// so we cover the two load-bearing properties at the strongest feasible layer: -// -// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate -// each path applies: non-admin/unknown role -> strip, admin/owner -> keep. -// -// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole` -// param threaded from the request; duplicate: `authUser.role`), so a -// refactor that drops the guard or reads the wrong role trips the test. -// This is what replaces the removed `applyAdminGate` stand-in for these -// two entrypoints. +import { PageService } from './page.service'; +import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util'; + +const WS = 'ws-1'; +const SPACE = 'space-1'; +const USER = 'u1'; const docWithEmbed = () => ({ type: 'doc', @@ -29,74 +35,206 @@ const docWithEmbed = () => ({ ], }); -// The real predicate both paths apply (see SECURITY blocks in page.service.ts): -// toggle AND admin. -function applyGate( - json: any, - featureEnabled: boolean, - role: string | null | undefined, -) { - if (!htmlEmbedAllowed(featureEnabled, role) && hasHtmlEmbedNode(json)) { - return stripHtmlEmbedNodes(json); - } - return json; +// Minimal chainable kysely stub. `nextPagePosition` (used by create) and +// duplicatePage's bulk insert go through `this.db`; only the calls those paths +// make need to resolve. `capturedInserts` collects every page row handed to +// `insertInto('pages').values(...)` so we can assert on the persisted content. +function buildDb(capturedInserts: any[]) { + const selectChain: any = { + select: () => selectChain, + selectAll: () => selectChain, + where: () => selectChain, + orderBy: () => selectChain, + limit: () => selectChain, + execute: async () => [], + executeTakeFirst: async () => undefined, + }; + const db: any = { + selectFrom: () => selectChain, + insertInto: (table: string) => ({ + values: (rows: any) => { + if (table === 'pages') { + for (const row of Array.isArray(rows) ? rows : [rows]) { + capturedInserts.push(row); + } + } + return { execute: async () => undefined }; + }, + }), + // executeTx -> db.transaction().execute(cb): run the callback with `db` + // itself acting as the transaction so any in-tx inserts are captured too. + transaction: () => ({ execute: async (cb: any) => cb(db) }), + }; + return db; } -describe('page create/duplicate gate decision (real helpers)', () => { - it('toggle ON + non-admin (member) strips', () => { - const result = applyGate(docWithEmbed(), true, 'member'); - expect(hasHtmlEmbedNode(result)).toBe(false); - expect(result.content).toHaveLength(1); - expect(result.content[0].content[0].text).toBe('body'); - }); +// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled` +// drives the workspace toggle the gate reads via workspaceRepo.findById. +function buildService(opts: { + featureEnabled: boolean; + capturedInserts: any[]; + rootPage?: any; // for duplicatePage +}) { + const { featureEnabled, capturedInserts } = opts; - it('toggle ON + unknown/empty role fails closed (strips)', () => { - for (const role of [null, undefined, 'viewer'] as const) { - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, role))).toBe( - false, - ); - } - }); + const pageRepo: any = { + findById: jest.fn(async () => null), // no parent page in create tests + // create() persists here; capture the row so we can inspect content. + insertPage: jest.fn(async (row: any) => { + capturedInserts.push(row); + return { id: 'new-page', slugId: 'slug-1', ...row }; + }), + getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)), + }; - it('toggle ON + admin/owner keep', () => { - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'admin'))).toBe( - true, - ); - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'owner'))).toBe( - true, + const pagePermissionRepo: any = { + // duplicatePage filters accessible pages; grant the root so it is copied. + filterAccessiblePageIds: jest.fn(async () => + opts.rootPage ? [opts.rootPage.id] : [], + ), + }; + + const workspaceRepo: any = { + findById: jest.fn(async () => ({ + id: WS, + settings: { htmlEmbed: featureEnabled }, + })), + }; + + const attachmentRepo: any = { findByIds: jest.fn(async () => []) }; + const storageService: any = { copy: jest.fn(async () => undefined) }; + const noopQueue: any = { add: jest.fn(async () => undefined) }; + const eventEmitter: any = { emit: jest.fn() }; + const collaborationGateway: any = {}; + const watcherService: any = {}; + // duplicatePage fires transclusion bulk inserts after persisting; they are + // best-effort (wrapped in try/catch) and irrelevant to the gate. + const transclusionService: any = { + insertTransclusionsForPages: jest.fn(async () => undefined), + insertReferencesForPages: jest.fn(async () => undefined), + insertTemplateReferencesForPages: jest.fn(async () => undefined), + }; + + const db = buildDb(capturedInserts); + + const service = new PageService( + pageRepo, + pagePermissionRepo, + attachmentRepo, + db, + storageService, + noopQueue, // attachmentQueue + noopQueue, // aiQueue + noopQueue, // generalQueue + eventEmitter, + collaborationGateway, + watcherService, + transclusionService, + workspaceRepo, + ); + return service; +} + +describe('PageService.create htmlEmbed admin gate (real code)', () => { + // Run create() and return the content actually persisted via insertPage. + async function persistedContent( + featureEnabled: boolean, + callerRole: string | null | undefined, + ) { + const capturedInserts: any[] = []; + const service = buildService({ featureEnabled, capturedInserts }); + await service.create( + USER, + WS, + { + spaceId: SPACE, + title: 'p', + // 'json' format is used as-is by parseProsemirrorContent (passed to the + // real jsonToNode schema validation), so hand it the PM-JSON object. + content: docWithEmbed(), + format: 'json' as any, + } as any, + callerRole, ); + expect(capturedInserts).toHaveLength(1); + return capturedInserts[0].content; + } + + it('toggle ON + member: persisted content has htmlEmbed stripped', async () => { + const content = await persistedContent(true, 'member'); + expect(hasHtmlEmbedNode(content)).toBe(false); + // Non-embed content survives. + expect(JSON.stringify(content)).toContain('body'); }); - it('toggle OFF strips for everyone (admin/owner/member)', () => { - for (const role of ['admin', 'owner', 'member'] as const) { - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), false, role))).toBe( - false, - ); + it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true); + }); + + it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false); } }); }); -const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8'); +describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => { + // Duplicate a single source page that contains an embed and return the content + // persisted for the copy (captured at db.insertInto('pages').values(...)). + async function persistedContent( + featureEnabled: boolean, + role: string | null | undefined, + ) { + const rootPage: any = { + id: 'src-page', + slugId: 'src-slug', + title: 'Source', + icon: null, + position: 'a0', + spaceId: SPACE, + workspaceId: WS, + parentPageId: null, + content: docWithEmbed(), + }; + const capturedInserts: any[] = []; + const service = buildService({ featureEnabled, capturedInserts, rootPage }); + const authUser: any = { id: USER, workspaceId: WS, role }; + await service.duplicatePage(rootPage, undefined, authUser); + // The bulk insert is the page persist boundary; one source page -> one copy. + const pageRows = capturedInserts.filter((r) => r.content); + expect(pageRows.length).toBeGreaterThanOrEqual(1); + return pageRows[0].content; + } -describe('page create/duplicate gate identity is pinned (source contract)', () => { - it('create() gates on toggle AND the caller role param before deriving content/ydoc', () => { - // create() receives the caller's workspace role as `callerRole` and gates on - // the combined toggle-AND-admin predicate; the embed must be stripped BEFORE - // insertPage. - expect(SRC).toMatch( - /!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, - ); - expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)'); + it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => { + const content = await persistedContent(true, 'member'); + expect(hasHtmlEmbedNode(content)).toBe(false); + expect(JSON.stringify(content)).toContain('body'); }); - it('duplicatePage() gates on toggle AND the duplicating user role (authUser.role)', () => { - expect(SRC).toMatch( - /!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, - ); + it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true); }); - it('both paths resolve the toggle from the workspace settings', () => { - expect(SRC).toContain('isHtmlEmbedFeatureEnabled('); - expect(SRC).toContain('this.workspaceRepo.findById('); + it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false); + } }); }); diff --git a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts index 603765fc..d2902be0 100644 --- a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts +++ b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts @@ -1,123 +1,266 @@ -import { readFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { - hasHtmlEmbedNode, - htmlEmbedAllowed, - stripHtmlEmbedNodes, -} from '../../../common/helpers/prosemirror/html-embed.util'; +// Exercises the REAL htmlEmbed admin gate on the two import write paths: +// +// (1) ImportService.importPage() — single .html/.md upload +// (2) FileImportTaskService.processGenericImport() — zip / multi-file import +// +// Both build content/textContent/ydoc directly and persist (bypassing the +// collab onStoreDocument strip), so each must run the imported document through +// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read +// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed, +// `stripHtmlEmbedNodes` BEFORE persisting. +// +// This spec constructs the REAL services with deps mocked, feeds an imported +// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed +// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED +// content (captured at the insert boundary) is stripped for a non-admin / +// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD +// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts. +// +// Three modules are mocked away because they pull transitive ESM deps that +// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the +// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via +// import-attachment). None of them participate in the gate decision: +// - import-formatter: contextless HTML cleanup + link rewriting; replaced with +// faithful passthroughs (the embed div has no href/iframe, so the real +// normalizer would leave it untouched anyway). +// - import-attachment: attachment rewriting; passthrough returns html as-is. +jest.mock('../../../collaboration/collaboration.gateway', () => ({ + CollaborationGateway: class {}, +})); +jest.mock('../utils/import-formatter', () => ({ + normalizeImportHtml: () => {}, + formatImportHtml: async (opts: any) => ({ + html: opts.html, + backlinks: [], + pageIcon: undefined, + }), +})); +jest.mock('./import-attachment.service', () => ({ + ImportAttachmentService: class {}, +})); -// FAIL-CLOSED IDENTITY for the import write paths. -// -// import.service / file-import-task.service cannot be unit-LOADED under the -// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in -// transformIgnorePatterns). So we cover the two load-bearing properties at the -// strongest feasible layer: -// -// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate -// predicate each entrypoint runs against the role resolved from -// userRepo.findById(...): a MISSING user (findById -> undefined) must fail -// closed (strip), and only 'admin'/'owner' keep the embed. -// -// (2) IDENTITY — source-pin which identity governs the gate so a refactor that -// swaps the lookup to the wrong user (e.g. the queue worker's caller) is -// caught: zip import resolves the role from `fileTask.creatorId`; single -// import from the request `userId`. NOT some ambient caller. -// -// If a guard is deleted/misplaced or the identity field changes, these break. +import { promises as fs } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { ImportService } from './import.service'; +import { FileImportTaskService } from './file-import-task.service'; +import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util'; -const docWithEmbed = () => ({ - type: 'doc', - content: [ - { type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] }, - { type: 'htmlEmbed', attrs: { source: '' } }, - ], -}); +const WS = 'ws-1'; +const SPACE = 'space-1'; +const USER = 'importer-1'; -// The real predicate both import entrypoints apply (see the SECURITY blocks in -// import.service.ts and file-import-task.service.ts): resolve the importer via -// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`. -function applyImportGate( - json: any, - featureEnabled: boolean, - importingUser: { role?: string } | undefined, -) { - if ( - !htmlEmbedAllowed(featureEnabled, importingUser?.role) && - hasHtmlEmbedNode(json) - ) { - return stripHtmlEmbedNodes(json); - } - return json; +// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses +// `
` into an htmlEmbed PM node +// (base64 below decodes to ``). +const HTML_WITH_EMBED = + '

imported body

' + + '
'; + +function workspaceRepoFor(featureEnabled: boolean) { + return { + findById: jest.fn(async () => ({ + id: WS, + settings: { htmlEmbed: featureEnabled }, + })), + }; } -describe('import gate fail-closed by toggle AND resolved-user role (real helpers)', () => { - it('toggle ON + missing user (userRepo.findById -> undefined) strips the embed', () => { - // findById returns undefined when the user/workspace pair does not resolve; - // undefined?.role is undefined -> htmlEmbedAllowed(true, undefined) === false. - const result = applyImportGate(docWithEmbed(), true, undefined); - expect(hasHtmlEmbedNode(result)).toBe(false); +// userRepo.findById resolves the importer's role (or undefined for a missing +// user -> fail closed). +function userRepoFor(user: { role?: string } | undefined) { + return { findById: jest.fn(async () => user) }; +} + +describe('ImportService.importPage htmlEmbed admin gate (real code)', () => { + // Run importPage with a single uploaded .html and return the persisted content + // captured at pageRepo.insertPage. + async function persistedContent( + featureEnabled: boolean, + user: { role?: string } | undefined, + ) { + const captured: any[] = []; + const pageRepo: any = { + insertPage: jest.fn(async (row: any) => { + captured.push(row); + return { id: 'p1', slugId: 's1', ...row }; + }), + }; + // db is only used for getNewPagePosition (a select chain). + const selectChain: any = { + select: () => selectChain, + where: () => selectChain, + orderBy: () => selectChain, + limit: () => selectChain, + executeTakeFirst: async () => undefined, + }; + const db: any = { selectFrom: () => selectChain }; + + const service = new ImportService( + pageRepo, + userRepoFor(user) as any, + { putBuffer: jest.fn() } as any, // storageService (unused on this path) + db, + { add: jest.fn() } as any, // fileTaskQueue (unused) + workspaceRepoFor(featureEnabled) as any, + ); + + const file: any = { + filename: 'doc.html', + toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'), + }; + await service.importPage(Promise.resolve(file), USER, SPACE, WS); + expect(captured).toHaveLength(1); + return captured[0].content; + } + + it('toggle ON + member: persisted content has htmlEmbed stripped', async () => { + const content = await persistedContent(true, { role: 'member' }); + expect(hasHtmlEmbedNode(content)).toBe(false); + expect(JSON.stringify(content)).toContain('imported body'); }); - it("toggle ON + resolved role 'member' strips", () => { + it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe( + false, + ); + }); + + it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe( + true, + ); + }); + + it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe( + true, + ); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'member' })), + hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })), ).toBe(false); }); - - it("toggle ON + resolved role 'admin' keeps the embed", () => { - expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'admin' })), - ).toBe(true); - }); - - it("toggle ON + resolved role 'owner' keeps the embed", () => { - expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'owner' })), - ).toBe(true); - }); - - it('toggle OFF strips for every role (admin/owner/member)', () => { - for (const role of ['admin', 'owner', 'member'] as const) { - expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), false, { role })), - ).toBe(false); - } - }); }); -// Source-pin the identity each entrypoint feeds to userRepo.findById. These are -// the lines that decide WHOSE role governs the gate; pinning them means a -// refactor that points the lookup at the wrong user trips the test. -const SRC_DIR = join(__dirname); +describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => { + let extractDir: string; -describe('import gate identity is pinned to the importer (source contract)', () => { - it('single import resolves the role from the request userId', () => { - const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8'); - // The role lookup must key on the request `userId`, then gate on the role. - expect(src).toMatch( - /this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/, - ); - expect(src).toMatch( - /htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*\)/, - ); - // And the toggle is resolved from the workspace settings. - expect(src).toContain('isHtmlEmbedFeatureEnabled('); - // And the gate uses the real strip helper. - expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + beforeEach(async () => { + // Real temp dir holding a single .html page that carries the embed; the + // method reads it from disk via fs.readFile. + extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-')); + await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED); }); - it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => { - const src = readFileSync( - join(SRC_DIR, 'file-import-task.service.ts'), - 'utf-8', + afterEach(async () => { + await fs.rm(extractDir, { recursive: true, force: true }); + }); + + // Run processGenericImport over the temp dir and return the content persisted + // for the imported page (captured at trx.insertInto('pages').values(...)). + async function persistedContent( + featureEnabled: boolean, + user: { role?: string } | undefined, + ) { + const captured: any[] = []; + const trxInsertChain = (table: string) => ({ + values: (row: any) => { + if (table === 'pages') captured.push(row); + return { execute: async () => undefined }; + }, + }); + const trx: any = { insertInto: trxInsertChain }; + const db: any = { + // spaces lookup at the top of processGenericImport + selectFrom: () => ({ + select: () => ({ + where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }), + }), + }), + // executeTx -> db.transaction().execute(cb) + transaction: () => ({ execute: async (cb: any) => cb(trx) }), + }; + + // importService stub: only the real, gate-relevant helpers are used. We give + // it the REAL implementations by delegating to a real ImportService for + // processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and + // strip path runs for real. + const realImport = new ImportService( + {} as any, + {} as any, + {} as any, + {} as any, + {} as any, + {} as any, ); - expect(src).toMatch( - /this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/, + const importService: any = { + processHTML: (html: string) => realImport.processHTML(html), + extractTitleAndRemoveHeading: (s: any) => + realImport.extractTitleAndRemoveHeading(s), + createYdoc: (j: any) => realImport.createYdoc(j), + }; + + const importAttachmentService: any = { + // passthrough: no attachment rewriting, return html unchanged + processAttachments: jest.fn(async (opts: any) => opts.html), + }; + + const service = new FileImportTaskService( + { putBuffer: jest.fn() } as any, // storageService + importService, + { nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only) + { insertBacklink: jest.fn() } as any, // backlinkRepo + db, + importAttachmentService, + userRepoFor(user) as any, + workspaceRepoFor(featureEnabled) as any, + { emit: jest.fn() } as any, // eventEmitter + { logBatchWithContext: jest.fn() } as any, // auditService ); - expect(src).toMatch( - /importerCanAuthorHtmlEmbed\s*=\s*htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*,?\s*\)/, + + const fileTask: any = { + id: 'task-1', + creatorId: USER, + workspaceId: WS, + spaceId: SPACE, + source: 'generic', + }; + + await service.processGenericImport({ extractDir, fileTask }); + expect(captured.length).toBeGreaterThanOrEqual(1); + return captured[0].content; + } + + it('toggle ON + member: persisted page has htmlEmbed stripped', async () => { + const content = await persistedContent(true, { role: 'member' }); + expect(hasHtmlEmbedNode(content)).toBe(false); + expect(JSON.stringify(content)).toContain('imported body'); + }); + + it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe( + false, ); - expect(src).toContain('isHtmlEmbedFeatureEnabled('); - expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + }); + + it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe( + true, + ); + }); + + it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe( + true, + ); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect( + hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })), + ).toBe(false); }); }); -- 2.49.1 From b7ea8c850ebec2731716c9c6bb6726a90b101992 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 23:02:01 +0300 Subject: [PATCH 4/5] fix(html-embed): preserve admin's existing embed on a non-admin co-editor's store (#29) The collab persist strip keyed to the storing connection's user, so when a non-admin co-editor stored, it removed an admin's legitimately-authored embed too (data loss). Now: toggle OFF still strips all (feature disabled); toggle ON + non-admin storer strips only NEWLY-introduced embeds and preserves those already present in the persisted content (admin-vetted), via new helpers collectHtmlEmbedSources + stripDisallowedHtmlEmbedNodes (identity = attrs.source, already-vetted HTML). The ydoc reflect is now guarded by a deep-equal check so an unrelated non-admin edit that touches no new embed doesn't churn the doc. A non-admin still cannot add a new embed. Documents the allow-list TOCTOU (best-effort snapshot read outside the lock; converges on next store). Co-Authored-By: Claude Opus 4.8 --- .../persistence.extension.html-embed.spec.ts | 67 ++++++- .../extensions/persistence.extension.ts | 85 ++++++--- .../helpers/prosemirror/html-embed.spec.ts | 165 ++++++++++++++++++ .../helpers/prosemirror/html-embed.util.ts | 101 +++++++++++ 4 files changed, 394 insertions(+), 24 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts index a78173a6..55f06ea9 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts @@ -3,6 +3,7 @@ import { TiptapTransformer } from '@hocuspocus/transformer'; import { PersistenceExtension } from './persistence.extension'; import { tiptapExtensions } from '../collaboration.util'; import { + collectHtmlEmbedSources, hasHtmlEmbedNode, HTML_EMBED_NODE_NAME, } from '../../common/helpers/prosemirror/html-embed.util'; @@ -121,7 +122,7 @@ function nodeTypeCounts(json: any): Record { * onStoreDocument to reach the strip + persist branch, and capture the content * that would be written to the page row. */ -function buildExtension(featureEnabled = true) { +function buildExtension(featureEnabled = true, priorContent?: any) { const captured: { content?: any } = {}; const existingPage = { @@ -131,7 +132,10 @@ function buildExtension(featureEnabled = true) { workspaceId: 'ws-1', creatorId: 'creator-1', contributorIds: [], - content: { type: 'doc', content: [] }, // differs from new content -> persist runs + // The currently-persisted content. Defaults to an empty doc (differs from + // new content -> persist runs); a test may pass a prior admin embed here to + // exercise the preserve-admin-embed branch. + content: priorContent ?? { type: 'doc', content: [] }, createdAt: new Date(), lastUpdatedSource: 'user', }; @@ -186,8 +190,9 @@ async function runStore( role: string | null | undefined, doc: Y.Doc, featureEnabled = true, + priorContent?: any, ) { - const { ext, captured } = buildExtension(featureEnabled); + const { ext, captured } = buildExtension(featureEnabled, priorContent); // hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has // none, so stub it (the post-persist broadcast is not under test here). (doc as any).broadcastStateless = () => undefined; @@ -268,6 +273,62 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)' ).toBe(false); }); + it('toggle ON + non-admin store: PRESERVES an admin embed already in the persisted content through an unrelated edit', async () => { + // Prior persisted content already holds an admin-authored embed. + const ADMIN_SOURCE = ''; + const prior = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + // A non-admin makes an UNRELATED edit (tweaks the paragraph) but the embed + // is still present in the merged doc. + const edited = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + + const captured = await runStore('member', buildYdoc(edited), true, prior); + expect(captured.content).toBeDefined(); + // The admin's pre-existing embed survives the non-admin store. + expect(collectHtmlEmbedSources(captured.content)).toEqual( + new Set([ADMIN_SOURCE]), + ); + }); + + it('toggle ON + non-admin store: strips a NEWLY-added embed while keeping the prior admin one', async () => { + const ADMIN_SOURCE = ''; + const prior = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + // Non-admin keeps the admin embed, makes an unrelated paragraph edit (so the + // store is not a no-op and is persisted), and ALSO adds a brand-new embed. + const edited = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }; + + const captured = await runStore('member', buildYdoc(edited), true, prior); + expect(captured.content).toBeDefined(); + // Only the admin-vetted source remains; the newly-introduced one is stripped. + expect(collectHtmlEmbedSources(captured.content)).toEqual( + new Set([ADMIN_SOURCE]), + ); + }); + it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => { const emptyDoc = buildYdoc({ type: 'doc', diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 23c94631..7c2ff963 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -40,9 +40,11 @@ import { } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; import { + collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, isHtmlEmbedFeatureEnabled, + stripDisallowedHtmlEmbedNodes, stripHtmlEmbedNodes, } from '../../common/helpers/prosemirror/html-embed.util'; import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; @@ -130,12 +132,31 @@ export class PersistenceExtension implements Extension { // every htmlEmbed node before it is written to the page row AND before the // ydoc state is re-encoded, so the node cannot be reintroduced by a // non-admin via the collab socket. - // NOTE (residual risk): the gate is keyed to the storing connection's user. - // If an admin already authored an htmlEmbed and a non-admin's later store - // does not touch it, this strip would remove the admin's embed on that - // non-admin store. This is intentionally conservative (fail closed): the - // admin re-adds/keeps the node on their own next edit. A future refinement - // could diff against the previously persisted admin-authored embeds. + // NOTE (defense-in-depth refinement, Gitea #29): the gate is keyed to the + // storing connection's user, but it no longer blindly strips EVERY embed on + // a non-admin store. We distinguish two cases inside the !allowed branch: + // - Feature toggle OFF => strip ALL embeds (the feature is disabled for + // everyone; existing embeds get cleaned up on the next save). + // - Toggle ON but the storer is a NON-admin => strip only NEWLY-introduced + // embeds and PRESERVE embeds already present in the currently-persisted + // page content (admin-authored, already vetted). So a non-admin still + // cannot ADD an embed, but an unrelated edit (e.g. a paragraph tweak) no + // longer destroys an admin's existing embed (the prior data-loss bug). + // The pre-existing-embed identity is the raw `attrs.source` (see + // collectHtmlEmbedSources). A non-admin who copies an existing admin embed's + // exact source elsewhere passes — acceptable, that HTML is already vetted. + // + // ACCEPTED RESIDUAL RISK (toggle-ON allow-list TOCTOU): the allow-list is a + // best-effort snapshot read OUTSIDE the locked transaction (the prior content + // is pre-read above, but inside executeTx the row is re-read withLock without + // recomputing the allow-list). A concurrent admin store that changes the + // persisted embeds between the pre-read and this write can make the preserve + // decision use a slightly stale snapshot — worst case one embed transiently + // kept or dropped; it converges on the next store, with no auth bypass or + // broader data loss. The race is accepted because it only affects concurrent + // authenticated editors on the (rare) toggle-ON non-admin path, converges on + // the next store, and the persisted row plus every share/readonly read path + // remain protected by the strip. // // ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in // the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs @@ -157,22 +178,44 @@ export class PersistenceExtension implements Extension { ); if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) { if (hasHtmlEmbedNode(tiptapJson)) { - this.logger.warn( - `Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`, - ); - tiptapJson = stripHtmlEmbedNodes(tiptapJson); - // Reflect the stripped content back into the shared ydoc so the node is - // removed for all connected clients, not just the persisted row. - const fragment = document.getXmlFragment('default'); - if (fragment.length > 0) { - fragment.delete(0, fragment.length); + let strippedJson: typeof tiptapJson; + if (htmlEmbedEnabled === false) { + // Toggle OFF: feature disabled for everyone -> strip ALL embeds. + strippedJson = stripHtmlEmbedNodes(tiptapJson); + } else { + // Toggle ON, non-admin storer: preserve embeds already present in the + // currently-persisted (admin-vetted) page content; strip only the + // newly-introduced ones. Pre-read the prior content — a small extra + // query only on this rare non-admin + toggle-ON path. + const prior = await this.pageRepo.findById(pageId, { + includeContent: true, + }); + const allowed = collectHtmlEmbedSources(prior?.content); + strippedJson = stripDisallowedHtmlEmbedNodes(tiptapJson, allowed); + } + + // Only mutate the ydoc + log when the strip actually removed something; + // an unnecessary ydoc rewrite would churn the doc for all clients. With + // the toggle-ON branch a non-admin store that only touches admin-vetted + // embeds leaves the content unchanged here. + if (!isDeepStrictEqual(strippedJson, tiptapJson)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`, + ); + tiptapJson = strippedJson; + // Reflect the stripped content back into the shared ydoc so the node + // is removed for all connected clients, not just the persisted row. + const fragment = document.getXmlFragment('default'); + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + const cleanDoc = TiptapTransformer.toYdoc( + tiptapJson, + 'default', + tiptapExtensions, + ); + Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); } - const cleanDoc = TiptapTransformer.toYdoc( - tiptapJson, - 'default', - tiptapExtensions, - ); - Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); } } diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts index b48e9e73..4351673a 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -1,8 +1,10 @@ import { canAuthorHtmlEmbed, + collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, isHtmlEmbedFeatureEnabled, + stripDisallowedHtmlEmbedNodes, stripHtmlEmbedNodes, } from './html-embed.util'; import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util'; @@ -105,6 +107,169 @@ describe('stripHtmlEmbedNodes', () => { }); }); +describe('collectHtmlEmbedSources', () => { + it('collects the source of every htmlEmbed node, including nested ones', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed', attrs: { source: 'top' } }, + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: 'nested' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'x' }] }, + ], + }, + ], + }, + ], + }; + const sources = collectHtmlEmbedSources(doc); + expect(sources).toEqual(new Set(['top', 'nested'])); + }); + + it('returns an empty set for a doc with no embeds', () => { + const doc = { + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }], + }; + expect(collectHtmlEmbedSources(doc).size).toBe(0); + }); + + it('gracefully skips embeds with absent attrs or non-string source', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed' }, // no attrs + { type: 'htmlEmbed', attrs: {} }, // no source + { type: 'htmlEmbed', attrs: { source: 42 } }, // non-string + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }; + expect(collectHtmlEmbedSources(doc)).toEqual(new Set([''])); + }); + + it('returns an empty set for non-object input', () => { + expect(collectHtmlEmbedSources(null).size).toBe(0); + expect(collectHtmlEmbedSources(undefined).size).toBe(0); + expect(collectHtmlEmbedSources('x' as any).size).toBe(0); + }); +}); + +describe('stripDisallowedHtmlEmbedNodes', () => { + it('keeps an embed whose source is allowed and removes the rest', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + expect(collectHtmlEmbedSources(result)).toEqual(new Set([''])); + // The allowed embed and the paragraph survive; the new embed is gone. + expect(result.content).toHaveLength(2); + expect(result.content[0].attrs.source).toBe(''); + expect(result.content[1].type).toBe('paragraph'); + }); + + it('keeps BOTH embeds when two nodes share the same allowed source', () => { + // Source-identity semantics: identity is the raw `attrs.source`, so a + // non-admin who duplicates an existing admin-vetted source keeps both copies. + // This is intended — the raw HTML is already vetted, so a duplicate is safe. + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'mid' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + expect(hasHtmlEmbedNode(result)).toBe(true); + const embeds = result.content.filter( + (n: any) => n.type === 'htmlEmbed', + ); + expect(embeds).toHaveLength(2); + expect(embeds.every((n: any) => n.attrs.source === '')).toBe(true); + }); + + it('removes a newly-introduced embed when nothing is allowed', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: '' } }], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set()); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it('filters nested embeds by the allow-list (e.g. inside columns)', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }, + ], + }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + const col = findFirstChild(result, 'column'); + expect(col.content).toHaveLength(1); + expect(col.content[0].attrs.source).toBe(''); + }); + + it('treats an embed with absent/non-string source as not allowed (stripped)', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed' }, + { type: 'htmlEmbed', attrs: {} }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it('does not mutate the input document', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: '' } }], + }; + stripDisallowedHtmlEmbedNodes(doc, new Set()); + expect(doc.content).toHaveLength(1); + expect(doc.content[0].type).toBe('htmlEmbed'); + }); + + it('neutralizes a root node that is itself a disallowed htmlEmbed', () => { + const root = { type: 'htmlEmbed', attrs: { source: '' } }; + const result = stripDisallowedHtmlEmbedNodes(root, new Set()); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => { + const root = { type: 'htmlEmbed', attrs: { source: '' } }; + const result = stripDisallowedHtmlEmbedNodes(root, new Set([''])); + expect(collectHtmlEmbedSources(result)).toEqual(new Set([''])); + }); + + it('returns non-object input unchanged', () => { + expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull(); + }); +}); + describe('canAuthorHtmlEmbed', () => { it('allows owner and admin', () => { expect(canAuthorHtmlEmbed('owner')).toBe(true); diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts index aa5d579d..146ea9d3 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts @@ -48,6 +48,107 @@ export function stripHtmlEmbedNodes(pmJson: T): T { return { ...node } as unknown as T; } +/** + * Walk the document and collect a stable identity for every `htmlEmbed` node. + * + * The identity is the node's `attrs.source` string — the raw HTML the embed + * renders. Two embeds that render the exact same HTML are treated as the same + * identity. Used by the collab persist path to know which embeds are ALREADY + * present in the currently-persisted (admin-vetted) page content, so a later + * non-admin store can strip only NEWLY-introduced embeds while preserving the + * pre-existing admin-authored ones. + * + * Absent attrs or a non-string/absent `source` are skipped gracefully (such a + * node contributes no identity to the set). + */ +export function collectHtmlEmbedSources(pmJson: unknown): Set { + const sources = new Set(); + + const walk = (node: unknown): void => { + if (!node || typeof node !== 'object') { + return; + } + const n = node as JSONContent; + if (n.type === HTML_EMBED_NODE_NAME) { + const source = (n.attrs as Record | undefined)?.source; + if (typeof source === 'string') { + sources.add(source); + } + } + if (Array.isArray(n.content)) { + for (const child of n.content) { + walk(child); + } + } + }; + + walk(pmJson); + return sources; +} + +/** + * Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose + * `attrs.source` is in `allowedSources`; remove the rest. + * + * Used on the collab persist path when the feature toggle is ON but the storing + * user is a NON-admin: `allowedSources` is the set of embed sources already + * present in the currently-persisted page content (admin-authored, already + * vetted). A non-admin therefore cannot ADD a new embed, but their unrelated + * edit also cannot destroy an admin's existing one. + * + * NOTE: identity is the raw source string, so a non-admin who COPIES an existing + * admin embed's exact source into a NEW location passes this check. That is + * acceptable — the source is already admin-vetted content present in the doc; no + * new untrusted HTML is introduced. + * + * Returns a NEW document; the input is not mutated. Same defensive root-type + * check pattern as {@link stripHtmlEmbedNodes}. + */ +export function stripDisallowedHtmlEmbedNodes( + pmJson: T, + allowedSources: Set, +): T { + if (!pmJson || typeof pmJson !== 'object') { + return pmJson; + } + + const node = pmJson as unknown as JSONContent; + + // Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is + // itself an htmlEmbed and its source is NOT allowed, the children-filtering + // below could never drop it, so neutralize it here. Unreachable in normal use + // (the PM document root is always a `doc`). + if (node.type === HTML_EMBED_NODE_NAME) { + const source = (node.attrs as Record | undefined)?.source; + if (typeof source === 'string' && allowedSources.has(source)) { + return { ...node } as unknown as T; + } + return { type: 'doc', content: [] } as unknown as T; + } + + if (Array.isArray(node.content)) { + const filtered: JSONContent[] = []; + for (const child of node.content) { + // Drop a disallowed htmlEmbed child (newly introduced); keep an allowed + // one (already present in the persisted, admin-vetted content). + if (child && child.type === HTML_EMBED_NODE_NAME) { + const source = (child.attrs as Record | undefined) + ?.source; + if (typeof source === 'string' && allowedSources.has(source)) { + filtered.push({ ...child }); + } + continue; + } + // Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are + // also filtered by the same allow-list. + filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources)); + } + return { ...node, content: filtered } as unknown as T; + } + + return { ...node } as unknown as T; +} + /** * Returns true if the document contains at least one `htmlEmbed` node anywhere * in its tree. Useful to decide whether a strip pass actually changed anything -- 2.49.1 From 424761753e395459b7a119b057c0be74e1be0360 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 23:20:02 +0300 Subject: [PATCH 5/5] fix(html-embed): shrink the collab broadcast window with an early onChange guard (#26) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A non-admin's transient htmlEmbed could execute in other open editors until the debounced (10s) onStoreDocument strip. Add a ~300ms onChange-debounced early strip (guardHtmlEmbed) that converges the shared ydoc for everyone far sooner. Safety-critical details: - Scheduled from onChange ONLY for non-admins AND only when the workspace toggle is ON (cached per-document in onLoadDocument), so the common toggle-OFF case does zero extra work. - guardHtmlEmbed does ALL async work (toggle + persisted allow-list read) FIRST, then performs fromYdoc -> strip -> fragment.delete -> applyUpdate in a single SYNCHRONOUS, await-free block, so no inbound Yjs update can interleave and a concurrent edit can never be clobbered. Bails if document.isDestroyed. - Reuses the #29 preserve logic (admin-vetted embeds survive; only the non-admin's new ones are stripped). Loop-safe (corrective update has null origin -> no reschedule; post-strip no embed -> cheap no-op). Per-document timer cleared on unload. onStoreDocument stays the authoritative backstop. The irreducible residual is only the very first inbound broadcast before the debounce fires — Hocuspocus exposes no synchronous beforeBroadcast filter. Co-Authored-By: Claude Opus 4.8 --- .../persistence.extension.html-embed.spec.ts | 115 +++++++++ .../extensions/persistence.extension.ts | 220 +++++++++++++++++- 2 files changed, 323 insertions(+), 12 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts index 55f06ea9..7941a1e6 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts @@ -339,3 +339,118 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)' await expect(runStore('member', emptyDoc)).resolves.toBeDefined(); }); }); + +// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges +// the shared ydoc sub-second, before the 10s store debounce. We call it directly +// (it is the debounced timer body) and assert the ydoc fragment no longer yields +// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds +// already in the persisted content survive. +describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => { + async function runGuard( + role: string | null | undefined, + doc: Y.Doc, + featureEnabled = true, + priorContent?: any, + ) { + const { ext } = buildExtension(featureEnabled, priorContent); + await (ext as any).guardHtmlEmbed( + 'page-1', + doc, + { user: { id: 'u1', role, workspaceId: 'ws-1' } }, + ); + } + + it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => { + // Prior persisted content has NO embed; the live doc has one a non-admin + // just added. + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + true, + ); + + await runGuard('member', doc, true, { type: 'doc', content: [] }); + + // The shared ydoc fragment no longer yields any htmlEmbed. + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + false, + ); + }); + + it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => { + const ADMIN_SOURCE = ''; + const prior = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + // Live doc keeps the admin embed AND adds a brand-new one. + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }); + + await runGuard('member', doc, true, prior); + + // Only the admin-vetted source survives in the shared ydoc. + expect( + collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')), + ).toEqual(new Set([ADMIN_SOURCE])); + }); + + it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => { + // Even an embed that matches the prior content is stripped when the toggle + // is OFF, because the OFF path passes allowed=null (strip everything) and + // never reads the prior content for an allow-list. + const SOURCE = ''; + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }, + ], + }); + await runGuard('member', doc, false, { + type: 'doc', + content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }], + }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + false, + ); + }); + + it('admin role: guard is a defensive no-op (embed preserved)', async () => { + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }); + await runGuard('admin', doc, true, { type: 'doc', content: [] }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + true, + ); + }); + + it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => { + const doc = buildYdoc({ + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }], + }); + await runGuard('member', doc, true, { type: 'doc', content: [] }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + false, + ); + }); +}); diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 7c2ff963..b5dd49ed 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -40,6 +40,7 @@ import { } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; import { + canAuthorHtmlEmbed, collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, @@ -58,6 +59,21 @@ export class PersistenceExtension implements Extension { // coalescing window" per document and OR it across all edits in the window, // so the snapshot is marked 'agent' regardless of who wrote last. private agentTouched: Map = new Map(); + // Per-document debounce timers for the early htmlEmbed guard (Gitea #26). + // onChange schedules a short (~300ms) debounced strip that converges the + // shared ydoc for all connected clients well before the 10s store debounce, + // shrinking the pre-persist broadcast window of a non-admin's transient embed. + private htmlEmbedGuardTimers: Map = new Map(); + // Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated + // in onLoadDocument (which already loads the page + has workspace context) and + // read in onChange to gate early-guard scheduling: when the toggle is OFF (the + // common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and + // rely on the onStoreDocument strip as the backstop (when OFF the embed does + // not execute in editable mode anyway). Cleared in afterUnloadDocument. + // STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF + // until the document is reloaded, so the early guard won't schedule — accepted, + // the onStoreDocument backstop still strips on persist. + private htmlEmbedToggleByDoc: Map = new Map(); constructor( private readonly pageRepo: PageRepo, @@ -89,6 +105,23 @@ export class PersistenceExtension implements Extension { return; } + // Cache the workspace htmlEmbed toggle for this document (Gitea #26). We + // already have the page (hence its workspaceId) here, so resolve the toggle + // once and cache it keyed by documentName. onChange reads this to decide + // whether to schedule the early guard at all — when OFF we skip the guard + // entirely (no timer, no fromYdoc, no DB read). Cleared in + // afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note. + try { + const enabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(page.workspaceId))?.settings, + ); + this.htmlEmbedToggleByDoc.set(documentName, enabled); + } catch (err) { + // Fail OFF: if the toggle can't be resolved, never schedule the early + // guard; the onStoreDocument backstop still strips on persist. + this.htmlEmbedToggleByDoc.set(documentName, false); + } + if (page.ydoc) { this.logger.debug(`ydoc loaded from db: ${pageId}`); @@ -158,18 +191,25 @@ export class PersistenceExtension implements Extension { // the next store, and the persisted row plus every share/readonly read path // remain protected by the strip. // - // ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in - // the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs - // update to connected clients immediately, so a non-admin's transient - // htmlEmbed can execute in OTHER open editors' browsers in the brief window - // before this persist strips it. The exposure is limited to concurrent - // AUTHENTICATED space members who have the doc open with Edit rights - // (semi-trusted) — anonymous public-share/readonly viewers do NOT open a - // collab socket (ReadonlyPageEditor renders fetched, already-stripped - // content; HocuspocusProvider is only used by the authenticated editable - // page-editor), and the PERSISTED page row plus every share/readonly read - // path are protected by this strip. The window is therefore accepted rather - // than mitigated with an inbound beforeBroadcast strip. + // RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26): + // this strip runs in the debounced onStoreDocument (up to 10s), but + // hocuspocus broadcasts each inbound Yjs update to connected clients + // immediately, so a non-admin's transient htmlEmbed can execute in OTHER open + // editors' browsers in the window before this persist strips it. The exposure + // is limited to concurrent AUTHENTICATED space members who have the doc open + // with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do + // NOT open a collab socket (ReadonlyPageEditor renders fetched, + // already-stripped content; HocuspocusProvider is only used by the + // authenticated editable page-editor), and the PERSISTED page row plus every + // share/readonly read path are protected by this strip. + // The window is now SHRUNK to sub-second by an onChange-debounced early guard + // (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as + // this block and re-encodes the cleaned ydoc, converging the doc for all + // clients long before this 10s store debounce fires. This onStoreDocument + // strip remains the authoritative backstop for persistence. The irreducible + // residual is only the VERY FIRST inbound broadcast before the ~300ms debounce + // fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the + // node before that first relay, so it cannot be eliminated entirely. // Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature // toggle is ON and the storing user is an admin/owner. OFF (default) => // stripped for everyone (existing embeds get cleaned up on next save). @@ -389,12 +429,168 @@ export class PersistenceExtension implements Extension { if (data.context?.actor === 'agent') { this.agentTouched.set(documentName, true); } + + // Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced + // guard ONLY when (a) this document's workspace toggle is cached ON and + // (b) the changing connection's user is a NON-admin (cannot author + // htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer, + // no fromYdoc, no DB read — killing the OFF-case overhead (the common + // default); the onStoreDocument strip is the backstop and an OFF embed does + // not execute in editable mode anyway. We do NO expensive work here — we only + // (re)schedule the timer; the debounce coalesces rapid edits into a single + // guard check. + if ( + userId && + this.htmlEmbedToggleByDoc.get(documentName) === true && + !canAuthorHtmlEmbed(data.context?.user?.role) + ) { + const existing = this.htmlEmbedGuardTimers.get(documentName); + if (existing) { + clearTimeout(existing); + } + const timer = setTimeout(() => { + this.htmlEmbedGuardTimers.delete(documentName); + void this.guardHtmlEmbed(documentName, data.document, data.context); + }, 300); + this.htmlEmbedGuardTimers.set(documentName, timer); + } + } + + /** + * Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the + * onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of + * waiting for the 10s store debounce, so a non-admin's transient embed is + * removed from the shared ydoc — and re-broadcast as cleaned state — for all + * connected clients in sub-second time. onStoreDocument remains the + * authoritative persistence backstop; this is an ADDITIONAL early pass. + * + * CONCURRENCY (the critical invariant): the Y.Doc mutation is a single + * SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the + * applyUpdate write. ALL async work (the workspace toggle lookup and the + * persisted-content read for the allow-list) happens FIRST, before that block. + * Because JS is single-threaded, a synchronous block cannot interleave with + * inbound Yjs update handlers, so a concurrent edit that lands while we await + * cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then + * delete + rebuild + applyUpdate without yielding. (An earlier version awaited + * DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap + * was lost — this restructure fixes that.) + * + * The allow-list is a best-effort snapshot read outside any lock (TOCTOU + * accepted, same as onStoreDocument): worst case one embed is transiently kept + * or dropped; it converges on the next guard/store, with no auth bypass. + * + * Loop-safety: the corrective applyUpdate has a null origin, so the re-fired + * onChange carries no userId and is not rescheduled; and after a strip no + * htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the + * hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer + * would crash the process — so the whole body is wrapped in try/catch. + */ + private async guardHtmlEmbed( + documentName: string, + document: Y.Doc, + context: any, + ): Promise { + // Defensive: ensure no stale timer entry survives for this document. + this.htmlEmbedGuardTimers.delete(documentName); + try { + // Re-check defensively: onChange only schedules for non-admins, but if an + // admin/owner somehow reaches here, the embed is authored content — do + // nothing (onStoreDocument's toggle-AND-admin gate handles persistence). + if (canAuthorHtmlEmbed(context?.user?.role)) { + return; + } + + // ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ---- + // Resolve the workspace toggle exactly as onStoreDocument does. When OFF we + // strip everything; when ON we use the preserve logic (keep admin-vetted + // embeds, strip only the non-admin's newly-introduced ones). + const enabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(context?.user?.workspaceId)) + ?.settings, + ); + + // The allow-list (admin-vetted sources already in the persisted content). + // null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block, + // so no await sits between the doc snapshot and the doc write. + let allowed: Set | null = null; + if (enabled !== false) { + const prior = await this.pageRepo.findById(getPageId(documentName), { + includeContent: true, + }); + allowed = collectHtmlEmbedSources(prior?.content); + } + + // The awaits above may have let the document be unloaded/destroyed. If so, + // bail — mutating a destroyed doc is pointless and could throw (the + // try/catch is the ultimate safety net regardless). + if ((document as { isDestroyed?: boolean }).isDestroyed) { + return; + } + + // ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ---- + // Because there is no await between fromYdoc and applyUpdate, no inbound + // Yjs update can interleave, so a concurrent edit cannot be lost. + const json = TiptapTransformer.fromYdoc(document, 'default'); + + // Cheap exit: nothing to guard if the doc has no embed at all. This is also + // why a post-strip re-fire is a no-op (loop-safe). + if (!hasHtmlEmbedNode(json)) { + return; + } + + const strippedJson = + allowed === null + ? stripHtmlEmbedNodes(json) + : stripDisallowedHtmlEmbedNodes(json, allowed); + + // Nothing was stripped (e.g. the only embed is an admin-vetted one) — do + // not churn the shared ydoc for all clients. + if (isDeepStrictEqual(strippedJson, json)) { + return; + } + + // Reflect the stripped content back into the shared ydoc EXACTLY as + // onStoreDocument does, so the node is removed for all connected clients, + // not just on the eventual persist. This re-encode broadcasts the cleaned + // state; after it hasHtmlEmbedNode is false, so any later guard fire is a + // cheap no-op (loop-safe). + const fragment = document.getXmlFragment('default'); + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + const cleanDoc = TiptapTransformer.toYdoc( + strippedJson, + 'default', + tiptapExtensions, + ); + Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); + + this.logger.warn( + `Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`, + ); + } catch (err) { + // NEVER rethrow out of a timer-scheduled call. + this.logger.error( + `Early htmlEmbed guard failed on ${documentName}`, + err, + ); + } } async afterUnloadDocument(data: afterUnloadDocumentPayload) { const documentName = data.documentName; this.contributors.delete(documentName); this.agentTouched.delete(documentName); + // Drop the cached toggle for this document so a reload re-resolves it (and + // picks up a mid-session admin toggle flip). + this.htmlEmbedToggleByDoc.delete(documentName); + // Clear any pending early-guard timer so it cannot fire after the document + // is unloaded (leak / use-after-unload prevention). + const timer = this.htmlEmbedGuardTimers.get(documentName); + if (timer) { + clearTimeout(timer); + this.htmlEmbedGuardTimers.delete(documentName); + } } private consumeContributors(documentName: string): string[] { -- 2.49.1