From 859223db1a341596e32d7dd50674ff1aca43b41c Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:15:43 +0300 Subject: [PATCH 01/16] fix(page-templates): show a template marker icon in the page tree (#38) Template pages were toggleable but indistinguishable in the sidebar tree. Render an IconTemplate next to the title when node.isTemplate is true, wrapped in a Tooltip(label='Template') with an aria-label + role='img' for AT. The icon is a child of the row Link so clicks navigate as normal; pointer events stay enabled so the tooltip's hover handlers fire. Adds the 'Template' i18n key to en-US and ru-RU (other locales fall back to en-US). Co-Authored-By: Claude Opus 4.8 --- .../public/locales/en-US/translation.json | 1 + .../public/locales/ru-RU/translation.json | 1 + .../page/tree/components/space-tree-row.tsx | 22 ++++++++++++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index c04fc72d..651800ee 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -183,6 +183,7 @@ "Successfully imported": "Successfully imported", "Successfully restored": "Successfully restored", "System settings": "System settings", + "Template": "Template", "Templates": "Templates", "Theme": "Theme", "To change your email, you have to enter your password and new email.": "To change your email, you have to enter your password and new email.", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index 25ff2530..238c42fd 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -183,6 +183,7 @@ "Successfully imported": "Успешно импортировано", "Successfully restored": "Успешно восстановлено", "System settings": "Системные настройки", + "Template": "Шаблон", "Templates": "Шаблоны", "Theme": "Тема", "To change your email, you have to enter your password and new email.": "Чтобы изменить электронную почту, вам нужно ввести пароль и новый адрес.", diff --git a/apps/client/src/features/page/tree/components/space-tree-row.tsx b/apps/client/src/features/page/tree/components/space-tree-row.tsx index df371498..c5c08b1c 100644 --- a/apps/client/src/features/page/tree/components/space-tree-row.tsx +++ b/apps/client/src/features/page/tree/components/space-tree-row.tsx @@ -2,13 +2,14 @@ import { useRef } from "react"; import { Link, useParams } from "react-router-dom"; import { useAtom } from "jotai"; import { useTranslation } from "react-i18next"; -import { ActionIcon, rem } from "@mantine/core"; +import { ActionIcon, rem, Tooltip } from "@mantine/core"; import { IconChevronDown, IconChevronRight, IconFileDescription, IconPlus, IconPointFilled, + IconTemplate, } from "@tabler/icons-react"; import EmojiPicker from "@/components/ui/emoji-picker.tsx"; @@ -171,6 +172,25 @@ export function SpaceTreeRow({ {node.name || t("untitled")} + {node.isTemplate === true && ( + + + + )} +
From c9eb495688b49c508afe423b8d77e55a436af4f6 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:21:32 +0300 Subject: [PATCH 02/16] fix(page-templates): clean up page-embed node chrome (#39) Two design problems on the whole-page embed (pageEmbed) node: - Double selection frame: the generic square cyan .ProseMirror-selectednode outline stacked on top of the rounded .includeWrap border. Add node-pageEmbed to the existing outline:none rule (already covering the transclusion nodes) so only the single rounded border remains. - Redundant 'open source' controls: the floating toolbar's external-link button duplicated the header badge title link. Remove the toolbar button; the badge title is now the single way to open the source (kept Refresh + ... menu). Also swap the badge fallback icon IconArrowsMaximize (read as 'expand') for a neutral IconFileText. Follow-ups from review: render the badge whenever the source resolves (so the only open-source link can't vanish when title+icon are empty), and label the link (title/aria-label) + add the 'Open source page' i18n key (en-US, ru-RU). Co-Authored-By: Claude Opus 4.8 --- .../public/locales/en-US/translation.json | 1 + .../public/locales/ru-RU/translation.json | 1 + .../components/page-embed/page-embed-view.tsx | 26 ++++++------------- .../transclusion/transclusion.module.css | 3 ++- 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index 651800ee..0f6a1a9f 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -474,6 +474,7 @@ "Make sub-pages public too": "Make sub-pages public too", "Allow search engines to index page": "Allow search engines to index page", "Open page": "Open page", + "Open source page": "Open source page", "Page": "Page", "Delete public share link": "Delete public share link", "Delete share": "Delete share", diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index 238c42fd..ef5d3dc7 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -472,6 +472,7 @@ "Make sub-pages public too": "Сделать подстраницы тоже общедоступными", "Allow search engines to index page": "Разрешить поисковым системам индексировать страницу", "Open page": "Открыть страницу", + "Open source page": "Открыть исходную страницу", "Page": "Страница", "Delete public share link": "Удалить публичную ссылку", "Delete share": "Удалить общий доступ", diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx index 63890eec..707a051f 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx @@ -2,10 +2,9 @@ import { NodeViewProps, NodeViewWrapper } from "@tiptap/react"; import { ActionIcon, Menu, Tooltip } from "@mantine/core"; import { IconAlertTriangle, - IconArrowsMaximize, IconDots, - IconExternalLink, IconEyeOff, + IconFileText, IconInfoCircle, IconRefresh, IconRepeat, @@ -136,20 +135,6 @@ function PageEmbedBody({ - {sourceHref && ( - - - - - - )} @@ -170,13 +155,18 @@ function PageEmbedBody({ ) : null; const header = - sourceTitle || sourceIcon ? ( + // Render the badge whenever the source resolves (sourceHref), not only when + // it has a title/icon — the title link is now the single way to open the + // source, so it must not disappear when title and icon are both empty. + sourceTitle || sourceIcon || sourceHref ? (
- {sourceIcon ? `${sourceIcon} ` : } + {sourceIcon ? `${sourceIcon} ` : } {sourceHref ? ( {sourceTitle || t("Untitled")} diff --git a/apps/client/src/features/editor/components/transclusion/transclusion.module.css b/apps/client/src/features/editor/components/transclusion/transclusion.module.css index 4d8d321a..168da0c7 100644 --- a/apps/client/src/features/editor/components/transclusion/transclusion.module.css +++ b/apps/client/src/features/editor/components/transclusion/transclusion.module.css @@ -183,7 +183,8 @@ } :global(.react-renderer.node-transclusionSource.ProseMirror-selectednode), -:global(.react-renderer.node-transclusionReference.ProseMirror-selectednode) { +:global(.react-renderer.node-transclusionReference.ProseMirror-selectednode), +:global(.react-renderer.node-pageEmbed.ProseMirror-selectednode) { outline: none; } From b8655ae52c70cf391988fc9a974fc89139421b6e Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:26:42 +0300 Subject: [PATCH 03/16] fix(page-templates): make page-embed Refresh actually re-render (#40) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read-only embed renderer mounts a Tiptap EditorProvider with the looked-up content, but Tiptap consumes the `content` option only at initial mount. After Refresh busted the lookup cache and re-fetched fresh content, the new content prop never reached the sub-editor, so the embed appeared not to update at all. Key PageEmbedContent on result.sourceUpdatedAt (the source page's updatedAt, already returned by the lookup and bumped on every persisted content change) so the component and its EditorProvider remount and apply the refreshed content when the source changes. Note: server-side freshness vs. live collab edits is bounded by the 10s persist debounce (collaboration.gateway.ts) — that separate limitation stays documented in #40 and is out of scope here; this commit fixes the client never re-rendering. Co-Authored-By: Claude Opus 4.8 --- .../editor/components/page-embed/page-embed-view.tsx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx index 707a051f..b51607db 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx @@ -214,7 +214,17 @@ function PageEmbedBody({ sourcePageId={sourcePageId} hostPageId={hostPageId} > - + {/* + Tiptap's EditorProvider consumes `content` only at initial mount, so a + changed `content` prop (e.g. after Refresh re-fetches fresh content) + would not update the read-only sub-editor. Key on the source's + updatedAt to remount PageEmbedContent (and its inner EditorProvider) + whenever the source page changes, applying the refreshed content. + */} + ); } else if (result.status === "no_access") { From 4536d27ad2834a94e945e8935e948e61452b909a Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:42:30 +0300 Subject: [PATCH 04/16] fix(page-templates): never strand a page-embed id in-flight (#35) In the page-embed lookup flush(), the success branch cleared inFlightRef and resolved waiters only for ids present in the response items. A short/partial server response would leave a requested id stuck in inFlightRef forever (the subscribe/refresh path is guarded by !inFlightRef.has(id)) and its refresh() promise would never resolve. After processing returned items, also clear + resolve any requested id that wasn't returned, mirroring the catch branch. Cannot trigger under today's exact-mapping server contract; this is hardening. Co-Authored-By: Claude Opus 4.8 --- .../page-embed/page-embed-lookup-context.tsx | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx index aa2a8caf..d29c19dc 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx @@ -55,7 +55,9 @@ export function PageEmbedLookupProvider({ try { const { items } = await lookupTemplate({ sourcePageIds: ids }); + const returned = new Set(); for (const r of items) { + returned.add(r.sourcePageId); resultCacheRef.current.set(r.sourcePageId, r); inFlightRef.current.delete(r.sourcePageId); const subs = subscribersRef.current.get(r.sourcePageId); @@ -64,6 +66,17 @@ export function PageEmbedLookupProvider({ } resolveWaiters(r.sourcePageId); } + // Harden against a partial/short server response: any requested id not + // present in `items` would otherwise stay in `inFlightRef` forever + // (subscribe/refresh are guarded by `!inFlightRef.has(id)`) and its + // refresh() promise would never resolve. Clear + resolve those ids, + // mirroring the catch branch, so no id can be stranded in-flight. + for (const id of ids) { + if (!returned.has(id)) { + inFlightRef.current.delete(id); + resolveWaiters(id); + } + } } catch (err) { // Surface the failure: errors must never be swallowed silently. console.error("[pageEmbed] template lookup failed", err); From 22887c474a937f44163b8b986ddf3f67009057bd Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:42:30 +0300 Subject: [PATCH 05/16] chore(page-templates): tidy ts suppression in duplicatePage pageEmbed remap (#37) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace bare //@ts-ignore (no space, no reason) with // @ts-expect-error plus a reason on the pageEmbed sourcePageId reassignment, matching the codebase style. ProseMirror Attrs is read-only typed, so the reassignment genuinely errors — @ts-expect-error is valid here. Co-Authored-By: Claude Opus 4.8 --- apps/server/src/core/page/services/page.service.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index 5373801d..8e5c28b6 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -716,7 +716,7 @@ export class PageService { const sourcePageId = node.attrs.sourcePageId; if (sourcePageId && pageMap.has(sourcePageId)) { const mappedPage = pageMap.get(sourcePageId); - //@ts-ignore + // @ts-expect-error ProseMirror Attrs is read-only typed; reassigning sourcePageId to the duplicated page copy is intentional here node.attrs.sourcePageId = mappedPage.newPageId; } } From a15cccf5579a2c70cfe09c92df730aafa1615326 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:42:49 +0300 Subject: [PATCH 06/16] chore(page-templates): remove dead findReferencePageIdsBySource (#34) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'used in N pages' reverse-navigation method had zero callers in the merged PR #17 — unreachable, untested code. Remove it. The reverse-navigation feature can be (re)added with the method if/when it's actually built. Co-Authored-By: Claude Opus 4.8 --- .../page-template-references.repo.ts | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts b/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts index a678422f..8493e901 100644 --- a/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts +++ b/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts @@ -22,21 +22,6 @@ export class PageTemplateReferencesRepo { .execute(); } - async findReferencePageIdsBySource( - sourcePageId: string, - workspaceId: string, - trx?: KyselyTransaction, - ): Promise { - const rows = await dbOrTx(this.db, trx) - .selectFrom('pageTemplateReferences') - .select('referencePageId') - .distinct() - .where('workspaceId', '=', workspaceId) - .where('sourcePageId', '=', sourcePageId) - .execute(); - return rows.map((r) => r.referencePageId); - } - async insertMany( rows: InsertablePageTemplateReference[], trx?: KyselyTransaction, From 79d096ed7a091b3a7855ee9d39b9957e64fcdba5 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:42:49 +0300 Subject: [PATCH 07/16] fix(page-templates): defense-in-depth workspace checks (#36) Consistency hardening from #17 review (not currently exploitable): - toggleTemplate now explicitly rejects a page outside the caller's workspace (page.workspaceId !== user.workspaceId -> NotFound, avoiding existence leak) instead of relying solely on the space-membership model. - PageTemplateReferencesRepo.deleteByReferenceAndSources is now workspace-scoped (adds a workspaceId filter + param), matching the 'scope by workspaceId everywhere' invariant; the sole caller threads its workspaceId. The PAGE_TEMPLATE_THROTTLER limit is intentionally left as-is (the issue's throttle item was 'consider only'; no change without usage data). Co-Authored-By: Claude Opus 4.8 --- .../src/core/page/transclusion/page-template.controller.ts | 6 ++++++ .../src/core/page/transclusion/transclusion.service.ts | 1 + .../page-template-references.repo.ts | 3 +++ 3 files changed, 10 insertions(+) diff --git a/apps/server/src/core/page/transclusion/page-template.controller.ts b/apps/server/src/core/page/transclusion/page-template.controller.ts index 555a487f..db20ea42 100644 --- a/apps/server/src/core/page/transclusion/page-template.controller.ts +++ b/apps/server/src/core/page/transclusion/page-template.controller.ts @@ -67,6 +67,12 @@ export class PageTemplateController { throw new NotFoundException('Page not found'); } + if (page.workspaceId !== user.workspaceId) { + // Defense-in-depth: never act on a page outside the caller's workspace. + // Use NotFound (not Forbidden) to avoid leaking cross-workspace existence. + throw new NotFoundException('Page not found'); + } + await this.pageAccessService.validateCanEdit(page, user); const isTemplate = diff --git a/apps/server/src/core/page/transclusion/transclusion.service.ts b/apps/server/src/core/page/transclusion/transclusion.service.ts index f8f3b464..76bb8cfb 100644 --- a/apps/server/src/core/page/transclusion/transclusion.service.ts +++ b/apps/server/src/core/page/transclusion/transclusion.service.ts @@ -317,6 +317,7 @@ export class TransclusionService { if (toDelete.length > 0) { await this.pageTemplateReferencesRepo.deleteByReferenceAndSources( referencePageId, + workspaceId, toDelete, trx, ); diff --git a/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts b/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts index 8493e901..ac358bc6 100644 --- a/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts +++ b/apps/server/src/database/repos/page-template-references/page-template-references.repo.ts @@ -38,12 +38,15 @@ export class PageTemplateReferencesRepo { async deleteByReferenceAndSources( referencePageId: string, + workspaceId: string, sourcePageIds: string[], trx?: KyselyTransaction, ): Promise { if (sourcePageIds.length === 0) return; await dbOrTx(this.db, trx) .deleteFrom('pageTemplateReferences') + // Defense-in-depth: scope deletes to the caller's workspace. + .where('workspaceId', '=', workspaceId) .where('referencePageId', '=', referencePageId) .where('sourcePageId', 'in', sourcePageIds) .execute(); From 6a052b88b496e15718f61db8c2d1bc6f28283df3 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:52:32 +0300 Subject: [PATCH 08/16] fix(html-embed): strip embeds at serve time on authenticated read paths (#28) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Completes the workspace htmlEmbed kill-switch. The public-share path already strips at serve time when the toggle is OFF, but the authenticated read paths (/info and /history/info) returned page/history content with embeds intact, so a disabled feature kept executing for in-workspace view-only viewers until the page was next saved. Now both paths resolve the workspace toggle and run stripHtmlEmbedNodes when it's OFF (fail-closed on a missing workspace), before any markdown/html format conversion. Admin-authored content only — completeness, not privilege escalation. Injects WorkspaceRepo into PageController. Co-Authored-By: Claude Opus 4.8 --- apps/server/src/core/page/page.controller.ts | 28 ++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/apps/server/src/core/page/page.controller.ts b/apps/server/src/core/page/page.controller.ts index 968480c9..28ec083e 100644 --- a/apps/server/src/core/page/page.controller.ts +++ b/apps/server/src/core/page/page.controller.ts @@ -39,6 +39,11 @@ import { } from '../casl/interfaces/space-ability.type'; import SpaceAbilityFactory from '../casl/abilities/space-ability.factory'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../common/helpers/prosemirror/html-embed.util'; import { RecentPageDto } from './dto/recent-page.dto'; import { CreatedByUserDto } from './dto/created-by-user.dto'; import { DuplicatePageDto } from './dto/duplicate-page.dto'; @@ -63,6 +68,7 @@ export class PageController { constructor( private readonly pageService: PageService, private readonly pageRepo: PageRepo, + private readonly workspaceRepo: WorkspaceRepo, private readonly pageHistoryService: PageHistoryService, private readonly spaceAbility: SpaceAbilityFactory, private readonly pageAccessService: PageAccessService, @@ -92,6 +98,18 @@ export class PageController { const permissions = { canEdit, hasRestriction }; + if (page.content) { + const workspace = await this.workspaceRepo.findById(page.workspaceId); + if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) { + // Kill-switch: when the workspace feature is OFF, never serve raw + // htmlEmbed nodes on the read path (mirrors the public-share strip), + // so disabling the feature is an immediate, total kill-switch and not + // dependent on the page being re-saved. Admin-authored content only. + // Fail-closed: a missing workspace resolves to OFF and is stripped. + page.content = stripHtmlEmbedNodes(page.content) as any; + } + } + if (dto.format && dto.format !== 'json' && page.content) { const contentOutput = dto.format === 'markdown' @@ -536,6 +554,16 @@ export class PageController { await this.pageAccessService.validateCanView(page, user); + if (history.content) { + const workspace = await this.workspaceRepo.findById(page.workspaceId); + if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) { + // Kill-switch: history snapshots are an authenticated read path too, so + // strip htmlEmbed when the workspace feature is OFF (same as /info and + // the public-share path). Fail-closed on a missing workspace. + history.content = stripHtmlEmbedNodes(history.content) as any; + } + } + return history; } From 8ee4279d300c2d364d030ad8eea88b935bf076db Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 21:52:32 +0300 Subject: [PATCH 09/16] harden(html-embed): make stripHtmlEmbedNodes total with a root-type check (#30) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stripHtmlEmbedNodes only filtered children, so a (never-in-practice) bare htmlEmbed root node would be returned as-is. Add a defensive root check that returns an embed-free doc, making the helper total — it can never return a node for which hasHtmlEmbedNode is true. Adds a unit test for the root case. Co-Authored-By: Claude Opus 4.8 --- .../src/common/helpers/prosemirror/html-embed.spec.ts | 11 +++++++++++ .../src/common/helpers/prosemirror/html-embed.util.ts | 9 +++++++++ 2 files changed, 20 insertions(+) diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts index 6b07ec0b..b48e9e73 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -92,6 +92,17 @@ describe('stripHtmlEmbedNodes', () => { const result = stripHtmlEmbedNodes(doc); expect(result).toEqual(doc); }); + + it('neutralizes a root node that is itself an htmlEmbed', () => { + // Defensive: the PM root is always a `doc`, so this is unreachable in normal + // use, but the helper must still never return a bare htmlEmbed. + const root = { + type: 'htmlEmbed', + attrs: { source: '' }, + }; + const result = stripHtmlEmbedNodes(root); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); }); describe('canAuthorHtmlEmbed', () => { diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts index f1d0b6e5..aa5d579d 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts @@ -22,6 +22,15 @@ export function stripHtmlEmbedNodes(pmJson: T): T { const node = pmJson as unknown as JSONContent; + // Defensive root-type check: if the ROOT node is itself an htmlEmbed, the + // children-filtering below could never drop it, so a bare htmlEmbed would be + // returned as-is. This branch is unreachable in normal use (the PM document + // root is always a `doc`) and exists only to make the helper total — a bare + // htmlEmbed can never be returned by this function. + if (node.type === HTML_EMBED_NODE_NAME) { + return { type: 'doc', content: [] } as unknown as T; + } + if (Array.isArray(node.content)) { const filtered: JSONContent[] = []; for (const child of node.content) { From 4f46f91db419536f3c45a93c4e39518c91c970fb Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 22:22:56 +0300 Subject: [PATCH 10/16] test(page-templates): fix TransclusionService spec constructor arity The transclusion specs predated two added constructor params, so they failed to compile (TS2554: expected 11 args, got 10) and the suites couldn't run. Add the missing mock args: workspaceRepo (param 11) in the lookup/access specs, and pageTemplateReferencesRepo (param 4, which had shifted pageRepo into the wrong slot) in the unsync-html-embed spec. All three suites now compile and pass. Co-Authored-By: Claude Opus 4.8 --- .../core/page/transclusion/spec/page-template-access.spec.ts | 4 +++- .../core/page/transclusion/spec/page-template-lookup.spec.ts | 1 + .../transclusion/spec/transclusion-unsync-html-embed.spec.ts | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts index 3c497d80..1f16605b 100644 --- a/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts +++ b/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts @@ -68,6 +68,7 @@ describe('TransclusionService — template access core (real filter)', () => { {} as any, // attachmentRepo {} as any, // storageService {} as any, // pageAccessService + {} as any, // workspaceRepo ); return { service, db, pageRepo, spaceMemberRepo, pagePermissionRepo }; @@ -216,7 +217,8 @@ describe('TransclusionService.syncPageTemplateReferences — workspace scoping', {} as any, {} as any, {} as any, - {} as any, + {} as any, // pageAccessService + {} as any, // workspaceRepo ); return { service, insertMany, pageTemplateReferencesRepo }; diff --git a/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts index f62a047c..0ecd306e 100644 --- a/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts +++ b/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts @@ -34,6 +34,7 @@ describe('TransclusionService.lookupTemplate (access mapping)', () => { {} as any, // attachmentRepo {} as any, // storageService {} as any, // pageAccessService + {} as any, // workspaceRepo ); jest diff --git a/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts index 8ad13121..4d149369 100644 --- a/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts +++ b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts @@ -56,6 +56,7 @@ function buildService(featureEnabled = true) { {} as any, // db (unused on this path) pageTransclusionsRepo as any, pageTransclusionReferencesRepo as any, + {} as any, // pageTemplateReferencesRepo (unused on this path) pageRepo as any, {} as any, // pagePermissionRepo (unused) {} as any, // spaceMemberRepo (unused) From 98769155d3bf5efdeafdcf54286ecb92953513e9 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 22:37:35 +0300 Subject: [PATCH 11/16] test(page-templates): cover client pageEmbed cycle/self-embed/depth guard (#31) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cycle/self-embed/depth guard (PAGE_EMBED_MAX_DEPTH=5) lives only on the client and is the sole protection against runaway nested rendering — and was untested. Extract the inline predicates into pure, behavior-identical exported helpers (isPageEmbedCycle, isPageEmbedTooDeep in the ancestry context; filterPageEmbedOptions in the picker) so they're unit-testable without mounting the heavy Tiptap NodeView, and add vitest coverage (20 tests): ancestry chain/ host accumulation, cycle (ancestor-in-chain + top-level self-embed), too-deep at the cap, and picker host-exclusion. Co-Authored-By: Claude Opus 4.8 --- .../page-embed-ancestry-context.test.tsx | 149 ++++++++++++++++++ .../page-embed-ancestry-context.tsx | 23 +++ .../page-embed/page-embed-picker.test.ts | 44 ++++++ .../page-embed/page-embed-picker.tsx | 16 +- .../components/page-embed/page-embed-view.tsx | 14 +- 5 files changed, 237 insertions(+), 9 deletions(-) create mode 100644 apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.test.tsx create mode 100644 apps/client/src/features/editor/components/page-embed/page-embed-picker.test.ts diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.test.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.test.tsx new file mode 100644 index 00000000..867922d2 --- /dev/null +++ b/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.test.tsx @@ -0,0 +1,149 @@ +import { describe, it, expect } from "vitest"; +import { render, screen } from "@testing-library/react"; +import { + PageEmbedAncestryProvider, + usePageEmbedAncestry, + isPageEmbedCycle, + isPageEmbedTooDeep, + PAGE_EMBED_MAX_DEPTH, +} from "./page-embed-ancestry-context"; + +/** + * Tiny probe that renders the current ancestry context as serialized data + * attributes so tests can assert the accumulated chain / threaded hostPageId + * without mounting the heavy Tiptap node view. + */ +function AncestryProbe({ testId = "probe" }: { testId?: string }) { + const { chain, hostPageId } = usePageEmbedAncestry(); + return ( + + ); +} + +describe("PageEmbedAncestryProvider", () => { + it("defaults to an empty chain and null host with no provider", () => { + render(); + const probe = screen.getByTestId("probe"); + expect(probe.getAttribute("data-chain")).toBe(""); + expect(probe.getAttribute("data-chain-length")).toBe("0"); + expect(probe.getAttribute("data-host")).toBe(""); + }); + + it("accumulates sourcePageId into the chain across nested providers", () => { + render( + + + + + + + , + ); + const probe = screen.getByTestId("probe"); + // Chain is built outermost -> innermost. + expect(probe.getAttribute("data-chain")).toBe("a,b,c"); + expect(probe.getAttribute("data-chain-length")).toBe("3"); + }); + + it("threads the host page id from the outermost provider down the tree", () => { + render( + + + + + , + ); + const probe = screen.getByTestId("probe"); + // The first host wins (parent.hostPageId ?? hostPageId); deeper hosts are + // ignored so the original host is preserved for self-embed detection. + expect(probe.getAttribute("data-host")).toBe("host-page"); + }); + + it("does not add an entry to the chain when sourcePageId is missing", () => { + render( + + + + + + + , + ); + const probe = screen.getByTestId("probe"); + // null / undefined sources are pass-through: chain stays ["a"], host kept. + expect(probe.getAttribute("data-chain")).toBe("a"); + expect(probe.getAttribute("data-host")).toBe("host"); + }); + + it("adopts a host provided only at a deeper level when the root had none", () => { + render( + + + + + , + ); + const probe = screen.getByTestId("probe"); + expect(probe.getAttribute("data-host")).toBe("late-host"); + }); +}); + +describe("isPageEmbedCycle", () => { + it("is false when the source is not in the chain and is not the host", () => { + expect(isPageEmbedCycle(["a", "b"], "host", "c")).toBe(false); + }); + + it("is true when the source is already present in the ancestor chain", () => { + expect(isPageEmbedCycle(["a", "b", "c"], "host", "b")).toBe(true); + }); + + it("is true for a top-level self-embed (host === source, empty chain)", () => { + expect(isPageEmbedCycle([], "self", "self")).toBe(true); + }); + + it("is true when the source equals the host even mid-chain", () => { + expect(isPageEmbedCycle(["x"], "self", "self")).toBe(true); + }); + + it("is false when there is no source id (nothing to embed yet)", () => { + expect(isPageEmbedCycle(["a"], "host", null)).toBe(false); + expect(isPageEmbedCycle([], "host", "")).toBe(false); + }); + + it("is false when host is null and source is not in the chain", () => { + expect(isPageEmbedCycle(["a", "b"], null, "c")).toBe(false); + }); +}); + +describe("isPageEmbedTooDeep", () => { + it("is false below the max depth", () => { + expect(isPageEmbedTooDeep([])).toBe(false); + expect( + isPageEmbedTooDeep(new Array(PAGE_EMBED_MAX_DEPTH - 1).fill("x")), + ).toBe(false); + }); + + it("is true once the chain length reaches the max depth", () => { + expect( + isPageEmbedTooDeep(new Array(PAGE_EMBED_MAX_DEPTH).fill("x")), + ).toBe(true); + }); + + it("is true when the chain length exceeds the max depth", () => { + expect( + isPageEmbedTooDeep(new Array(PAGE_EMBED_MAX_DEPTH + 3).fill("x")), + ).toBe(true); + }); + + it("guards at exactly PAGE_EMBED_MAX_DEPTH (=5)", () => { + // Pin the documented constant so an accidental change is caught. + expect(PAGE_EMBED_MAX_DEPTH).toBe(5); + expect(isPageEmbedTooDeep(["1", "2", "3", "4"])).toBe(false); + expect(isPageEmbedTooDeep(["1", "2", "3", "4", "5"])).toBe(true); + }); +}); diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.tsx index c989ee21..cdd7f109 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-ancestry-context.tsx @@ -51,3 +51,26 @@ export function PageEmbedAncestryProvider({ export function usePageEmbedAncestry() { return useContext(PageEmbedAncestryContext); } + +/** + * Pure cycle predicate used by the page-embed node view. Returns true when the + * source page would recurse into itself: either it is already present in the + * ancestor chain, or it is the host page (top-level self-embed). Extracted so + * the anti-DoS guard can be unit-tested without mounting the Tiptap NodeView. + */ +export function isPageEmbedCycle( + chain: string[], + hostPageId: string | null, + sourcePageId: string | null, +): boolean { + if (!sourcePageId) return false; + return chain.includes(sourcePageId) || hostPageId === sourcePageId; +} + +/** + * Pure depth-limit predicate. Returns true once the ancestor chain has reached + * the hard cap, before a deeper nested editor is mounted. + */ +export function isPageEmbedTooDeep(chain: string[]): boolean { + return chain.length >= PAGE_EMBED_MAX_DEPTH; +} diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-picker.test.ts b/apps/client/src/features/editor/components/page-embed/page-embed-picker.test.ts new file mode 100644 index 00000000..257f09fd --- /dev/null +++ b/apps/client/src/features/editor/components/page-embed/page-embed-picker.test.ts @@ -0,0 +1,44 @@ +import { describe, it, expect } from "vitest"; +import { filterPageEmbedOptions } from "./page-embed-picker"; + +type Page = { id: string; title?: string }; + +describe("filterPageEmbedOptions", () => { + const pages: Page[] = [ + { id: "p1", title: "One" }, + { id: "host", title: "Host" }, + { id: "p2", title: "Two" }, + ]; + + it("excludes the host page from the options (self-embed guard)", () => { + const result = filterPageEmbedOptions(pages, "host"); + expect(result.map((p) => p.id)).toEqual(["p1", "p2"]); + }); + + it("keeps all pages when the host id matches nothing", () => { + const result = filterPageEmbedOptions(pages, "other"); + expect(result.map((p) => p.id)).toEqual(["p1", "host", "p2"]); + }); + + it("keeps all pages when no host id is provided", () => { + const result = filterPageEmbedOptions(pages, undefined); + expect(result.map((p) => p.id)).toEqual(["p1", "host", "p2"]); + }); + + it("drops nullish entries defensively", () => { + const dirty = [ + { id: "p1" }, + null as unknown as Page, + undefined as unknown as Page, + { id: "p2" }, + ]; + const result = filterPageEmbedOptions(dirty, "host"); + expect(result.map((p) => p.id)).toEqual(["p1", "p2"]); + }); + + it("returns an empty array for nullish input", () => { + expect( + filterPageEmbedOptions(null as unknown as Page[], "host"), + ).toEqual([]); + }); +}); diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-picker.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-picker.tsx index 7648b05e..5e914a57 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-picker.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-picker.tsx @@ -9,6 +9,18 @@ import type { IPage } from "@/features/page/types/page.types"; export const PAGE_EMBED_PICKER_EVENT = "open-page-embed-picker"; +/** + * Pure filter excluding the host page (and any nullish entries) from the picker + * results. Extracted so the self-embed guard at insertion time is unit-testable + * without mounting the modal/search query. + */ +export function filterPageEmbedOptions( + pages: T[], + hostPageId?: string, +): T[] { + return (pages ?? []).filter((p) => p && p.id !== hostPageId); +} + type PickerDetail = { editor: Editor; range: Range; @@ -55,9 +67,7 @@ export default function PageEmbedPicker() { }); const hostPageId = detailRef.current?.hostPageId; - const pages = ((data?.pages ?? []) as IPage[]).filter( - (p) => p && p.id !== hostPageId, - ); + const pages = filterPageEmbedOptions((data?.pages ?? []) as IPage[], hostPageId); const handleSelect = (page: IPage) => { const detail = detailRef.current; diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx index b51607db..a06a3063 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-view.tsx @@ -20,7 +20,8 @@ import { usePageEmbedLookup } from "./page-embed-lookup-context"; import { PageEmbedAncestryProvider, usePageEmbedAncestry, - PAGE_EMBED_MAX_DEPTH, + isPageEmbedCycle, + isPageEmbedTooDeep, } from "./page-embed-ancestry-context"; import PageEmbedContent from "./page-embed-content"; @@ -100,11 +101,12 @@ function PageEmbedBody({ // --- Cycle / depth guard (evaluated before any lookup is rendered) --------- // Self-embed or a source already present in the ancestor chain → cycle. - const isCycle = - !!sourcePageId && - (ancestry.chain.includes(sourcePageId) || - ancestry.hostPageId === sourcePageId); - const isTooDeep = ancestry.chain.length >= PAGE_EMBED_MAX_DEPTH; + const isCycle = isPageEmbedCycle( + ancestry.chain, + ancestry.hostPageId, + sourcePageId, + ); + const isTooDeep = isPageEmbedTooDeep(ancestry.chain); const sourceTitle = result && !("status" in result) ? result.title : null; From bc1ea792f554f745a19d2011678348bfdff83da5 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 22:37:35 +0300 Subject: [PATCH 12/16] test(page-templates): cover duplicatePage pageEmbed remap + reference sync (#32) Extract the per-node pageEmbed remap decision into a shared pure helper (remapPageEmbedSourceId) and use it BOTH in PageService.duplicatePage and the JSON walker, so the test guards the real production path (not a mirror that could drift). Behavior is identical: source in the copied set -> new copy id; otherwise keep the original. Add jest coverage (16 tests): the remap helper (in-set/out-of-set/null/nested), syncPageTemplateReferences toDelete (stale refs removed with the right workspaceId), and insertTemplateReferencesForPages multi-workspace grouping/filtering. Co-Authored-By: Claude Opus 4.8 --- .../src/core/page/services/page.service.ts | 12 +- .../spec/page-embed-remap.util.spec.ts | 200 +++++++++++ .../page-template-references-sync.spec.ts | 310 ++++++++++++++++++ .../utils/transclusion-prosemirror.util.ts | 58 ++++ 4 files changed, 574 insertions(+), 6 deletions(-) create mode 100644 apps/server/src/core/page/transclusion/spec/page-embed-remap.util.spec.ts create mode 100644 apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index 8e5c28b6..dc17e188 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -62,6 +62,7 @@ import { markdownToHtml } from '@docmost/editor-ext'; import { WatcherService } from '../../watcher/watcher.service'; import { sql } from 'kysely'; import { TransclusionService } from '../transclusion/transclusion.service'; +import { remapPageEmbedSourceId } from '../transclusion/utils/transclusion-prosemirror.util'; import { AuthProvenanceData } from '../../../common/decorators/auth-provenance.decorator'; @Injectable() @@ -713,12 +714,11 @@ export class PageService { // source page is also part of the copied set, point at its new copy; // otherwise leave it pointing at the original (live embed of original). if (node.type.name === 'pageEmbed') { - const sourcePageId = node.attrs.sourcePageId; - if (sourcePageId && pageMap.has(sourcePageId)) { - const mappedPage = pageMap.get(sourcePageId); - // @ts-expect-error ProseMirror Attrs is read-only typed; reassigning sourcePageId to the duplicated page copy is intentional here - node.attrs.sourcePageId = mappedPage.newPageId; - } + // @ts-expect-error ProseMirror Attrs is read-only typed; intentional remap to the duplicated copy + node.attrs.sourcePageId = remapPageEmbedSourceId( + node.attrs.sourcePageId, + (id) => pageMap.get(id)?.newPageId, + ); } // Update internal page links in link marks diff --git a/apps/server/src/core/page/transclusion/spec/page-embed-remap.util.spec.ts b/apps/server/src/core/page/transclusion/spec/page-embed-remap.util.spec.ts new file mode 100644 index 00000000..47fa46c4 --- /dev/null +++ b/apps/server/src/core/page/transclusion/spec/page-embed-remap.util.spec.ts @@ -0,0 +1,200 @@ +import { + remapPageEmbedSourceId, + remapPageEmbedSourceIds, +} from '../utils/transclusion-prosemirror.util'; + +/** + * Unit tests for the `pageEmbed` remap used by `PageService.duplicatePage`: + * + * - source page within the copied set -> rewrite to the COPY's new id + * - source page NOT in the copied set -> keep the ORIGINAL id (live embed) + * + * `remapPageEmbedSourceId` is the per-node decision the production + * `duplicatePage` callback now calls directly, so these tests guard the real + * path rather than a parallel copy. `remapPageEmbedSourceIds` is the JSON + * walker that delegates to the same helper; its tests exercise the shared + * decision transitively across nested ProseMirror containers. + */ +describe('remapPageEmbedSourceId (shared per-node decision used by duplicatePage)', () => { + it('returns the new copy id when the source IS in the copied set', () => { + const idMap = new Map([['old-src', 'new-copy']]); + + const out = remapPageEmbedSourceId('old-src', (id) => idMap.get(id)); + + expect(out).toBe('new-copy'); + }); + + it('returns the original id when the source is NOT in the copied set', () => { + const idMap = new Map([['old-src', 'new-copy']]); + + const out = remapPageEmbedSourceId('external', (id) => idMap.get(id)); + + expect(out).toBe('external'); + }); + + it('returns the original id when resolveNewId yields undefined', () => { + const out = remapPageEmbedSourceId('some-id', () => undefined); + + expect(out).toBe('some-id'); + }); + + it('leaves a null source unchanged without consulting the resolver', () => { + const resolve = jest.fn(() => 'should-not-be-used'); + + const out = remapPageEmbedSourceId(null, resolve); + + expect(out).toBeNull(); + expect(resolve).not.toHaveBeenCalled(); + }); + + it('leaves an undefined source unchanged without consulting the resolver', () => { + const resolve = jest.fn(() => 'should-not-be-used'); + + const out = remapPageEmbedSourceId(undefined, resolve); + + expect(out).toBeUndefined(); + expect(resolve).not.toHaveBeenCalled(); + }); +}); + +describe('remapPageEmbedSourceIds (duplicatePage pageEmbed remap)', () => { + const docWithEmbeds = (ids: string[]) => ({ + type: 'doc', + content: ids.map((id) => ({ + type: 'pageEmbed', + attrs: { sourcePageId: id }, + })), + }); + + it('remaps a source that IS within the copied set to its new copy id', () => { + const doc = docWithEmbeds(['old-src']); + const idMap = new Map([['old-src', 'new-copy']]); + + const out = remapPageEmbedSourceIds(doc, idMap); + + expect(out.content[0].attrs.sourcePageId).toBe('new-copy'); + }); + + it('keeps the original id for a source NOT in the copied set', () => { + const doc = docWithEmbeds(['external']); + const idMap = new Map([['old-src', 'new-copy']]); // does not contain "external" + + const out = remapPageEmbedSourceIds(doc, idMap); + + expect(out.content[0].attrs.sourcePageId).toBe('external'); + }); + + it('handles a mixed doc: in-set remapped, out-of-set preserved', () => { + const doc = docWithEmbeds(['in-set', 'external']); + const idMap = new Map([['in-set', 'in-set-copy']]); + + const out = remapPageEmbedSourceIds(doc, idMap); + + expect(out.content.map((n: any) => n.attrs.sourcePageId)).toEqual([ + 'in-set-copy', + 'external', + ]); + }); + + it('remaps pageEmbeds nested inside columns', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'columnList', + content: [ + { + type: 'column', + content: [ + { type: 'pageEmbed', attrs: { sourcePageId: 'nested-in' } }, + ], + }, + { + type: 'column', + content: [ + { type: 'pageEmbed', attrs: { sourcePageId: 'nested-out' } }, + ], + }, + ], + }, + ], + }; + const idMap = new Map([['nested-in', 'nested-in-copy']]); + + const out = remapPageEmbedSourceIds(doc, idMap) as any; + + const col0 = out.content[0].content[0].content[0]; + const col1 = out.content[0].content[1].content[0]; + expect(col0.attrs.sourcePageId).toBe('nested-in-copy'); + expect(col1.attrs.sourcePageId).toBe('nested-out'); + }); + + it('remaps pageEmbeds nested inside a callout', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'callout', + content: [ + { type: 'pageEmbed', attrs: { sourcePageId: 'in-callout' } }, + ], + }, + ], + }; + const idMap = new Map([['in-callout', 'in-callout-copy']]); + + const out = remapPageEmbedSourceIds(doc, idMap) as any; + + expect(out.content[0].content[0].attrs.sourcePageId).toBe( + 'in-callout-copy', + ); + }); + + it('does not descend into a transclusionSource (schema-isolated)', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'transclusionSource', + attrs: { id: 'src' }, + content: [ + { type: 'pageEmbed', attrs: { sourcePageId: 'hidden' } }, + ], + }, + ], + }; + const idMap = new Map([['hidden', 'should-not-apply']]); + + const out = remapPageEmbedSourceIds(doc, idMap) as any; + + // The embed inside a source must be left untouched. + expect(out.content[0].content[0].attrs.sourcePageId).toBe('hidden'); + }); + + it('leaves embeds missing a sourcePageId untouched', () => { + const doc = { + type: 'doc', + content: [ + { type: 'pageEmbed', attrs: {} }, + { type: 'pageEmbed', attrs: { sourcePageId: '' } }, + ], + }; + const idMap = new Map([['', 'x']]); + + const out = remapPageEmbedSourceIds(doc, idMap) as any; + + expect(out.content[0].attrs.sourcePageId).toBeUndefined(); + expect(out.content[1].attrs.sourcePageId).toBe(''); + }); + + it('returns the doc unchanged when idMap is empty', () => { + const doc = docWithEmbeds(['a', 'b']); + + const out = remapPageEmbedSourceIds(doc, new Map()); + + expect(out.content.map((n: any) => n.attrs.sourcePageId)).toEqual([ + 'a', + 'b', + ]); + }); +}); diff --git a/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts new file mode 100644 index 00000000..4afad554 --- /dev/null +++ b/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts @@ -0,0 +1,310 @@ +import { TransclusionService } from '../transclusion.service'; + +/** + * Covers two untested, high-risk write paths around `page_template_references`: + * + * 1. `syncPageTemplateReferences` — the `toDelete` branch: stale references are + * removed when the host page no longer embeds a source, while genuinely new + * embeds are inserted. We assert `deleteByReferenceAndSources` / `insertMany` + * receive the correct rows and the returned `{ inserted, deleted }` counts. + * + * 2. `insertTemplateReferencesForPages` — the multi-workspace grouping/filtering + * branch: candidate source ids are grouped per workspace, each workspace is + * validated independently, and cross-workspace sources are dropped. + * + * Setup/mocking mirrors the existing transclusion specs (page-template-access / + * page-template-lookup): `new TransclusionService(...)` is built with the same + * 11 positional mock args; only the deps each test touches are real stubs. + */ + +/** + * Chainable kysely `db` stub used by `filterInWorkspaceSourceIds`. Every + * `selectFrom(...).select(...).where(...)` returns the same builder; `.execute()` + * resolves whatever rows the per-call resolver returns. The resolver receives + * the captured `where('id','in', )` and `where('workspaceId','=', ws)` + * arguments so a test can decide, per workspace, which ids "exist". + */ +function makeWorkspaceScopedDb( + resolve: (ids: string[], workspaceId: string) => string[], +) { + const state = { ids: [] as string[], workspaceId: '' }; + const builder: any = {}; + builder.selectFrom = jest.fn(() => builder); + builder.select = jest.fn(() => builder); + builder.where = jest.fn((col: string, _op: string, val: any) => { + if (col === 'id') state.ids = val as string[]; + if (col === 'workspaceId') state.workspaceId = val as string; + return builder; + }); + builder.execute = jest.fn(async () => + resolve(state.ids, state.workspaceId).map((id) => ({ id })), + ); + return builder; +} + +function buildService(opts: { + db: any; + pageTemplateReferencesRepo: any; +}) { + return new TransclusionService( + opts.db, + {} as any, // pageTransclusionsRepo + {} as any, // pageTransclusionReferencesRepo + opts.pageTemplateReferencesRepo, + {} as any, // pageRepo + {} as any, // pagePermissionRepo + {} as any, // spaceMemberRepo + {} as any, // attachmentRepo + {} as any, // storageService + {} as any, // pageAccessService + {} as any, // workspaceRepo + ); +} + +const pageEmbedDoc = (sourceIds: string[]) => ({ + type: 'doc', + content: sourceIds.map((id) => ({ + type: 'pageEmbed', + attrs: { sourcePageId: id }, + })), +}); + +describe('TransclusionService.syncPageTemplateReferences — toDelete branch', () => { + it('deletes stale references and inserts new ones with correct args/counts', async () => { + // Every candidate id is treated as in-workspace by the existence query. + const db = makeWorkspaceScopedDb((ids) => ids); + + const insertMany = jest.fn().mockResolvedValue(undefined); + const deleteByReferenceAndSources = jest.fn().mockResolvedValue(undefined); + const pageTemplateReferencesRepo = { + // existing refs: "keep" stays embedded, "stale-a"/"stale-b" no longer are + findByReferencePageId: jest.fn().mockResolvedValue([ + { sourcePageId: 'keep' }, + { sourcePageId: 'stale-a' }, + { sourcePageId: 'stale-b' }, + ]), + insertMany, + deleteByReferenceAndSources, + }; + + const service = buildService({ db, pageTemplateReferencesRepo }); + + // host now embeds: keep (unchanged) + fresh (new). stale-a/stale-b gone. + const result = await service.syncPageTemplateReferences( + 'host', + 'w1', + pageEmbedDoc(['keep', 'fresh']), + ); + + expect(result).toEqual({ inserted: 1, deleted: 2 }); + + // only the genuinely new embed is inserted (keep already existed) + expect(insertMany).toHaveBeenCalledTimes(1); + expect(insertMany.mock.calls[0][0]).toEqual([ + { workspaceId: 'w1', referencePageId: 'host', sourcePageId: 'fresh' }, + ]); + + // stale references removed, scoped to host + workspace + expect(deleteByReferenceAndSources).toHaveBeenCalledTimes(1); + const [refPageId, workspaceId, staleSources] = + deleteByReferenceAndSources.mock.calls[0]; + expect(refPageId).toBe('host'); + expect(workspaceId).toBe('w1'); + expect([...staleSources].sort()).toEqual(['stale-a', 'stale-b']); + }); + + it('deletes ALL existing references when the host embeds nothing anymore', async () => { + const db = makeWorkspaceScopedDb((ids) => ids); + const insertMany = jest.fn().mockResolvedValue(undefined); + const deleteByReferenceAndSources = jest.fn().mockResolvedValue(undefined); + const pageTemplateReferencesRepo = { + findByReferencePageId: jest + .fn() + .mockResolvedValue([{ sourcePageId: 'a' }, { sourcePageId: 'b' }]), + insertMany, + deleteByReferenceAndSources, + }; + + const service = buildService({ db, pageTemplateReferencesRepo }); + + const result = await service.syncPageTemplateReferences( + 'host', + 'w1', + pageEmbedDoc([]), // no embeds left + ); + + expect(result).toEqual({ inserted: 0, deleted: 2 }); + expect(insertMany).not.toHaveBeenCalled(); + const [, , staleSources] = deleteByReferenceAndSources.mock.calls[0]; + expect([...staleSources].sort()).toEqual(['a', 'b']); + }); + + it('treats a cross-workspace embed as stale: it never survives to be kept', async () => { + // existence query drops "cross-ws"; so an existing ref to it must be deleted + const db = makeWorkspaceScopedDb((ids) => ids.filter((id) => id !== 'cross-ws')); + const insertMany = jest.fn().mockResolvedValue(undefined); + const deleteByReferenceAndSources = jest.fn().mockResolvedValue(undefined); + const pageTemplateReferencesRepo = { + findByReferencePageId: jest + .fn() + .mockResolvedValue([{ sourcePageId: 'cross-ws' }]), + insertMany, + deleteByReferenceAndSources, + }; + + const service = buildService({ db, pageTemplateReferencesRepo }); + + // host still "embeds" cross-ws in its doc, but it is not in-workspace + const result = await service.syncPageTemplateReferences( + 'host', + 'w1', + pageEmbedDoc(['cross-ws']), + ); + + expect(result).toEqual({ inserted: 0, deleted: 1 }); + expect(insertMany).not.toHaveBeenCalled(); + const [, , staleSources] = deleteByReferenceAndSources.mock.calls[0]; + expect([...staleSources]).toEqual(['cross-ws']); + }); + + it('no-ops both repos when desired and existing already match', async () => { + const db = makeWorkspaceScopedDb((ids) => ids); + const insertMany = jest.fn().mockResolvedValue(undefined); + const deleteByReferenceAndSources = jest.fn().mockResolvedValue(undefined); + const pageTemplateReferencesRepo = { + findByReferencePageId: jest + .fn() + .mockResolvedValue([{ sourcePageId: 'same' }]), + insertMany, + deleteByReferenceAndSources, + }; + + const service = buildService({ db, pageTemplateReferencesRepo }); + + const result = await service.syncPageTemplateReferences( + 'host', + 'w1', + pageEmbedDoc(['same']), + ); + + expect(result).toEqual({ inserted: 0, deleted: 0 }); + expect(insertMany).not.toHaveBeenCalled(); + expect(deleteByReferenceAndSources).not.toHaveBeenCalled(); + }); +}); + +describe('TransclusionService.insertTemplateReferencesForPages — multi-workspace grouping', () => { + it('groups candidates per workspace and validates each workspace independently', async () => { + // Each workspace "owns" only its own source ids. The existence query is + // workspace-scoped, so an id from another workspace is dropped. + const owned: Record = { + w1: ['s1'], + w2: ['s2'], + }; + const executeArgs: Array<{ ids: string[]; workspaceId: string }> = []; + const db = makeWorkspaceScopedDb((ids, workspaceId) => { + executeArgs.push({ ids: [...ids], workspaceId }); + const ownedSet = new Set(owned[workspaceId] ?? []); + return ids.filter((id) => ownedSet.has(id)); + }); + + const insertMany = jest.fn().mockResolvedValue(undefined); + const pageTemplateReferencesRepo = { insertMany }; + + const service = buildService({ db, pageTemplateReferencesRepo }); + + // page-a in w1 embeds s1 (valid) + s2 (belongs to w2 -> dropped) + // page-b in w2 embeds s2 (valid) + const result = await service.insertTemplateReferencesForPages([ + { id: 'page-a', workspaceId: 'w1', content: pageEmbedDoc(['s1', 's2']) }, + { id: 'page-b', workspaceId: 'w2', content: pageEmbedDoc(['s2']) }, + ]); + + expect(result).toEqual({ inserted: 2 }); + + expect(insertMany).toHaveBeenCalledTimes(1); + const rows = insertMany.mock.calls[0][0]; + expect(rows).toEqual([ + { workspaceId: 'w1', referencePageId: 'page-a', sourcePageId: 's1' }, + { workspaceId: 'w2', referencePageId: 'page-b', sourcePageId: 's2' }, + ]); + + // one existence query per workspace, each scoped to that workspace's candidates + expect(executeArgs).toHaveLength(2); + const w1Call = executeArgs.find((c) => c.workspaceId === 'w1'); + const w2Call = executeArgs.find((c) => c.workspaceId === 'w2'); + expect(w1Call?.ids.sort()).toEqual(['s1', 's2']); + expect(w2Call?.ids).toEqual(['s2']); + }); + + it('drops every cross-workspace source and inserts nothing when none are in-workspace', async () => { + // No id is owned by its page's workspace -> all filtered out. + const db = makeWorkspaceScopedDb(() => []); + const insertMany = jest.fn().mockResolvedValue(undefined); + const service = buildService({ + db, + pageTemplateReferencesRepo: { insertMany }, + }); + + const result = await service.insertTemplateReferencesForPages([ + { id: 'page-a', workspaceId: 'w1', content: pageEmbedDoc(['x']) }, + { id: 'page-b', workspaceId: 'w2', content: pageEmbedDoc(['y']) }, + ]); + + expect(result).toEqual({ inserted: 0 }); + expect(insertMany).not.toHaveBeenCalled(); + }); + + it('dedupes a sourceId shared by two pages in the same workspace into one validation', async () => { + const executeArgs: Array<{ ids: string[]; workspaceId: string }> = []; + const db = makeWorkspaceScopedDb((ids, workspaceId) => { + executeArgs.push({ ids: [...ids], workspaceId }); + return ids; // all in-workspace + }); + const insertMany = jest.fn().mockResolvedValue(undefined); + const service = buildService({ + db, + pageTemplateReferencesRepo: { insertMany }, + }); + + // both pages embed the same source "shared" in w1 + const result = await service.insertTemplateReferencesForPages([ + { id: 'page-a', workspaceId: 'w1', content: pageEmbedDoc(['shared']) }, + { id: 'page-b', workspaceId: 'w1', content: pageEmbedDoc(['shared']) }, + ]); + + // a row per (page, source) pair, but only one existence query for w1 + expect(result).toEqual({ inserted: 2 }); + expect(executeArgs).toHaveLength(1); + expect(executeArgs[0]).toEqual({ ids: ['shared'], workspaceId: 'w1' }); + + const rows = insertMany.mock.calls[0][0]; + expect(rows).toEqual([ + { workspaceId: 'w1', referencePageId: 'page-a', sourcePageId: 'shared' }, + { workspaceId: 'w1', referencePageId: 'page-b', sourcePageId: 'shared' }, + ]); + }); + + it('returns inserted:0 without querying when no page has embeds', async () => { + const execute = jest.fn(); + const db = makeWorkspaceScopedDb(() => { + execute(); + return []; + }); + const insertMany = jest.fn().mockResolvedValue(undefined); + const service = buildService({ + db, + pageTemplateReferencesRepo: { insertMany }, + }); + + const result = await service.insertTemplateReferencesForPages([ + { id: 'page-a', workspaceId: 'w1', content: pageEmbedDoc([]) }, + ]); + + expect(result).toEqual({ inserted: 0 }); + expect(insertMany).not.toHaveBeenCalled(); + // filterInWorkspaceSourceIds short-circuits on empty candidates, so the + // existence query never runs. + expect(execute).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts index eeeea0e2..d8cdf3bf 100644 --- a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts +++ b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts @@ -99,6 +99,64 @@ export function collectReferencesFromPmJson( return out; } +/** + * Decide the sourcePageId a duplicated pageEmbed should point to: the copy's new + * id when the embedded source is part of the copied set, otherwise the original + * (a live embed of the original page). Pure — shared by PageService.duplicatePage + * (the real path) and the JSON walker below, so both stay in lockstep. + */ +export function remapPageEmbedSourceId( + sourcePageId: string | null | undefined, + resolveNewId: (id: string) => string | undefined, +): string | null | undefined { + if (sourcePageId) { + const mapped = resolveNewId(sourcePageId); + if (mapped) return mapped; + } + return sourcePageId; +} + +/** + * Remap the `sourcePageId` of every `pageEmbed` node in a ProseMirror JSON doc + * according to `idMap` (old page id -> new page id). Delegates the per-node + * decision to the shared `remapPageEmbedSourceId` helper that + * `PageService.duplicatePage` also uses, so the production path and this walker + * stay in lockstep: when the embedded source page is part of the copied set + * (present in `idMap`) the embed is pointed at its new copy; otherwise the + * original `sourcePageId` is preserved so it stays a live embed of the original + * page. Mutates `doc` in place (and returns it) to match the service's in-place + * ProseMirror mutation. Recurses through arbitrary block containers (columns, + * callouts, etc.) the same way the collectors do, but does NOT descend into a + * `transclusionSource` (schema-isolated). + */ +export function remapPageEmbedSourceIds( + doc: T, + idMap: Map, +): T { + const visit = (node: any): void => { + if (!node || typeof node !== 'object') return; + + if (node.type === PAGE_EMBED_TYPE) { + if (node.attrs) { + node.attrs.sourcePageId = remapPageEmbedSourceId( + node.attrs.sourcePageId, + (id) => idMap.get(id), + ); + } + return; // atom node - no children + } + + if (node.type === TRANSCLUSION_TYPE) return; + + if (Array.isArray(node.content)) { + for (const child of node.content) visit(child); + } + }; + + visit(doc); + return doc; +} + /** * Walks a ProseMirror JSON document and returns one snapshot per unique * `sourcePageId` found on `pageEmbed` nodes (whole-page live embeds). Order From 39f3eacf897e1365c89e8ef18bf4854c3d2f3ae5 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 22:37:35 +0300 Subject: [PATCH 13/16] test(page-templates): cover lookupTemplate anti-leak + edge cases (#33) - The security-relevant catch->not_found branch in lookupTemplate (returns not_found instead of raw content when comment-mark stripping throws) is now tested by forcing the strip to throw with a malformed text node, asserting no content/marks leak. - not_found for a soft-deleted source resolved through the REAL filterViewerAccessiblePageIds (deletedAt-excluded), not the stubbed filter. - Rename the misleading 'honours <=50 cap' test to reflect it only exercises dedup (the cap lives in the DTO, never engaged in the service unit). - Cover the onlyTemplates search filter (restricts to is_template=true). Also fix two pre-existing failing 'should be defined' specs (search service + controller) that couldn't resolve the @InjectKysely token via createTestingModule. Co-Authored-By: Claude Opus 4.8 --- .../spec/page-template-access.spec.ts | 9 +- .../spec/page-template-lookup-edge.spec.ts | 183 ++++++++++++++++++ .../src/core/search/search.controller.spec.ts | 18 +- .../src/core/search/search.service.spec.ts | 105 ++++++++-- 4 files changed, 295 insertions(+), 20 deletions(-) create mode 100644 apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts diff --git a/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts index 1f16605b..78707dd8 100644 --- a/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts +++ b/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts @@ -173,8 +173,13 @@ describe('TransclusionService — template access core (real filter)', () => { expect((items[2] as any).status).toBe('no_access'); // not space-visible }); - it('honours the DTO-level ≤50 cap by deduping ids passed to the filter', async () => { - // The DTO enforces ArrayMaxSize(50); the service dedupes before filtering. + it('dedupes source ids before passing them to the access filter', async () => { + // NOTE: this test only covers DEDUP, not the ≤50 cap. The ArrayMaxSize(50) + // limit is enforced by the DTO (validation layer), so it is never engaged in + // the service under unit test — the service receives an already-validated + // array and merely dedupes it. Renamed from the old "honours ≤50 cap" title, + // which misleadingly implied the cap was exercised here. A real cap test would + // belong in a controller/DTO-validation spec, not in this service unit test. const ids = ['a', 'a', 'b']; const { service, db } = makeService({ spaceVisibleRows: [], diff --git a/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts new file mode 100644 index 00000000..59dec763 --- /dev/null +++ b/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts @@ -0,0 +1,183 @@ +import { TransclusionService } from '../transclusion.service'; + +/** + * Edge-case + anti-leak coverage for `lookupTemplate` that the existing + * `page-template-lookup.spec.ts` (stubbed filter) and `page-template-access.spec.ts` + * (real filter, happy paths) do not exercise: + * + * 1. SECURITY anti-leak: when comment-mark stripping THROWS, the item must come + * back as `not_found` and NEVER carry raw content (the source's comment marks + * could otherwise leak to a viewer). See the `catch` branch in `lookupTemplate`. + * 2. A soft-deleted source page resolved through the REAL + * `filterViewerAccessiblePageIds` (space-visibility query filters `deletedAt`), + * asserting it maps to `not_found`/`no_access` rather than content. + */ +describe('TransclusionService.lookupTemplate — anti-leak catch branch', () => { + const now = new Date('2026-06-20T00:00:00.000Z'); + + function makeService(opts: { + accessibleIds: string[]; + pages: Array<{ + id: string; + slugId?: string; + title: string | null; + icon: string | null; + content: unknown; + updatedAt: Date; + }>; + }) { + const pageRepo = { + findManyByIds: jest.fn().mockResolvedValue(opts.pages), + }; + + const service = new TransclusionService( + {} as any, // db + {} as any, // pageTransclusionsRepo + {} as any, // pageTransclusionReferencesRepo + {} as any, // pageTemplateReferencesRepo + pageRepo as any, + {} as any, // pagePermissionRepo + {} as any, // spaceMemberRepo + {} as any, // attachmentRepo + {} as any, // storageService + {} as any, // pageAccessService + {} as any, // workspaceRepo + ); + + // Stub the access decision; we are testing the content-prep stage, not access. + jest + .spyOn(service, 'filterViewerAccessiblePageIds') + .mockResolvedValue(opts.accessibleIds); + + return { service, pageRepo }; + } + + it('returns not_found (NOT raw content) when comment-mark stripping throws', async () => { + // An accessible, present page whose stored content is structurally invalid PM: + // a `text` node without a `text` field. `jsonToNode` (called inside the try + // block) throws "Invalid text node in JSON" on this, which exercises the + // service's catch -> not_found anti-leak guard. This uses a REAL malformed + // input (no module mocking) so the test stays faithful to production behaviour. + const malformedContent = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + // Missing `text` — Node.fromJSON rejects this and jsonToNode rethrows. + type: 'text', + marks: [{ type: 'comment', attrs: { commentId: 'leak-me' } }], + }, + ], + }, + ], + }; + + const { service } = makeService({ + accessibleIds: ['p1'], + pages: [ + { + id: 'p1', + slugId: 's1', + title: 'Secret', + icon: '📄', + content: malformedContent, + updatedAt: now, + }, + ], + }); + + // Silence the expected error log so the suite output stays clean. + jest.spyOn((service as any).logger, 'error').mockImplementation(() => {}); + + const { items } = await service.lookupTemplate(['p1'], 'u1', 'w1'); + + expect(items).toHaveLength(1); + const item = items[0] as any; + + // Must degrade to not_found... + expect(item.status).toBe('not_found'); + expect(item.sourcePageId).toBe('p1'); + + // ...and must NOT leak ANY content/metadata of the source page. + expect(item).not.toHaveProperty('content'); + expect(item).not.toHaveProperty('title'); + expect(item).not.toHaveProperty('icon'); + expect(item).not.toHaveProperty('slugId'); + expect(item).not.toHaveProperty('sourceUpdatedAt'); + + // Hard guarantee: the would-be-leaked comment mark appears nowhere in output. + expect(JSON.stringify(item)).not.toContain('leak-me'); + expect(JSON.stringify(item)).not.toContain('comment'); + }); +}); + +describe('TransclusionService.lookupTemplate — soft-deleted source via real filter', () => { + const now = new Date('2026-06-20T00:00:00.000Z'); + + /** + * Chainable kysely `db` stub mirroring `page-template-access.spec.ts`. The + * space-visibility query in `filterViewerAccessiblePageIds` filters + * `where('deletedAt','is',null)`; a soft-deleted page is therefore absent from + * the rows we resolve here, so the REAL filter is what drops it. + */ + function makeDb(executeRows: Array<{ id: string }>) { + const builder: any = {}; + builder.selectFrom = jest.fn(() => builder); + builder.select = jest.fn(() => builder); + builder.where = jest.fn(() => builder); + builder.execute = jest.fn(async () => executeRows); + return builder; + } + + it('resolves a soft-deleted source to not_found/no_access through the REAL filter', async () => { + // The page IS soft-deleted, so the space-visibility query returns no rows for + // it (deletedAt filter). We let the real filter run end-to-end. + const db = makeDb([]); // soft-deleted -> excluded by the deletedAt='is null' clause + + const spaceMemberRepo = { + getUserSpaceIdsQuery: jest.fn(() => ({ __subquery: true })), + }; + const pagePermissionRepo = { + filterAccessiblePageIds: jest.fn().mockResolvedValue([]), + }; + const pageRepo = { + // Even if it were queried, the page is gone; assert via the filter instead. + findManyByIds: jest.fn().mockResolvedValue([]), + }; + + const service = new TransclusionService( + db as any, + {} as any, + {} as any, + {} as any, + pageRepo as any, + pagePermissionRepo as any, + spaceMemberRepo as any, + {} as any, + {} as any, + {} as any, + {} as any, + ); + + const { items } = await service.lookupTemplate(['deleted-src'], 'u1', 'w1'); + + // Soft-deleted source must never resolve to content. + expect(items).toEqual([ + { sourcePageId: 'deleted-src', status: 'no_access' }, + ]); + const item = items[0] as any; + expect(item).not.toHaveProperty('content'); + + // The real filter short-circuited before page-permission filtering because + // the deletedAt-filtered space-visibility query returned nothing. + expect(pagePermissionRepo.filterAccessiblePageIds).not.toHaveBeenCalled(); + // And the verb on the db builder included a deletedAt 'is null' guard, proving + // the real path (not a stub) excluded the soft-deleted page. + const deletedAtCall = db.where.mock.calls.find( + (c: any[]) => c[0] === 'deletedAt', + ); + expect(deletedAtCall).toEqual(['deletedAt', 'is', null]); + }); +}); diff --git a/apps/server/src/core/search/search.controller.spec.ts b/apps/server/src/core/search/search.controller.spec.ts index 6d6bad58..1b0e42cb 100644 --- a/apps/server/src/core/search/search.controller.spec.ts +++ b/apps/server/src/core/search/search.controller.spec.ts @@ -1,15 +1,19 @@ -import { Test, TestingModule } from '@nestjs/testing'; import { SearchController } from './search.controller'; +// Direct instantiation with stub deps. The Test.createTestingModule form failed +// to resolve SearchService's @InjectKysely() connection token at compile() (the +// same Nest-DI/Kysely-token issue addressed in search.service.spec), and this +// unit only needs the controller to construct. describe('SearchController', () => { let controller: SearchController; - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - controllers: [SearchController], - }).compile(); - - controller = module.get(SearchController); + beforeEach(() => { + controller = new SearchController( + {} as any, // searchService + {} as any, // spaceAbility + {} as any, // environmentService + {} as any, // moduleRef + ); }); it('should be defined', () => { diff --git a/apps/server/src/core/search/search.service.spec.ts b/apps/server/src/core/search/search.service.spec.ts index 63fc48c0..efd4d2b8 100644 --- a/apps/server/src/core/search/search.service.spec.ts +++ b/apps/server/src/core/search/search.service.spec.ts @@ -1,18 +1,101 @@ -import { Test, TestingModule } from '@nestjs/testing'; import { SearchService } from './search.service'; describe('SearchService', () => { - let service: SearchService; - - beforeEach(async () => { - const module: TestingModule = await Test.createTestingModule({ - providers: [SearchService], - }).compile(); - - service = module.get(SearchService); - }); - it('should be defined', () => { + // Construct directly with stub deps. The previous Test.createTestingModule + // form could not resolve the @InjectKysely() connection token and failed at + // compile() — manual construction mirrors the rest of these unit specs. + const service = new SearchService( + {} as any, // db + {} as any, // pageRepo + {} as any, // shareRepo + {} as any, // spaceMemberRepo + {} as any, // pagePermissionRepo + ); expect(service).toBeDefined(); }); }); + +/** + * Focused coverage for the `onlyTemplates` flag in `searchSuggestions`, which + * restricts page suggestions to template pages (`is_template = true`). The kysely + * query builder and repos are mocked the same way the access specs mock chainable + * builders: every builder method returns the same builder, `.execute()` resolves + * the supplied rows. We assert whether `.where('isTemplate', '=', true)` is added. + */ +describe('SearchService.searchSuggestions — onlyTemplates filter', () => { + function makeService(pageRows: Array<{ id: string }>) { + // Chainable page-search builder. Record every `.where(...)` call so we can + // assert on the is_template restriction. + const pageBuilder: any = {}; + pageBuilder.select = jest.fn(() => pageBuilder); + pageBuilder.where = jest.fn(() => pageBuilder); + pageBuilder.orderBy = jest.fn(() => pageBuilder); + pageBuilder.limit = jest.fn(() => pageBuilder); + pageBuilder.execute = jest.fn(async () => pageRows); + + const db: any = { + // searchSuggestions only touches `pages` here (includePages: true). + selectFrom: jest.fn(() => pageBuilder), + }; + + const pageRepo = { + // `.select((eb) => this.pageRepo.withSpace(eb))` — return value is ignored + // by our builder stub, so a sentinel is enough. + withSpace: jest.fn(() => ({ __withSpace: true })), + }; + const shareRepo = {}; + const spaceMemberRepo = { + getUserSpaceIds: jest.fn().mockResolvedValue(['space-1']), + }; + const pagePermissionRepo = { + // Let every found page through page-level permission filtering. + filterAccessiblePageIds: jest + .fn() + .mockImplementation(async ({ pageIds }: { pageIds: string[] }) => pageIds), + }; + + const service = new SearchService( + db as any, + pageRepo as any, + shareRepo as any, + spaceMemberRepo as any, + pagePermissionRepo as any, + ); + + return { service, db, pageBuilder }; + } + + const isTemplateWhereCall = (pageBuilder: any) => + pageBuilder.where.mock.calls.find((c: any[]) => c[0] === 'isTemplate'); + + it('restricts page suggestions to is_template = true when onlyTemplates is set', async () => { + const { service, pageBuilder } = makeService([{ id: 'tmpl-1' }]); + + const result = await service.searchSuggestions( + { query: 'plan', includePages: true, onlyTemplates: true } as any, + 'user-1', + 'ws-1', + ); + + // The is_template restriction must be applied to the page query. + const call = isTemplateWhereCall(pageBuilder); + expect(call).toEqual(['isTemplate', '=', true]); + + // Sanity: the (template) page made it through. + expect(result.pages.map((p: any) => p.id)).toEqual(['tmpl-1']); + }); + + it('does NOT restrict to templates when onlyTemplates is absent', async () => { + const { service, pageBuilder } = makeService([{ id: 'any-1' }]); + + await service.searchSuggestions( + { query: 'plan', includePages: true } as any, + 'user-1', + 'ws-1', + ); + + // No is_template clause should be added for a normal page suggestion search. + expect(isTemplateWhereCall(pageBuilder)).toBeUndefined(); + }); +}); From 8191c37daa306ad1fd442108bb17db10542242f0 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 22:49:18 +0300 Subject: [PATCH 14/16] test(html-embed): real-execution gate tests for create/duplicate/import (#27) The create/duplicate/import gate tests asserted gate presence via brittle expect(SRC).toMatch(/regex/) over the source text plus a reimplemented applyGate() stand-in, so a refactor could break the real gate while they still passed. Rewrite both specs to execute the REAL methods (PageService.create / duplicatePage; ImportService.importPage; FileImportTaskService.processGenericImport) with each caller role and assert on the PERSISTED content via hasHtmlEmbedNode: member -> stripped, admin/owner+toggle ON -> preserved, toggle OFF -> stripped for everyone, unknown/missing role -> fail-closed. No source-regex assertions remain. Co-Authored-By: Claude Opus 4.8 --- .../page-service-html-embed-identity.spec.ts | 286 ++++++++++---- .../import-html-embed-identity.spec.ts | 349 ++++++++++++------ 2 files changed, 458 insertions(+), 177 deletions(-) diff --git a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts index f23d565a..bc1b8254 100644 --- a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts +++ b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts @@ -1,25 +1,31 @@ -import { readFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { - hasHtmlEmbedNode, - htmlEmbedAllowed, - stripHtmlEmbedNodes, -} from '../../../common/helpers/prosemirror/html-embed.util'; +// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab +// write paths: PageService.create() and PageService.duplicatePage(). Both build +// content/textContent/ydoc directly and persist, bypassing the collab +// onStoreDocument strip, so each must run the incoming document through the +// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not +// allowed, `stripHtmlEmbedNodes`) BEFORE persisting. +// +// This spec constructs the REAL PageService with every constructor dep mocked, +// feeds content containing an `htmlEmbed`, calls the real method, and asserts on +// the PERSISTED content (captured at the repo insert / db insert boundary) that +// the embed was actually stripped (member/unknown role) or preserved +// (admin/owner + toggle ON). Mirrors the GOOD pattern in +// transclusion/spec/transclusion-unsync-html-embed.spec.ts. +// +// page.service.ts pulls in the collaboration gateway (a transitive ESM chain +// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so +// that single module is mocked away — it is never used on the create/duplicate +// gate paths. +jest.mock('../../../collaboration/collaboration.gateway', () => ({ + CollaborationGateway: class {}, +})); -// PageService.create() and duplicatePage() guards. -// -// page.service.ts cannot be unit-LOADED under the server's jest config (a -// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns), -// so we cover the two load-bearing properties at the strongest feasible layer: -// -// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate -// each path applies: non-admin/unknown role -> strip, admin/owner -> keep. -// -// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole` -// param threaded from the request; duplicate: `authUser.role`), so a -// refactor that drops the guard or reads the wrong role trips the test. -// This is what replaces the removed `applyAdminGate` stand-in for these -// two entrypoints. +import { PageService } from './page.service'; +import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util'; + +const WS = 'ws-1'; +const SPACE = 'space-1'; +const USER = 'u1'; const docWithEmbed = () => ({ type: 'doc', @@ -29,74 +35,206 @@ const docWithEmbed = () => ({ ], }); -// The real predicate both paths apply (see SECURITY blocks in page.service.ts): -// toggle AND admin. -function applyGate( - json: any, - featureEnabled: boolean, - role: string | null | undefined, -) { - if (!htmlEmbedAllowed(featureEnabled, role) && hasHtmlEmbedNode(json)) { - return stripHtmlEmbedNodes(json); - } - return json; +// Minimal chainable kysely stub. `nextPagePosition` (used by create) and +// duplicatePage's bulk insert go through `this.db`; only the calls those paths +// make need to resolve. `capturedInserts` collects every page row handed to +// `insertInto('pages').values(...)` so we can assert on the persisted content. +function buildDb(capturedInserts: any[]) { + const selectChain: any = { + select: () => selectChain, + selectAll: () => selectChain, + where: () => selectChain, + orderBy: () => selectChain, + limit: () => selectChain, + execute: async () => [], + executeTakeFirst: async () => undefined, + }; + const db: any = { + selectFrom: () => selectChain, + insertInto: (table: string) => ({ + values: (rows: any) => { + if (table === 'pages') { + for (const row of Array.isArray(rows) ? rows : [rows]) { + capturedInserts.push(row); + } + } + return { execute: async () => undefined }; + }, + }), + // executeTx -> db.transaction().execute(cb): run the callback with `db` + // itself acting as the transaction so any in-tx inserts are captured too. + transaction: () => ({ execute: async (cb: any) => cb(db) }), + }; + return db; } -describe('page create/duplicate gate decision (real helpers)', () => { - it('toggle ON + non-admin (member) strips', () => { - const result = applyGate(docWithEmbed(), true, 'member'); - expect(hasHtmlEmbedNode(result)).toBe(false); - expect(result.content).toHaveLength(1); - expect(result.content[0].content[0].text).toBe('body'); - }); +// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled` +// drives the workspace toggle the gate reads via workspaceRepo.findById. +function buildService(opts: { + featureEnabled: boolean; + capturedInserts: any[]; + rootPage?: any; // for duplicatePage +}) { + const { featureEnabled, capturedInserts } = opts; - it('toggle ON + unknown/empty role fails closed (strips)', () => { - for (const role of [null, undefined, 'viewer'] as const) { - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, role))).toBe( - false, - ); - } - }); + const pageRepo: any = { + findById: jest.fn(async () => null), // no parent page in create tests + // create() persists here; capture the row so we can inspect content. + insertPage: jest.fn(async (row: any) => { + capturedInserts.push(row); + return { id: 'new-page', slugId: 'slug-1', ...row }; + }), + getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)), + }; - it('toggle ON + admin/owner keep', () => { - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'admin'))).toBe( - true, - ); - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'owner'))).toBe( - true, + const pagePermissionRepo: any = { + // duplicatePage filters accessible pages; grant the root so it is copied. + filterAccessiblePageIds: jest.fn(async () => + opts.rootPage ? [opts.rootPage.id] : [], + ), + }; + + const workspaceRepo: any = { + findById: jest.fn(async () => ({ + id: WS, + settings: { htmlEmbed: featureEnabled }, + })), + }; + + const attachmentRepo: any = { findByIds: jest.fn(async () => []) }; + const storageService: any = { copy: jest.fn(async () => undefined) }; + const noopQueue: any = { add: jest.fn(async () => undefined) }; + const eventEmitter: any = { emit: jest.fn() }; + const collaborationGateway: any = {}; + const watcherService: any = {}; + // duplicatePage fires transclusion bulk inserts after persisting; they are + // best-effort (wrapped in try/catch) and irrelevant to the gate. + const transclusionService: any = { + insertTransclusionsForPages: jest.fn(async () => undefined), + insertReferencesForPages: jest.fn(async () => undefined), + insertTemplateReferencesForPages: jest.fn(async () => undefined), + }; + + const db = buildDb(capturedInserts); + + const service = new PageService( + pageRepo, + pagePermissionRepo, + attachmentRepo, + db, + storageService, + noopQueue, // attachmentQueue + noopQueue, // aiQueue + noopQueue, // generalQueue + eventEmitter, + collaborationGateway, + watcherService, + transclusionService, + workspaceRepo, + ); + return service; +} + +describe('PageService.create htmlEmbed admin gate (real code)', () => { + // Run create() and return the content actually persisted via insertPage. + async function persistedContent( + featureEnabled: boolean, + callerRole: string | null | undefined, + ) { + const capturedInserts: any[] = []; + const service = buildService({ featureEnabled, capturedInserts }); + await service.create( + USER, + WS, + { + spaceId: SPACE, + title: 'p', + // 'json' format is used as-is by parseProsemirrorContent (passed to the + // real jsonToNode schema validation), so hand it the PM-JSON object. + content: docWithEmbed(), + format: 'json' as any, + } as any, + callerRole, ); + expect(capturedInserts).toHaveLength(1); + return capturedInserts[0].content; + } + + it('toggle ON + member: persisted content has htmlEmbed stripped', async () => { + const content = await persistedContent(true, 'member'); + expect(hasHtmlEmbedNode(content)).toBe(false); + // Non-embed content survives. + expect(JSON.stringify(content)).toContain('body'); }); - it('toggle OFF strips for everyone (admin/owner/member)', () => { - for (const role of ['admin', 'owner', 'member'] as const) { - expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), false, role))).toBe( - false, - ); + it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true); + }); + + it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false); } }); }); -const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8'); +describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => { + // Duplicate a single source page that contains an embed and return the content + // persisted for the copy (captured at db.insertInto('pages').values(...)). + async function persistedContent( + featureEnabled: boolean, + role: string | null | undefined, + ) { + const rootPage: any = { + id: 'src-page', + slugId: 'src-slug', + title: 'Source', + icon: null, + position: 'a0', + spaceId: SPACE, + workspaceId: WS, + parentPageId: null, + content: docWithEmbed(), + }; + const capturedInserts: any[] = []; + const service = buildService({ featureEnabled, capturedInserts, rootPage }); + const authUser: any = { id: USER, workspaceId: WS, role }; + await service.duplicatePage(rootPage, undefined, authUser); + // The bulk insert is the page persist boundary; one source page -> one copy. + const pageRows = capturedInserts.filter((r) => r.content); + expect(pageRows.length).toBeGreaterThanOrEqual(1); + return pageRows[0].content; + } -describe('page create/duplicate gate identity is pinned (source contract)', () => { - it('create() gates on toggle AND the caller role param before deriving content/ydoc', () => { - // create() receives the caller's workspace role as `callerRole` and gates on - // the combined toggle-AND-admin predicate; the embed must be stripped BEFORE - // insertPage. - expect(SRC).toMatch( - /!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, - ); - expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)'); + it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => { + const content = await persistedContent(true, 'member'); + expect(hasHtmlEmbedNode(content)).toBe(false); + expect(JSON.stringify(content)).toContain('body'); }); - it('duplicatePage() gates on toggle AND the duplicating user role (authUser.role)', () => { - expect(SRC).toMatch( - /!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, - ); + it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true); }); - it('both paths resolve the toggle from the workspace settings', () => { - expect(SRC).toContain('isHtmlEmbedFeatureEnabled('); - expect(SRC).toContain('this.workspaceRepo.findById('); + it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false); + } }); }); diff --git a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts index 603765fc..d2902be0 100644 --- a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts +++ b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts @@ -1,123 +1,266 @@ -import { readFileSync } from 'node:fs'; -import { join } from 'node:path'; -import { - hasHtmlEmbedNode, - htmlEmbedAllowed, - stripHtmlEmbedNodes, -} from '../../../common/helpers/prosemirror/html-embed.util'; +// Exercises the REAL htmlEmbed admin gate on the two import write paths: +// +// (1) ImportService.importPage() — single .html/.md upload +// (2) FileImportTaskService.processGenericImport() — zip / multi-file import +// +// Both build content/textContent/ydoc directly and persist (bypassing the +// collab onStoreDocument strip), so each must run the imported document through +// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read +// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed, +// `stripHtmlEmbedNodes` BEFORE persisting. +// +// This spec constructs the REAL services with deps mocked, feeds an imported +// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed +// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED +// content (captured at the insert boundary) is stripped for a non-admin / +// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD +// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts. +// +// Three modules are mocked away because they pull transitive ESM deps that +// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the +// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via +// import-attachment). None of them participate in the gate decision: +// - import-formatter: contextless HTML cleanup + link rewriting; replaced with +// faithful passthroughs (the embed div has no href/iframe, so the real +// normalizer would leave it untouched anyway). +// - import-attachment: attachment rewriting; passthrough returns html as-is. +jest.mock('../../../collaboration/collaboration.gateway', () => ({ + CollaborationGateway: class {}, +})); +jest.mock('../utils/import-formatter', () => ({ + normalizeImportHtml: () => {}, + formatImportHtml: async (opts: any) => ({ + html: opts.html, + backlinks: [], + pageIcon: undefined, + }), +})); +jest.mock('./import-attachment.service', () => ({ + ImportAttachmentService: class {}, +})); -// FAIL-CLOSED IDENTITY for the import write paths. -// -// import.service / file-import-task.service cannot be unit-LOADED under the -// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in -// transformIgnorePatterns). So we cover the two load-bearing properties at the -// strongest feasible layer: -// -// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate -// predicate each entrypoint runs against the role resolved from -// userRepo.findById(...): a MISSING user (findById -> undefined) must fail -// closed (strip), and only 'admin'/'owner' keep the embed. -// -// (2) IDENTITY — source-pin which identity governs the gate so a refactor that -// swaps the lookup to the wrong user (e.g. the queue worker's caller) is -// caught: zip import resolves the role from `fileTask.creatorId`; single -// import from the request `userId`. NOT some ambient caller. -// -// If a guard is deleted/misplaced or the identity field changes, these break. +import { promises as fs } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { ImportService } from './import.service'; +import { FileImportTaskService } from './file-import-task.service'; +import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util'; -const docWithEmbed = () => ({ - type: 'doc', - content: [ - { type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] }, - { type: 'htmlEmbed', attrs: { source: '' } }, - ], -}); +const WS = 'ws-1'; +const SPACE = 'space-1'; +const USER = 'importer-1'; -// The real predicate both import entrypoints apply (see the SECURITY blocks in -// import.service.ts and file-import-task.service.ts): resolve the importer via -// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`. -function applyImportGate( - json: any, - featureEnabled: boolean, - importingUser: { role?: string } | undefined, -) { - if ( - !htmlEmbedAllowed(featureEnabled, importingUser?.role) && - hasHtmlEmbedNode(json) - ) { - return stripHtmlEmbedNodes(json); - } - return json; +// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses +// `
` into an htmlEmbed PM node +// (base64 below decodes to ``). +const HTML_WITH_EMBED = + '

imported body

' + + '
'; + +function workspaceRepoFor(featureEnabled: boolean) { + return { + findById: jest.fn(async () => ({ + id: WS, + settings: { htmlEmbed: featureEnabled }, + })), + }; } -describe('import gate fail-closed by toggle AND resolved-user role (real helpers)', () => { - it('toggle ON + missing user (userRepo.findById -> undefined) strips the embed', () => { - // findById returns undefined when the user/workspace pair does not resolve; - // undefined?.role is undefined -> htmlEmbedAllowed(true, undefined) === false. - const result = applyImportGate(docWithEmbed(), true, undefined); - expect(hasHtmlEmbedNode(result)).toBe(false); +// userRepo.findById resolves the importer's role (or undefined for a missing +// user -> fail closed). +function userRepoFor(user: { role?: string } | undefined) { + return { findById: jest.fn(async () => user) }; +} + +describe('ImportService.importPage htmlEmbed admin gate (real code)', () => { + // Run importPage with a single uploaded .html and return the persisted content + // captured at pageRepo.insertPage. + async function persistedContent( + featureEnabled: boolean, + user: { role?: string } | undefined, + ) { + const captured: any[] = []; + const pageRepo: any = { + insertPage: jest.fn(async (row: any) => { + captured.push(row); + return { id: 'p1', slugId: 's1', ...row }; + }), + }; + // db is only used for getNewPagePosition (a select chain). + const selectChain: any = { + select: () => selectChain, + where: () => selectChain, + orderBy: () => selectChain, + limit: () => selectChain, + executeTakeFirst: async () => undefined, + }; + const db: any = { selectFrom: () => selectChain }; + + const service = new ImportService( + pageRepo, + userRepoFor(user) as any, + { putBuffer: jest.fn() } as any, // storageService (unused on this path) + db, + { add: jest.fn() } as any, // fileTaskQueue (unused) + workspaceRepoFor(featureEnabled) as any, + ); + + const file: any = { + filename: 'doc.html', + toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'), + }; + await service.importPage(Promise.resolve(file), USER, SPACE, WS); + expect(captured).toHaveLength(1); + return captured[0].content; + } + + it('toggle ON + member: persisted content has htmlEmbed stripped', async () => { + const content = await persistedContent(true, { role: 'member' }); + expect(hasHtmlEmbedNode(content)).toBe(false); + expect(JSON.stringify(content)).toContain('imported body'); }); - it("toggle ON + resolved role 'member' strips", () => { + it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe( + false, + ); + }); + + it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe( + true, + ); + }); + + it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe( + true, + ); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'member' })), + hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })), ).toBe(false); }); - - it("toggle ON + resolved role 'admin' keeps the embed", () => { - expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'admin' })), - ).toBe(true); - }); - - it("toggle ON + resolved role 'owner' keeps the embed", () => { - expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'owner' })), - ).toBe(true); - }); - - it('toggle OFF strips for every role (admin/owner/member)', () => { - for (const role of ['admin', 'owner', 'member'] as const) { - expect( - hasHtmlEmbedNode(applyImportGate(docWithEmbed(), false, { role })), - ).toBe(false); - } - }); }); -// Source-pin the identity each entrypoint feeds to userRepo.findById. These are -// the lines that decide WHOSE role governs the gate; pinning them means a -// refactor that points the lookup at the wrong user trips the test. -const SRC_DIR = join(__dirname); +describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => { + let extractDir: string; -describe('import gate identity is pinned to the importer (source contract)', () => { - it('single import resolves the role from the request userId', () => { - const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8'); - // The role lookup must key on the request `userId`, then gate on the role. - expect(src).toMatch( - /this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/, - ); - expect(src).toMatch( - /htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*\)/, - ); - // And the toggle is resolved from the workspace settings. - expect(src).toContain('isHtmlEmbedFeatureEnabled('); - // And the gate uses the real strip helper. - expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + beforeEach(async () => { + // Real temp dir holding a single .html page that carries the embed; the + // method reads it from disk via fs.readFile. + extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-')); + await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED); }); - it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => { - const src = readFileSync( - join(SRC_DIR, 'file-import-task.service.ts'), - 'utf-8', + afterEach(async () => { + await fs.rm(extractDir, { recursive: true, force: true }); + }); + + // Run processGenericImport over the temp dir and return the content persisted + // for the imported page (captured at trx.insertInto('pages').values(...)). + async function persistedContent( + featureEnabled: boolean, + user: { role?: string } | undefined, + ) { + const captured: any[] = []; + const trxInsertChain = (table: string) => ({ + values: (row: any) => { + if (table === 'pages') captured.push(row); + return { execute: async () => undefined }; + }, + }); + const trx: any = { insertInto: trxInsertChain }; + const db: any = { + // spaces lookup at the top of processGenericImport + selectFrom: () => ({ + select: () => ({ + where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }), + }), + }), + // executeTx -> db.transaction().execute(cb) + transaction: () => ({ execute: async (cb: any) => cb(trx) }), + }; + + // importService stub: only the real, gate-relevant helpers are used. We give + // it the REAL implementations by delegating to a real ImportService for + // processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and + // strip path runs for real. + const realImport = new ImportService( + {} as any, + {} as any, + {} as any, + {} as any, + {} as any, + {} as any, ); - expect(src).toMatch( - /this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/, + const importService: any = { + processHTML: (html: string) => realImport.processHTML(html), + extractTitleAndRemoveHeading: (s: any) => + realImport.extractTitleAndRemoveHeading(s), + createYdoc: (j: any) => realImport.createYdoc(j), + }; + + const importAttachmentService: any = { + // passthrough: no attachment rewriting, return html unchanged + processAttachments: jest.fn(async (opts: any) => opts.html), + }; + + const service = new FileImportTaskService( + { putBuffer: jest.fn() } as any, // storageService + importService, + { nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only) + { insertBacklink: jest.fn() } as any, // backlinkRepo + db, + importAttachmentService, + userRepoFor(user) as any, + workspaceRepoFor(featureEnabled) as any, + { emit: jest.fn() } as any, // eventEmitter + { logBatchWithContext: jest.fn() } as any, // auditService ); - expect(src).toMatch( - /importerCanAuthorHtmlEmbed\s*=\s*htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*,?\s*\)/, + + const fileTask: any = { + id: 'task-1', + creatorId: USER, + workspaceId: WS, + spaceId: SPACE, + source: 'generic', + }; + + await service.processGenericImport({ extractDir, fileTask }); + expect(captured.length).toBeGreaterThanOrEqual(1); + return captured[0].content; + } + + it('toggle ON + member: persisted page has htmlEmbed stripped', async () => { + const content = await persistedContent(true, { role: 'member' }); + expect(hasHtmlEmbedNode(content)).toBe(false); + expect(JSON.stringify(content)).toContain('imported body'); + }); + + it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe( + false, ); - expect(src).toContain('isHtmlEmbedFeatureEnabled('); - expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + }); + + it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe( + true, + ); + }); + + it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => { + expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe( + true, + ); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect( + hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })), + ).toBe(false); }); }); From b7ea8c850ebec2731716c9c6bb6726a90b101992 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 23:02:01 +0300 Subject: [PATCH 15/16] fix(html-embed): preserve admin's existing embed on a non-admin co-editor's store (#29) The collab persist strip keyed to the storing connection's user, so when a non-admin co-editor stored, it removed an admin's legitimately-authored embed too (data loss). Now: toggle OFF still strips all (feature disabled); toggle ON + non-admin storer strips only NEWLY-introduced embeds and preserves those already present in the persisted content (admin-vetted), via new helpers collectHtmlEmbedSources + stripDisallowedHtmlEmbedNodes (identity = attrs.source, already-vetted HTML). The ydoc reflect is now guarded by a deep-equal check so an unrelated non-admin edit that touches no new embed doesn't churn the doc. A non-admin still cannot add a new embed. Documents the allow-list TOCTOU (best-effort snapshot read outside the lock; converges on next store). Co-Authored-By: Claude Opus 4.8 --- .../persistence.extension.html-embed.spec.ts | 67 ++++++- .../extensions/persistence.extension.ts | 85 ++++++--- .../helpers/prosemirror/html-embed.spec.ts | 165 ++++++++++++++++++ .../helpers/prosemirror/html-embed.util.ts | 101 +++++++++++ 4 files changed, 394 insertions(+), 24 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts index a78173a6..55f06ea9 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts @@ -3,6 +3,7 @@ import { TiptapTransformer } from '@hocuspocus/transformer'; import { PersistenceExtension } from './persistence.extension'; import { tiptapExtensions } from '../collaboration.util'; import { + collectHtmlEmbedSources, hasHtmlEmbedNode, HTML_EMBED_NODE_NAME, } from '../../common/helpers/prosemirror/html-embed.util'; @@ -121,7 +122,7 @@ function nodeTypeCounts(json: any): Record { * onStoreDocument to reach the strip + persist branch, and capture the content * that would be written to the page row. */ -function buildExtension(featureEnabled = true) { +function buildExtension(featureEnabled = true, priorContent?: any) { const captured: { content?: any } = {}; const existingPage = { @@ -131,7 +132,10 @@ function buildExtension(featureEnabled = true) { workspaceId: 'ws-1', creatorId: 'creator-1', contributorIds: [], - content: { type: 'doc', content: [] }, // differs from new content -> persist runs + // The currently-persisted content. Defaults to an empty doc (differs from + // new content -> persist runs); a test may pass a prior admin embed here to + // exercise the preserve-admin-embed branch. + content: priorContent ?? { type: 'doc', content: [] }, createdAt: new Date(), lastUpdatedSource: 'user', }; @@ -186,8 +190,9 @@ async function runStore( role: string | null | undefined, doc: Y.Doc, featureEnabled = true, + priorContent?: any, ) { - const { ext, captured } = buildExtension(featureEnabled); + const { ext, captured } = buildExtension(featureEnabled, priorContent); // hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has // none, so stub it (the post-persist broadcast is not under test here). (doc as any).broadcastStateless = () => undefined; @@ -268,6 +273,62 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)' ).toBe(false); }); + it('toggle ON + non-admin store: PRESERVES an admin embed already in the persisted content through an unrelated edit', async () => { + // Prior persisted content already holds an admin-authored embed. + const ADMIN_SOURCE = ''; + const prior = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + // A non-admin makes an UNRELATED edit (tweaks the paragraph) but the embed + // is still present in the merged doc. + const edited = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + + const captured = await runStore('member', buildYdoc(edited), true, prior); + expect(captured.content).toBeDefined(); + // The admin's pre-existing embed survives the non-admin store. + expect(collectHtmlEmbedSources(captured.content)).toEqual( + new Set([ADMIN_SOURCE]), + ); + }); + + it('toggle ON + non-admin store: strips a NEWLY-added embed while keeping the prior admin one', async () => { + const ADMIN_SOURCE = ''; + const prior = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + // Non-admin keeps the admin embed, makes an unrelated paragraph edit (so the + // store is not a no-op and is persisted), and ALSO adds a brand-new embed. + const edited = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }; + + const captured = await runStore('member', buildYdoc(edited), true, prior); + expect(captured.content).toBeDefined(); + // Only the admin-vetted source remains; the newly-introduced one is stripped. + expect(collectHtmlEmbedSources(captured.content)).toEqual( + new Set([ADMIN_SOURCE]), + ); + }); + it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => { const emptyDoc = buildYdoc({ type: 'doc', diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 23c94631..7c2ff963 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -40,9 +40,11 @@ import { } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; import { + collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, isHtmlEmbedFeatureEnabled, + stripDisallowedHtmlEmbedNodes, stripHtmlEmbedNodes, } from '../../common/helpers/prosemirror/html-embed.util'; import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; @@ -130,12 +132,31 @@ export class PersistenceExtension implements Extension { // every htmlEmbed node before it is written to the page row AND before the // ydoc state is re-encoded, so the node cannot be reintroduced by a // non-admin via the collab socket. - // NOTE (residual risk): the gate is keyed to the storing connection's user. - // If an admin already authored an htmlEmbed and a non-admin's later store - // does not touch it, this strip would remove the admin's embed on that - // non-admin store. This is intentionally conservative (fail closed): the - // admin re-adds/keeps the node on their own next edit. A future refinement - // could diff against the previously persisted admin-authored embeds. + // NOTE (defense-in-depth refinement, Gitea #29): the gate is keyed to the + // storing connection's user, but it no longer blindly strips EVERY embed on + // a non-admin store. We distinguish two cases inside the !allowed branch: + // - Feature toggle OFF => strip ALL embeds (the feature is disabled for + // everyone; existing embeds get cleaned up on the next save). + // - Toggle ON but the storer is a NON-admin => strip only NEWLY-introduced + // embeds and PRESERVE embeds already present in the currently-persisted + // page content (admin-authored, already vetted). So a non-admin still + // cannot ADD an embed, but an unrelated edit (e.g. a paragraph tweak) no + // longer destroys an admin's existing embed (the prior data-loss bug). + // The pre-existing-embed identity is the raw `attrs.source` (see + // collectHtmlEmbedSources). A non-admin who copies an existing admin embed's + // exact source elsewhere passes — acceptable, that HTML is already vetted. + // + // ACCEPTED RESIDUAL RISK (toggle-ON allow-list TOCTOU): the allow-list is a + // best-effort snapshot read OUTSIDE the locked transaction (the prior content + // is pre-read above, but inside executeTx the row is re-read withLock without + // recomputing the allow-list). A concurrent admin store that changes the + // persisted embeds between the pre-read and this write can make the preserve + // decision use a slightly stale snapshot — worst case one embed transiently + // kept or dropped; it converges on the next store, with no auth bypass or + // broader data loss. The race is accepted because it only affects concurrent + // authenticated editors on the (rare) toggle-ON non-admin path, converges on + // the next store, and the persisted row plus every share/readonly read path + // remain protected by the strip. // // ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in // the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs @@ -157,22 +178,44 @@ export class PersistenceExtension implements Extension { ); if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) { if (hasHtmlEmbedNode(tiptapJson)) { - this.logger.warn( - `Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`, - ); - tiptapJson = stripHtmlEmbedNodes(tiptapJson); - // Reflect the stripped content back into the shared ydoc so the node is - // removed for all connected clients, not just the persisted row. - const fragment = document.getXmlFragment('default'); - if (fragment.length > 0) { - fragment.delete(0, fragment.length); + let strippedJson: typeof tiptapJson; + if (htmlEmbedEnabled === false) { + // Toggle OFF: feature disabled for everyone -> strip ALL embeds. + strippedJson = stripHtmlEmbedNodes(tiptapJson); + } else { + // Toggle ON, non-admin storer: preserve embeds already present in the + // currently-persisted (admin-vetted) page content; strip only the + // newly-introduced ones. Pre-read the prior content — a small extra + // query only on this rare non-admin + toggle-ON path. + const prior = await this.pageRepo.findById(pageId, { + includeContent: true, + }); + const allowed = collectHtmlEmbedSources(prior?.content); + strippedJson = stripDisallowedHtmlEmbedNodes(tiptapJson, allowed); + } + + // Only mutate the ydoc + log when the strip actually removed something; + // an unnecessary ydoc rewrite would churn the doc for all clients. With + // the toggle-ON branch a non-admin store that only touches admin-vetted + // embeds leaves the content unchanged here. + if (!isDeepStrictEqual(strippedJson, tiptapJson)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`, + ); + tiptapJson = strippedJson; + // Reflect the stripped content back into the shared ydoc so the node + // is removed for all connected clients, not just the persisted row. + const fragment = document.getXmlFragment('default'); + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + const cleanDoc = TiptapTransformer.toYdoc( + tiptapJson, + 'default', + tiptapExtensions, + ); + Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); } - const cleanDoc = TiptapTransformer.toYdoc( - tiptapJson, - 'default', - tiptapExtensions, - ); - Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); } } diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts index b48e9e73..4351673a 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -1,8 +1,10 @@ import { canAuthorHtmlEmbed, + collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, isHtmlEmbedFeatureEnabled, + stripDisallowedHtmlEmbedNodes, stripHtmlEmbedNodes, } from './html-embed.util'; import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util'; @@ -105,6 +107,169 @@ describe('stripHtmlEmbedNodes', () => { }); }); +describe('collectHtmlEmbedSources', () => { + it('collects the source of every htmlEmbed node, including nested ones', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed', attrs: { source: 'top' } }, + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: 'nested' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'x' }] }, + ], + }, + ], + }, + ], + }; + const sources = collectHtmlEmbedSources(doc); + expect(sources).toEqual(new Set(['top', 'nested'])); + }); + + it('returns an empty set for a doc with no embeds', () => { + const doc = { + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }], + }; + expect(collectHtmlEmbedSources(doc).size).toBe(0); + }); + + it('gracefully skips embeds with absent attrs or non-string source', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed' }, // no attrs + { type: 'htmlEmbed', attrs: {} }, // no source + { type: 'htmlEmbed', attrs: { source: 42 } }, // non-string + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }; + expect(collectHtmlEmbedSources(doc)).toEqual(new Set([''])); + }); + + it('returns an empty set for non-object input', () => { + expect(collectHtmlEmbedSources(null).size).toBe(0); + expect(collectHtmlEmbedSources(undefined).size).toBe(0); + expect(collectHtmlEmbedSources('x' as any).size).toBe(0); + }); +}); + +describe('stripDisallowedHtmlEmbedNodes', () => { + it('keeps an embed whose source is allowed and removes the rest', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + expect(collectHtmlEmbedSources(result)).toEqual(new Set([''])); + // The allowed embed and the paragraph survive; the new embed is gone. + expect(result.content).toHaveLength(2); + expect(result.content[0].attrs.source).toBe(''); + expect(result.content[1].type).toBe('paragraph'); + }); + + it('keeps BOTH embeds when two nodes share the same allowed source', () => { + // Source-identity semantics: identity is the raw `attrs.source`, so a + // non-admin who duplicates an existing admin-vetted source keeps both copies. + // This is intended — the raw HTML is already vetted, so a duplicate is safe. + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'mid' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + expect(hasHtmlEmbedNode(result)).toBe(true); + const embeds = result.content.filter( + (n: any) => n.type === 'htmlEmbed', + ); + expect(embeds).toHaveLength(2); + expect(embeds.every((n: any) => n.attrs.source === '')).toBe(true); + }); + + it('removes a newly-introduced embed when nothing is allowed', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: '' } }], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set()); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it('filters nested embeds by the allow-list (e.g. inside columns)', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }, + ], + }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + const col = findFirstChild(result, 'column'); + expect(col.content).toHaveLength(1); + expect(col.content[0].attrs.source).toBe(''); + }); + + it('treats an embed with absent/non-string source as not allowed (stripped)', () => { + const doc = { + type: 'doc', + content: [ + { type: 'htmlEmbed' }, + { type: 'htmlEmbed', attrs: {} }, + ], + }; + const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it('does not mutate the input document', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: '' } }], + }; + stripDisallowedHtmlEmbedNodes(doc, new Set()); + expect(doc.content).toHaveLength(1); + expect(doc.content[0].type).toBe('htmlEmbed'); + }); + + it('neutralizes a root node that is itself a disallowed htmlEmbed', () => { + const root = { type: 'htmlEmbed', attrs: { source: '' } }; + const result = stripDisallowedHtmlEmbedNodes(root, new Set()); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => { + const root = { type: 'htmlEmbed', attrs: { source: '' } }; + const result = stripDisallowedHtmlEmbedNodes(root, new Set([''])); + expect(collectHtmlEmbedSources(result)).toEqual(new Set([''])); + }); + + it('returns non-object input unchanged', () => { + expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull(); + }); +}); + describe('canAuthorHtmlEmbed', () => { it('allows owner and admin', () => { expect(canAuthorHtmlEmbed('owner')).toBe(true); diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts index aa5d579d..146ea9d3 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts @@ -48,6 +48,107 @@ export function stripHtmlEmbedNodes(pmJson: T): T { return { ...node } as unknown as T; } +/** + * Walk the document and collect a stable identity for every `htmlEmbed` node. + * + * The identity is the node's `attrs.source` string — the raw HTML the embed + * renders. Two embeds that render the exact same HTML are treated as the same + * identity. Used by the collab persist path to know which embeds are ALREADY + * present in the currently-persisted (admin-vetted) page content, so a later + * non-admin store can strip only NEWLY-introduced embeds while preserving the + * pre-existing admin-authored ones. + * + * Absent attrs or a non-string/absent `source` are skipped gracefully (such a + * node contributes no identity to the set). + */ +export function collectHtmlEmbedSources(pmJson: unknown): Set { + const sources = new Set(); + + const walk = (node: unknown): void => { + if (!node || typeof node !== 'object') { + return; + } + const n = node as JSONContent; + if (n.type === HTML_EMBED_NODE_NAME) { + const source = (n.attrs as Record | undefined)?.source; + if (typeof source === 'string') { + sources.add(source); + } + } + if (Array.isArray(n.content)) { + for (const child of n.content) { + walk(child); + } + } + }; + + walk(pmJson); + return sources; +} + +/** + * Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose + * `attrs.source` is in `allowedSources`; remove the rest. + * + * Used on the collab persist path when the feature toggle is ON but the storing + * user is a NON-admin: `allowedSources` is the set of embed sources already + * present in the currently-persisted page content (admin-authored, already + * vetted). A non-admin therefore cannot ADD a new embed, but their unrelated + * edit also cannot destroy an admin's existing one. + * + * NOTE: identity is the raw source string, so a non-admin who COPIES an existing + * admin embed's exact source into a NEW location passes this check. That is + * acceptable — the source is already admin-vetted content present in the doc; no + * new untrusted HTML is introduced. + * + * Returns a NEW document; the input is not mutated. Same defensive root-type + * check pattern as {@link stripHtmlEmbedNodes}. + */ +export function stripDisallowedHtmlEmbedNodes( + pmJson: T, + allowedSources: Set, +): T { + if (!pmJson || typeof pmJson !== 'object') { + return pmJson; + } + + const node = pmJson as unknown as JSONContent; + + // Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is + // itself an htmlEmbed and its source is NOT allowed, the children-filtering + // below could never drop it, so neutralize it here. Unreachable in normal use + // (the PM document root is always a `doc`). + if (node.type === HTML_EMBED_NODE_NAME) { + const source = (node.attrs as Record | undefined)?.source; + if (typeof source === 'string' && allowedSources.has(source)) { + return { ...node } as unknown as T; + } + return { type: 'doc', content: [] } as unknown as T; + } + + if (Array.isArray(node.content)) { + const filtered: JSONContent[] = []; + for (const child of node.content) { + // Drop a disallowed htmlEmbed child (newly introduced); keep an allowed + // one (already present in the persisted, admin-vetted content). + if (child && child.type === HTML_EMBED_NODE_NAME) { + const source = (child.attrs as Record | undefined) + ?.source; + if (typeof source === 'string' && allowedSources.has(source)) { + filtered.push({ ...child }); + } + continue; + } + // Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are + // also filtered by the same allow-list. + filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources)); + } + return { ...node, content: filtered } as unknown as T; + } + + return { ...node } as unknown as T; +} + /** * Returns true if the document contains at least one `htmlEmbed` node anywhere * in its tree. Useful to decide whether a strip pass actually changed anything From 424761753e395459b7a119b057c0be74e1be0360 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sat, 20 Jun 2026 23:20:02 +0300 Subject: [PATCH 16/16] fix(html-embed): shrink the collab broadcast window with an early onChange guard (#26) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A non-admin's transient htmlEmbed could execute in other open editors until the debounced (10s) onStoreDocument strip. Add a ~300ms onChange-debounced early strip (guardHtmlEmbed) that converges the shared ydoc for everyone far sooner. Safety-critical details: - Scheduled from onChange ONLY for non-admins AND only when the workspace toggle is ON (cached per-document in onLoadDocument), so the common toggle-OFF case does zero extra work. - guardHtmlEmbed does ALL async work (toggle + persisted allow-list read) FIRST, then performs fromYdoc -> strip -> fragment.delete -> applyUpdate in a single SYNCHRONOUS, await-free block, so no inbound Yjs update can interleave and a concurrent edit can never be clobbered. Bails if document.isDestroyed. - Reuses the #29 preserve logic (admin-vetted embeds survive; only the non-admin's new ones are stripped). Loop-safe (corrective update has null origin -> no reschedule; post-strip no embed -> cheap no-op). Per-document timer cleared on unload. onStoreDocument stays the authoritative backstop. The irreducible residual is only the very first inbound broadcast before the debounce fires — Hocuspocus exposes no synchronous beforeBroadcast filter. Co-Authored-By: Claude Opus 4.8 --- .../persistence.extension.html-embed.spec.ts | 115 +++++++++ .../extensions/persistence.extension.ts | 220 +++++++++++++++++- 2 files changed, 323 insertions(+), 12 deletions(-) diff --git a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts index 55f06ea9..7941a1e6 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts @@ -339,3 +339,118 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)' await expect(runStore('member', emptyDoc)).resolves.toBeDefined(); }); }); + +// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges +// the shared ydoc sub-second, before the 10s store debounce. We call it directly +// (it is the debounced timer body) and assert the ydoc fragment no longer yields +// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds +// already in the persisted content survive. +describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => { + async function runGuard( + role: string | null | undefined, + doc: Y.Doc, + featureEnabled = true, + priorContent?: any, + ) { + const { ext } = buildExtension(featureEnabled, priorContent); + await (ext as any).guardHtmlEmbed( + 'page-1', + doc, + { user: { id: 'u1', role, workspaceId: 'ws-1' } }, + ); + } + + it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => { + // Prior persisted content has NO embed; the live doc has one a non-admin + // just added. + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + true, + ); + + await runGuard('member', doc, true, { type: 'doc', content: [] }); + + // The shared ydoc fragment no longer yields any htmlEmbed. + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + false, + ); + }); + + it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => { + const ADMIN_SOURCE = ''; + const prior = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + ], + }; + // Live doc keeps the admin embed AND adds a brand-new one. + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'intro' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }); + + await runGuard('member', doc, true, prior); + + // Only the admin-vetted source survives in the shared ydoc. + expect( + collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')), + ).toEqual(new Set([ADMIN_SOURCE])); + }); + + it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => { + // Even an embed that matches the prior content is stripped when the toggle + // is OFF, because the OFF path passes allowed=null (strip everything) and + // never reads the prior content for an allow-list. + const SOURCE = ''; + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, + { type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }, + ], + }); + await runGuard('member', doc, false, { + type: 'doc', + content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }], + }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + false, + ); + }); + + it('admin role: guard is a defensive no-op (embed preserved)', async () => { + const doc = buildYdoc({ + type: 'doc', + content: [ + { type: HTML_EMBED_NODE_NAME, attrs: { source: '' } }, + ], + }); + await runGuard('admin', doc, true, { type: 'doc', content: [] }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + true, + ); + }); + + it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => { + const doc = buildYdoc({ + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }], + }); + await runGuard('member', doc, true, { type: 'doc', content: [] }); + expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe( + false, + ); + }); +}); diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index 7c2ff963..b5dd49ed 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -40,6 +40,7 @@ import { } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; import { + canAuthorHtmlEmbed, collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, @@ -58,6 +59,21 @@ export class PersistenceExtension implements Extension { // coalescing window" per document and OR it across all edits in the window, // so the snapshot is marked 'agent' regardless of who wrote last. private agentTouched: Map = new Map(); + // Per-document debounce timers for the early htmlEmbed guard (Gitea #26). + // onChange schedules a short (~300ms) debounced strip that converges the + // shared ydoc for all connected clients well before the 10s store debounce, + // shrinking the pre-persist broadcast window of a non-admin's transient embed. + private htmlEmbedGuardTimers: Map = new Map(); + // Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated + // in onLoadDocument (which already loads the page + has workspace context) and + // read in onChange to gate early-guard scheduling: when the toggle is OFF (the + // common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and + // rely on the onStoreDocument strip as the backstop (when OFF the embed does + // not execute in editable mode anyway). Cleared in afterUnloadDocument. + // STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF + // until the document is reloaded, so the early guard won't schedule — accepted, + // the onStoreDocument backstop still strips on persist. + private htmlEmbedToggleByDoc: Map = new Map(); constructor( private readonly pageRepo: PageRepo, @@ -89,6 +105,23 @@ export class PersistenceExtension implements Extension { return; } + // Cache the workspace htmlEmbed toggle for this document (Gitea #26). We + // already have the page (hence its workspaceId) here, so resolve the toggle + // once and cache it keyed by documentName. onChange reads this to decide + // whether to schedule the early guard at all — when OFF we skip the guard + // entirely (no timer, no fromYdoc, no DB read). Cleared in + // afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note. + try { + const enabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(page.workspaceId))?.settings, + ); + this.htmlEmbedToggleByDoc.set(documentName, enabled); + } catch (err) { + // Fail OFF: if the toggle can't be resolved, never schedule the early + // guard; the onStoreDocument backstop still strips on persist. + this.htmlEmbedToggleByDoc.set(documentName, false); + } + if (page.ydoc) { this.logger.debug(`ydoc loaded from db: ${pageId}`); @@ -158,18 +191,25 @@ export class PersistenceExtension implements Extension { // the next store, and the persisted row plus every share/readonly read path // remain protected by the strip. // - // ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in - // the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs - // update to connected clients immediately, so a non-admin's transient - // htmlEmbed can execute in OTHER open editors' browsers in the brief window - // before this persist strips it. The exposure is limited to concurrent - // AUTHENTICATED space members who have the doc open with Edit rights - // (semi-trusted) — anonymous public-share/readonly viewers do NOT open a - // collab socket (ReadonlyPageEditor renders fetched, already-stripped - // content; HocuspocusProvider is only used by the authenticated editable - // page-editor), and the PERSISTED page row plus every share/readonly read - // path are protected by this strip. The window is therefore accepted rather - // than mitigated with an inbound beforeBroadcast strip. + // RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26): + // this strip runs in the debounced onStoreDocument (up to 10s), but + // hocuspocus broadcasts each inbound Yjs update to connected clients + // immediately, so a non-admin's transient htmlEmbed can execute in OTHER open + // editors' browsers in the window before this persist strips it. The exposure + // is limited to concurrent AUTHENTICATED space members who have the doc open + // with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do + // NOT open a collab socket (ReadonlyPageEditor renders fetched, + // already-stripped content; HocuspocusProvider is only used by the + // authenticated editable page-editor), and the PERSISTED page row plus every + // share/readonly read path are protected by this strip. + // The window is now SHRUNK to sub-second by an onChange-debounced early guard + // (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as + // this block and re-encodes the cleaned ydoc, converging the doc for all + // clients long before this 10s store debounce fires. This onStoreDocument + // strip remains the authoritative backstop for persistence. The irreducible + // residual is only the VERY FIRST inbound broadcast before the ~300ms debounce + // fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the + // node before that first relay, so it cannot be eliminated entirely. // Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature // toggle is ON and the storing user is an admin/owner. OFF (default) => // stripped for everyone (existing embeds get cleaned up on next save). @@ -389,12 +429,168 @@ export class PersistenceExtension implements Extension { if (data.context?.actor === 'agent') { this.agentTouched.set(documentName, true); } + + // Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced + // guard ONLY when (a) this document's workspace toggle is cached ON and + // (b) the changing connection's user is a NON-admin (cannot author + // htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer, + // no fromYdoc, no DB read — killing the OFF-case overhead (the common + // default); the onStoreDocument strip is the backstop and an OFF embed does + // not execute in editable mode anyway. We do NO expensive work here — we only + // (re)schedule the timer; the debounce coalesces rapid edits into a single + // guard check. + if ( + userId && + this.htmlEmbedToggleByDoc.get(documentName) === true && + !canAuthorHtmlEmbed(data.context?.user?.role) + ) { + const existing = this.htmlEmbedGuardTimers.get(documentName); + if (existing) { + clearTimeout(existing); + } + const timer = setTimeout(() => { + this.htmlEmbedGuardTimers.delete(documentName); + void this.guardHtmlEmbed(documentName, data.document, data.context); + }, 300); + this.htmlEmbedGuardTimers.set(documentName, timer); + } + } + + /** + * Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the + * onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of + * waiting for the 10s store debounce, so a non-admin's transient embed is + * removed from the shared ydoc — and re-broadcast as cleaned state — for all + * connected clients in sub-second time. onStoreDocument remains the + * authoritative persistence backstop; this is an ADDITIONAL early pass. + * + * CONCURRENCY (the critical invariant): the Y.Doc mutation is a single + * SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the + * applyUpdate write. ALL async work (the workspace toggle lookup and the + * persisted-content read for the allow-list) happens FIRST, before that block. + * Because JS is single-threaded, a synchronous block cannot interleave with + * inbound Yjs update handlers, so a concurrent edit that lands while we await + * cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then + * delete + rebuild + applyUpdate without yielding. (An earlier version awaited + * DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap + * was lost — this restructure fixes that.) + * + * The allow-list is a best-effort snapshot read outside any lock (TOCTOU + * accepted, same as onStoreDocument): worst case one embed is transiently kept + * or dropped; it converges on the next guard/store, with no auth bypass. + * + * Loop-safety: the corrective applyUpdate has a null origin, so the re-fired + * onChange carries no userId and is not rescheduled; and after a strip no + * htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the + * hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer + * would crash the process — so the whole body is wrapped in try/catch. + */ + private async guardHtmlEmbed( + documentName: string, + document: Y.Doc, + context: any, + ): Promise { + // Defensive: ensure no stale timer entry survives for this document. + this.htmlEmbedGuardTimers.delete(documentName); + try { + // Re-check defensively: onChange only schedules for non-admins, but if an + // admin/owner somehow reaches here, the embed is authored content — do + // nothing (onStoreDocument's toggle-AND-admin gate handles persistence). + if (canAuthorHtmlEmbed(context?.user?.role)) { + return; + } + + // ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ---- + // Resolve the workspace toggle exactly as onStoreDocument does. When OFF we + // strip everything; when ON we use the preserve logic (keep admin-vetted + // embeds, strip only the non-admin's newly-introduced ones). + const enabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(context?.user?.workspaceId)) + ?.settings, + ); + + // The allow-list (admin-vetted sources already in the persisted content). + // null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block, + // so no await sits between the doc snapshot and the doc write. + let allowed: Set | null = null; + if (enabled !== false) { + const prior = await this.pageRepo.findById(getPageId(documentName), { + includeContent: true, + }); + allowed = collectHtmlEmbedSources(prior?.content); + } + + // The awaits above may have let the document be unloaded/destroyed. If so, + // bail — mutating a destroyed doc is pointless and could throw (the + // try/catch is the ultimate safety net regardless). + if ((document as { isDestroyed?: boolean }).isDestroyed) { + return; + } + + // ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ---- + // Because there is no await between fromYdoc and applyUpdate, no inbound + // Yjs update can interleave, so a concurrent edit cannot be lost. + const json = TiptapTransformer.fromYdoc(document, 'default'); + + // Cheap exit: nothing to guard if the doc has no embed at all. This is also + // why a post-strip re-fire is a no-op (loop-safe). + if (!hasHtmlEmbedNode(json)) { + return; + } + + const strippedJson = + allowed === null + ? stripHtmlEmbedNodes(json) + : stripDisallowedHtmlEmbedNodes(json, allowed); + + // Nothing was stripped (e.g. the only embed is an admin-vetted one) — do + // not churn the shared ydoc for all clients. + if (isDeepStrictEqual(strippedJson, json)) { + return; + } + + // Reflect the stripped content back into the shared ydoc EXACTLY as + // onStoreDocument does, so the node is removed for all connected clients, + // not just on the eventual persist. This re-encode broadcasts the cleaned + // state; after it hasHtmlEmbedNode is false, so any later guard fire is a + // cheap no-op (loop-safe). + const fragment = document.getXmlFragment('default'); + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + const cleanDoc = TiptapTransformer.toYdoc( + strippedJson, + 'default', + tiptapExtensions, + ); + Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); + + this.logger.warn( + `Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`, + ); + } catch (err) { + // NEVER rethrow out of a timer-scheduled call. + this.logger.error( + `Early htmlEmbed guard failed on ${documentName}`, + err, + ); + } } async afterUnloadDocument(data: afterUnloadDocumentPayload) { const documentName = data.documentName; this.contributors.delete(documentName); this.agentTouched.delete(documentName); + // Drop the cached toggle for this document so a reload re-resolves it (and + // picks up a mid-session admin toggle flip). + this.htmlEmbedToggleByDoc.delete(documentName); + // Clear any pending early-guard timer so it cannot fire after the document + // is unloaded (leak / use-after-unload prevention). + const timer = this.htmlEmbedGuardTimers.get(documentName); + if (timer) { + clearTimeout(timer); + this.htmlEmbedGuardTimers.delete(documentName); + } } private consumeContributors(documentName: string): string[] {