diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css b/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css new file mode 100644 index 00000000..75304685 --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css @@ -0,0 +1,43 @@ +.htmlEmbedNodeView { + position: relative; +} + +/* The container the raw source is injected into. */ +.htmlEmbedContent { + width: 100%; +} + +/* Edit affordance overlay, only shown while editing the document. */ +.htmlEmbedToolbar { + position: absolute; + top: 4px; + right: 4px; + z-index: 2; + opacity: 0; + transition: opacity 0.15s ease; +} + +.htmlEmbedNodeView:hover .htmlEmbedToolbar { + opacity: 1; +} + +/* Placeholder card shown when the source is empty (edit mode only). */ +.htmlEmbedPlaceholder { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + padding: 16px; + border: 1px dashed var(--mantine-color-gray-4); + border-radius: 8px; + color: var(--mantine-color-dimmed); + + @mixin dark { + border-color: var(--mantine-color-dark-3); + } +} + +.htmlEmbedSelected { + outline: 2px solid var(--mantine-color-blue-5); + border-radius: 8px; +} diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx b/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx new file mode 100644 index 00000000..a46b383a --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx @@ -0,0 +1,185 @@ +import { NodeViewProps, NodeViewWrapper } from "@tiptap/react"; +import React, { useCallback, useEffect, useRef, useState } from "react"; +import clsx from "clsx"; +import { + ActionIcon, + Button, + Group, + Modal, + Text, + Textarea, +} from "@mantine/core"; +import { IconCode, IconEdit } from "@tabler/icons-react"; +import { useTranslation } from "react-i18next"; +import { useAtomValue } from "jotai"; +import useUserRole from "@/hooks/use-user-role.tsx"; +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; +import classes from "./html-embed-view.module.css"; + +/** + * Inject raw HTML (including ")} + styles={{ input: { fontFamily: "monospace" } }} + data-autofocus + /> + + + + + + + ); +} diff --git a/apps/client/src/features/editor/components/slash-menu/menu-items.ts b/apps/client/src/features/editor/components/slash-menu/menu-items.ts index 7f856755..bb52fbca 100644 --- a/apps/client/src/features/editor/components/slash-menu/menu-items.ts +++ b/apps/client/src/features/editor/components/slash-menu/menu-items.ts @@ -587,6 +587,22 @@ const CommandGroups: SlashMenuGroupedItemsType = { .insertColumns({ layout: "five_equal" }) .run(), }, + { + title: "HTML embed", + description: "Embed raw HTML, CSS and JavaScript (admins only).", + searchTerms: ["html", "css", "js", "javascript", "script", "tracker", "analytics", "raw", "embed"], + icon: IconCode, + adminOnly: true, + requiresHtmlEmbedFeature: true, + command: ({ editor, range }: CommandProps) => { + editor + .chain() + .focus() + .deleteRange(range) + .setHtmlEmbed({ source: "" }) + .run(); + }, + }, { title: "Iframe embed", description: "Embed any Iframe", @@ -744,6 +760,43 @@ const CommandGroups: SlashMenuGroupedItemsType = { ], }; +/** + * Read whether the current user is a workspace admin/owner from the persisted + * `currentUser` (the same payload `currentUserAtom` stores via localStorage). + * Used to hide admin-only slash items (e.g. raw HTML embed). This is a UI gate + * only; the server independently strips admin-only nodes from non-admin writes. + */ +function isCurrentUserAdmin(): boolean { + try { + const raw = localStorage.getItem("currentUser"); + if (!raw) return false; + const parsed = JSON.parse(raw); + const role = parsed?.user?.role; + return role === "owner" || role === "admin"; + } catch { + return false; + } +} + +/** + * Read the workspace-level HTML embed feature toggle from the persisted + * `currentUser` payload (the same localStorage entry `currentUserAtom` writes, + * carrying `workspace.settings`). ABSENT/false => OFF (the default). The slash + * `getSuggestionItems` is a plain function (no React/atom context), so we read + * the persisted state the same way `isCurrentUserAdmin()` does. UI gate only; + * the server independently strips htmlEmbed from every non-allowed write. + */ +function isHtmlEmbedFeatureEnabled(): boolean { + try { + const raw = localStorage.getItem("currentUser"); + if (!raw) return false; + const parsed = JSON.parse(raw); + return parsed?.workspace?.settings?.htmlEmbed === true; + } catch { + return false; + } +} + export const getSuggestionItems = ({ query, excludeItems, @@ -753,6 +806,8 @@ export const getSuggestionItems = ({ }): SlashMenuGroupedItemsType => { const search = query.toLowerCase(); const filteredGroups: SlashMenuGroupedItemsType = {}; + const isAdmin = isCurrentUserAdmin(); + const htmlEmbedFeatureEnabled = isHtmlEmbedFeatureEnabled(); const fuzzyMatch = (query: string, target: string) => { let queryIndex = 0; @@ -767,6 +822,11 @@ export const getSuggestionItems = ({ for (const [group, items] of Object.entries(CommandGroups)) { const filteredItems = items.filter((item) => { if (excludeItems?.has(item.title)) return false; + // Hide admin-only items (raw HTML embed) from non-admins. + if (item.adminOnly && !isAdmin) return false; + // Hide HTML-embed-gated items unless the workspace feature toggle is ON. + if (item.requiresHtmlEmbedFeature && !htmlEmbedFeatureEnabled) + return false; return ( fuzzyMatch(search, item.title) || item.description.toLowerCase().includes(search) || diff --git a/apps/client/src/features/editor/components/slash-menu/types.ts b/apps/client/src/features/editor/components/slash-menu/types.ts index cf5bd3e4..c650b605 100644 --- a/apps/client/src/features/editor/components/slash-menu/types.ts +++ b/apps/client/src/features/editor/components/slash-menu/types.ts @@ -21,6 +21,14 @@ export type SlashMenuItemType = { searchTerms: string[]; command: (props: CommandProps) => void; disable?: (editor: ReturnType) => boolean; + // When true, the item is only offered to workspace admins/owners. This is a + // UI convenience only — the real authoring gate is enforced server-side. + adminOnly?: boolean; + // When true, the item is hidden unless the workspace HTML embed feature toggle + // is ON. Combined with adminOnly, the item shows only for admins in workspaces + // where the feature is enabled. UI gate only — the server strips htmlEmbed on + // every write where the toggle is OFF or the user is not an admin. + requiresHtmlEmbedFeature?: boolean; }; export type SlashMenuGroupedItemsType = { diff --git a/apps/client/src/features/editor/extensions/extensions.ts b/apps/client/src/features/editor/extensions/extensions.ts index 87c7b9e5..edac4b26 100644 --- a/apps/client/src/features/editor/extensions/extensions.ts +++ b/apps/client/src/features/editor/extensions/extensions.ts @@ -41,6 +41,7 @@ import { Drawio, Excalidraw, Embed, + HtmlEmbed, TiptapPdf, PageBreak, SearchAndReplace, @@ -87,6 +88,7 @@ import CodeBlockView from "@/features/editor/components/code-block/code-block-vi import DrawioView from "../components/drawio/drawio-view"; import ExcalidrawView from "@/features/editor/components/excalidraw/excalidraw-view-lazy.tsx"; import EmbedView from "@/features/editor/components/embed/embed-view.tsx"; +import HtmlEmbedView from "@/features/editor/components/html-embed/html-embed-view.tsx"; import PdfView from "@/features/editor/components/pdf/pdf-view.tsx"; import SubpagesView from "@/features/editor/components/subpages/subpages-view.tsx"; import TransclusionView from "@/features/editor/components/transclusion/transclusion-view.tsx"; @@ -365,6 +367,13 @@ export const mainExtensions = [ Embed.configure({ view: EmbedView, }), + // Raw HTML/CSS/JS node (Variant C). The node is registered for ALL users so + // documents authored by admins render correctly for everyone; INSERTION is + // gated to admins in the slash menu, and the server strips the node from any + // non-admin write so a non-admin cannot persist it. + HtmlEmbed.configure({ + view: HtmlEmbedView, + }), TiptapPdf.configure({ view: PdfView, }), diff --git a/apps/client/src/features/workspace/components/settings/components/html-embed-settings.tsx b/apps/client/src/features/workspace/components/settings/components/html-embed-settings.tsx new file mode 100644 index 00000000..27a21a8e --- /dev/null +++ b/apps/client/src/features/workspace/components/settings/components/html-embed-settings.tsx @@ -0,0 +1,99 @@ +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; +import { useAtom } from "jotai"; +import { useState } from "react"; +import { updateWorkspace } from "@/features/workspace/services/workspace-service.ts"; +import { Switch, Stack, Paper, Group, Text, List } from "@mantine/core"; +import { notifications } from "@mantine/notifications"; +import useUserRole from "@/hooks/use-user-role.tsx"; +import { useTranslation } from "react-i18next"; + +/** + * Admin toggle for the workspace HTML embed feature. + * + * SECURITY: when ON, workspace admins/owners can embed raw HTML/CSS/JS that + * EXECUTES in the wiki page origin for every reader (a deliberate stored-XSS + * surface, e.g. for analytics trackers). OFF by default. The server strips + * htmlEmbed nodes on every write where the toggle is OFF or the saver is not an + * admin, so this switch fully enables/disables the feature workspace-wide. + */ +export default function HtmlEmbedSettings() { + const { t } = useTranslation(); + const [workspace, setWorkspace] = useAtom(workspaceAtom); + const { isAdmin } = useUserRole(); + + const [checked, setChecked] = useState( + workspace?.settings?.htmlEmbed ?? false, + ); + const [isLoading, setIsLoading] = useState(false); + + async function handleToggle(value: boolean) { + setIsLoading(true); + const previous = checked; + setChecked(value); // optimistic update + try { + const updated = await updateWorkspace({ htmlEmbed: value }); + // Force settings.htmlEmbed to the new value so the atom is consistent even + // if the response shape omits it. + setWorkspace({ + ...updated, + settings: { + ...updated.settings, + htmlEmbed: value, + }, + }); + notifications.show({ message: t("Updated successfully") }); + } catch (err) { + console.log(err); + setChecked(previous); // revert on failure + notifications.show({ + message: t("Failed to update data"), + color: "red", + }); + } finally { + setIsLoading(false); + } + } + + return ( + + + + {t("HTML embed")} + + + {t("advanced")} + + + + + handleToggle(event.currentTarget.checked)} + /> + + + + {t( + "Only workspace admins/owners can insert HTML embeds. Members never can: the editor option is hidden for them and the server strips the embed on save at every write path.", + )} + + + {t( + "If a non-admin edits and saves a page that contains an admin's embed, that save strips the embed (fail-closed). An admin must re-add it.", + )} + + + {t( + "Turning this off strips existing embeds on their next save and immediately disables execution (existing embeds render as a disabled placeholder).", + )} + + + + + ); +} diff --git a/apps/client/src/features/workspace/types/workspace.types.ts b/apps/client/src/features/workspace/types/workspace.types.ts index 53d1d5ce..4447821c 100644 --- a/apps/client/src/features/workspace/types/workspace.types.ts +++ b/apps/client/src/features/workspace/types/workspace.types.ts @@ -30,6 +30,9 @@ export interface IWorkspace { restrictApiToAdmins?: boolean; allowMemberTemplates?: boolean; isScimEnabled?: boolean; + // Write-only field for updateWorkspace({ htmlEmbed }). Read state lives at + // settings.htmlEmbed. + htmlEmbed?: boolean; } export interface IWorkspaceSettings { @@ -37,6 +40,8 @@ export interface IWorkspaceSettings { sharing?: IWorkspaceSharingSettings; api?: IWorkspaceApiSettings; templates?: IWorkspaceTemplateSettings; + // Admin-only HTML embed feature toggle. ABSENT/false => OFF (default). + htmlEmbed?: boolean; } export interface IWorkspaceApiSettings { diff --git a/apps/client/src/pages/settings/workspace/workspace-settings.tsx b/apps/client/src/pages/settings/workspace/workspace-settings.tsx index bb759a9b..484ad860 100644 --- a/apps/client/src/pages/settings/workspace/workspace-settings.tsx +++ b/apps/client/src/pages/settings/workspace/workspace-settings.tsx @@ -1,6 +1,7 @@ import SettingsTitle from "@/components/settings/settings-title.tsx"; import WorkspaceNameForm from "@/features/workspace/components/settings/components/workspace-name-form"; import WorkspaceIcon from "@/features/workspace/components/settings/components/workspace-icon.tsx"; +import HtmlEmbedSettings from "@/features/workspace/components/settings/components/html-embed-settings.tsx"; import { useTranslation } from "react-i18next"; import { getAppName } from "@/lib/config.ts"; import { Helmet } from "react-helmet-async"; @@ -15,6 +16,7 @@ export default function WorkspaceSettings() { + ); } diff --git a/apps/server/src/collaboration/collaboration.handler.html-embed.spec.ts b/apps/server/src/collaboration/collaboration.handler.html-embed.spec.ts new file mode 100644 index 00000000..23b11ba9 --- /dev/null +++ b/apps/server/src/collaboration/collaboration.handler.html-embed.spec.ts @@ -0,0 +1,120 @@ +import * as Y from 'yjs'; +import { TiptapTransformer } from '@hocuspocus/transformer'; +import { CollaborationHandler } from './collaboration.handler'; +import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the +// REST/MCP/AI content-update entrypoint, used by the page update endpoint and +// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed +// BEFORE handing the json to withYdocConnection. We stub only +// withYdocConnection (which would otherwise open a real hocuspocus connection): +// the role-extraction (`user?.role`) + strip that run upstream of it are REAL +// production code. The 'replace' branch then runs the production +// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we +// decode back to JSON and assert on. This replaces the re-implemented +// `applyAdminGate` stand-in for this entrypoint. + +const docWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, + { + type: 'columns', + content: [ + { + type: 'column', + attrs: { position: 'left' }, + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'inner' }] }, + ], + }, + { + type: 'column', + attrs: { position: 'right' }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'r' }] }, + ], + }, + ], + }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +/** + * Run the REAL updatePageContent('replace') with a stubbed withYdocConnection. + * The stub provides a real Y.Doc + recording fragment; the production fn calls + * TiptapTransformer.toYdoc() and applies it to the doc, so decoding + * the doc afterward yields exactly the gated content. + */ +async function gatedContentFor( + role: string | null | undefined, + featureEnabled = true, +) { + // Workspace settings read used by the toggle-AND-admin gate. + const workspaceRepo = { + findById: jest.fn(async () => ({ + id: 'ws-1', + settings: { htmlEmbed: featureEnabled }, + })), + }; + const handler = new CollaborationHandler(workspaceRepo as any); + const captureDoc = new Y.Doc(); + + jest + .spyOn(handler, 'withYdocConnection') + .mockImplementation(async (_hp, _name, _ctx, fn: any) => { + const fragment = captureDoc.getXmlFragment('default'); + // Mirror the real Document surface the fn touches. + const docLike: any = { + getXmlFragment: () => fragment, + }; + // The fn does: fragment.delete(0,len) then + // Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls + // Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target. + fn(captureDoc); + }); + + const handlers = handler.getHandlers({} as any); + await handlers.updatePageContent('page-1', { + prosemirrorJson: docWithEmbed(), + operation: 'replace', + user: { id: 'u1', role, workspaceId: 'ws-1' } as any, + }); + + return TiptapTransformer.fromYdoc(captureDoc, 'default'); +} + +describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => { + it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => { + const gated = await gatedContentFor('member'); + expect(hasHtmlEmbedNode(gated)).toBe(false); + // Non-embed siblings survive. + const json = JSON.stringify(gated); + expect(json).toContain('keep'); + expect(json).toContain('inner'); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false); + } + }); + + it('toggle ON + admin: htmlEmbed preserved', async () => { + expect(hasHtmlEmbedNode(await gatedContentFor('admin', true))).toBe(true); + }); + + it('toggle ON + owner: htmlEmbed preserved', async () => { + expect(hasHtmlEmbedNode(await gatedContentFor('owner', true))).toBe(true); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + expect(hasHtmlEmbedNode(await gatedContentFor('admin', false))).toBe(false); + }); + + it('toggle OFF + member: stripped', async () => { + expect(hasHtmlEmbedNode(await gatedContentFor('member', false))).toBe(false); + }); +}); diff --git a/apps/server/src/collaboration/collaboration.handler.ts b/apps/server/src/collaboration/collaboration.handler.ts index 992f9b74..debfde60 100644 --- a/apps/server/src/collaboration/collaboration.handler.ts +++ b/apps/server/src/collaboration/collaboration.handler.ts @@ -8,6 +8,13 @@ import { import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util'; import * as Y from 'yjs'; import { User } from '@docmost/db/types/entity.types'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../common/helpers/prosemirror/html-embed.util'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; export type CollabEventHandlers = ReturnType< CollaborationHandler['getHandlers'] @@ -17,7 +24,7 @@ export type CollabEventHandlers = ReturnType< export class CollaborationHandler { private readonly logger = new Logger(CollaborationHandler.name); - constructor() {} + constructor(private readonly workspaceRepo: WorkspaceRepo) {} getHandlers(hocuspocus: Hocuspocus) { return { @@ -83,8 +90,31 @@ export class CollaborationHandler { user: User; }, ) => { - const { prosemirrorJson, operation, user } = payload; + const { operation, user } = payload; + let { prosemirrorJson } = payload; this.logger.debug('Updating page content via yjs', documentName); + + // SECURITY (Variant C admin gate, REST/MCP/AI write path): + // updatePageContent is the server-side entrypoint used by the REST page + // update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute + // arbitrary JS in every reader's browser, so a NON-admin caller must not + // be able to persist them here. If the editing user is not a workspace + // admin/owner, strip every htmlEmbed node before it reaches the ydoc. + // Toggle-AND-admin gate: htmlEmbed survives only when the workspace + // feature toggle is ON and the editing user is an admin/owner. OFF + // (default) => stripped for everyone. + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(user?.workspaceId))?.settings, + ); + if (!htmlEmbedAllowed(htmlEmbedEnabled, user?.role)) { + if (hasHtmlEmbedNode(prosemirrorJson)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from update by user ${user?.id} on ${documentName}`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + } + await this.withYdocConnection( hocuspocus, documentName, diff --git a/apps/server/src/collaboration/collaboration.util.ts b/apps/server/src/collaboration/collaboration.util.ts index 554aa43b..d99636db 100644 --- a/apps/server/src/collaboration/collaboration.util.ts +++ b/apps/server/src/collaboration/collaboration.util.ts @@ -32,6 +32,7 @@ import { Drawio, Excalidraw, Embed, + HtmlEmbed, Mention, Subpages, Highlight, @@ -102,6 +103,10 @@ export const tiptapExtensions = [ Drawio, Excalidraw, Embed, + // Registered server-side so the node survives schema parsing/serialization. + // Authoring is gated to admins at the document WRITE paths (see + // stripHtmlEmbedNodes usage in persistence/page services), NOT here. + HtmlEmbed, Mention, Subpages, Columns, diff --git a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts new file mode 100644 index 00000000..a78173a6 --- /dev/null +++ b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts @@ -0,0 +1,280 @@ +import * as Y from 'yjs'; +import { TiptapTransformer } from '@hocuspocus/transformer'; +import { PersistenceExtension } from './persistence.extension'; +import { tiptapExtensions } from '../collaboration.util'; +import { + hasHtmlEmbedNode, + HTML_EMBED_NODE_NAME, +} from '../../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab +// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks. +// This replaces the prior re-implemented `applyAdminGate` stand-in for this +// entrypoint: if the role-extraction expression (`context?.user?.role`), the +// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail. + +const RICH_DOC = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [{ type: 'text', text: 'intro paragraph' }], + }, + { + type: 'columns', + content: [ + { + type: 'column', + attrs: { position: 'left' }, + content: [ + { + type: 'paragraph', + content: [ + { type: 'text', text: 'left col, mentioning ' }, + { + type: 'mention', + attrs: { + id: 'mention-1', + label: 'Alice', + entityType: 'user', + entityId: 'user-123', + creatorId: 'creator-1', + }, + }, + ], + }, + // Nested embed inside a column — must be stripped recursively. + { + type: HTML_EMBED_NODE_NAME, + attrs: { source: '' }, + }, + ], + }, + { + type: 'column', + attrs: { position: 'right' }, + content: [ + { + type: 'table', + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableHeader', + attrs: { colspan: 1, rowspan: 1 }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'H' }] }, + ], + }, + ], + }, + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + attrs: { colspan: 1, rowspan: 1 }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'cell' }] }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + // Top-level embed — must be stripped. + { + type: HTML_EMBED_NODE_NAME, + attrs: { source: '' }, + }, + { + type: 'paragraph', + content: [{ type: 'text', text: 'outro paragraph' }], + }, + ], +}; + +function buildYdoc(json: any): Y.Doc { + return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions); +} + +// Count nodes by type across the whole tree (excludes htmlEmbed by listing it +// separately) so we can assert every OTHER node type survived the strip. +function nodeTypeCounts(json: any): Record { + const counts: Record = {}; + const walk = (n: any) => { + if (!n || typeof n !== 'object') return; + if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1; + if (Array.isArray(n.content)) n.content.forEach(walk); + }; + walk(json); + return counts; +} + +/** + * Construct a real PersistenceExtension with the minimum mocks needed for + * onStoreDocument to reach the strip + persist branch, and capture the content + * that would be written to the page row. + */ +function buildExtension(featureEnabled = true) { + const captured: { content?: any } = {}; + + const existingPage = { + id: 'page-1', + slugId: 'slug-1', + spaceId: 'space-1', + workspaceId: 'ws-1', + creatorId: 'creator-1', + contributorIds: [], + content: { type: 'doc', content: [] }, // differs from new content -> persist runs + createdAt: new Date(), + lastUpdatedSource: 'user', + }; + + const pageRepo = { + findById: jest.fn(async () => ({ ...existingPage })), + updatePage: jest.fn(async (values: any) => { + captured.content = values.content; + }), + }; + const pageHistoryRepo = { + findPageLastHistory: jest.fn(async () => null), + saveHistory: jest.fn(async () => undefined), + }; + // db.transaction().execute(cb) just runs the callback (no real DB). + const db = { + transaction: () => ({ + execute: (cb: any) => cb({} as any), + }), + }; + const noopQueue = { add: jest.fn(async () => undefined) } as any; + const collabHistory = { addContributors: jest.fn(async () => undefined) } as any; + const transclusionService = { + syncPageTransclusions: jest.fn(async () => undefined), + syncPageReferences: jest.fn(async () => undefined), + } as any; + + // Workspace settings read used by the toggle-AND-admin gate. + const workspaceRepo = { + findById: jest.fn(async () => ({ + id: 'ws-1', + settings: { htmlEmbed: featureEnabled }, + })), + }; + + const ext = new PersistenceExtension( + pageRepo as any, + pageHistoryRepo as any, + db as any, + noopQueue, + noopQueue, + noopQueue, + collabHistory, + transclusionService, + workspaceRepo as any, + ); + + return { ext, captured, pageRepo }; +} + +async function runStore( + role: string | null | undefined, + doc: Y.Doc, + featureEnabled = true, +) { + const { ext, captured } = buildExtension(featureEnabled); + // hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has + // none, so stub it (the post-persist broadcast is not under test here). + (doc as any).broadcastStateless = () => undefined; + await ext.onStoreDocument({ + documentName: 'page-1', + document: doc, + context: { user: { id: 'u1', role } }, + } as any); + return captured; +} + +describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => { + it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => { + const doc = buildYdoc(RICH_DOC); + const before = TiptapTransformer.fromYdoc(doc, 'default'); + expect(hasHtmlEmbedNode(before)).toBe(true); + const beforeCounts = nodeTypeCounts(before); + + const captured = await runStore('member', doc); + + expect(captured.content).toBeDefined(); + // htmlEmbed gone from the persisted content. + expect(hasHtmlEmbedNode(captured.content)).toBe(false); + + // Every non-embed node type is preserved with the SAME count (guards against + // data loss if a node were missing from tiptapExtensions and dropped on the + // toYdoc rebuild). + const afterCounts = nodeTypeCounts(captured.content); + for (const [type, count] of Object.entries(beforeCounts)) { + if (type === HTML_EMBED_NODE_NAME) continue; + expect(afterCounts[type]).toBe(count); + } + // The two embeds are gone. + expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2); + expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined(); + + // The shared ydoc fragment was also rewritten clean (re-decode it). + const reDecoded = TiptapTransformer.fromYdoc(doc, 'default'); + expect(hasHtmlEmbedNode(reDecoded)).toBe(false); + }); + + it('toggle ON + admin store: htmlEmbed preserved in persisted content', async () => { + const captured = await runStore('admin', buildYdoc(RICH_DOC), true); + expect(captured.content).toBeDefined(); + expect(hasHtmlEmbedNode(captured.content)).toBe(true); + expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2); + }); + + it('toggle ON + owner store: htmlEmbed preserved', async () => { + const captured = await runStore('owner', buildYdoc(RICH_DOC), true); + expect(hasHtmlEmbedNode(captured.content)).toBe(true); + }); + + it('toggle OFF + admin store: stripped (feature disabled for everyone)', async () => { + const captured = await runStore('admin', buildYdoc(RICH_DOC), false); + expect(hasHtmlEmbedNode(captured.content)).toBe(false); + }); + + it('toggle OFF + owner store: stripped', async () => { + const captured = await runStore('owner', buildYdoc(RICH_DOC), false); + expect(hasHtmlEmbedNode(captured.content)).toBe(false); + }); + + it('toggle OFF + member store: stripped', async () => { + const captured = await runStore('member', buildYdoc(RICH_DOC), false); + expect(hasHtmlEmbedNode(captured.content)).toBe(false); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + expect( + hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content), + ).toBe(false); + expect( + hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content), + ).toBe(false); + expect( + hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content), + ).toBe(false); + }); + + it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => { + const emptyDoc = buildYdoc({ + type: 'doc', + content: [{ type: 'paragraph' }], + }); + // Non-admin path with an empty/embed-free fragment must be a no-op strip, + // not throw. + await expect(runStore('member', emptyDoc)).resolves.toBeDefined(); + }); +}); diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index af4137d6..91331dec 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -39,6 +39,13 @@ import { HISTORY_INTERVAL, } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../common/helpers/prosemirror/html-embed.util'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; @Injectable() export class PersistenceExtension implements Extension { @@ -59,6 +66,7 @@ export class PersistenceExtension implements Extension { @InjectQueue(QueueName.NOTIFICATION_QUEUE) private notificationQueue: Queue, private readonly collabHistory: CollabHistoryService, private readonly transclusionService: TransclusionService, + private readonly workspaceRepo: WorkspaceRepo, ) {} async onLoadDocument(data: onLoadDocumentPayload) { @@ -112,7 +120,62 @@ export class PersistenceExtension implements Extension { const pageId = getPageId(documentName); - const tiptapJson = TiptapTransformer.fromYdoc(document, 'default'); + let tiptapJson = TiptapTransformer.fromYdoc(document, 'default'); + + // SECURITY (Variant C admin gate, collab WebSocket write path): + // The persisted snapshot is the merged ydoc, which may contain an htmlEmbed + // node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized + // JS in every reader's browser, so only workspace admins/owners may author + // it. When the user whose store triggers this persist is not an admin, strip + // every htmlEmbed node before it is written to the page row AND before the + // ydoc state is re-encoded, so the node cannot be reintroduced by a + // non-admin via the collab socket. + // NOTE (residual risk): the gate is keyed to the storing connection's user. + // If an admin already authored an htmlEmbed and a non-admin's later store + // does not touch it, this strip would remove the admin's embed on that + // non-admin store. This is intentionally conservative (fail closed): the + // admin re-adds/keeps the node on their own next edit. A future refinement + // could diff against the previously persisted admin-authored embeds. + // + // ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in + // the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs + // update to connected clients immediately, so a non-admin's transient + // htmlEmbed can execute in OTHER open editors' browsers in the brief window + // before this persist strips it. The exposure is limited to concurrent + // AUTHENTICATED space members who have the doc open with Edit rights + // (semi-trusted) — anonymous public-share/readonly viewers do NOT open a + // collab socket (ReadonlyPageEditor renders fetched, already-stripped + // content; HocuspocusProvider is only used by the authenticated editable + // page-editor), and the PERSISTED page row plus every share/readonly read + // path are protected by this strip. The window is therefore accepted rather + // than mitigated with an inbound beforeBroadcast strip. + // Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature + // toggle is ON and the storing user is an admin/owner. OFF (default) => + // stripped for everyone (existing embeds get cleaned up on next save). + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(context?.user?.workspaceId))?.settings, + ); + if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) { + if (hasHtmlEmbedNode(tiptapJson)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`, + ); + tiptapJson = stripHtmlEmbedNodes(tiptapJson); + // Reflect the stripped content back into the shared ydoc so the node is + // removed for all connected clients, not just the persisted row. + const fragment = document.getXmlFragment('default'); + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + const cleanDoc = TiptapTransformer.toYdoc( + tiptapJson, + 'default', + tiptapExtensions, + ); + Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); + } + } + const ydocState = Buffer.from(Y.encodeStateAsUpdate(document)); let textContent = null; diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts new file mode 100644 index 00000000..6b07ec0b --- /dev/null +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -0,0 +1,214 @@ +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from './html-embed.util'; +import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util'; +import { + decodeHtmlEmbedSource, + encodeHtmlEmbedSource, +} from '@docmost/editor-ext'; + +const findFirstChild = (json: any, type: string): any | undefined => { + if (!json || typeof json !== 'object') return undefined; + if (json.type === type) return json; + if (Array.isArray(json.content)) { + for (const child of json.content) { + const found = findFirstChild(child, type); + if (found) return found; + } + } + return undefined; +}; + +describe('stripHtmlEmbedNodes', () => { + it('removes a top-level htmlEmbed node', () => { + const doc = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'before' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'after' }] }, + ], + }; + + const result = stripHtmlEmbedNodes(doc); + expect(hasHtmlEmbedNode(result)).toBe(false); + // Other nodes are preserved. + expect(result.content).toHaveLength(2); + expect(result.content[0].content[0].text).toBe('before'); + expect(result.content[1].content[0].text).toBe('after'); + }); + + it('removes nested htmlEmbed nodes (e.g. inside columns)', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: 'x' } }, + { + type: 'paragraph', + content: [{ type: 'text', text: 'keep' }], + }, + ], + }, + ], + }, + ], + }; + + const result = stripHtmlEmbedNodes(doc); + expect(hasHtmlEmbedNode(result)).toBe(false); + const col = findFirstChild(result, 'column'); + expect(col.content).toHaveLength(1); + expect(col.content[0].type).toBe('paragraph'); + }); + + it('does not mutate the input document', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: 'x' } }], + }; + stripHtmlEmbedNodes(doc); + expect(doc.content).toHaveLength(1); + expect(doc.content[0].type).toBe('htmlEmbed'); + }); + + it('leaves documents without htmlEmbed untouched', () => { + const doc = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, + ], + }; + expect(hasHtmlEmbedNode(doc)).toBe(false); + const result = stripHtmlEmbedNodes(doc); + expect(result).toEqual(doc); + }); +}); + +describe('canAuthorHtmlEmbed', () => { + it('allows owner and admin', () => { + expect(canAuthorHtmlEmbed('owner')).toBe(true); + expect(canAuthorHtmlEmbed('admin')).toBe(true); + }); + it('denies member and unknown/empty roles', () => { + expect(canAuthorHtmlEmbed('member')).toBe(false); + expect(canAuthorHtmlEmbed(null)).toBe(false); + expect(canAuthorHtmlEmbed(undefined)).toBe(false); + expect(canAuthorHtmlEmbed('viewer')).toBe(false); + }); +}); + +describe('isHtmlEmbedFeatureEnabled', () => { + it('is true only when settings.htmlEmbed === true', () => { + expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true); + }); + it('defaults to false (absent / false / non-object)', () => { + expect(isHtmlEmbedFeatureEnabled({})).toBe(false); + expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: false })).toBe(false); + expect(isHtmlEmbedFeatureEnabled(null)).toBe(false); + expect(isHtmlEmbedFeatureEnabled(undefined)).toBe(false); + // Truthy-but-not-true values must NOT enable the feature. + expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: 'true' as any })).toBe(false); + }); +}); + +describe('htmlEmbedAllowed (toggle AND admin)', () => { + it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => { + expect(htmlEmbedAllowed(false, 'admin')).toBe(false); + expect(htmlEmbedAllowed(false, 'owner')).toBe(false); + }); + it('toggle OFF + member => not allowed', () => { + expect(htmlEmbedAllowed(false, 'member')).toBe(false); + }); + it('toggle ON + admin/owner => allowed', () => { + expect(htmlEmbedAllowed(true, 'admin')).toBe(true); + expect(htmlEmbedAllowed(true, 'owner')).toBe(true); + }); + it('toggle ON + member/unknown => not allowed', () => { + expect(htmlEmbedAllowed(true, 'member')).toBe(false); + expect(htmlEmbedAllowed(true, null)).toBe(false); + expect(htmlEmbedAllowed(true, undefined)).toBe(false); + expect(htmlEmbedAllowed(true, 'viewer')).toBe(false); + }); +}); + +// NOTE: a previous revision of this file re-implemented the write-path admin +// gate as a local `applyAdminGate` stand-in and asserted against THAT. A +// deleted/misplaced real guard would have kept those green. The stand-in is +// removed. The collab store, REST/MCP update, and transclusion-unsync paths are +// now tested against their REAL code in: +// - collaboration/extensions/persistence.extension.html-embed.spec.ts +// - collaboration/collaboration.handler.html-embed.spec.ts +// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts +// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup) +// - integrations/import/services/import-html-embed-identity.spec.ts (import) +// +// The case below stays here because it asserts a REAL parse path +// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a +// re-implemented gate. +describe('htmlEmbed smuggled via the markdown/html form (real parse + real helpers)', () => { + it('the parsed node is detected and stripped by the real helpers', () => { + // The markdown/html create formats decode to the same htmlEmbed node, so the + // gate (run on the parsed JSON) covers them identically. + const source = ''; + const encoded = encodeHtmlEmbedSource(source); + const html = `
`; + const parsed = htmlToJson(html); + expect(hasHtmlEmbedNode(parsed)).toBe(true); + + // A non-admin role gates to strip via the real helpers. + expect(canAuthorHtmlEmbed('member')).toBe(false); + const stripped = stripHtmlEmbedNodes(parsed); + expect(hasHtmlEmbedNode(stripped)).toBe(false); + }); +}); + +describe('htmlEmbed source base64 codec', () => { + it('round-trips arbitrary source including UTF-8', () => { + const source = ''; + const encoded = encodeHtmlEmbedSource(source); + expect(encoded).not.toContain('<'); + expect(decodeHtmlEmbedSource(encoded)).toBe(source); + }); +}); + +describe('htmlEmbed node HTML <-> JSON round-trip', () => { + it('preserves the raw source through HTML -> JSON', () => { + const source = ''; + const encoded = encodeHtmlEmbedSource(source); + const html = `
`; + + const json = htmlToJson(html); + const node = findFirstChild(json, 'htmlEmbed'); + expect(node).toBeDefined(); + expect(node.attrs.source).toBe(source); + }); + + it('round-trips JSON -> HTML -> JSON keeping the source', () => { + const source = '
raw & markup
'; + const json = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source } }], + }; + + const html = jsonToHtml(json); + // The static HTML carries the encoded source but does NOT inline the raw + // markup (it must not be an injection vector by itself). + expect(html).toContain('data-type="htmlEmbed"'); + expect(html).not.toContain('onclick'); + + const back = htmlToJson(html); + const node = findFirstChild(back, 'htmlEmbed'); + expect(node).toBeDefined(); + expect(node.attrs.source).toBe(source); + }); +}); diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts new file mode 100644 index 00000000..f1d0b6e5 --- /dev/null +++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts @@ -0,0 +1,102 @@ +import { JSONContent } from '@tiptap/core'; + +export const HTML_EMBED_NODE_NAME = 'htmlEmbed'; + +/** + * Recursively remove every `htmlEmbed` node from a ProseMirror JSON document. + * + * SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin + * (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to + * author it. This helper is the server-side enforcement primitive: every WRITE + * path that may persist content from a NON-admin caller must run the incoming + * document through this function so a non-admin cannot smuggle the node in via + * the collab socket, the REST/MCP/AI content-update path, paste, or import. + * + * Returns a NEW document; the input is not mutated. If the input is not a valid + * doc object it is returned unchanged (callers persist what they were given). + */ +export function stripHtmlEmbedNodes(pmJson: T): T { + if (!pmJson || typeof pmJson !== 'object') { + return pmJson; + } + + const node = pmJson as unknown as JSONContent; + + if (Array.isArray(node.content)) { + const filtered: JSONContent[] = []; + for (const child of node.content) { + // Drop any htmlEmbed child outright. + if (child && child.type === HTML_EMBED_NODE_NAME) { + continue; + } + // Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are + // also removed. + filtered.push(stripHtmlEmbedNodes(child)); + } + return { ...node, content: filtered } as unknown as T; + } + + return { ...node } as unknown as T; +} + +/** + * Returns true if the document contains at least one `htmlEmbed` node anywhere + * in its tree. Useful to decide whether a strip pass actually changed anything + * (e.g. for logging a rejected non-admin embed attempt). + */ +export function hasHtmlEmbedNode(pmJson: unknown): boolean { + if (!pmJson || typeof pmJson !== 'object') { + return false; + } + const node = pmJson as JSONContent; + if (node.type === HTML_EMBED_NODE_NAME) { + return true; + } + if (Array.isArray(node.content)) { + return node.content.some((child) => hasHtmlEmbedNode(child)); + } + return false; +} + +/** + * Map the workspace user role to whether it may author `htmlEmbed` nodes. + * Owners and admins are trusted; everyone else (member, and any unknown role) + * is not. Kept here so every write path shares one definition of "trusted". + */ +export function canAuthorHtmlEmbed(role: string | null | undefined): boolean { + return role === 'owner' || role === 'admin'; +} + +/** + * Combined write-path gate for the htmlEmbed feature. + * + * htmlEmbed is allowed in a document only when the workspace feature toggle is + * ON and the authoring/saving user is a workspace admin/owner. OFF (default) => + * stripped for EVERYONE, including admins (the feature is disabled). + * + * `featureEnabled` is read from the workspace settings for the relevant write + * (`workspace.settings?.htmlEmbed === true`). Every WRITE path that may persist + * htmlEmbed content must gate on this combined predicate, so that turning the + * toggle OFF strips existing embeds on the next save and prevents new ones from + * being persisted regardless of role. + */ +export function htmlEmbedAllowed( + featureEnabled: boolean, + role: string | null | undefined, +): boolean { + return featureEnabled === true && canAuthorHtmlEmbed(role); +} + +/** + * Read the workspace-level htmlEmbed feature toggle from a workspace's settings + * jsonb. ABSENT/non-true => OFF (the default). Kept here so every server write + * path resolves the toggle the same way. + */ +export function isHtmlEmbedFeatureEnabled( + settings: unknown | null | undefined, +): boolean { + if (!settings || typeof settings !== 'object') { + return false; + } + return (settings as Record).htmlEmbed === true; +} diff --git a/apps/server/src/core/page/page.controller.ts b/apps/server/src/core/page/page.controller.ts index fd5c866e..968480c9 100644 --- a/apps/server/src/core/page/page.controller.ts +++ b/apps/server/src/core/page/page.controller.ts @@ -237,6 +237,9 @@ export class PageController { user.id, workspace.id, createPageDto, + // Pass the caller's workspace role so create() can enforce the htmlEmbed + // admin gate (non-admins cannot author raw-JS embeds). + user.role, provenance, ); diff --git a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts new file mode 100644 index 00000000..f23d565a --- /dev/null +++ b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts @@ -0,0 +1,102 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; + +// PageService.create() and duplicatePage() guards. +// +// page.service.ts cannot be unit-LOADED under the server's jest config (a +// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns), +// so we cover the two load-bearing properties at the strongest feasible layer: +// +// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate +// each path applies: non-admin/unknown role -> strip, admin/owner -> keep. +// +// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole` +// param threaded from the request; duplicate: `authUser.role`), so a +// refactor that drops the guard or reads the wrong role trips the test. +// This is what replaces the removed `applyAdminGate` stand-in for these +// two entrypoints. + +const docWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +// The real predicate both paths apply (see SECURITY blocks in page.service.ts): +// toggle AND admin. +function applyGate( + json: any, + featureEnabled: boolean, + role: string | null | undefined, +) { + if (!htmlEmbedAllowed(featureEnabled, role) && hasHtmlEmbedNode(json)) { + return stripHtmlEmbedNodes(json); + } + return json; +} + +describe('page create/duplicate gate decision (real helpers)', () => { + it('toggle ON + non-admin (member) strips', () => { + const result = applyGate(docWithEmbed(), true, 'member'); + expect(hasHtmlEmbedNode(result)).toBe(false); + expect(result.content).toHaveLength(1); + expect(result.content[0].content[0].text).toBe('body'); + }); + + it('toggle ON + unknown/empty role fails closed (strips)', () => { + for (const role of [null, undefined, 'viewer'] as const) { + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, role))).toBe( + false, + ); + } + }); + + it('toggle ON + admin/owner keep', () => { + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'admin'))).toBe( + true, + ); + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'owner'))).toBe( + true, + ); + }); + + it('toggle OFF strips for everyone (admin/owner/member)', () => { + for (const role of ['admin', 'owner', 'member'] as const) { + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), false, role))).toBe( + false, + ); + } + }); +}); + +const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8'); + +describe('page create/duplicate gate identity is pinned (source contract)', () => { + it('create() gates on toggle AND the caller role param before deriving content/ydoc', () => { + // create() receives the caller's workspace role as `callerRole` and gates on + // the combined toggle-AND-admin predicate; the embed must be stripped BEFORE + // insertPage. + expect(SRC).toMatch( + /!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, + ); + expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)'); + }); + + it('duplicatePage() gates on toggle AND the duplicating user role (authUser.role)', () => { + expect(SRC).toMatch( + /!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, + ); + }); + + it('both paths resolve the toggle from the workspace settings', () => { + expect(SRC).toContain('isHtmlEmbedFeatureEnabled('); + expect(SRC).toContain('this.workspaceRepo.findById('); + }); +}); diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index 27b394e7..0dc2276f 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -30,6 +30,13 @@ import { isAttachmentNode, removeMarkTypeFromDoc, } from '../../../common/helpers/prosemirror/utils'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; import { htmlToJson, jsonToNode, @@ -74,6 +81,7 @@ export class PageService { private collaborationGateway: CollaborationGateway, private readonly watcherService: WatcherService, private readonly transclusionService: TransclusionService, + private readonly workspaceRepo: WorkspaceRepo, ) {} async findById( @@ -93,6 +101,10 @@ export class PageService { userId: string, workspaceId: string, createPageDto: CreatePageDto, + // Workspace role of the caller. Used to enforce the htmlEmbed admin gate on + // the create write path (see below). Optional/typed loosely so unknown or + // missing roles fall through to the non-admin (strip) branch by default. + callerRole?: string | null, // Optional agent-edit provenance (from the signed access claim). When the // actor is 'agent', stamp the page's source marker so a freshly created page // shows it was created by the AI agent (§14 N2) — create goes through REST, @@ -123,11 +135,36 @@ export class PageService { let ydoc = undefined; if (createPageDto?.content && createPageDto?.format) { - const prosemirrorJson = await this.parseProsemirrorContent( + let prosemirrorJson = await this.parseProsemirrorContent( createPageDto.content, createPageDto.format, ); + // SECURITY (Variant C admin gate, plain page-create write path): + // create() builds content/textContent/ydoc directly and persists them via + // insertPage, bypassing the collab onStoreDocument strip. htmlEmbed renders + // raw, unsanitized JS in readers' browsers, so only workspace admins/owners + // may author it. The create controller requires only space Edit, so a + // regular member could otherwise POST a doc (json, or the markdown/html + // forms that parse to the same node) containing an + // htmlEmbed and store XSS for every reader. Strip every htmlEmbed node when + // the caller is not an admin, BEFORE deriving textContent/ydoc/insert. + // The gate is toggle-AND-admin: htmlEmbed survives only when the workspace + // feature toggle is ON and the caller is an admin/owner. OFF (default) => + // stripped for everyone. Cheap settings read keyed to the workspace. + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(workspaceId))?.settings, + ); + if ( + !htmlEmbedAllowed(htmlEmbedEnabled, callerRole) && + hasHtmlEmbedNode(prosemirrorJson) + ) { + this.logger.warn( + `Stripping htmlEmbed node(s) from page creation by user ${userId} (space ${createPageDto.spaceId})`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + content = prosemirrorJson; textContent = jsonToText(prosemirrorJson); ydoc = createYdocFromJson(prosemirrorJson); @@ -589,6 +626,12 @@ export class PageService { const attachmentMap = new Map(); + // Resolve the htmlEmbed toggle ONCE for the workspace; the per-page gate + // below is toggle-AND-admin (OFF default => stripped for everyone). + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(rootPage.workspaceId))?.settings, + ); + const insertablePages: InsertablePage[] = await Promise.all( pages.map(async (page) => { const pageContent = getProsemirrorContent(page.content); @@ -688,7 +731,25 @@ export class PageService { } }); - const prosemirrorJson = prosemirrorDoc.toJSON(); + let prosemirrorJson = prosemirrorDoc.toJSON(); + + // SECURITY (Variant C admin gate, duplication write path): + // Duplication builds the ydoc directly and bypasses the collab + // onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in + // readers' browsers, so only workspace admins/owners may author it. A + // non-admin with space Edit could otherwise duplicate an admin page + // that contains an embed into a new page authored by them. Strip every + // htmlEmbed node from each duplicated page when the duplicating user is + // not an admin, BEFORE computing textContent/ydoc/insert. + if ( + !htmlEmbedAllowed(htmlEmbedEnabled, authUser.role) && + hasHtmlEmbedNode(prosemirrorJson) + ) { + this.logger.warn( + `Stripping htmlEmbed node(s) from page duplication by user ${authUser.id} (source page ${page.id})`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } // Add "Copy of " prefix to the root page title only for duplicates in same space let title = page.title; diff --git a/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts new file mode 100644 index 00000000..8ad13121 --- /dev/null +++ b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts @@ -0,0 +1,144 @@ +import { TransclusionService } from '../transclusion.service'; +import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate. +// unsync returns a source snapshot the client materializes into the reference +// page; a non-admin must never receive an embed payload to re-persist. The gate +// reads `user.role` and strips before returning. All repos / access checks are +// mocked so the REAL gate logic runs end-to-end. Complements the existing +// transclusion specs (rewriteAttachmentsForUnsync, controller). + +const WS = 'ws-1'; +const REF_PAGE = 'ref-1'; +const SRC_PAGE = 'src-1'; +const TX_ID = 'tx-1'; + +const sourceContentWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +function buildService(featureEnabled = true) { + const pageRepo = { + findById: jest.fn(async (id: string) => ({ + id, + workspaceId: WS, + spaceId: 'space-1', + deletedAt: null, + })), + }; + const pageTransclusionsRepo = { + findByPageAndTransclusion: jest.fn(async () => ({ + content: sourceContentWithEmbed(), + })), + }; + const pageTransclusionReferencesRepo = { + deleteOne: jest.fn(async () => undefined), + }; + const attachmentRepo = { findByIds: jest.fn(async () => []) }; + const storageService = { copy: jest.fn(async () => undefined) }; + const pageAccessService = { + validateCanEdit: jest.fn(async () => undefined), + validateCanView: jest.fn(async () => undefined), + }; + // Workspace settings read used by the toggle-AND-admin gate. + const workspaceRepo = { + findById: jest.fn(async () => ({ + id: WS, + settings: { htmlEmbed: featureEnabled }, + })), + }; + + const service = new TransclusionService( + {} as any, // db (unused on this path) + pageTransclusionsRepo as any, + pageTransclusionReferencesRepo as any, + pageRepo as any, + {} as any, // pagePermissionRepo (unused) + {} as any, // spaceMemberRepo (unused) + attachmentRepo as any, + storageService as any, + pageAccessService as any, + workspaceRepo as any, + ); + return service; +} + +function userWithRole(role: string | null | undefined) { + return { id: 'u1', workspaceId: WS, role } as any; +} + +describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => { + it('non-admin (member): returned content has htmlEmbed stripped', async () => { + const service = buildService(); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('member'), + ); + expect(hasHtmlEmbedNode(content)).toBe(false); + // Non-embed content is preserved. + expect(JSON.stringify(content)).toContain('snapshot body'); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + const service = buildService(); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole(role), + ); + expect(hasHtmlEmbedNode(content)).toBe(false); + } + }); + + it('toggle ON + admin: returned content keeps the htmlEmbed', async () => { + const service = buildService(true); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('admin'), + ); + expect(hasHtmlEmbedNode(content)).toBe(true); + }); + + it('toggle ON + owner: returned content keeps the htmlEmbed', async () => { + const service = buildService(true); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('owner'), + ); + expect(hasHtmlEmbedNode(content)).toBe(true); + }); + + it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => { + const service = buildService(false); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('admin'), + ); + expect(hasHtmlEmbedNode(content)).toBe(false); + }); + + it('toggle OFF + member: stripped', async () => { + const service = buildService(false); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('member'), + ); + expect(hasHtmlEmbedNode(content)).toBe(false); + }); +}); diff --git a/apps/server/src/core/page/transclusion/transclusion.service.ts b/apps/server/src/core/page/transclusion/transclusion.service.ts index e208707c..2fd92402 100644 --- a/apps/server/src/core/page/transclusion/transclusion.service.ts +++ b/apps/server/src/core/page/transclusion/transclusion.service.ts @@ -23,6 +23,13 @@ import { rewriteAttachmentsForUnsync } from './utils/transclusion-unsync.util'; import { TransclusionLookup } from './transclusion.types'; import { Page, User } from '@docmost/db/types/entity.types'; import { PageAccessService } from '../page-access/page-access.service'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; type ReferencingPageInfo = { id: string; @@ -47,6 +54,7 @@ export class TransclusionService { private readonly attachmentRepo: AttachmentRepo, private readonly storageService: StorageService, private readonly pageAccessService: PageAccessService, + private readonly workspaceRepo: WorkspaceRepo, ) {} async syncPageTransclusions( @@ -461,10 +469,12 @@ export class TransclusionService { throw new NotFoundException('Sync block not found'); } - const { content, copies } = rewriteAttachmentsForUnsync( + let content: unknown; + let copies: ReturnType['copies']; + ({ content, copies } = rewriteAttachmentsForUnsync( transclusion.content, () => uuid7(), - ); + )); if (copies.length > 0) { const oldIds = copies.map((c) => c.oldAttachmentId); @@ -513,6 +523,24 @@ export class TransclusionService { transclusionId, ); + // SECURITY (Variant C admin gate, transclusion unsync write path): + // The returned content is a source snapshot that the client materializes + // into the reference page via insertContentAt. The snapshot keeps any + // htmlEmbed verbatim, and unsync requires only space Edit/View. If the + // requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a + // non-admin can never receive an embed payload to re-persist (the collab + // strip on the subsequent save is debounced/race-prone and must not be the + // only guard). Admin behavior is unchanged. + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(user.workspaceId))?.settings, + ); + if (!htmlEmbedAllowed(htmlEmbedEnabled, user.role) && hasHtmlEmbedNode(content)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`, + ); + content = stripHtmlEmbedNodes(content); + } + return { content }; } } diff --git a/apps/server/src/core/share/share-html-embed.spec.ts b/apps/server/src/core/share/share-html-embed.spec.ts new file mode 100644 index 00000000..1bfeff1c --- /dev/null +++ b/apps/server/src/core/share/share-html-embed.spec.ts @@ -0,0 +1,133 @@ +import { ShareService } from './share.service'; +import { hasHtmlEmbedNode } from '../../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL ShareService server-authoritative htmlEmbed kill-switch for +// shared content. An anonymous public-share viewer cannot read the per-workspace +// htmlEmbed toggle, so the SERVER must decide what to serve: when the toggle is +// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are kept so +// the read-only client executes them. All repos / token service are mocked so the +// real prepareContentForShare logic runs end-to-end via getSharedPage. + +const WS = 'ws-1'; +const PAGE = 'page-1'; + +const pageContentWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'shared body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +function buildService(opts: { + // undefined => workspaceRepo.findById returns undefined (fail-closed case) + htmlEmbed?: boolean | undefined; + workspaceMissing?: boolean; +}) { + const shareRepo = { findById: jest.fn() }; + + const pageRepo = { + findById: jest.fn(async () => ({ + id: PAGE, + workspaceId: WS, + spaceId: 'space-1', + deletedAt: null, + content: pageContentWithEmbed(), + })), + }; + + const pagePermissionRepo = { + hasRestrictedAncestor: jest.fn(async () => false), + }; + + const tokenService = { + generateAttachmentToken: jest.fn(async () => 'tok'), + }; + + const workspaceRepo = { + findById: jest.fn(async () => + opts.workspaceMissing + ? undefined + : { id: WS, settings: { htmlEmbed: opts.htmlEmbed } }, + ), + }; + + const service = new ShareService( + shareRepo as any, + pageRepo as any, + pagePermissionRepo as any, + {} as any, // db (unused on this path) + tokenService as any, + {} as any, // transclusionService (unused) + workspaceRepo as any, + ); + + // getSharedPage resolves the share via getShareForPage (a raw db query). + // Stub it so we exercise prepareContentForShare deterministically. + jest + .spyOn(service, 'getShareForPage') + .mockResolvedValue({ pageId: PAGE, key: 'k', id: 's1' } as any); + + return { service, workspaceRepo }; +} + +describe('ShareService htmlEmbed server-authoritative kill-switch (real code)', () => { + it('toggle ON: shared content keeps the htmlEmbed (served to anonymous viewer)', async () => { + const { service } = buildService({ htmlEmbed: true }); + const { page } = await service.getSharedPage( + { pageId: PAGE } as any, + WS, + ); + expect(hasHtmlEmbedNode(page.content)).toBe(true); + expect(JSON.stringify(page.content)).toContain('shared body'); + }); + + it('toggle OFF: htmlEmbed stripped from shared content', async () => { + const { service } = buildService({ htmlEmbed: false }); + const { page } = await service.getSharedPage( + { pageId: PAGE } as any, + WS, + ); + expect(hasHtmlEmbedNode(page.content)).toBe(false); + // Non-embed content is preserved. + expect(JSON.stringify(page.content)).toContain('shared body'); + }); + + it('toggle ABSENT: defaults OFF and strips', async () => { + const { service } = buildService({ htmlEmbed: undefined }); + const { page } = await service.getSharedPage( + { pageId: PAGE } as any, + WS, + ); + expect(hasHtmlEmbedNode(page.content)).toBe(false); + }); + + it('workspace missing: fails closed (stripped)', async () => { + const { service } = buildService({ workspaceMissing: true }); + const { page } = await service.getSharedPage( + { pageId: PAGE } as any, + WS, + ); + expect(hasHtmlEmbedNode(page.content)).toBe(false); + }); + + it('updatePublicAttachments strips htmlEmbed when toggle OFF', async () => { + const { service } = buildService({ htmlEmbed: false }); + const out = await service.updatePublicAttachments({ + id: PAGE, + workspaceId: WS, + content: pageContentWithEmbed(), + } as any); + expect(hasHtmlEmbedNode(out)).toBe(false); + }); + + it('updatePublicAttachments keeps htmlEmbed when toggle ON', async () => { + const { service } = buildService({ htmlEmbed: true }); + const out = await service.updatePublicAttachments({ + id: PAGE, + workspaceId: WS, + content: pageContentWithEmbed(), + } as any); + expect(hasHtmlEmbedNode(out)).toBe(true); + }); +}); diff --git a/apps/server/src/core/share/share.service.ts b/apps/server/src/core/share/share.service.ts index 03ee3155..846cc96a 100644 --- a/apps/server/src/core/share/share.service.ts +++ b/apps/server/src/core/share/share.service.ts @@ -26,6 +26,11 @@ import { validate as isValidUUID } from 'uuid'; import { sql } from 'kysely'; import { TransclusionService } from '../page/transclusion/transclusion.service'; import { TransclusionLookup } from '../page/transclusion/transclusion.types'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; +import { + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../common/helpers/prosemirror/html-embed.util'; @Injectable() export class ShareService { @@ -38,8 +43,22 @@ export class ShareService { @InjectKysely() private readonly db: KyselyDB, private readonly tokenService: TokenService, private readonly transclusionService: TransclusionService, + private readonly workspaceRepo: WorkspaceRepo, ) {} + /** + * Resolve whether the htmlEmbed feature toggle is ON for a workspace. + * Fail-closed: a missing workspace (or absent/non-true setting) => OFF, so + * share content gets the embed stripped when we can't positively confirm the + * feature is enabled. + */ + private async isHtmlEmbedEnabledForWorkspace( + workspaceId: string, + ): Promise { + const workspace = await this.workspaceRepo.findById(workspaceId); + return isHtmlEmbedFeatureEnabled(workspace?.settings); + } + async getShareTree(shareId: string, workspaceId: string) { const share = await this.shareRepo.findById(shareId); if (!share || share.workspaceId !== workspaceId) { @@ -360,6 +379,11 @@ export class ShareService { workspaceId, ); + // Resolve the workspace htmlEmbed toggle once for this share request; all + // transcluded items belong to the same workspace as the host share. + const htmlEmbedEnabled = + await this.isHtmlEmbedEnabledForWorkspace(workspaceId); + // Sanitize each item's content for public delivery // generate per-attachment tokens scoped to the source page // and strip comment marks. @@ -370,6 +394,7 @@ export class ShareService { item.content, item.sourcePageId, workspaceId, + htmlEmbedEnabled, ); return { ...item, content: doc?.toJSON() ?? item.content }; }), @@ -417,10 +442,14 @@ export class ShareService { } async updatePublicAttachments(page: Page): Promise { + const htmlEmbedEnabled = await this.isHtmlEmbedEnabledForWorkspace( + page.workspaceId, + ); const doc = await this.prepareContentForShare( page.content, page.id, page.workspaceId, + htmlEmbedEnabled, ); return doc?.toJSON() ?? page.content; } @@ -441,6 +470,13 @@ export class ShareService { * not leak structure (existence, location, count, resolved state, or * comment ids) to public viewers. * + * 3. Strip `htmlEmbed` nodes when the workspace feature toggle is OFF. This + * makes the toggle a SERVER-AUTHORITATIVE kill-switch for shared content: + * when OFF the embed is never served to the anonymous viewer (who can't + * read the per-workspace toggle), when ON the embed is served so the + * read-only client executes it. `htmlEmbedEnabled` is resolved fail-closed + * by the callers (missing workspace => OFF => strip). + * * Both share-content paths — the host page (`updatePublicAttachments`) and * the share-scoped transclusion lookup (`lookupTransclusionForShare`) — * call into this single helper so the two paths can never drift on @@ -450,8 +486,16 @@ export class ShareService { content: unknown, attachmentOwnerPageId: string, workspaceId: string, + htmlEmbedEnabled: boolean, ): Promise { - const pmJson = getProsemirrorContent(content); + let pmJson = getProsemirrorContent(content); + + // Kill-switch: when the workspace toggle is OFF, never serve htmlEmbed + // nodes to public viewers. Strip before tokenizing/serializing. + if (!htmlEmbedEnabled) { + pmJson = stripHtmlEmbedNodes(pmJson); + } + const attachmentIds = getAttachmentIds(pmJson); const tokenMap = new Map(); diff --git a/apps/server/src/core/workspace/dto/update-workspace.dto.ts b/apps/server/src/core/workspace/dto/update-workspace.dto.ts index c32e3697..1beb7ece 100644 --- a/apps/server/src/core/workspace/dto/update-workspace.dto.ts +++ b/apps/server/src/core/workspace/dto/update-workspace.dto.ts @@ -53,6 +53,12 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) { @IsBoolean() aiDictation: boolean; + // Workspace feature toggle for the admin-only HTML embed feature. Persisted at + // settings.htmlEmbed. ABSENT/false => OFF (default). + @IsOptional() + @IsBoolean() + htmlEmbed: boolean; + @IsOptional() @IsBoolean() aiPublicShareAssistant: boolean; diff --git a/apps/server/src/core/workspace/services/workspace.service.ts b/apps/server/src/core/workspace/services/workspace.service.ts index 94a86da2..deead2b8 100644 --- a/apps/server/src/core/workspace/services/workspace.service.ts +++ b/apps/server/src/core/workspace/services/workspace.service.ts @@ -511,6 +511,20 @@ export class WorkspaceService { ); } + if (typeof updateWorkspaceDto.htmlEmbed !== 'undefined') { + const prev = settingsBefore?.htmlEmbed ?? false; + if (prev !== updateWorkspaceDto.htmlEmbed) { + before.htmlEmbed = prev; + after.htmlEmbed = updateWorkspaceDto.htmlEmbed; + } + await this.workspaceRepo.updateSetting( + workspaceId, + 'htmlEmbed', + updateWorkspaceDto.htmlEmbed, + trx, + ); + } + if (typeof updateWorkspaceDto.aiPublicShareAssistant !== 'undefined') { const prev = settingsBefore?.ai?.publicShareAssistant ?? false; if (prev !== updateWorkspaceDto.aiPublicShareAssistant) { @@ -534,6 +548,7 @@ export class WorkspaceService { delete updateWorkspaceDto.allowMemberTemplates; delete updateWorkspaceDto.aiChat; delete updateWorkspaceDto.aiDictation; + delete updateWorkspaceDto.htmlEmbed; delete updateWorkspaceDto.aiPublicShareAssistant; await this.workspaceRepo.updateWorkspace( diff --git a/apps/server/src/database/repos/workspace/workspace.repo.ts b/apps/server/src/database/repos/workspace/workspace.repo.ts index 821120f7..ff45df3e 100644 --- a/apps/server/src/database/repos/workspace/workspace.repo.ts +++ b/apps/server/src/database/repos/workspace/workspace.repo.ts @@ -265,6 +265,32 @@ export class WorkspaceRepo { .executeTakeFirst(); } + /** + * Set a single scalar key at the TOP LEVEL of `settings` (e.g. + * `settings.htmlEmbed`). Mirrors `updateAiSettings`/`updateSharingSettings` + * but without a nested namespace object. `prefKey` comes from a fixed + * allowlist at the call site (inlined via `sql.raw`, never user input); the + * value is inlined via `sql.lit`. + */ + async updateSetting( + workspaceId: string, + prefKey: string, + prefValue: string | boolean, + trx?: KyselyTransaction, + ) { + const db = dbOrTx(this.db, trx); + return db + .updateTable('workspaces') + .set({ + settings: sql`COALESCE(settings, '{}'::jsonb) + || jsonb_build_object('${sql.raw(prefKey)}', ${sql.lit(prefValue)})`, + updatedAt: new Date(), + }) + .where('id', '=', workspaceId) + .returning(this.baseFields) + .executeTakeFirst(); + } + async updateSharingSettings( workspaceId: string, prefKey: string, diff --git a/apps/server/src/integrations/import/services/file-import-task.service.ts b/apps/server/src/integrations/import/services/file-import-task.service.ts index 218c75ca..c87d4d8f 100644 --- a/apps/server/src/integrations/import/services/file-import-task.service.ts +++ b/apps/server/src/integrations/import/services/file-import-task.service.ts @@ -20,6 +20,14 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; import { FileTask, InsertablePage } from '@docmost/db/types/entity.types'; import { markdownToHtml } from '@docmost/editor-ext'; import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; +import { UserRepo } from '@docmost/db/repos/user/user.repo'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; import { formatImportHtml } from '../utils/import-formatter'; import { buildAttachmentCandidates, @@ -53,6 +61,8 @@ export class FileImportTaskService { private readonly backlinkRepo: BacklinkRepo, @InjectKysely() private readonly db: KyselyDB, private readonly importAttachmentService: ImportAttachmentService, + private readonly userRepo: UserRepo, + private readonly workspaceRepo: WorkspaceRepo, private eventEmitter: EventEmitter2, @Inject(AUDIT_SERVICE) private readonly auditService: IAuditService, ) {} @@ -149,6 +159,29 @@ export class FileImportTaskService { .where('id', '=', fileTask.spaceId) .executeTakeFirst(); + // SECURITY (Variant C admin gate, zip/multi-file import write path): + // An imported .html/.md file can carry an htmlEmbed marker (the node's + // serialized form), which would execute raw, unsanitized JS in readers' + // browsers. Only workspace admins/owners may author it. Resolve the + // importer's role ONCE here; each page's prosemirror JSON is run through the + // strip below before textContent/ydoc/insert when the importer is not an + // admin, so a non-admin cannot smuggle the node in via a zip import (which + // requires only space Edit). + const importingUser = await this.userRepo.findById( + fileTask.creatorId, + fileTask.workspaceId, + ); + // Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed + // survives only when the workspace feature toggle is ON and the importer is + // an admin/owner. OFF (default) => stripped for everyone. + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(fileTask.workspaceId))?.settings, + ); + const importerCanAuthorHtmlEmbed = htmlEmbedAllowed( + htmlEmbedEnabled, + importingUser?.role, + ); + const pagesMap = new Map(); for (const absPath of allFiles) { @@ -496,9 +529,21 @@ export class FileImportTaskService { await this.importService.processHTML(html), ); - const { title, prosemirrorJson } = + let { title, prosemirrorJson } = this.importService.extractTitleAndRemoveHeading(pmState); + // SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages + // imported by a non-admin BEFORE computing textContent/ydoc/insert. + if ( + !importerCanAuthorHtmlEmbed && + hasHtmlEmbedNode(prosemirrorJson) + ) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + const insertablePage: InsertablePage = { id: page.id, slugId: page.slugId, diff --git a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts new file mode 100644 index 00000000..603765fc --- /dev/null +++ b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts @@ -0,0 +1,123 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; + +// FAIL-CLOSED IDENTITY for the import write paths. +// +// import.service / file-import-task.service cannot be unit-LOADED under the +// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in +// transformIgnorePatterns). So we cover the two load-bearing properties at the +// strongest feasible layer: +// +// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate +// predicate each entrypoint runs against the role resolved from +// userRepo.findById(...): a MISSING user (findById -> undefined) must fail +// closed (strip), and only 'admin'/'owner' keep the embed. +// +// (2) IDENTITY — source-pin which identity governs the gate so a refactor that +// swaps the lookup to the wrong user (e.g. the queue worker's caller) is +// caught: zip import resolves the role from `fileTask.creatorId`; single +// import from the request `userId`. NOT some ambient caller. +// +// If a guard is deleted/misplaced or the identity field changes, these break. + +const docWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +// The real predicate both import entrypoints apply (see the SECURITY blocks in +// import.service.ts and file-import-task.service.ts): resolve the importer via +// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`. +function applyImportGate( + json: any, + featureEnabled: boolean, + importingUser: { role?: string } | undefined, +) { + if ( + !htmlEmbedAllowed(featureEnabled, importingUser?.role) && + hasHtmlEmbedNode(json) + ) { + return stripHtmlEmbedNodes(json); + } + return json; +} + +describe('import gate fail-closed by toggle AND resolved-user role (real helpers)', () => { + it('toggle ON + missing user (userRepo.findById -> undefined) strips the embed', () => { + // findById returns undefined when the user/workspace pair does not resolve; + // undefined?.role is undefined -> htmlEmbedAllowed(true, undefined) === false. + const result = applyImportGate(docWithEmbed(), true, undefined); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it("toggle ON + resolved role 'member' strips", () => { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'member' })), + ).toBe(false); + }); + + it("toggle ON + resolved role 'admin' keeps the embed", () => { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'admin' })), + ).toBe(true); + }); + + it("toggle ON + resolved role 'owner' keeps the embed", () => { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'owner' })), + ).toBe(true); + }); + + it('toggle OFF strips for every role (admin/owner/member)', () => { + for (const role of ['admin', 'owner', 'member'] as const) { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), false, { role })), + ).toBe(false); + } + }); +}); + +// Source-pin the identity each entrypoint feeds to userRepo.findById. These are +// the lines that decide WHOSE role governs the gate; pinning them means a +// refactor that points the lookup at the wrong user trips the test. +const SRC_DIR = join(__dirname); + +describe('import gate identity is pinned to the importer (source contract)', () => { + it('single import resolves the role from the request userId', () => { + const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8'); + // The role lookup must key on the request `userId`, then gate on the role. + expect(src).toMatch( + /this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/, + ); + expect(src).toMatch( + /htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*\)/, + ); + // And the toggle is resolved from the workspace settings. + expect(src).toContain('isHtmlEmbedFeatureEnabled('); + // And the gate uses the real strip helper. + expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + }); + + it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => { + const src = readFileSync( + join(SRC_DIR, 'file-import-task.service.ts'), + 'utf-8', + ); + expect(src).toMatch( + /this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/, + ); + expect(src).toMatch( + /importerCanAuthorHtmlEmbed\s*=\s*htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*,?\s*\)/, + ); + expect(src).toContain('isHtmlEmbedFeatureEnabled('); + expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + }); +}); diff --git a/apps/server/src/integrations/import/services/import.service.ts b/apps/server/src/integrations/import/services/import.service.ts index 9182dcf1..574a13ab 100644 --- a/apps/server/src/integrations/import/services/import.service.ts +++ b/apps/server/src/integrations/import/services/import.service.ts @@ -1,5 +1,13 @@ import { BadRequestException, Injectable, Logger } from '@nestjs/common'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; +import { UserRepo } from '@docmost/db/repos/user/user.repo'; +import { + hasHtmlEmbedNode, + htmlEmbedAllowed, + isHtmlEmbedFeatureEnabled, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; +import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo'; import { MultipartFile } from '@fastify/multipart'; import * as path from 'path'; import { @@ -37,10 +45,12 @@ export class ImportService { constructor( private readonly pageRepo: PageRepo, + private readonly userRepo: UserRepo, private readonly storageService: StorageService, @InjectKysely() private readonly db: KyselyDB, @InjectQueue(QueueName.FILE_TASK_QUEUE) private readonly fileTaskQueue: Queue, + private readonly workspaceRepo: WorkspaceRepo, ) {} async importPage( @@ -83,8 +93,30 @@ export class ImportService { throw new BadRequestException(message); } - const { title, prosemirrorJson } = - this.extractTitleAndRemoveHeading(prosemirrorState); + const extracted = this.extractTitleAndRemoveHeading(prosemirrorState); + const title = extracted.title; + let prosemirrorJson = extracted.prosemirrorJson; + + // SECURITY (Variant C admin gate, import write path): + // An imported .html/.md file can carry an htmlEmbed marker (the node's + // serialized form), which would execute raw JS in readers' browsers. Only + // workspace admins/owners may author it, so strip htmlEmbed nodes from + // imports performed by a non-admin user. + if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) { + const importingUser = await this.userRepo.findById(userId, workspaceId); + // Toggle-AND-admin gate: htmlEmbed survives only when the workspace + // feature toggle is ON and the importer is an admin/owner. OFF (default) + // => stripped for everyone. + const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled( + (await this.workspaceRepo.findById(workspaceId))?.settings, + ); + if (!htmlEmbedAllowed(htmlEmbedEnabled, importingUser?.role)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from import by user ${userId}`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + } const pageTitle = title || fileName; diff --git a/docs/arbitrary-html-embed-plan.md b/docs/arbitrary-html-embed-plan.md deleted file mode 100644 index e02466a8..00000000 --- a/docs/arbitrary-html-embed-plan.md +++ /dev/null @@ -1,95 +0,0 @@ -# Вставка произвольного HTML/CSS/JS в страницы — анализ и подходы - -> Статус: **черновик / обсуждение**. Решение по модели изоляции ещё не принято — см. раздел «Развилка». -> Исходный кейс: нужно вставлять трекер (счётчик аналитики) на вики-страницы. - -## 1. Почему «из коробки» произвольный HTML вставить нельзя - -Контент страницы в Docmost хранится не как HTML, а как **ProseMirror JSON** (документ TipTap, синхронизируется через Yjs). Любой путь, которым контент попадает в страницу — ручной ввод, вставка из буфера (paste), импорт Markdown/HTML — проходит парсинг строго по схеме редактора: - -`apps/server/src/common/helpers/prosemirror/html/generateJSON.ts:45` - -```ts -PMDOMParser.fromSchema(schema).parse(doc.body, options) -``` - -`PMDOMParser.fromSchema` оставляет только те теги, для которых в схеме есть нода/марк с правилом `parseHTML` (`p`, `h1–h6`, списки, `blockquote`, `code`/`pre`, `a`, `strong`/`em`, таблицы, картинки, callout и т.п.). Всё остальное — `
`, `