diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css b/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css new file mode 100644 index 00000000..75304685 --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css @@ -0,0 +1,43 @@ +.htmlEmbedNodeView { + position: relative; +} + +/* The container the raw source is injected into. */ +.htmlEmbedContent { + width: 100%; +} + +/* Edit affordance overlay, only shown while editing the document. */ +.htmlEmbedToolbar { + position: absolute; + top: 4px; + right: 4px; + z-index: 2; + opacity: 0; + transition: opacity 0.15s ease; +} + +.htmlEmbedNodeView:hover .htmlEmbedToolbar { + opacity: 1; +} + +/* Placeholder card shown when the source is empty (edit mode only). */ +.htmlEmbedPlaceholder { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + padding: 16px; + border: 1px dashed var(--mantine-color-gray-4); + border-radius: 8px; + color: var(--mantine-color-dimmed); + + @mixin dark { + border-color: var(--mantine-color-dark-3); + } +} + +.htmlEmbedSelected { + outline: 2px solid var(--mantine-color-blue-5); + border-radius: 8px; +} diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx b/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx new file mode 100644 index 00000000..4d9a1bb5 --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx @@ -0,0 +1,149 @@ +import { NodeViewProps, NodeViewWrapper } from "@tiptap/react"; +import React, { useCallback, useEffect, useRef, useState } from "react"; +import clsx from "clsx"; +import { + ActionIcon, + Button, + Group, + Modal, + Text, + Textarea, +} from "@mantine/core"; +import { IconCode, IconEdit } from "@tabler/icons-react"; +import { useTranslation } from "react-i18next"; +import useUserRole from "@/hooks/use-user-role.tsx"; +import classes from "./html-embed-view.module.css"; + +/** + * Inject raw HTML (including ")} + styles={{ input: { fontFamily: "monospace" } }} + data-autofocus + /> + + + + + + + ); +} diff --git a/apps/client/src/features/editor/components/slash-menu/menu-items.ts b/apps/client/src/features/editor/components/slash-menu/menu-items.ts index 7f856755..b2c5c33f 100644 --- a/apps/client/src/features/editor/components/slash-menu/menu-items.ts +++ b/apps/client/src/features/editor/components/slash-menu/menu-items.ts @@ -587,6 +587,21 @@ const CommandGroups: SlashMenuGroupedItemsType = { .insertColumns({ layout: "five_equal" }) .run(), }, + { + title: "HTML embed", + description: "Embed raw HTML, CSS and JavaScript (admins only).", + searchTerms: ["html", "css", "js", "javascript", "script", "tracker", "analytics", "raw", "embed"], + icon: IconCode, + adminOnly: true, + command: ({ editor, range }: CommandProps) => { + editor + .chain() + .focus() + .deleteRange(range) + .setHtmlEmbed({ source: "" }) + .run(); + }, + }, { title: "Iframe embed", description: "Embed any Iframe", @@ -744,6 +759,24 @@ const CommandGroups: SlashMenuGroupedItemsType = { ], }; +/** + * Read whether the current user is a workspace admin/owner from the persisted + * `currentUser` (the same payload `currentUserAtom` stores via localStorage). + * Used to hide admin-only slash items (e.g. raw HTML embed). This is a UI gate + * only; the server independently strips admin-only nodes from non-admin writes. + */ +function isCurrentUserAdmin(): boolean { + try { + const raw = localStorage.getItem("currentUser"); + if (!raw) return false; + const parsed = JSON.parse(raw); + const role = parsed?.user?.role; + return role === "owner" || role === "admin"; + } catch { + return false; + } +} + export const getSuggestionItems = ({ query, excludeItems, @@ -753,6 +786,7 @@ export const getSuggestionItems = ({ }): SlashMenuGroupedItemsType => { const search = query.toLowerCase(); const filteredGroups: SlashMenuGroupedItemsType = {}; + const isAdmin = isCurrentUserAdmin(); const fuzzyMatch = (query: string, target: string) => { let queryIndex = 0; @@ -767,6 +801,8 @@ export const getSuggestionItems = ({ for (const [group, items] of Object.entries(CommandGroups)) { const filteredItems = items.filter((item) => { if (excludeItems?.has(item.title)) return false; + // Hide admin-only items (raw HTML embed) from non-admins. + if (item.adminOnly && !isAdmin) return false; return ( fuzzyMatch(search, item.title) || item.description.toLowerCase().includes(search) || diff --git a/apps/client/src/features/editor/components/slash-menu/types.ts b/apps/client/src/features/editor/components/slash-menu/types.ts index cf5bd3e4..2bd9a9f3 100644 --- a/apps/client/src/features/editor/components/slash-menu/types.ts +++ b/apps/client/src/features/editor/components/slash-menu/types.ts @@ -21,6 +21,9 @@ export type SlashMenuItemType = { searchTerms: string[]; command: (props: CommandProps) => void; disable?: (editor: ReturnType) => boolean; + // When true, the item is only offered to workspace admins/owners. This is a + // UI convenience only — the real authoring gate is enforced server-side. + adminOnly?: boolean; }; export type SlashMenuGroupedItemsType = { diff --git a/apps/client/src/features/editor/extensions/extensions.ts b/apps/client/src/features/editor/extensions/extensions.ts index 87c7b9e5..edac4b26 100644 --- a/apps/client/src/features/editor/extensions/extensions.ts +++ b/apps/client/src/features/editor/extensions/extensions.ts @@ -41,6 +41,7 @@ import { Drawio, Excalidraw, Embed, + HtmlEmbed, TiptapPdf, PageBreak, SearchAndReplace, @@ -87,6 +88,7 @@ import CodeBlockView from "@/features/editor/components/code-block/code-block-vi import DrawioView from "../components/drawio/drawio-view"; import ExcalidrawView from "@/features/editor/components/excalidraw/excalidraw-view-lazy.tsx"; import EmbedView from "@/features/editor/components/embed/embed-view.tsx"; +import HtmlEmbedView from "@/features/editor/components/html-embed/html-embed-view.tsx"; import PdfView from "@/features/editor/components/pdf/pdf-view.tsx"; import SubpagesView from "@/features/editor/components/subpages/subpages-view.tsx"; import TransclusionView from "@/features/editor/components/transclusion/transclusion-view.tsx"; @@ -365,6 +367,13 @@ export const mainExtensions = [ Embed.configure({ view: EmbedView, }), + // Raw HTML/CSS/JS node (Variant C). The node is registered for ALL users so + // documents authored by admins render correctly for everyone; INSERTION is + // gated to admins in the slash menu, and the server strips the node from any + // non-admin write so a non-admin cannot persist it. + HtmlEmbed.configure({ + view: HtmlEmbedView, + }), TiptapPdf.configure({ view: PdfView, }), diff --git a/apps/server/src/collaboration/collaboration.handler.ts b/apps/server/src/collaboration/collaboration.handler.ts index 992f9b74..fae7935d 100644 --- a/apps/server/src/collaboration/collaboration.handler.ts +++ b/apps/server/src/collaboration/collaboration.handler.ts @@ -8,6 +8,11 @@ import { import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util'; import * as Y from 'yjs'; import { User } from '@docmost/db/types/entity.types'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../common/helpers/prosemirror/html-embed.util'; export type CollabEventHandlers = ReturnType< CollaborationHandler['getHandlers'] @@ -83,8 +88,25 @@ export class CollaborationHandler { user: User; }, ) => { - const { prosemirrorJson, operation, user } = payload; + const { operation, user } = payload; + let { prosemirrorJson } = payload; this.logger.debug('Updating page content via yjs', documentName); + + // SECURITY (Variant C admin gate, REST/MCP/AI write path): + // updatePageContent is the server-side entrypoint used by the REST page + // update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute + // arbitrary JS in every reader's browser, so a NON-admin caller must not + // be able to persist them here. If the editing user is not a workspace + // admin/owner, strip every htmlEmbed node before it reaches the ydoc. + if (!canAuthorHtmlEmbed(user?.role)) { + if (hasHtmlEmbedNode(prosemirrorJson)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin update by user ${user?.id} on ${documentName}`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + } + await this.withYdocConnection( hocuspocus, documentName, diff --git a/apps/server/src/collaboration/collaboration.util.ts b/apps/server/src/collaboration/collaboration.util.ts index 554aa43b..d99636db 100644 --- a/apps/server/src/collaboration/collaboration.util.ts +++ b/apps/server/src/collaboration/collaboration.util.ts @@ -32,6 +32,7 @@ import { Drawio, Excalidraw, Embed, + HtmlEmbed, Mention, Subpages, Highlight, @@ -102,6 +103,10 @@ export const tiptapExtensions = [ Drawio, Excalidraw, Embed, + // Registered server-side so the node survives schema parsing/serialization. + // Authoring is gated to admins at the document WRITE paths (see + // stripHtmlEmbedNodes usage in persistence/page services), NOT here. + HtmlEmbed, Mention, Subpages, Columns, diff --git a/apps/server/src/collaboration/extensions/persistence.extension.ts b/apps/server/src/collaboration/extensions/persistence.extension.ts index af4137d6..b9376d30 100644 --- a/apps/server/src/collaboration/extensions/persistence.extension.ts +++ b/apps/server/src/collaboration/extensions/persistence.extension.ts @@ -39,6 +39,11 @@ import { HISTORY_INTERVAL, } from '../constants'; import { TransclusionService } from '../../core/page/transclusion/transclusion.service'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../common/helpers/prosemirror/html-embed.util'; @Injectable() export class PersistenceExtension implements Extension { @@ -112,7 +117,56 @@ export class PersistenceExtension implements Extension { const pageId = getPageId(documentName); - const tiptapJson = TiptapTransformer.fromYdoc(document, 'default'); + let tiptapJson = TiptapTransformer.fromYdoc(document, 'default'); + + // SECURITY (Variant C admin gate, collab WebSocket write path): + // The persisted snapshot is the merged ydoc, which may contain an htmlEmbed + // node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized + // JS in every reader's browser, so only workspace admins/owners may author + // it. When the user whose store triggers this persist is not an admin, strip + // every htmlEmbed node before it is written to the page row AND before the + // ydoc state is re-encoded, so the node cannot be reintroduced by a + // non-admin via the collab socket. + // NOTE (residual risk): the gate is keyed to the storing connection's user. + // If an admin already authored an htmlEmbed and a non-admin's later store + // does not touch it, this strip would remove the admin's embed on that + // non-admin store. This is intentionally conservative (fail closed): the + // admin re-adds/keeps the node on their own next edit. A future refinement + // could diff against the previously persisted admin-authored embeds. + // + // ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in + // the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs + // update to connected clients immediately, so a non-admin's transient + // htmlEmbed can execute in OTHER open editors' browsers in the brief window + // before this persist strips it. The exposure is limited to concurrent + // AUTHENTICATED space members who have the doc open with Edit rights + // (semi-trusted) — anonymous public-share/readonly viewers do NOT open a + // collab socket (ReadonlyPageEditor renders fetched, already-stripped + // content; HocuspocusProvider is only used by the authenticated editable + // page-editor), and the PERSISTED page row plus every share/readonly read + // path are protected by this strip. The window is therefore accepted rather + // than mitigated with an inbound beforeBroadcast strip. + if (!canAuthorHtmlEmbed(context?.user?.role)) { + if (hasHtmlEmbedNode(tiptapJson)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin collab store by user ${context?.user?.id} on ${documentName}`, + ); + tiptapJson = stripHtmlEmbedNodes(tiptapJson); + // Reflect the stripped content back into the shared ydoc so the node is + // removed for all connected clients, not just the persisted row. + const fragment = document.getXmlFragment('default'); + if (fragment.length > 0) { + fragment.delete(0, fragment.length); + } + const cleanDoc = TiptapTransformer.toYdoc( + tiptapJson, + 'default', + tiptapExtensions, + ); + Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc)); + } + } + const ydocState = Buffer.from(Y.encodeStateAsUpdate(document)); let textContent = null; diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts new file mode 100644 index 00000000..1a044a4e --- /dev/null +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -0,0 +1,229 @@ +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from './html-embed.util'; +import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util'; +import { + decodeHtmlEmbedSource, + encodeHtmlEmbedSource, +} from '@docmost/editor-ext'; + +const findFirstChild = (json: any, type: string): any | undefined => { + if (!json || typeof json !== 'object') return undefined; + if (json.type === type) return json; + if (Array.isArray(json.content)) { + for (const child of json.content) { + const found = findFirstChild(child, type); + if (found) return found; + } + } + return undefined; +}; + +describe('stripHtmlEmbedNodes', () => { + it('removes a top-level htmlEmbed node', () => { + const doc = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'before' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'after' }] }, + ], + }; + + const result = stripHtmlEmbedNodes(doc); + expect(hasHtmlEmbedNode(result)).toBe(false); + // Other nodes are preserved. + expect(result.content).toHaveLength(2); + expect(result.content[0].content[0].text).toBe('before'); + expect(result.content[1].content[0].text).toBe('after'); + }); + + it('removes nested htmlEmbed nodes (e.g. inside columns)', () => { + const doc = { + type: 'doc', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: 'x' } }, + { + type: 'paragraph', + content: [{ type: 'text', text: 'keep' }], + }, + ], + }, + ], + }, + ], + }; + + const result = stripHtmlEmbedNodes(doc); + expect(hasHtmlEmbedNode(result)).toBe(false); + const col = findFirstChild(result, 'column'); + expect(col.content).toHaveLength(1); + expect(col.content[0].type).toBe('paragraph'); + }); + + it('does not mutate the input document', () => { + const doc = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source: 'x' } }], + }; + stripHtmlEmbedNodes(doc); + expect(doc.content).toHaveLength(1); + expect(doc.content[0].type).toBe('htmlEmbed'); + }); + + it('leaves documents without htmlEmbed untouched', () => { + const doc = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, + ], + }; + expect(hasHtmlEmbedNode(doc)).toBe(false); + const result = stripHtmlEmbedNodes(doc); + expect(result).toEqual(doc); + }); +}); + +describe('canAuthorHtmlEmbed', () => { + it('allows owner and admin', () => { + expect(canAuthorHtmlEmbed('owner')).toBe(true); + expect(canAuthorHtmlEmbed('admin')).toBe(true); + }); + it('denies member and unknown/empty roles', () => { + expect(canAuthorHtmlEmbed('member')).toBe(false); + expect(canAuthorHtmlEmbed(null)).toBe(false); + expect(canAuthorHtmlEmbed(undefined)).toBe(false); + expect(canAuthorHtmlEmbed('viewer')).toBe(false); + }); +}); + +// Replicates the write-path decision used by every non-admin persistence guard +// (collab store, single import, zip import, duplication, transclusion unsync): +// if !canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json) -> strip, else keep. +const applyAdminGate = (json: any, role: string | null | undefined) => { + if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) { + return stripHtmlEmbedNodes(json); + } + return json; +}; + +describe('admin-gate write-path decision (duplication / import / unsync)', () => { + const docWithEmbed = { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], + }; + + it('strips the embed for a non-admin (member) author', () => { + const result = applyAdminGate(docWithEmbed, 'member'); + expect(hasHtmlEmbedNode(result)).toBe(false); + expect(result.content).toHaveLength(1); + expect(result.content[0].content[0].text).toBe('keep'); + }); + + it('strips the embed for unknown/empty roles', () => { + expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false); + expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe( + false, + ); + expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'viewer'))).toBe( + false, + ); + }); + + it('keeps the embed for an admin author', () => { + const result = applyAdminGate(docWithEmbed, 'admin'); + expect(hasHtmlEmbedNode(result)).toBe(true); + expect(result).toBe(docWithEmbed); + }); + + it('keeps the embed for an owner author', () => { + const result = applyAdminGate(docWithEmbed, 'owner'); + expect(hasHtmlEmbedNode(result)).toBe(true); + }); + + it('strips nested embeds (subtree/column duplication) for a non-admin', () => { + const nested = { + type: 'doc', + content: [ + { + type: 'columns', + content: [ + { + type: 'column', + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'ok' }] }, + ], + }, + ], + }, + ], + }; + const result = applyAdminGate(nested, 'member'); + expect(hasHtmlEmbedNode(result)).toBe(false); + const col = findFirstChild(result, 'column'); + expect(col.content).toHaveLength(1); + expect(col.content[0].type).toBe('paragraph'); + }); + + it('leaves a non-admin doc without embeds untouched (no needless rewrite)', () => { + const clean = { + type: 'doc', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }], + }; + const result = applyAdminGate(clean, 'member'); + expect(result).toBe(clean); + }); +}); + +describe('htmlEmbed source base64 codec', () => { + it('round-trips arbitrary source including UTF-8', () => { + const source = ''; + const encoded = encodeHtmlEmbedSource(source); + expect(encoded).not.toContain('<'); + expect(decodeHtmlEmbedSource(encoded)).toBe(source); + }); +}); + +describe('htmlEmbed node HTML <-> JSON round-trip', () => { + it('preserves the raw source through HTML -> JSON', () => { + const source = ''; + const encoded = encodeHtmlEmbedSource(source); + const html = `
`; + + const json = htmlToJson(html); + const node = findFirstChild(json, 'htmlEmbed'); + expect(node).toBeDefined(); + expect(node.attrs.source).toBe(source); + }); + + it('round-trips JSON -> HTML -> JSON keeping the source', () => { + const source = '
raw & markup
'; + const json = { + type: 'doc', + content: [{ type: 'htmlEmbed', attrs: { source } }], + }; + + const html = jsonToHtml(json); + // The static HTML carries the encoded source but does NOT inline the raw + // markup (it must not be an injection vector by itself). + expect(html).toContain('data-type="htmlEmbed"'); + expect(html).not.toContain('onclick'); + + const back = htmlToJson(html); + const node = findFirstChild(back, 'htmlEmbed'); + expect(node).toBeDefined(); + expect(node.attrs.source).toBe(source); + }); +}); diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts new file mode 100644 index 00000000..5a521ba5 --- /dev/null +++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts @@ -0,0 +1,68 @@ +import { JSONContent } from '@tiptap/core'; + +export const HTML_EMBED_NODE_NAME = 'htmlEmbed'; + +/** + * Recursively remove every `htmlEmbed` node from a ProseMirror JSON document. + * + * SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin + * (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to + * author it. This helper is the server-side enforcement primitive: every WRITE + * path that may persist content from a NON-admin caller must run the incoming + * document through this function so a non-admin cannot smuggle the node in via + * the collab socket, the REST/MCP/AI content-update path, paste, or import. + * + * Returns a NEW document; the input is not mutated. If the input is not a valid + * doc object it is returned unchanged (callers persist what they were given). + */ +export function stripHtmlEmbedNodes(pmJson: T): T { + if (!pmJson || typeof pmJson !== 'object') { + return pmJson; + } + + const node = pmJson as unknown as JSONContent; + + if (Array.isArray(node.content)) { + const filtered: JSONContent[] = []; + for (const child of node.content) { + // Drop any htmlEmbed child outright. + if (child && child.type === HTML_EMBED_NODE_NAME) { + continue; + } + // Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are + // also removed. + filtered.push(stripHtmlEmbedNodes(child)); + } + return { ...node, content: filtered } as unknown as T; + } + + return { ...node } as unknown as T; +} + +/** + * Returns true if the document contains at least one `htmlEmbed` node anywhere + * in its tree. Useful to decide whether a strip pass actually changed anything + * (e.g. for logging a rejected non-admin embed attempt). + */ +export function hasHtmlEmbedNode(pmJson: unknown): boolean { + if (!pmJson || typeof pmJson !== 'object') { + return false; + } + const node = pmJson as JSONContent; + if (node.type === HTML_EMBED_NODE_NAME) { + return true; + } + if (Array.isArray(node.content)) { + return node.content.some((child) => hasHtmlEmbedNode(child)); + } + return false; +} + +/** + * Map the workspace user role to whether it may author `htmlEmbed` nodes. + * Owners and admins are trusted; everyone else (member, and any unknown role) + * is not. Kept here so every write path shares one definition of "trusted". + */ +export function canAuthorHtmlEmbed(role: string | null | undefined): boolean { + return role === 'owner' || role === 'admin'; +} diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts index cc1dfb24..adc8b746 100644 --- a/apps/server/src/core/page/services/page.service.ts +++ b/apps/server/src/core/page/services/page.service.ts @@ -30,6 +30,11 @@ import { isAttachmentNode, removeMarkTypeFromDoc, } from '../../../common/helpers/prosemirror/utils'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; import { htmlToJson, jsonToNode, @@ -688,7 +693,25 @@ export class PageService { } }); - const prosemirrorJson = prosemirrorDoc.toJSON(); + let prosemirrorJson = prosemirrorDoc.toJSON(); + + // SECURITY (Variant C admin gate, duplication write path): + // Duplication builds the ydoc directly and bypasses the collab + // onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in + // readers' browsers, so only workspace admins/owners may author it. A + // non-admin with space Edit could otherwise duplicate an admin page + // that contains an embed into a new page authored by them. Strip every + // htmlEmbed node from each duplicated page when the duplicating user is + // not an admin, BEFORE computing textContent/ydoc/insert. + if ( + !canAuthorHtmlEmbed(authUser.role) && + hasHtmlEmbedNode(prosemirrorJson) + ) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin page duplication by user ${authUser.id} (source page ${page.id})`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } // Add "Copy of " prefix to the root page title only for duplicates in same space let title = page.title; diff --git a/apps/server/src/core/page/transclusion/transclusion.service.ts b/apps/server/src/core/page/transclusion/transclusion.service.ts index e208707c..73abb49b 100644 --- a/apps/server/src/core/page/transclusion/transclusion.service.ts +++ b/apps/server/src/core/page/transclusion/transclusion.service.ts @@ -23,6 +23,11 @@ import { rewriteAttachmentsForUnsync } from './utils/transclusion-unsync.util'; import { TransclusionLookup } from './transclusion.types'; import { Page, User } from '@docmost/db/types/entity.types'; import { PageAccessService } from '../page-access/page-access.service'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; type ReferencingPageInfo = { id: string; @@ -461,10 +466,12 @@ export class TransclusionService { throw new NotFoundException('Sync block not found'); } - const { content, copies } = rewriteAttachmentsForUnsync( + let content: unknown; + let copies: ReturnType['copies']; + ({ content, copies } = rewriteAttachmentsForUnsync( transclusion.content, () => uuid7(), - ); + )); if (copies.length > 0) { const oldIds = copies.map((c) => c.oldAttachmentId); @@ -513,6 +520,21 @@ export class TransclusionService { transclusionId, ); + // SECURITY (Variant C admin gate, transclusion unsync write path): + // The returned content is a source snapshot that the client materializes + // into the reference page via insertContentAt. The snapshot keeps any + // htmlEmbed verbatim, and unsync requires only space Edit/View. If the + // requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a + // non-admin can never receive an embed payload to re-persist (the collab + // strip on the subsequent save is debounced/race-prone and must not be the + // only guard). Admin behavior is unchanged. + if (!canAuthorHtmlEmbed(user.role) && hasHtmlEmbedNode(content)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`, + ); + content = stripHtmlEmbedNodes(content); + } + return { content }; } } diff --git a/apps/server/src/integrations/import/services/file-import-task.service.ts b/apps/server/src/integrations/import/services/file-import-task.service.ts index 40525ddf..fe063597 100644 --- a/apps/server/src/integrations/import/services/file-import-task.service.ts +++ b/apps/server/src/integrations/import/services/file-import-task.service.ts @@ -20,6 +20,12 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered'; import { FileTask, InsertablePage } from '@docmost/db/types/entity.types'; import { markdownToHtml } from '@docmost/editor-ext'; import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; +import { UserRepo } from '@docmost/db/repos/user/user.repo'; import { formatImportHtml } from '../utils/import-formatter'; import { buildAttachmentCandidates, @@ -53,6 +59,7 @@ export class FileImportTaskService { private readonly backlinkRepo: BacklinkRepo, @InjectKysely() private readonly db: KyselyDB, private readonly importAttachmentService: ImportAttachmentService, + private readonly userRepo: UserRepo, private eventEmitter: EventEmitter2, @Inject(AUDIT_SERVICE) private readonly auditService: IAuditService, ) {} @@ -149,6 +156,20 @@ export class FileImportTaskService { .where('id', '=', fileTask.spaceId) .executeTakeFirst(); + // SECURITY (Variant C admin gate, zip/multi-file import write path): + // An imported .html/.md file can carry an htmlEmbed marker (the node's + // serialized form), which would execute raw, unsanitized JS in readers' + // browsers. Only workspace admins/owners may author it. Resolve the + // importer's role ONCE here; each page's prosemirror JSON is run through the + // strip below before textContent/ydoc/insert when the importer is not an + // admin, so a non-admin cannot smuggle the node in via a zip import (which + // requires only space Edit). + const importingUser = await this.userRepo.findById( + fileTask.creatorId, + fileTask.workspaceId, + ); + const importerCanAuthorHtmlEmbed = canAuthorHtmlEmbed(importingUser?.role); + const pagesMap = new Map(); for (const absPath of allFiles) { @@ -496,9 +517,21 @@ export class FileImportTaskService { await this.importService.processHTML(html), ); - const { title, prosemirrorJson } = + let { title, prosemirrorJson } = this.importService.extractTitleAndRemoveHeading(pmState); + // SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages + // imported by a non-admin BEFORE computing textContent/ydoc/insert. + if ( + !importerCanAuthorHtmlEmbed && + hasHtmlEmbedNode(prosemirrorJson) + ) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + const insertablePage: InsertablePage = { id: page.id, slugId: page.slugId, diff --git a/apps/server/src/integrations/import/services/import.service.ts b/apps/server/src/integrations/import/services/import.service.ts index 9182dcf1..46bd6ed2 100644 --- a/apps/server/src/integrations/import/services/import.service.ts +++ b/apps/server/src/integrations/import/services/import.service.ts @@ -1,5 +1,11 @@ import { BadRequestException, Injectable, Logger } from '@nestjs/common'; import { PageRepo } from '@docmost/db/repos/page/page.repo'; +import { UserRepo } from '@docmost/db/repos/user/user.repo'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; import { MultipartFile } from '@fastify/multipart'; import * as path from 'path'; import { @@ -37,6 +43,7 @@ export class ImportService { constructor( private readonly pageRepo: PageRepo, + private readonly userRepo: UserRepo, private readonly storageService: StorageService, @InjectKysely() private readonly db: KyselyDB, @InjectQueue(QueueName.FILE_TASK_QUEUE) @@ -83,8 +90,24 @@ export class ImportService { throw new BadRequestException(message); } - const { title, prosemirrorJson } = - this.extractTitleAndRemoveHeading(prosemirrorState); + const extracted = this.extractTitleAndRemoveHeading(prosemirrorState); + const title = extracted.title; + let prosemirrorJson = extracted.prosemirrorJson; + + // SECURITY (Variant C admin gate, import write path): + // An imported .html/.md file can carry an htmlEmbed marker (the node's + // serialized form), which would execute raw JS in readers' browsers. Only + // workspace admins/owners may author it, so strip htmlEmbed nodes from + // imports performed by a non-admin user. + if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) { + const importingUser = await this.userRepo.findById(userId, workspaceId); + if (!canAuthorHtmlEmbed(importingUser?.role)) { + this.logger.warn( + `Stripping htmlEmbed node(s) from non-admin import by user ${userId}`, + ); + prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson); + } + } const pageTitle = title || fileName; diff --git a/packages/editor-ext/src/index.ts b/packages/editor-ext/src/index.ts index 003d2288..b8cf918e 100644 --- a/packages/editor-ext/src/index.ts +++ b/packages/editor-ext/src/index.ts @@ -16,6 +16,7 @@ export * from "./lib/custom-code-block"; export * from "./lib/drawio"; export * from "./lib/excalidraw"; export * from "./lib/embed"; +export * from "./lib/html-embed/html-embed"; export * from "./lib/mention"; export * from "./lib/markdown"; export * from "./lib/search-and-replace"; diff --git a/packages/editor-ext/src/lib/html-embed/html-embed.ts b/packages/editor-ext/src/lib/html-embed/html-embed.ts new file mode 100644 index 00000000..d3d004a1 --- /dev/null +++ b/packages/editor-ext/src/lib/html-embed/html-embed.ts @@ -0,0 +1,138 @@ +import { Node, mergeAttributes } from "@tiptap/core"; +import { ReactNodeViewRenderer } from "@tiptap/react"; + +export interface HtmlEmbedOptions { + HTMLAttributes: Record; + view: any; +} + +export interface HtmlEmbedAttributes { + // Raw HTML/CSS/JS string that is injected verbatim into the wiki origin. + source?: string; +} + +declare module "@tiptap/core" { + interface Commands { + htmlEmbed: { + setHtmlEmbed: (attributes?: HtmlEmbedAttributes) => ReturnType; + }; + } +} + +/** + * Encode the raw source to base64 for the `data-source` attribute. + * + * The source is arbitrary HTML/CSS/JS. Storing it raw inside an HTML attribute + * would (a) require heavy escaping and (b) risk the parser interpreting markup + * inside the attribute. Base64 makes the round-trip HTML <-> ProseMirror JSON + * lossless and keeps the markup inert while it sits in the attribute. + * + * `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so that non-Latin1 + * (UTF-8) characters survive the base64 step. + */ +export function encodeHtmlEmbedSource(source: string): string { + if (!source) return ""; + try { + if (typeof btoa === "function") { + return btoa(encodeURIComponent(source)); + } + // Node fallback (server-side schema parsing has no global btoa). + return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64"); + } catch { + // Never swallow silently in a way that loses data: fall back to raw. + return ""; + } +} + +export function decodeHtmlEmbedSource(encoded: string): string { + if (!encoded) return ""; + try { + if (typeof atob === "function") { + return decodeURIComponent(atob(encoded)); + } + // Node fallback. + return decodeURIComponent( + Buffer.from(encoded, "base64").toString("utf-8"), + ); + } catch { + return ""; + } +} + +export const HtmlEmbed = Node.create({ + name: "htmlEmbed", + inline: false, + group: "block", + // atom + isolating: the node has no editable ProseMirror children; its body + // is the opaque `source` string rendered by the NodeView. + atom: true, + isolating: true, + defining: true, + draggable: true, + + addOptions() { + return { + HTMLAttributes: {}, + view: null, + }; + }, + + addAttributes() { + return { + source: { + default: "", + // Decode the base64 payload back to the raw source on parse. + parseHTML: (element) => + decodeHtmlEmbedSource(element.getAttribute("data-source") || ""), + // Encode the raw source to base64 on render so it round-trips losslessly + // through the HTML <-> JSON conversions used by export/import/collab. + renderHTML: (attributes: HtmlEmbedAttributes) => ({ + "data-source": encodeHtmlEmbedSource(attributes.source || ""), + }), + }, + }; + }, + + parseHTML() { + return [ + { + tag: `div[data-type="${this.name}"]`, + }, + ]; + }, + + renderHTML({ HTMLAttributes }) { + // The static HTML representation is just a marker div carrying the encoded + // source. The actual raw markup is NOT expanded here on purpose: the static + // generateHTML output (used for previews, search indexing, exports) must not + // itself become an injection vector. Only the client NodeView expands and + // executes the source. + return [ + "div", + mergeAttributes( + { "data-type": this.name }, + this.options.HTMLAttributes, + HTMLAttributes, + ), + ]; + }, + + addCommands() { + return { + setHtmlEmbed: + (attrs: HtmlEmbedAttributes) => + ({ commands }) => { + return commands.insertContent({ + type: this.name, + attrs: attrs, + }); + }, + }; + }, + + addNodeView() { + // Force the react node view to render immediately using flush sync. + this.editor.isInitialized = true; + return ReactNodeViewRenderer(this.options.view); + }, +}); diff --git a/packages/editor-ext/src/lib/markdown/utils/html-embed.marked.ts b/packages/editor-ext/src/lib/markdown/utils/html-embed.marked.ts new file mode 100644 index 00000000..8333c3d7 --- /dev/null +++ b/packages/editor-ext/src/lib/markdown/utils/html-embed.marked.ts @@ -0,0 +1,41 @@ +import { Token } from "marked"; + +interface HtmlEmbedToken { + type: "htmlEmbed"; + raw: string; + encoded: string; +} + +/** + * Marked extension that rebuilds an `htmlEmbed` node from the HTML comment + * marker produced by the turndown rule (``). + * + * It emits the same marker div the node's `parseHTML` recognizes, so the + * pipeline MD -> HTML -> ProseMirror JSON restores the node (and its + * base64 `data-source`) exactly. We do NOT expand the raw markup here; the + * source stays base64-encoded in the attribute and is only executed by the + * client NodeView. + */ +export const htmlEmbedExtension = { + name: "htmlEmbed", + level: "block" as const, + start(src: string) { + return src.indexOf("/; + const match = rule.exec(src); + + if (match) { + return { + type: "htmlEmbed", + raw: match[0], + encoded: match[1] ?? "", + }; + } + }, + renderer(token: Token) { + const htmlEmbedToken = token as HtmlEmbedToken; + return `
`; + }, +}; diff --git a/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts b/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts index 7556aa4f..58bb83f9 100644 --- a/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts +++ b/packages/editor-ext/src/lib/markdown/utils/marked.utils.ts @@ -2,6 +2,7 @@ import { marked } from "marked"; import { calloutExtension } from "./callout.marked"; import { mathBlockExtension } from "./math-block.marked"; import { mathInlineExtension } from "./math-inline.marked"; +import { htmlEmbedExtension } from "./html-embed.marked"; marked.use({ renderer: { @@ -34,7 +35,12 @@ marked.use({ }); marked.use({ - extensions: [calloutExtension, mathBlockExtension, mathInlineExtension], + extensions: [ + calloutExtension, + mathBlockExtension, + mathInlineExtension, + htmlEmbedExtension, + ], }); marked.setOptions({ breaks: true }); diff --git a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts index ebfc3423..449868f7 100644 --- a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts +++ b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts @@ -32,12 +32,39 @@ export function htmlToMarkdown(html: string): string { mathInline, mathBlock, iframeEmbed, + htmlEmbed, image, video, ]); return turndownService.turndown(html).replaceAll('
', ' '); } +/** + * Serialize the `htmlEmbed` node to Markdown. + * + * Markdown has no native representation for an arbitrary-HTML block, so we + * preserve the node losslessly as an HTML comment carrying the base64-encoded + * source (the same `data-source` payload the node stores). `markdownToHtml` + * recognizes the same marker and rebuilds the node, so the round-trip + * MD -> HTML -> JSON keeps the source intact. The comment also keeps the raw + * markup inert in the exported `.md` file (it does not render in plain Markdown + * viewers). + */ +function htmlEmbed(turndownService: _TurndownService) { + turndownService.addRule('htmlEmbed', { + filter: function (node: HTMLInputElement) { + return ( + node.nodeName === 'DIV' && + node.getAttribute('data-type') === 'htmlEmbed' + ); + }, + replacement: function (_content: string, node: HTMLInputElement) { + const encoded = node.getAttribute('data-source') || ''; + return `\n\n\n\n`; + }, + }); +} + function listParagraph(turndownService: _TurndownService) { turndownService.addRule('paragraph', { filter: ['p'],