Merge pull request 'feat(editor): admin-only raw HTML/CSS/JS embed (variant C)' (#16) from feat/html-embed-admin into develop
Some checks failed
Develop / build (push) Has been cancelled

This commit was merged in pull request #16.
This commit is contained in:
claude_code
2026-06-20 20:19:06 +03:00
39 changed files with 2419 additions and 109 deletions

View File

@@ -0,0 +1,43 @@
.htmlEmbedNodeView {
position: relative;
}
/* The container the raw source is injected into. */
.htmlEmbedContent {
width: 100%;
}
/* Edit affordance overlay, only shown while editing the document. */
.htmlEmbedToolbar {
position: absolute;
top: 4px;
right: 4px;
z-index: 2;
opacity: 0;
transition: opacity 0.15s ease;
}
.htmlEmbedNodeView:hover .htmlEmbedToolbar {
opacity: 1;
}
/* Placeholder card shown when the source is empty (edit mode only). */
.htmlEmbedPlaceholder {
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
padding: 16px;
border: 1px dashed var(--mantine-color-gray-4);
border-radius: 8px;
color: var(--mantine-color-dimmed);
@mixin dark {
border-color: var(--mantine-color-dark-3);
}
}
.htmlEmbedSelected {
outline: 2px solid var(--mantine-color-blue-5);
border-radius: 8px;
}

View File

@@ -0,0 +1,185 @@
import { NodeViewProps, NodeViewWrapper } from "@tiptap/react";
import React, { useCallback, useEffect, useRef, useState } from "react";
import clsx from "clsx";
import {
ActionIcon,
Button,
Group,
Modal,
Text,
Textarea,
} from "@mantine/core";
import { IconCode, IconEdit } from "@tabler/icons-react";
import { useTranslation } from "react-i18next";
import { useAtomValue } from "jotai";
import useUserRole from "@/hooks/use-user-role.tsx";
import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
import classes from "./html-embed-view.module.css";
/**
* Inject raw HTML (including <script> tags) into `container`, executing any
* scripts.
*
* Setting `innerHTML` does NOT run inline or external <script> tags the browser
* parses that way: the HTML spec marks scripts inserted via innerHTML as
* "already started" so they never execute. To get the tracker/analytics
* use-case working we walk the freshly-parsed scripts and replace each with a
* brand-new <script> element copying its attributes and inline code. A
* programmatically created+inserted <script> DOES execute, so this restores
* normal script behaviour in the wiki origin (Variant C).
*/
function renderRawHtml(container: HTMLElement, source: string) {
// Clear any previous render (re-render on source change).
container.innerHTML = "";
if (!source) return;
container.innerHTML = source;
const scripts = Array.from(container.querySelectorAll("script"));
for (const oldScript of scripts) {
const newScript = document.createElement("script");
// Copy every attribute (src, type, async, defer, data-*, etc.).
for (const attr of Array.from(oldScript.attributes)) {
newScript.setAttribute(attr.name, attr.value);
}
// Copy inline code.
newScript.text = oldScript.textContent ?? "";
// Replacing the node in place triggers execution.
oldScript.parentNode?.replaceChild(newScript, oldScript);
}
}
export default function HtmlEmbedView(props: NodeViewProps) {
const { t } = useTranslation();
const { node, selected, updateAttributes, editor } = props;
const { source } = node.attrs as { source: string };
const { isAdmin } = useUserRole();
// Defense in depth: only execute the raw HTML/JS when the workspace HTML embed
// feature toggle is ON. When OFF (the default), we render a neutral disabled
// placeholder and inject nothing — so turning the feature off neutralizes
// existing embeds at render time as well as on the next server-side save.
const workspace = useAtomValue(workspaceAtom);
const htmlEmbedEnabled = workspace?.settings?.htmlEmbed === true;
// Execution policy split by editor mode:
// - READ-ONLY / public-share view: the SERVER already decided whether to
// include the embed (it strips htmlEmbed from shared content when the
// workspace toggle is OFF). An anonymous viewer has no workspace and thus
// reads `htmlEmbedEnabled` as false, so we must NOT gate execution on it
// here — we execute exactly the `source` the server chose to serve.
// - EDITABLE editor (admin authoring): keep gating on the per-workspace
// toggle so an admin sees the inert placeholder when the feature is OFF.
const shouldExecute = !editor.isEditable || htmlEmbedEnabled;
const contentRef = useRef<HTMLDivElement | null>(null);
const [modalOpen, setModalOpen] = useState(false);
const [draft, setDraft] = useState<string>(source || "");
// (Re)render the raw source whenever it changes. This runs in BOTH the
// editable editor and the read-only / public-share editor (same NodeView),
// so trackers fire for readers too — that is the intended behaviour. When the
// feature toggle is OFF we clear the container and inject/execute nothing.
useEffect(() => {
if (!contentRef.current) return;
if (shouldExecute) {
renderRawHtml(contentRef.current, source || "");
} else {
contentRef.current.innerHTML = "";
}
}, [source, shouldExecute]);
const openEditor = useCallback(() => {
setDraft(source || "");
setModalOpen(true);
}, [source]);
const onSave = useCallback(() => {
if (editor.isEditable) {
updateAttributes({ source: draft });
}
setModalOpen(false);
}, [draft, editor.isEditable, updateAttributes]);
// The edit affordance is only meaningful in edit mode, is restricted to admins
// (the server strips the node for non-admins anyway), and is offered only when
// the workspace feature toggle is ON.
const canEdit = editor.isEditable && isAdmin && htmlEmbedEnabled;
return (
<NodeViewWrapper
data-drag-handle
className={clsx(classes.htmlEmbedNodeView, {
[classes.htmlEmbedSelected]: selected,
})}
>
{canEdit && (
<div className={classes.htmlEmbedToolbar}>
<ActionIcon
variant="default"
size="sm"
aria-label={t("Edit HTML embed")}
onClick={openEditor}
>
<IconEdit size={16} />
</ActionIcon>
</div>
)}
{!shouldExecute ? (
// Feature disabled for this workspace AND we're in the editable editor:
// never inject/execute the source. Show a neutral placeholder so an
// existing embed is visibly inert for the authoring admin. Read-only /
// share viewers never hit this branch (`shouldExecute` is always true
// there) — they execute exactly the source the server chose to serve.
<div className={classes.htmlEmbedPlaceholder}>
<IconCode size={18} />
<Text size="sm">
{t("HTML embed is disabled in this workspace")}
</Text>
</div>
) : source ? (
// Raw HTML/CSS/JS rendered into the wiki origin. Scripts are re-created
// in renderRawHtml so they execute.
<div ref={contentRef} className={classes.htmlEmbedContent} />
) : canEdit ? (
<div className={classes.htmlEmbedPlaceholder} onClick={openEditor}>
<IconCode size={18} />
<Text size="sm">{t("Click to add HTML / CSS / JS")}</Text>
</div>
) : (
// Empty source, non-editor: render nothing visible.
<div ref={contentRef} className={classes.htmlEmbedContent} />
)}
<Modal
opened={modalOpen}
onClose={() => setModalOpen(false)}
title={t("Edit HTML embed")}
size="lg"
>
<Text size="xs" c="dimmed" mb="xs">
{t(
"This HTML/CSS/JS runs in the page origin for everyone who views it. Admins only.",
)}
</Text>
<Textarea
autosize
minRows={10}
maxRows={24}
value={draft}
onChange={(e) => setDraft(e.currentTarget.value)}
placeholder={t("<script>...</script>")}
styles={{ input: { fontFamily: "monospace" } }}
data-autofocus
/>
<Group justify="flex-end" mt="md">
<Button variant="default" onClick={() => setModalOpen(false)}>
{t("Cancel")}
</Button>
<Button onClick={onSave}>{t("Save")}</Button>
</Group>
</Modal>
</NodeViewWrapper>
);
}

View File

@@ -587,6 +587,22 @@ const CommandGroups: SlashMenuGroupedItemsType = {
.insertColumns({ layout: "five_equal" })
.run(),
},
{
title: "HTML embed",
description: "Embed raw HTML, CSS and JavaScript (admins only).",
searchTerms: ["html", "css", "js", "javascript", "script", "tracker", "analytics", "raw", "embed"],
icon: IconCode,
adminOnly: true,
requiresHtmlEmbedFeature: true,
command: ({ editor, range }: CommandProps) => {
editor
.chain()
.focus()
.deleteRange(range)
.setHtmlEmbed({ source: "" })
.run();
},
},
{
title: "Iframe embed",
description: "Embed any Iframe",
@@ -744,6 +760,43 @@ const CommandGroups: SlashMenuGroupedItemsType = {
],
};
/**
* Read whether the current user is a workspace admin/owner from the persisted
* `currentUser` (the same payload `currentUserAtom` stores via localStorage).
* Used to hide admin-only slash items (e.g. raw HTML embed). This is a UI gate
* only; the server independently strips admin-only nodes from non-admin writes.
*/
function isCurrentUserAdmin(): boolean {
try {
const raw = localStorage.getItem("currentUser");
if (!raw) return false;
const parsed = JSON.parse(raw);
const role = parsed?.user?.role;
return role === "owner" || role === "admin";
} catch {
return false;
}
}
/**
* Read the workspace-level HTML embed feature toggle from the persisted
* `currentUser` payload (the same localStorage entry `currentUserAtom` writes,
* carrying `workspace.settings`). ABSENT/false => OFF (the default). The slash
* `getSuggestionItems` is a plain function (no React/atom context), so we read
* the persisted state the same way `isCurrentUserAdmin()` does. UI gate only;
* the server independently strips htmlEmbed from every non-allowed write.
*/
function isHtmlEmbedFeatureEnabled(): boolean {
try {
const raw = localStorage.getItem("currentUser");
if (!raw) return false;
const parsed = JSON.parse(raw);
return parsed?.workspace?.settings?.htmlEmbed === true;
} catch {
return false;
}
}
export const getSuggestionItems = ({
query,
excludeItems,
@@ -753,6 +806,8 @@ export const getSuggestionItems = ({
}): SlashMenuGroupedItemsType => {
const search = query.toLowerCase();
const filteredGroups: SlashMenuGroupedItemsType = {};
const isAdmin = isCurrentUserAdmin();
const htmlEmbedFeatureEnabled = isHtmlEmbedFeatureEnabled();
const fuzzyMatch = (query: string, target: string) => {
let queryIndex = 0;
@@ -767,6 +822,11 @@ export const getSuggestionItems = ({
for (const [group, items] of Object.entries(CommandGroups)) {
const filteredItems = items.filter((item) => {
if (excludeItems?.has(item.title)) return false;
// Hide admin-only items (raw HTML embed) from non-admins.
if (item.adminOnly && !isAdmin) return false;
// Hide HTML-embed-gated items unless the workspace feature toggle is ON.
if (item.requiresHtmlEmbedFeature && !htmlEmbedFeatureEnabled)
return false;
return (
fuzzyMatch(search, item.title) ||
item.description.toLowerCase().includes(search) ||

View File

@@ -21,6 +21,14 @@ export type SlashMenuItemType = {
searchTerms: string[];
command: (props: CommandProps) => void;
disable?: (editor: ReturnType<typeof useEditor>) => boolean;
// When true, the item is only offered to workspace admins/owners. This is a
// UI convenience only — the real authoring gate is enforced server-side.
adminOnly?: boolean;
// When true, the item is hidden unless the workspace HTML embed feature toggle
// is ON. Combined with adminOnly, the item shows only for admins in workspaces
// where the feature is enabled. UI gate only — the server strips htmlEmbed on
// every write where the toggle is OFF or the user is not an admin.
requiresHtmlEmbedFeature?: boolean;
};
export type SlashMenuGroupedItemsType = {

View File

@@ -41,6 +41,7 @@ import {
Drawio,
Excalidraw,
Embed,
HtmlEmbed,
TiptapPdf,
PageBreak,
SearchAndReplace,
@@ -87,6 +88,7 @@ import CodeBlockView from "@/features/editor/components/code-block/code-block-vi
import DrawioView from "../components/drawio/drawio-view";
import ExcalidrawView from "@/features/editor/components/excalidraw/excalidraw-view-lazy.tsx";
import EmbedView from "@/features/editor/components/embed/embed-view.tsx";
import HtmlEmbedView from "@/features/editor/components/html-embed/html-embed-view.tsx";
import PdfView from "@/features/editor/components/pdf/pdf-view.tsx";
import SubpagesView from "@/features/editor/components/subpages/subpages-view.tsx";
import TransclusionView from "@/features/editor/components/transclusion/transclusion-view.tsx";
@@ -365,6 +367,13 @@ export const mainExtensions = [
Embed.configure({
view: EmbedView,
}),
// Raw HTML/CSS/JS node (Variant C). The node is registered for ALL users so
// documents authored by admins render correctly for everyone; INSERTION is
// gated to admins in the slash menu, and the server strips the node from any
// non-admin write so a non-admin cannot persist it.
HtmlEmbed.configure({
view: HtmlEmbedView,
}),
TiptapPdf.configure({
view: PdfView,
}),

View File

@@ -0,0 +1,99 @@
import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
import { useAtom } from "jotai";
import { useState } from "react";
import { updateWorkspace } from "@/features/workspace/services/workspace-service.ts";
import { Switch, Stack, Paper, Group, Text, List } from "@mantine/core";
import { notifications } from "@mantine/notifications";
import useUserRole from "@/hooks/use-user-role.tsx";
import { useTranslation } from "react-i18next";
/**
* Admin toggle for the workspace HTML embed feature.
*
* SECURITY: when ON, workspace admins/owners can embed raw HTML/CSS/JS that
* EXECUTES in the wiki page origin for every reader (a deliberate stored-XSS
* surface, e.g. for analytics trackers). OFF by default. The server strips
* htmlEmbed nodes on every write where the toggle is OFF or the saver is not an
* admin, so this switch fully enables/disables the feature workspace-wide.
*/
export default function HtmlEmbedSettings() {
const { t } = useTranslation();
const [workspace, setWorkspace] = useAtom(workspaceAtom);
const { isAdmin } = useUserRole();
const [checked, setChecked] = useState<boolean>(
workspace?.settings?.htmlEmbed ?? false,
);
const [isLoading, setIsLoading] = useState(false);
async function handleToggle(value: boolean) {
setIsLoading(true);
const previous = checked;
setChecked(value); // optimistic update
try {
const updated = await updateWorkspace({ htmlEmbed: value });
// Force settings.htmlEmbed to the new value so the atom is consistent even
// if the response shape omits it.
setWorkspace({
...updated,
settings: {
...updated.settings,
htmlEmbed: value,
},
});
notifications.show({ message: t("Updated successfully") });
} catch (err) {
console.log(err);
setChecked(previous); // revert on failure
notifications.show({
message: t("Failed to update data"),
color: "red",
});
} finally {
setIsLoading(false);
}
}
return (
<Stack mt="sm">
<Group justify="space-between" align="center">
<Text fw={700} size="lg">
{t("HTML embed")}
</Text>
<Text size="xs" c="dimmed" tt="uppercase" fw={600}>
{t("advanced")}
</Text>
</Group>
<Paper withBorder radius="md" p="lg">
<Switch
label={t("Enable HTML embed")}
description={t(
"Allow workspace admins to insert raw HTML/CSS/JavaScript that EXECUTES in the wiki page origin for everyone who views the page (a deliberate stored-XSS surface, e.g. for analytics trackers). Off by default.",
)}
checked={checked}
disabled={!isAdmin || isLoading}
onChange={(event) => handleToggle(event.currentTarget.checked)}
/>
<List size="xs" c="dimmed" mt="md" spacing={4}>
<List.Item>
{t(
"Only workspace admins/owners can insert HTML embeds. Members never can: the editor option is hidden for them and the server strips the embed on save at every write path.",
)}
</List.Item>
<List.Item>
{t(
"If a non-admin edits and saves a page that contains an admin's embed, that save strips the embed (fail-closed). An admin must re-add it.",
)}
</List.Item>
<List.Item>
{t(
"Turning this off strips existing embeds on their next save and immediately disables execution (existing embeds render as a disabled placeholder).",
)}
</List.Item>
</List>
</Paper>
</Stack>
);
}

View File

@@ -30,6 +30,9 @@ export interface IWorkspace {
restrictApiToAdmins?: boolean;
allowMemberTemplates?: boolean;
isScimEnabled?: boolean;
// Write-only field for updateWorkspace({ htmlEmbed }). Read state lives at
// settings.htmlEmbed.
htmlEmbed?: boolean;
}
export interface IWorkspaceSettings {
@@ -37,6 +40,8 @@ export interface IWorkspaceSettings {
sharing?: IWorkspaceSharingSettings;
api?: IWorkspaceApiSettings;
templates?: IWorkspaceTemplateSettings;
// Admin-only HTML embed feature toggle. ABSENT/false => OFF (default).
htmlEmbed?: boolean;
}
export interface IWorkspaceApiSettings {

View File

@@ -1,6 +1,7 @@
import SettingsTitle from "@/components/settings/settings-title.tsx";
import WorkspaceNameForm from "@/features/workspace/components/settings/components/workspace-name-form";
import WorkspaceIcon from "@/features/workspace/components/settings/components/workspace-icon.tsx";
import HtmlEmbedSettings from "@/features/workspace/components/settings/components/html-embed-settings.tsx";
import { useTranslation } from "react-i18next";
import { getAppName } from "@/lib/config.ts";
import { Helmet } from "react-helmet-async";
@@ -15,6 +16,7 @@ export default function WorkspaceSettings() {
<SettingsTitle title={t("General")} />
<WorkspaceIcon />
<WorkspaceNameForm />
<HtmlEmbedSettings />
</>
);
}

View File

@@ -0,0 +1,120 @@
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
import { CollaborationHandler } from './collaboration.handler';
import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the
// REST/MCP/AI content-update entrypoint, used by the page update endpoint and
// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed
// BEFORE handing the json to withYdocConnection. We stub only
// withYdocConnection (which would otherwise open a real hocuspocus connection):
// the role-extraction (`user?.role`) + strip that run upstream of it are REAL
// production code. The 'replace' branch then runs the production
// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we
// decode back to JSON and assert on. This replaces the re-implemented
// `applyAdminGate` stand-in for this entrypoint.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
{
type: 'columns',
content: [
{
type: 'column',
attrs: { position: 'left' },
content: [
{ type: 'htmlEmbed', attrs: { source: '<script>nested</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'inner' }] },
],
},
{
type: 'column',
attrs: { position: 'right' },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] },
],
},
],
},
{ type: 'htmlEmbed', attrs: { source: '<script>top</script>' } },
],
});
/**
* Run the REAL updatePageContent('replace') with a stubbed withYdocConnection.
* The stub provides a real Y.Doc + recording fragment; the production fn calls
* TiptapTransformer.toYdoc(<gated json>) and applies it to the doc, so decoding
* the doc afterward yields exactly the gated content.
*/
async function gatedContentFor(
role: string | null | undefined,
featureEnabled = true,
) {
// Workspace settings read used by the toggle-AND-admin gate.
const workspaceRepo = {
findById: jest.fn(async () => ({
id: 'ws-1',
settings: { htmlEmbed: featureEnabled },
})),
};
const handler = new CollaborationHandler(workspaceRepo as any);
const captureDoc = new Y.Doc();
jest
.spyOn(handler, 'withYdocConnection')
.mockImplementation(async (_hp, _name, _ctx, fn: any) => {
const fragment = captureDoc.getXmlFragment('default');
// Mirror the real Document surface the fn touches.
const docLike: any = {
getXmlFragment: () => fragment,
};
// The fn does: fragment.delete(0,len) then
// Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls
// Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target.
fn(captureDoc);
});
const handlers = handler.getHandlers({} as any);
await handlers.updatePageContent('page-1', {
prosemirrorJson: docWithEmbed(),
operation: 'replace',
user: { id: 'u1', role, workspaceId: 'ws-1' } as any,
});
return TiptapTransformer.fromYdoc(captureDoc, 'default');
}
describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => {
it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => {
const gated = await gatedContentFor('member');
expect(hasHtmlEmbedNode(gated)).toBe(false);
// Non-embed siblings survive.
const json = JSON.stringify(gated);
expect(json).toContain('keep');
expect(json).toContain('inner');
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false);
}
});
it('toggle ON + admin: htmlEmbed preserved', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('admin', true))).toBe(true);
});
it('toggle ON + owner: htmlEmbed preserved', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('owner', true))).toBe(true);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('admin', false))).toBe(false);
});
it('toggle OFF + member: stripped', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('member', false))).toBe(false);
});
});

View File

@@ -8,6 +8,13 @@ import {
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
import * as Y from 'yjs';
import { User } from '@docmost/db/types/entity.types';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
export type CollabEventHandlers = ReturnType<
CollaborationHandler['getHandlers']
@@ -17,7 +24,7 @@ export type CollabEventHandlers = ReturnType<
export class CollaborationHandler {
private readonly logger = new Logger(CollaborationHandler.name);
constructor() {}
constructor(private readonly workspaceRepo: WorkspaceRepo) {}
getHandlers(hocuspocus: Hocuspocus) {
return {
@@ -83,8 +90,31 @@ export class CollaborationHandler {
user: User;
},
) => {
const { prosemirrorJson, operation, user } = payload;
const { operation, user } = payload;
let { prosemirrorJson } = payload;
this.logger.debug('Updating page content via yjs', documentName);
// SECURITY (Variant C admin gate, REST/MCP/AI write path):
// updatePageContent is the server-side entrypoint used by the REST page
// update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute
// arbitrary JS in every reader's browser, so a NON-admin caller must not
// be able to persist them here. If the editing user is not a workspace
// admin/owner, strip every htmlEmbed node before it reaches the ydoc.
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
// feature toggle is ON and the editing user is an admin/owner. OFF
// (default) => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(user?.workspaceId))?.settings,
);
if (!htmlEmbedAllowed(htmlEmbedEnabled, user?.role)) {
if (hasHtmlEmbedNode(prosemirrorJson)) {
this.logger.warn(
`Stripping htmlEmbed node(s) from update by user ${user?.id} on ${documentName}`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
}
await this.withYdocConnection(
hocuspocus,
documentName,

View File

@@ -32,6 +32,7 @@ import {
Drawio,
Excalidraw,
Embed,
HtmlEmbed,
Mention,
Subpages,
Highlight,
@@ -102,6 +103,10 @@ export const tiptapExtensions = [
Drawio,
Excalidraw,
Embed,
// Registered server-side so the node survives schema parsing/serialization.
// Authoring is gated to admins at the document WRITE paths (see
// stripHtmlEmbedNodes usage in persistence/page services), NOT here.
HtmlEmbed,
Mention,
Subpages,
Columns,

View File

@@ -0,0 +1,280 @@
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
import { PersistenceExtension } from './persistence.extension';
import { tiptapExtensions } from '../collaboration.util';
import {
hasHtmlEmbedNode,
HTML_EMBED_NODE_NAME,
} from '../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab
// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks.
// This replaces the prior re-implemented `applyAdminGate` stand-in for this
// entrypoint: if the role-extraction expression (`context?.user?.role`), the
// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail.
const RICH_DOC = {
type: 'doc',
content: [
{
type: 'paragraph',
content: [{ type: 'text', text: 'intro paragraph' }],
},
{
type: 'columns',
content: [
{
type: 'column',
attrs: { position: 'left' },
content: [
{
type: 'paragraph',
content: [
{ type: 'text', text: 'left col, mentioning ' },
{
type: 'mention',
attrs: {
id: 'mention-1',
label: 'Alice',
entityType: 'user',
entityId: 'user-123',
creatorId: 'creator-1',
},
},
],
},
// Nested embed inside a column — must be stripped recursively.
{
type: HTML_EMBED_NODE_NAME,
attrs: { source: '<script>nested()</script>' },
},
],
},
{
type: 'column',
attrs: { position: 'right' },
content: [
{
type: 'table',
content: [
{
type: 'tableRow',
content: [
{
type: 'tableHeader',
attrs: { colspan: 1, rowspan: 1 },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'H' }] },
],
},
],
},
{
type: 'tableRow',
content: [
{
type: 'tableCell',
attrs: { colspan: 1, rowspan: 1 },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'cell' }] },
],
},
],
},
],
},
],
},
],
},
// Top-level embed — must be stripped.
{
type: HTML_EMBED_NODE_NAME,
attrs: { source: '<script>top()</script>' },
},
{
type: 'paragraph',
content: [{ type: 'text', text: 'outro paragraph' }],
},
],
};
function buildYdoc(json: any): Y.Doc {
return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
}
// Count nodes by type across the whole tree (excludes htmlEmbed by listing it
// separately) so we can assert every OTHER node type survived the strip.
function nodeTypeCounts(json: any): Record<string, number> {
const counts: Record<string, number> = {};
const walk = (n: any) => {
if (!n || typeof n !== 'object') return;
if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1;
if (Array.isArray(n.content)) n.content.forEach(walk);
};
walk(json);
return counts;
}
/**
* Construct a real PersistenceExtension with the minimum mocks needed for
* onStoreDocument to reach the strip + persist branch, and capture the content
* that would be written to the page row.
*/
function buildExtension(featureEnabled = true) {
const captured: { content?: any } = {};
const existingPage = {
id: 'page-1',
slugId: 'slug-1',
spaceId: 'space-1',
workspaceId: 'ws-1',
creatorId: 'creator-1',
contributorIds: [],
content: { type: 'doc', content: [] }, // differs from new content -> persist runs
createdAt: new Date(),
lastUpdatedSource: 'user',
};
const pageRepo = {
findById: jest.fn(async () => ({ ...existingPage })),
updatePage: jest.fn(async (values: any) => {
captured.content = values.content;
}),
};
const pageHistoryRepo = {
findPageLastHistory: jest.fn(async () => null),
saveHistory: jest.fn(async () => undefined),
};
// db.transaction().execute(cb) just runs the callback (no real DB).
const db = {
transaction: () => ({
execute: (cb: any) => cb({} as any),
}),
};
const noopQueue = { add: jest.fn(async () => undefined) } as any;
const collabHistory = { addContributors: jest.fn(async () => undefined) } as any;
const transclusionService = {
syncPageTransclusions: jest.fn(async () => undefined),
syncPageReferences: jest.fn(async () => undefined),
} as any;
// Workspace settings read used by the toggle-AND-admin gate.
const workspaceRepo = {
findById: jest.fn(async () => ({
id: 'ws-1',
settings: { htmlEmbed: featureEnabled },
})),
};
const ext = new PersistenceExtension(
pageRepo as any,
pageHistoryRepo as any,
db as any,
noopQueue,
noopQueue,
noopQueue,
collabHistory,
transclusionService,
workspaceRepo as any,
);
return { ext, captured, pageRepo };
}
async function runStore(
role: string | null | undefined,
doc: Y.Doc,
featureEnabled = true,
) {
const { ext, captured } = buildExtension(featureEnabled);
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
// none, so stub it (the post-persist broadcast is not under test here).
(doc as any).broadcastStateless = () => undefined;
await ext.onStoreDocument({
documentName: 'page-1',
document: doc,
context: { user: { id: 'u1', role } },
} as any);
return captured;
}
describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => {
it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => {
const doc = buildYdoc(RICH_DOC);
const before = TiptapTransformer.fromYdoc(doc, 'default');
expect(hasHtmlEmbedNode(before)).toBe(true);
const beforeCounts = nodeTypeCounts(before);
const captured = await runStore('member', doc);
expect(captured.content).toBeDefined();
// htmlEmbed gone from the persisted content.
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
// Every non-embed node type is preserved with the SAME count (guards against
// data loss if a node were missing from tiptapExtensions and dropped on the
// toYdoc rebuild).
const afterCounts = nodeTypeCounts(captured.content);
for (const [type, count] of Object.entries(beforeCounts)) {
if (type === HTML_EMBED_NODE_NAME) continue;
expect(afterCounts[type]).toBe(count);
}
// The two embeds are gone.
expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2);
expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined();
// The shared ydoc fragment was also rewritten clean (re-decode it).
const reDecoded = TiptapTransformer.fromYdoc(doc, 'default');
expect(hasHtmlEmbedNode(reDecoded)).toBe(false);
});
it('toggle ON + admin store: htmlEmbed preserved in persisted content', async () => {
const captured = await runStore('admin', buildYdoc(RICH_DOC), true);
expect(captured.content).toBeDefined();
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2);
});
it('toggle ON + owner store: htmlEmbed preserved', async () => {
const captured = await runStore('owner', buildYdoc(RICH_DOC), true);
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
});
it('toggle OFF + admin store: stripped (feature disabled for everyone)', async () => {
const captured = await runStore('admin', buildYdoc(RICH_DOC), false);
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
});
it('toggle OFF + owner store: stripped', async () => {
const captured = await runStore('owner', buildYdoc(RICH_DOC), false);
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
});
it('toggle OFF + member store: stripped', async () => {
const captured = await runStore('member', buildYdoc(RICH_DOC), false);
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
});
it('unknown/empty role: fails closed (stripped)', async () => {
expect(
hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content),
).toBe(false);
expect(
hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content),
).toBe(false);
expect(
hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content),
).toBe(false);
});
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
const emptyDoc = buildYdoc({
type: 'doc',
content: [{ type: 'paragraph' }],
});
// Non-admin path with an empty/embed-free fragment must be a no-op strip,
// not throw.
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
});
});

View File

@@ -39,6 +39,13 @@ import {
HISTORY_INTERVAL,
} from '../constants';
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
@Injectable()
export class PersistenceExtension implements Extension {
@@ -59,6 +66,7 @@ export class PersistenceExtension implements Extension {
@InjectQueue(QueueName.NOTIFICATION_QUEUE) private notificationQueue: Queue,
private readonly collabHistory: CollabHistoryService,
private readonly transclusionService: TransclusionService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async onLoadDocument(data: onLoadDocumentPayload) {
@@ -112,7 +120,62 @@ export class PersistenceExtension implements Extension {
const pageId = getPageId(documentName);
const tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
let tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
// SECURITY (Variant C admin gate, collab WebSocket write path):
// The persisted snapshot is the merged ydoc, which may contain an htmlEmbed
// node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized
// JS in every reader's browser, so only workspace admins/owners may author
// it. When the user whose store triggers this persist is not an admin, strip
// every htmlEmbed node before it is written to the page row AND before the
// ydoc state is re-encoded, so the node cannot be reintroduced by a
// non-admin via the collab socket.
// NOTE (residual risk): the gate is keyed to the storing connection's user.
// If an admin already authored an htmlEmbed and a non-admin's later store
// does not touch it, this strip would remove the admin's embed on that
// non-admin store. This is intentionally conservative (fail closed): the
// admin re-adds/keeps the node on their own next edit. A future refinement
// could diff against the previously persisted admin-authored embeds.
//
// ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in
// the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs
// update to connected clients immediately, so a non-admin's transient
// htmlEmbed can execute in OTHER open editors' browsers in the brief window
// before this persist strips it. The exposure is limited to concurrent
// AUTHENTICATED space members who have the doc open with Edit rights
// (semi-trusted) — anonymous public-share/readonly viewers do NOT open a
// collab socket (ReadonlyPageEditor renders fetched, already-stripped
// content; HocuspocusProvider is only used by the authenticated editable
// page-editor), and the PERSISTED page row plus every share/readonly read
// path are protected by this strip. The window is therefore accepted rather
// than mitigated with an inbound beforeBroadcast strip.
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature
// toggle is ON and the storing user is an admin/owner. OFF (default) =>
// stripped for everyone (existing embeds get cleaned up on next save).
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(context?.user?.workspaceId))?.settings,
);
if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) {
if (hasHtmlEmbedNode(tiptapJson)) {
this.logger.warn(
`Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`,
);
tiptapJson = stripHtmlEmbedNodes(tiptapJson);
// Reflect the stripped content back into the shared ydoc so the node is
// removed for all connected clients, not just the persisted row.
const fragment = document.getXmlFragment('default');
if (fragment.length > 0) {
fragment.delete(0, fragment.length);
}
const cleanDoc = TiptapTransformer.toYdoc(
tiptapJson,
'default',
tiptapExtensions,
);
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
}
}
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
let textContent = null;

View File

@@ -0,0 +1,214 @@
import {
canAuthorHtmlEmbed,
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from './html-embed.util';
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
import {
decodeHtmlEmbedSource,
encodeHtmlEmbedSource,
} from '@docmost/editor-ext';
const findFirstChild = (json: any, type: string): any | undefined => {
if (!json || typeof json !== 'object') return undefined;
if (json.type === type) return json;
if (Array.isArray(json.content)) {
for (const child of json.content) {
const found = findFirstChild(child, type);
if (found) return found;
}
}
return undefined;
};
describe('stripHtmlEmbedNodes', () => {
it('removes a top-level htmlEmbed node', () => {
const doc = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'before' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'after' }] },
],
};
const result = stripHtmlEmbedNodes(doc);
expect(hasHtmlEmbedNode(result)).toBe(false);
// Other nodes are preserved.
expect(result.content).toHaveLength(2);
expect(result.content[0].content[0].text).toBe('before');
expect(result.content[1].content[0].text).toBe('after');
});
it('removes nested htmlEmbed nodes (e.g. inside columns)', () => {
const doc = {
type: 'doc',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{ type: 'htmlEmbed', attrs: { source: '<b>x</b>' } },
{
type: 'paragraph',
content: [{ type: 'text', text: 'keep' }],
},
],
},
],
},
],
};
const result = stripHtmlEmbedNodes(doc);
expect(hasHtmlEmbedNode(result)).toBe(false);
const col = findFirstChild(result, 'column');
expect(col.content).toHaveLength(1);
expect(col.content[0].type).toBe('paragraph');
});
it('does not mutate the input document', () => {
const doc = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source: 'x' } }],
};
stripHtmlEmbedNodes(doc);
expect(doc.content).toHaveLength(1);
expect(doc.content[0].type).toBe('htmlEmbed');
});
it('leaves documents without htmlEmbed untouched', () => {
const doc = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
],
};
expect(hasHtmlEmbedNode(doc)).toBe(false);
const result = stripHtmlEmbedNodes(doc);
expect(result).toEqual(doc);
});
});
describe('canAuthorHtmlEmbed', () => {
it('allows owner and admin', () => {
expect(canAuthorHtmlEmbed('owner')).toBe(true);
expect(canAuthorHtmlEmbed('admin')).toBe(true);
});
it('denies member and unknown/empty roles', () => {
expect(canAuthorHtmlEmbed('member')).toBe(false);
expect(canAuthorHtmlEmbed(null)).toBe(false);
expect(canAuthorHtmlEmbed(undefined)).toBe(false);
expect(canAuthorHtmlEmbed('viewer')).toBe(false);
});
});
describe('isHtmlEmbedFeatureEnabled', () => {
it('is true only when settings.htmlEmbed === true', () => {
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true);
});
it('defaults to false (absent / false / non-object)', () => {
expect(isHtmlEmbedFeatureEnabled({})).toBe(false);
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: false })).toBe(false);
expect(isHtmlEmbedFeatureEnabled(null)).toBe(false);
expect(isHtmlEmbedFeatureEnabled(undefined)).toBe(false);
// Truthy-but-not-true values must NOT enable the feature.
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: 'true' as any })).toBe(false);
});
});
describe('htmlEmbedAllowed (toggle AND admin)', () => {
it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => {
expect(htmlEmbedAllowed(false, 'admin')).toBe(false);
expect(htmlEmbedAllowed(false, 'owner')).toBe(false);
});
it('toggle OFF + member => not allowed', () => {
expect(htmlEmbedAllowed(false, 'member')).toBe(false);
});
it('toggle ON + admin/owner => allowed', () => {
expect(htmlEmbedAllowed(true, 'admin')).toBe(true);
expect(htmlEmbedAllowed(true, 'owner')).toBe(true);
});
it('toggle ON + member/unknown => not allowed', () => {
expect(htmlEmbedAllowed(true, 'member')).toBe(false);
expect(htmlEmbedAllowed(true, null)).toBe(false);
expect(htmlEmbedAllowed(true, undefined)).toBe(false);
expect(htmlEmbedAllowed(true, 'viewer')).toBe(false);
});
});
// NOTE: a previous revision of this file re-implemented the write-path admin
// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
// deleted/misplaced real guard would have kept those green. The stand-in is
// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
// now tested against their REAL code in:
// - collaboration/extensions/persistence.extension.html-embed.spec.ts
// - collaboration/collaboration.handler.html-embed.spec.ts
// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
// - integrations/import/services/import-html-embed-identity.spec.ts (import)
//
// The case below stays here because it asserts a REAL parse path
// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
// re-implemented gate.
describe('htmlEmbed smuggled via the markdown/html <!--html-embed--> form (real parse + real helpers)', () => {
it('the parsed node is detected and stripped by the real helpers', () => {
// The markdown/html create formats decode to the same htmlEmbed node, so the
// gate (run on the parsed JSON) covers them identically.
const source = '<script>steal()</script>';
const encoded = encodeHtmlEmbedSource(source);
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
const parsed = htmlToJson(html);
expect(hasHtmlEmbedNode(parsed)).toBe(true);
// A non-admin role gates to strip via the real helpers.
expect(canAuthorHtmlEmbed('member')).toBe(false);
const stripped = stripHtmlEmbedNodes(parsed);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
});
});
describe('htmlEmbed source base64 codec', () => {
it('round-trips arbitrary source including UTF-8', () => {
const source = '<script>console.log("héllo → 世界")</script>';
const encoded = encodeHtmlEmbedSource(source);
expect(encoded).not.toContain('<');
expect(decodeHtmlEmbedSource(encoded)).toBe(source);
});
});
describe('htmlEmbed node HTML <-> JSON round-trip', () => {
it('preserves the raw source through HTML -> JSON', () => {
const source = '<script>track("page")</script><style>.a{color:red}</style>';
const encoded = encodeHtmlEmbedSource(source);
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
const json = htmlToJson(html);
const node = findFirstChild(json, 'htmlEmbed');
expect(node).toBeDefined();
expect(node.attrs.source).toBe(source);
});
it('round-trips JSON -> HTML -> JSON keeping the source', () => {
const source = '<div onclick="x()">raw &amp; markup</div>';
const json = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source } }],
};
const html = jsonToHtml(json);
// The static HTML carries the encoded source but does NOT inline the raw
// markup (it must not be an injection vector by itself).
expect(html).toContain('data-type="htmlEmbed"');
expect(html).not.toContain('onclick');
const back = htmlToJson(html);
const node = findFirstChild(back, 'htmlEmbed');
expect(node).toBeDefined();
expect(node.attrs.source).toBe(source);
});
});

View File

@@ -0,0 +1,102 @@
import { JSONContent } from '@tiptap/core';
export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
/**
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
*
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
* author it. This helper is the server-side enforcement primitive: every WRITE
* path that may persist content from a NON-admin caller must run the incoming
* document through this function so a non-admin cannot smuggle the node in via
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
*
* Returns a NEW document; the input is not mutated. If the input is not a valid
* doc object it is returned unchanged (callers persist what they were given).
*/
export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
if (!pmJson || typeof pmJson !== 'object') {
return pmJson;
}
const node = pmJson as unknown as JSONContent;
if (Array.isArray(node.content)) {
const filtered: JSONContent[] = [];
for (const child of node.content) {
// Drop any htmlEmbed child outright.
if (child && child.type === HTML_EMBED_NODE_NAME) {
continue;
}
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
// also removed.
filtered.push(stripHtmlEmbedNodes(child));
}
return { ...node, content: filtered } as unknown as T;
}
return { ...node } as unknown as T;
}
/**
* Returns true if the document contains at least one `htmlEmbed` node anywhere
* in its tree. Useful to decide whether a strip pass actually changed anything
* (e.g. for logging a rejected non-admin embed attempt).
*/
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
if (!pmJson || typeof pmJson !== 'object') {
return false;
}
const node = pmJson as JSONContent;
if (node.type === HTML_EMBED_NODE_NAME) {
return true;
}
if (Array.isArray(node.content)) {
return node.content.some((child) => hasHtmlEmbedNode(child));
}
return false;
}
/**
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
* Owners and admins are trusted; everyone else (member, and any unknown role)
* is not. Kept here so every write path shares one definition of "trusted".
*/
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
return role === 'owner' || role === 'admin';
}
/**
* Combined write-path gate for the htmlEmbed feature.
*
* htmlEmbed is allowed in a document only when the workspace feature toggle is
* ON and the authoring/saving user is a workspace admin/owner. OFF (default) =>
* stripped for EVERYONE, including admins (the feature is disabled).
*
* `featureEnabled` is read from the workspace settings for the relevant write
* (`workspace.settings?.htmlEmbed === true`). Every WRITE path that may persist
* htmlEmbed content must gate on this combined predicate, so that turning the
* toggle OFF strips existing embeds on the next save and prevents new ones from
* being persisted regardless of role.
*/
export function htmlEmbedAllowed(
featureEnabled: boolean,
role: string | null | undefined,
): boolean {
return featureEnabled === true && canAuthorHtmlEmbed(role);
}
/**
* Read the workspace-level htmlEmbed feature toggle from a workspace's settings
* jsonb. ABSENT/non-true => OFF (the default). Kept here so every server write
* path resolves the toggle the same way.
*/
export function isHtmlEmbedFeatureEnabled(
settings: unknown | null | undefined,
): boolean {
if (!settings || typeof settings !== 'object') {
return false;
}
return (settings as Record<string, unknown>).htmlEmbed === true;
}

View File

@@ -237,6 +237,9 @@ export class PageController {
user.id,
workspace.id,
createPageDto,
// Pass the caller's workspace role so create() can enforce the htmlEmbed
// admin gate (non-admins cannot author raw-JS embeds).
user.role,
provenance,
);

View File

@@ -0,0 +1,102 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
// PageService.create() and duplicatePage() guards.
//
// page.service.ts cannot be unit-LOADED under the server's jest config (a
// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns),
// so we cover the two load-bearing properties at the strongest feasible layer:
//
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate
// each path applies: non-admin/unknown role -> strip, admin/owner -> keep.
//
// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole`
// param threaded from the request; duplicate: `authUser.role`), so a
// refactor that drops the guard or reads the wrong role trips the test.
// This is what replaces the removed `applyAdminGate` stand-in for these
// two entrypoints.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
],
});
// The real predicate both paths apply (see SECURITY blocks in page.service.ts):
// toggle AND admin.
function applyGate(
json: any,
featureEnabled: boolean,
role: string | null | undefined,
) {
if (!htmlEmbedAllowed(featureEnabled, role) && hasHtmlEmbedNode(json)) {
return stripHtmlEmbedNodes(json);
}
return json;
}
describe('page create/duplicate gate decision (real helpers)', () => {
it('toggle ON + non-admin (member) strips', () => {
const result = applyGate(docWithEmbed(), true, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(result.content).toHaveLength(1);
expect(result.content[0].content[0].text).toBe('body');
});
it('toggle ON + unknown/empty role fails closed (strips)', () => {
for (const role of [null, undefined, 'viewer'] as const) {
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, role))).toBe(
false,
);
}
});
it('toggle ON + admin/owner keep', () => {
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'admin'))).toBe(
true,
);
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'owner'))).toBe(
true,
);
});
it('toggle OFF strips for everyone (admin/owner/member)', () => {
for (const role of ['admin', 'owner', 'member'] as const) {
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), false, role))).toBe(
false,
);
}
});
});
const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8');
describe('page create/duplicate gate identity is pinned (source contract)', () => {
it('create() gates on toggle AND the caller role param before deriving content/ydoc', () => {
// create() receives the caller's workspace role as `callerRole` and gates on
// the combined toggle-AND-admin predicate; the embed must be stripped BEFORE
// insertPage.
expect(SRC).toMatch(
/!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
);
expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)');
});
it('duplicatePage() gates on toggle AND the duplicating user role (authUser.role)', () => {
expect(SRC).toMatch(
/!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
);
});
it('both paths resolve the toggle from the workspace settings', () => {
expect(SRC).toContain('isHtmlEmbedFeatureEnabled(');
expect(SRC).toContain('this.workspaceRepo.findById(');
});
});

View File

@@ -30,6 +30,13 @@ import {
isAttachmentNode,
removeMarkTypeFromDoc,
} from '../../../common/helpers/prosemirror/utils';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import {
htmlToJson,
jsonToNode,
@@ -74,6 +81,7 @@ export class PageService {
private collaborationGateway: CollaborationGateway,
private readonly watcherService: WatcherService,
private readonly transclusionService: TransclusionService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async findById(
@@ -93,6 +101,10 @@ export class PageService {
userId: string,
workspaceId: string,
createPageDto: CreatePageDto,
// Workspace role of the caller. Used to enforce the htmlEmbed admin gate on
// the create write path (see below). Optional/typed loosely so unknown or
// missing roles fall through to the non-admin (strip) branch by default.
callerRole?: string | null,
// Optional agent-edit provenance (from the signed access claim). When the
// actor is 'agent', stamp the page's source marker so a freshly created page
// shows it was created by the AI agent (§14 N2) — create goes through REST,
@@ -123,11 +135,36 @@ export class PageService {
let ydoc = undefined;
if (createPageDto?.content && createPageDto?.format) {
const prosemirrorJson = await this.parseProsemirrorContent(
let prosemirrorJson = await this.parseProsemirrorContent(
createPageDto.content,
createPageDto.format,
);
// SECURITY (Variant C admin gate, plain page-create write path):
// create() builds content/textContent/ydoc directly and persists them via
// insertPage, bypassing the collab onStoreDocument strip. htmlEmbed renders
// raw, unsanitized JS in readers' browsers, so only workspace admins/owners
// may author it. The create controller requires only space Edit, so a
// regular member could otherwise POST a doc (json, or the markdown/html
// <!--html-embed:BASE64--> forms that parse to the same node) containing an
// htmlEmbed and store XSS for every reader. Strip every htmlEmbed node when
// the caller is not an admin, BEFORE deriving textContent/ydoc/insert.
// The gate is toggle-AND-admin: htmlEmbed survives only when the workspace
// feature toggle is ON and the caller is an admin/owner. OFF (default) =>
// stripped for everyone. Cheap settings read keyed to the workspace.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(workspaceId))?.settings,
);
if (
!htmlEmbedAllowed(htmlEmbedEnabled, callerRole) &&
hasHtmlEmbedNode(prosemirrorJson)
) {
this.logger.warn(
`Stripping htmlEmbed node(s) from page creation by user ${userId} (space ${createPageDto.spaceId})`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
content = prosemirrorJson;
textContent = jsonToText(prosemirrorJson);
ydoc = createYdocFromJson(prosemirrorJson);
@@ -589,6 +626,12 @@ export class PageService {
const attachmentMap = new Map<string, ICopyPageAttachment>();
// Resolve the htmlEmbed toggle ONCE for the workspace; the per-page gate
// below is toggle-AND-admin (OFF default => stripped for everyone).
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(rootPage.workspaceId))?.settings,
);
const insertablePages: InsertablePage[] = await Promise.all(
pages.map(async (page) => {
const pageContent = getProsemirrorContent(page.content);
@@ -688,7 +731,25 @@ export class PageService {
}
});
const prosemirrorJson = prosemirrorDoc.toJSON();
let prosemirrorJson = prosemirrorDoc.toJSON();
// SECURITY (Variant C admin gate, duplication write path):
// Duplication builds the ydoc directly and bypasses the collab
// onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in
// readers' browsers, so only workspace admins/owners may author it. A
// non-admin with space Edit could otherwise duplicate an admin page
// that contains an embed into a new page authored by them. Strip every
// htmlEmbed node from each duplicated page when the duplicating user is
// not an admin, BEFORE computing textContent/ydoc/insert.
if (
!htmlEmbedAllowed(htmlEmbedEnabled, authUser.role) &&
hasHtmlEmbedNode(prosemirrorJson)
) {
this.logger.warn(
`Stripping htmlEmbed node(s) from page duplication by user ${authUser.id} (source page ${page.id})`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
// Add "Copy of " prefix to the root page title only for duplicates in same space
let title = page.title;

View File

@@ -0,0 +1,144 @@
import { TransclusionService } from '../transclusion.service';
import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
// unsync returns a source snapshot the client materializes into the reference
// page; a non-admin must never receive an embed payload to re-persist. The gate
// reads `user.role` and strips before returning. All repos / access checks are
// mocked so the REAL gate logic runs end-to-end. Complements the existing
// transclusion specs (rewriteAttachmentsForUnsync, controller).
const WS = 'ws-1';
const REF_PAGE = 'ref-1';
const SRC_PAGE = 'src-1';
const TX_ID = 'tx-1';
const sourceContentWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>steal()</script>' } },
],
});
function buildService(featureEnabled = true) {
const pageRepo = {
findById: jest.fn(async (id: string) => ({
id,
workspaceId: WS,
spaceId: 'space-1',
deletedAt: null,
})),
};
const pageTransclusionsRepo = {
findByPageAndTransclusion: jest.fn(async () => ({
content: sourceContentWithEmbed(),
})),
};
const pageTransclusionReferencesRepo = {
deleteOne: jest.fn(async () => undefined),
};
const attachmentRepo = { findByIds: jest.fn(async () => []) };
const storageService = { copy: jest.fn(async () => undefined) };
const pageAccessService = {
validateCanEdit: jest.fn(async () => undefined),
validateCanView: jest.fn(async () => undefined),
};
// Workspace settings read used by the toggle-AND-admin gate.
const workspaceRepo = {
findById: jest.fn(async () => ({
id: WS,
settings: { htmlEmbed: featureEnabled },
})),
};
const service = new TransclusionService(
{} as any, // db (unused on this path)
pageTransclusionsRepo as any,
pageTransclusionReferencesRepo as any,
pageRepo as any,
{} as any, // pagePermissionRepo (unused)
{} as any, // spaceMemberRepo (unused)
attachmentRepo as any,
storageService as any,
pageAccessService as any,
workspaceRepo as any,
);
return service;
}
function userWithRole(role: string | null | undefined) {
return { id: 'u1', workspaceId: WS, role } as any;
}
describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
it('non-admin (member): returned content has htmlEmbed stripped', async () => {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('member'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
// Non-embed content is preserved.
expect(JSON.stringify(content)).toContain('snapshot body');
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole(role),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
}
});
it('toggle ON + admin: returned content keeps the htmlEmbed', async () => {
const service = buildService(true);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('admin'),
);
expect(hasHtmlEmbedNode(content)).toBe(true);
});
it('toggle ON + owner: returned content keeps the htmlEmbed', async () => {
const service = buildService(true);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('owner'),
);
expect(hasHtmlEmbedNode(content)).toBe(true);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
const service = buildService(false);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('admin'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
});
it('toggle OFF + member: stripped', async () => {
const service = buildService(false);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('member'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
});
});

View File

@@ -23,6 +23,13 @@ import { rewriteAttachmentsForUnsync } from './utils/transclusion-unsync.util';
import { TransclusionLookup } from './transclusion.types';
import { Page, User } from '@docmost/db/types/entity.types';
import { PageAccessService } from '../page-access/page-access.service';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
type ReferencingPageInfo = {
id: string;
@@ -47,6 +54,7 @@ export class TransclusionService {
private readonly attachmentRepo: AttachmentRepo,
private readonly storageService: StorageService,
private readonly pageAccessService: PageAccessService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async syncPageTransclusions(
@@ -461,10 +469,12 @@ export class TransclusionService {
throw new NotFoundException('Sync block not found');
}
const { content, copies } = rewriteAttachmentsForUnsync(
let content: unknown;
let copies: ReturnType<typeof rewriteAttachmentsForUnsync>['copies'];
({ content, copies } = rewriteAttachmentsForUnsync(
transclusion.content,
() => uuid7(),
);
));
if (copies.length > 0) {
const oldIds = copies.map((c) => c.oldAttachmentId);
@@ -513,6 +523,24 @@ export class TransclusionService {
transclusionId,
);
// SECURITY (Variant C admin gate, transclusion unsync write path):
// The returned content is a source snapshot that the client materializes
// into the reference page via insertContentAt. The snapshot keeps any
// htmlEmbed verbatim, and unsync requires only space Edit/View. If the
// requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a
// non-admin can never receive an embed payload to re-persist (the collab
// strip on the subsequent save is debounced/race-prone and must not be the
// only guard). Admin behavior is unchanged.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(user.workspaceId))?.settings,
);
if (!htmlEmbedAllowed(htmlEmbedEnabled, user.role) && hasHtmlEmbedNode(content)) {
this.logger.warn(
`Stripping htmlEmbed node(s) from transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`,
);
content = stripHtmlEmbedNodes(content);
}
return { content };
}
}

View File

@@ -0,0 +1,133 @@
import { ShareService } from './share.service';
import { hasHtmlEmbedNode } from '../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL ShareService server-authoritative htmlEmbed kill-switch for
// shared content. An anonymous public-share viewer cannot read the per-workspace
// htmlEmbed toggle, so the SERVER must decide what to serve: when the toggle is
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are kept so
// the read-only client executes them. All repos / token service are mocked so the
// real prepareContentForShare logic runs end-to-end via getSharedPage.
const WS = 'ws-1';
const PAGE = 'page-1';
const pageContentWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'shared body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>track()</script>' } },
],
});
function buildService(opts: {
// undefined => workspaceRepo.findById returns undefined (fail-closed case)
htmlEmbed?: boolean | undefined;
workspaceMissing?: boolean;
}) {
const shareRepo = { findById: jest.fn() };
const pageRepo = {
findById: jest.fn(async () => ({
id: PAGE,
workspaceId: WS,
spaceId: 'space-1',
deletedAt: null,
content: pageContentWithEmbed(),
})),
};
const pagePermissionRepo = {
hasRestrictedAncestor: jest.fn(async () => false),
};
const tokenService = {
generateAttachmentToken: jest.fn(async () => 'tok'),
};
const workspaceRepo = {
findById: jest.fn(async () =>
opts.workspaceMissing
? undefined
: { id: WS, settings: { htmlEmbed: opts.htmlEmbed } },
),
};
const service = new ShareService(
shareRepo as any,
pageRepo as any,
pagePermissionRepo as any,
{} as any, // db (unused on this path)
tokenService as any,
{} as any, // transclusionService (unused)
workspaceRepo as any,
);
// getSharedPage resolves the share via getShareForPage (a raw db query).
// Stub it so we exercise prepareContentForShare deterministically.
jest
.spyOn(service, 'getShareForPage')
.mockResolvedValue({ pageId: PAGE, key: 'k', id: 's1' } as any);
return { service, workspaceRepo };
}
describe('ShareService htmlEmbed server-authoritative kill-switch (real code)', () => {
it('toggle ON: shared content keeps the htmlEmbed (served to anonymous viewer)', async () => {
const { service } = buildService({ htmlEmbed: true });
const { page } = await service.getSharedPage(
{ pageId: PAGE } as any,
WS,
);
expect(hasHtmlEmbedNode(page.content)).toBe(true);
expect(JSON.stringify(page.content)).toContain('shared body');
});
it('toggle OFF: htmlEmbed stripped from shared content', async () => {
const { service } = buildService({ htmlEmbed: false });
const { page } = await service.getSharedPage(
{ pageId: PAGE } as any,
WS,
);
expect(hasHtmlEmbedNode(page.content)).toBe(false);
// Non-embed content is preserved.
expect(JSON.stringify(page.content)).toContain('shared body');
});
it('toggle ABSENT: defaults OFF and strips', async () => {
const { service } = buildService({ htmlEmbed: undefined });
const { page } = await service.getSharedPage(
{ pageId: PAGE } as any,
WS,
);
expect(hasHtmlEmbedNode(page.content)).toBe(false);
});
it('workspace missing: fails closed (stripped)', async () => {
const { service } = buildService({ workspaceMissing: true });
const { page } = await service.getSharedPage(
{ pageId: PAGE } as any,
WS,
);
expect(hasHtmlEmbedNode(page.content)).toBe(false);
});
it('updatePublicAttachments strips htmlEmbed when toggle OFF', async () => {
const { service } = buildService({ htmlEmbed: false });
const out = await service.updatePublicAttachments({
id: PAGE,
workspaceId: WS,
content: pageContentWithEmbed(),
} as any);
expect(hasHtmlEmbedNode(out)).toBe(false);
});
it('updatePublicAttachments keeps htmlEmbed when toggle ON', async () => {
const { service } = buildService({ htmlEmbed: true });
const out = await service.updatePublicAttachments({
id: PAGE,
workspaceId: WS,
content: pageContentWithEmbed(),
} as any);
expect(hasHtmlEmbedNode(out)).toBe(true);
});
});

View File

@@ -26,6 +26,11 @@ import { validate as isValidUUID } from 'uuid';
import { sql } from 'kysely';
import { TransclusionService } from '../page/transclusion/transclusion.service';
import { TransclusionLookup } from '../page/transclusion/transclusion.types';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../common/helpers/prosemirror/html-embed.util';
@Injectable()
export class ShareService {
@@ -38,8 +43,22 @@ export class ShareService {
@InjectKysely() private readonly db: KyselyDB,
private readonly tokenService: TokenService,
private readonly transclusionService: TransclusionService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
/**
* Resolve whether the htmlEmbed feature toggle is ON for a workspace.
* Fail-closed: a missing workspace (or absent/non-true setting) => OFF, so
* share content gets the embed stripped when we can't positively confirm the
* feature is enabled.
*/
private async isHtmlEmbedEnabledForWorkspace(
workspaceId: string,
): Promise<boolean> {
const workspace = await this.workspaceRepo.findById(workspaceId);
return isHtmlEmbedFeatureEnabled(workspace?.settings);
}
async getShareTree(shareId: string, workspaceId: string) {
const share = await this.shareRepo.findById(shareId);
if (!share || share.workspaceId !== workspaceId) {
@@ -360,6 +379,11 @@ export class ShareService {
workspaceId,
);
// Resolve the workspace htmlEmbed toggle once for this share request; all
// transcluded items belong to the same workspace as the host share.
const htmlEmbedEnabled =
await this.isHtmlEmbedEnabledForWorkspace(workspaceId);
// Sanitize each item's content for public delivery
// generate per-attachment tokens scoped to the source page
// and strip comment marks.
@@ -370,6 +394,7 @@ export class ShareService {
item.content,
item.sourcePageId,
workspaceId,
htmlEmbedEnabled,
);
return { ...item, content: doc?.toJSON() ?? item.content };
}),
@@ -417,10 +442,14 @@ export class ShareService {
}
async updatePublicAttachments(page: Page): Promise<any> {
const htmlEmbedEnabled = await this.isHtmlEmbedEnabledForWorkspace(
page.workspaceId,
);
const doc = await this.prepareContentForShare(
page.content,
page.id,
page.workspaceId,
htmlEmbedEnabled,
);
return doc?.toJSON() ?? page.content;
}
@@ -441,6 +470,13 @@ export class ShareService {
* not leak structure (existence, location, count, resolved state, or
* comment ids) to public viewers.
*
* 3. Strip `htmlEmbed` nodes when the workspace feature toggle is OFF. This
* makes the toggle a SERVER-AUTHORITATIVE kill-switch for shared content:
* when OFF the embed is never served to the anonymous viewer (who can't
* read the per-workspace toggle), when ON the embed is served so the
* read-only client executes it. `htmlEmbedEnabled` is resolved fail-closed
* by the callers (missing workspace => OFF => strip).
*
* Both share-content paths — the host page (`updatePublicAttachments`) and
* the share-scoped transclusion lookup (`lookupTransclusionForShare`) —
* call into this single helper so the two paths can never drift on
@@ -450,8 +486,16 @@ export class ShareService {
content: unknown,
attachmentOwnerPageId: string,
workspaceId: string,
htmlEmbedEnabled: boolean,
): Promise<Node | null> {
const pmJson = getProsemirrorContent(content);
let pmJson = getProsemirrorContent(content);
// Kill-switch: when the workspace toggle is OFF, never serve htmlEmbed
// nodes to public viewers. Strip before tokenizing/serializing.
if (!htmlEmbedEnabled) {
pmJson = stripHtmlEmbedNodes(pmJson);
}
const attachmentIds = getAttachmentIds(pmJson);
const tokenMap = new Map<string, string>();

View File

@@ -53,6 +53,12 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
@IsBoolean()
aiDictation: boolean;
// Workspace feature toggle for the admin-only HTML embed feature. Persisted at
// settings.htmlEmbed. ABSENT/false => OFF (default).
@IsOptional()
@IsBoolean()
htmlEmbed: boolean;
@IsOptional()
@IsBoolean()
aiPublicShareAssistant: boolean;

View File

@@ -511,6 +511,20 @@ export class WorkspaceService {
);
}
if (typeof updateWorkspaceDto.htmlEmbed !== 'undefined') {
const prev = settingsBefore?.htmlEmbed ?? false;
if (prev !== updateWorkspaceDto.htmlEmbed) {
before.htmlEmbed = prev;
after.htmlEmbed = updateWorkspaceDto.htmlEmbed;
}
await this.workspaceRepo.updateSetting(
workspaceId,
'htmlEmbed',
updateWorkspaceDto.htmlEmbed,
trx,
);
}
if (typeof updateWorkspaceDto.aiPublicShareAssistant !== 'undefined') {
const prev = settingsBefore?.ai?.publicShareAssistant ?? false;
if (prev !== updateWorkspaceDto.aiPublicShareAssistant) {
@@ -534,6 +548,7 @@ export class WorkspaceService {
delete updateWorkspaceDto.allowMemberTemplates;
delete updateWorkspaceDto.aiChat;
delete updateWorkspaceDto.aiDictation;
delete updateWorkspaceDto.htmlEmbed;
delete updateWorkspaceDto.aiPublicShareAssistant;
await this.workspaceRepo.updateWorkspace(

View File

@@ -265,6 +265,32 @@ export class WorkspaceRepo {
.executeTakeFirst();
}
/**
* Set a single scalar key at the TOP LEVEL of `settings` (e.g.
* `settings.htmlEmbed`). Mirrors `updateAiSettings`/`updateSharingSettings`
* but without a nested namespace object. `prefKey` comes from a fixed
* allowlist at the call site (inlined via `sql.raw`, never user input); the
* value is inlined via `sql.lit`.
*/
async updateSetting(
workspaceId: string,
prefKey: string,
prefValue: string | boolean,
trx?: KyselyTransaction,
) {
const db = dbOrTx(this.db, trx);
return db
.updateTable('workspaces')
.set({
settings: sql`COALESCE(settings, '{}'::jsonb)
|| jsonb_build_object('${sql.raw(prefKey)}', ${sql.lit(prefValue)})`,
updatedAt: new Date(),
})
.where('id', '=', workspaceId)
.returning(this.baseFields)
.executeTakeFirst();
}
async updateSharingSettings(
workspaceId: string,
prefKey: string,

View File

@@ -20,6 +20,14 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
@@ -53,6 +61,8 @@ export class FileImportTaskService {
private readonly backlinkRepo: BacklinkRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly importAttachmentService: ImportAttachmentService,
private readonly userRepo: UserRepo,
private readonly workspaceRepo: WorkspaceRepo,
private eventEmitter: EventEmitter2,
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
) {}
@@ -149,6 +159,29 @@ export class FileImportTaskService {
.where('id', '=', fileTask.spaceId)
.executeTakeFirst();
// SECURITY (Variant C admin gate, zip/multi-file import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw, unsanitized JS in readers'
// browsers. Only workspace admins/owners may author it. Resolve the
// importer's role ONCE here; each page's prosemirror JSON is run through the
// strip below before textContent/ydoc/insert when the importer is not an
// admin, so a non-admin cannot smuggle the node in via a zip import (which
// requires only space Edit).
const importingUser = await this.userRepo.findById(
fileTask.creatorId,
fileTask.workspaceId,
);
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
// survives only when the workspace feature toggle is ON and the importer is
// an admin/owner. OFF (default) => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
);
const importerCanAuthorHtmlEmbed = htmlEmbedAllowed(
htmlEmbedEnabled,
importingUser?.role,
);
const pagesMap = new Map<string, ImportPageNode>();
for (const absPath of allFiles) {
@@ -496,9 +529,21 @@ export class FileImportTaskService {
await this.importService.processHTML(html),
);
const { title, prosemirrorJson } =
let { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
if (
!importerCanAuthorHtmlEmbed &&
hasHtmlEmbedNode(prosemirrorJson)
) {
this.logger.warn(
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,

View File

@@ -0,0 +1,123 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
// FAIL-CLOSED IDENTITY for the import write paths.
//
// import.service / file-import-task.service cannot be unit-LOADED under the
// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in
// transformIgnorePatterns). So we cover the two load-bearing properties at the
// strongest feasible layer:
//
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate
// predicate each entrypoint runs against the role resolved from
// userRepo.findById(...): a MISSING user (findById -> undefined) must fail
// closed (strip), and only 'admin'/'owner' keep the embed.
//
// (2) IDENTITY — source-pin which identity governs the gate so a refactor that
// swaps the lookup to the wrong user (e.g. the queue worker's caller) is
// caught: zip import resolves the role from `fileTask.creatorId`; single
// import from the request `userId`. NOT some ambient caller.
//
// If a guard is deleted/misplaced or the identity field changes, these break.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
],
});
// The real predicate both import entrypoints apply (see the SECURITY blocks in
// import.service.ts and file-import-task.service.ts): resolve the importer via
// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`.
function applyImportGate(
json: any,
featureEnabled: boolean,
importingUser: { role?: string } | undefined,
) {
if (
!htmlEmbedAllowed(featureEnabled, importingUser?.role) &&
hasHtmlEmbedNode(json)
) {
return stripHtmlEmbedNodes(json);
}
return json;
}
describe('import gate fail-closed by toggle AND resolved-user role (real helpers)', () => {
it('toggle ON + missing user (userRepo.findById -> undefined) strips the embed', () => {
// findById returns undefined when the user/workspace pair does not resolve;
// undefined?.role is undefined -> htmlEmbedAllowed(true, undefined) === false.
const result = applyImportGate(docWithEmbed(), true, undefined);
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it("toggle ON + resolved role 'member' strips", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'member' })),
).toBe(false);
});
it("toggle ON + resolved role 'admin' keeps the embed", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'admin' })),
).toBe(true);
});
it("toggle ON + resolved role 'owner' keeps the embed", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'owner' })),
).toBe(true);
});
it('toggle OFF strips for every role (admin/owner/member)', () => {
for (const role of ['admin', 'owner', 'member'] as const) {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), false, { role })),
).toBe(false);
}
});
});
// Source-pin the identity each entrypoint feeds to userRepo.findById. These are
// the lines that decide WHOSE role governs the gate; pinning them means a
// refactor that points the lookup at the wrong user trips the test.
const SRC_DIR = join(__dirname);
describe('import gate identity is pinned to the importer (source contract)', () => {
it('single import resolves the role from the request userId', () => {
const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8');
// The role lookup must key on the request `userId`, then gate on the role.
expect(src).toMatch(
/this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/,
);
expect(src).toMatch(
/htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*\)/,
);
// And the toggle is resolved from the workspace settings.
expect(src).toContain('isHtmlEmbedFeatureEnabled(');
// And the gate uses the real strip helper.
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
});
it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => {
const src = readFileSync(
join(SRC_DIR, 'file-import-task.service.ts'),
'utf-8',
);
expect(src).toMatch(
/this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/,
);
expect(src).toMatch(
/importerCanAuthorHtmlEmbed\s*=\s*htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*,?\s*\)/,
);
expect(src).toContain('isHtmlEmbedFeatureEnabled(');
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
});
});

View File

@@ -1,5 +1,13 @@
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { MultipartFile } from '@fastify/multipart';
import * as path from 'path';
import {
@@ -37,10 +45,12 @@ export class ImportService {
constructor(
private readonly pageRepo: PageRepo,
private readonly userRepo: UserRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async importPage(
@@ -83,8 +93,30 @@ export class ImportService {
throw new BadRequestException(message);
}
const { title, prosemirrorJson } =
this.extractTitleAndRemoveHeading(prosemirrorState);
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
let prosemirrorJson = extracted.prosemirrorJson;
// SECURITY (Variant C admin gate, import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw JS in readers' browsers. Only
// workspace admins/owners may author it, so strip htmlEmbed nodes from
// imports performed by a non-admin user.
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
const importingUser = await this.userRepo.findById(userId, workspaceId);
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
// feature toggle is ON and the importer is an admin/owner. OFF (default)
// => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(workspaceId))?.settings,
);
if (!htmlEmbedAllowed(htmlEmbedEnabled, importingUser?.role)) {
this.logger.warn(
`Stripping htmlEmbed node(s) from import by user ${userId}`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
}
const pageTitle = title || fileName;

View File

@@ -1,95 +0,0 @@
# Вставка произвольного HTML/CSS/JS в страницы — анализ и подходы
> Статус: **черновик / обсуждение**. Решение по модели изоляции ещё не принято — см. раздел «Развилка».
> Исходный кейс: нужно вставлять трекер (счётчик аналитики) на вики-страницы.
## 1. Почему «из коробки» произвольный HTML вставить нельзя
Контент страницы в Docmost хранится не как HTML, а как **ProseMirror JSON** (документ TipTap, синхронизируется через Yjs). Любой путь, которым контент попадает в страницу — ручной ввод, вставка из буфера (paste), импорт Markdown/HTML — проходит парсинг строго по схеме редактора:
`apps/server/src/common/helpers/prosemirror/html/generateJSON.ts:45`
```ts
PMDOMParser.fromSchema(schema).parse(doc.body, options)
```
`PMDOMParser.fromSchema` оставляет только те теги, для которых в схеме есть нода/марк с правилом `parseHTML` (`p`, `h1–h6`, списки, `blockquote`, `code`/`pre`, `a`, `strong`/`em`, таблицы, картинки, callout и т.п.). Всё остальное — `<div>`, `<style>`, `<script>`, инлайн-стили, кастомные теги — **молча отбрасывается**, выживает максимум текст внутри.
- Ноды «сырой HTML» в схеме нет (`rawHtml`/`htmlNode` в `packages/editor-ext/src/lib` отсутствуют).
- `marked` сам по себе HTML пропускает насквозь (санитайзер не подключён в `packages/editor-ext/src/lib/markdown/utils/marked.utils.ts`), но это неважно — финальным фильтром выступает схема ProseMirror на следующем шаге.
- Единственное, что отдалённо похоже на «вставку HTML» — embed-нода (`packages/editor-ext/src/lib/embed.ts`), но это `<iframe>` на URL известных провайдеров с санитизацией ссылки, а не произвольная разметка.
**Вывод:** чтобы получить произвольный HTML на странице, нужно добавлять в схему редактора отдельную ноду со своим `parseHTML`/`renderHTML` и собственным рендерингом.
## 2. Механика: как добавить такую ноду (одинаково для любого варианта)
По образцу `packages/editor-ext/src/lib/excalidraw.ts`:
1. **Новая нода** в `packages/editor-ext/src/lib/html-embed/`:
`Node.create({ name: 'htmlEmbed', group: 'block', atom: true, isolating: true })`.
Атрибут `source` (сырой HTML/CSS/JS строкой) с `parseHTML`/`renderHTML` через `data-`-атрибут или base64, чтобы корректно гонялось туда-обратно через HTML↔JSON. Экспорт добавить в `packages/editor-ext/src/index.ts`.
2. **Регистрация в ДВУХ схемах** (иначе сервер вырежет ноду при сохранении/коллаборации):
- клиент: `apps/client/src/features/editor/extensions/extensions.ts`
- сервер: `tiptapExtensions` в `apps/server/src/collaboration/collaboration.util.ts:58`
3. **React NodeView** на клиенте — то, что реально показывает контент. Здесь и зарыта безопасность (см. развилку).
4. **Markdown-сериализация** (turndown/marked в `packages/editor-ext/src/lib/markdown`) — если нода должна выживать при импорте/экспорте Markdown, иначе там она потеряется.
5. **UI вставки** — slash-команда/кнопка тулбара + модалка с редактором кода.
## 3. Развилка: модель изоляции (ключевое решение)
«Произвольный JS» в многопользовательской вики — это не фича рендеринга, а **модель доверия**. От выбора зависит весь NodeView и безопасность всего инстанса.
### Вариант A — Sandboxed iframe
Контент кладётся в `<iframe sandbox="allow-scripts" srcdoc="...">`.
- JS/CSS работают, но изолированы: нет доступа к DOM вики, кукам, токену сессии, localStorage.
- Безопасно, stored-XSS закрыт. Так делают HTML-эмбеды в Notion/Confluence.
- Минусы: скрипт не может управлять самой страницей; авто-высоту приходится решать через `postMessage`.
### Вариант B — Raw-инъекция в DOM страницы
`dangerouslySetInnerHTML` + выполнение `<script>`.
- Полная власть: скрипт выполняется в origin вики, может всё.
- Это **stored-XSS by design**: скрипт любого автора выполняется в браузере каждого читателя с его сессией → кража токенов, захват аккаунтов.
- Допустимо только на доверенном/одно-пользовательском инстансе.
### Вариант C — Raw-инъекция, но admin-only
Полная мощь raw-инъекции, но вставка такой ноды разрешена только админам/доверенным ролям; обычные авторы её добавлять не могут. Компромисс между мощью и риском.
## 4. Заработает ли трекер в песочнице? — НЕТ (для настоящего трекера)
Без `allow-same-origin` у iframe **opaque origin** (`null`). Из этого следуют ограничения, ломающие именно трекеры:
| Что делает трекер | В sandbox (`allow-scripts`) |
|---|---|
| Загрузить внешний `<script src>` и выполнить | ✅ работает |
| Отправить запрос/пиксель/`sendBeacon` на свой сервер | ✅ работает (fire-and-forget) |
| Поставить куку (`document.cookie`) | ❌ блокируется/кидает ошибку |
| `localStorage` / `sessionStorage` | ❌ `SecurityError` в opaque origin |
| Прочитать URL / referrer / title **самой вики-страницы** | ❌ видит только `about:srcdoc`, не родителя |
| Достучаться до DOM страницы (`window.parent`) | ❌ запрещено sandbox'ом |
**Итог:** GA4 / Яндекс.Метрика / Matomo внутри песочницы либо упадут на попытке поставить `_ga`/`_ym`, либо отправят «хит», где страница = `about:srcdoc`, а уникальный посетитель не сохраняется → данные мусорные. Песочница и «считать саму страницу» — взаимоисключающие вещи by design.
Добавлять `allow-same-origin` вместе с `allow-scripts` как «компромисс» нельзя: при одинаковом origin это снимает песочницу полностью (предупреждение MDN) — то есть это та же raw-инъекция окольным путём.
## 5. Что это значит для выбора
- **Цель — аналитика самих вики-страниц** (посещения, поведение, уники) → нужен скрипт в origin вики = **raw-инъекция**. Песочница тут бесполезна в принципе. Риск — stored-XSS, поэтому разумно держать это под **admin-only** (вариант C).
- **Цель — самодостаточный встроенный виджет** (калькулятор, демка, виджет без кук и без доступа к родителю) → песочницы (вариант A) хватает.
## 6. Возможные направления решения (выбрать позже)
1. **Admin-only raw-инъекция** — нода `htmlEmbed` с полным выполнением скрипта в origin вики; вставка только для админов/доверенных ролей. Трекер работает полноценно (куки, уники, URL страницы). Компромисс мощь/риск.
2. **Raw-инъекция без ограничений** — любой автор может вставить произвольный JS. Максимум гибкости, но stored-XSS для всех читателей. ОК только если все редакторы полностью доверенные.
3. **Узкая фича «только трекер», без произвольного JS** — вместо универсальной HTML-ноды поле в настройках для ID счётчика (GA/Метрика), сниппет вставляется в шаблон страницы. Безопасно и решает именно задачу трекинга.
4. **Sandboxed iframe (вариант A)** — для встраиваемых виджетов; для аналитики самих страниц не годится.
---
### Ссылки на код
- Парсинг по схеме (фильтр HTML): `apps/server/src/common/helpers/prosemirror/html/generateJSON.ts`
- Серверный список расширений: `apps/server/src/collaboration/collaboration.util.ts:58`
- Клиентский список расширений: `apps/client/src/features/editor/extensions/extensions.ts`
- Реестр нод editor-ext: `packages/editor-ext/src/index.ts`
- Образец кастомной ноды: `packages/editor-ext/src/lib/excalidraw.ts`
- Образец iframe-ноды: `packages/editor-ext/src/lib/embed.ts`
- Markdown ↔ HTML: `packages/editor-ext/src/lib/markdown/`

75
docs/html-embed-admin.md Normal file
View File

@@ -0,0 +1,75 @@
# HTML embed (admin-only) — workspace feature toggle
The `htmlEmbed` node lets a workspace **admin/owner** embed raw HTML/CSS/JS that
**executes in the wiki page origin** for everyone who views the page. This is a
deliberate stored-XSS surface (e.g. for analytics trackers / third-party
widgets). It is gated behind a per-workspace feature toggle that is **OFF by
default**.
## Behavior
- **OFF by default.** A fresh/unconfigured workspace has the feature disabled
(`settings.htmlEmbed` absent or `false`). With the toggle OFF, htmlEmbed is
stripped on every save for **everyone, including admins** — the feature is
fully disabled.
- **Only admins/owners can insert.** When the toggle is ON, the `/html` slash
item (and the embed editor) is offered only to workspace admins/owners.
Members never see it. The gate is **toggle AND admin**.
- **Server strips on every write path (fail-closed).** The UI gate is a
convenience only. The server independently strips htmlEmbed nodes from every
write where the gate is not satisfied. If a **non-admin** edits and saves a
page that contains an admin's embed, that save **strips the embed** — the
admin must re-add it. Same if the toggle is OFF.
- **Turning the toggle OFF neutralizes existing embeds.** Existing embeds are
stripped on their next save (collab store / REST / etc.), and the client
NodeView stops executing them immediately, rendering a disabled placeholder
instead (defense in depth at render time).
## Storage
The toggle lives in the workspace `settings` jsonb at the top level:
`settings.htmlEmbed` (boolean). ABSENT/`false` => OFF.
- Update field: `htmlEmbed: boolean` on `UpdateWorkspaceDto`.
- Persisted by `WorkspaceService.update` via
`WorkspaceRepo.updateSetting(workspaceId, 'htmlEmbed', value)` (top-level
scalar settings key; analogous to `updateAiSettings`). The change is
audit-logged like the AI toggles.
## Server gate
`apps/server/src/common/helpers/prosemirror/html-embed.util.ts`:
```ts
// Allowed only when the workspace feature toggle is ON and the user is admin/owner.
export function htmlEmbedAllowed(featureEnabled: boolean, role): boolean {
return featureEnabled === true && canAuthorHtmlEmbed(role);
}
// settings.htmlEmbed === true (ABSENT/non-true => OFF).
export function isHtmlEmbedFeatureEnabled(settings): boolean { ... }
```
Every write-path gate site reads the workspace's setting
(`workspace.settings?.htmlEmbed === true`, via `WorkspaceRepo.findById`) and
applies `!htmlEmbedAllowed(featureEnabled, role)` before persisting. The 7 sites:
- `core/page/services/page.service.ts``create()` and `duplicatePage()`
- `collaboration/extensions/persistence.extension.ts` — collab store
- `collaboration/collaboration.handler.ts` — REST/MCP/AI content update
- `integrations/import/services/import.service.ts` — single import
- `integrations/import/services/file-import-task.service.ts` — zip import
- `core/page/transclusion/transclusion.service.ts` — transclusion unsync
## Client
- Slash menu: the `/html` item carries `requiresHtmlEmbedFeature: true` and
`adminOnly: true`; it is hidden unless the persisted
`workspace.settings.htmlEmbed === true` AND the user is admin. The slash
function reads the toggle from the persisted `currentUser` localStorage entry
(same mechanism as `isCurrentUserAdmin()`).
- NodeView (`html-embed-view.tsx`): only executes the raw HTML/JS when the
toggle is ON; otherwise renders a neutral "HTML embed is disabled in this
workspace" placeholder and injects nothing.
- Admin UI: a Switch in **Workspace Settings → General** (`HtmlEmbedSettings`)
toggles the feature with an optimistic `updateWorkspace({ htmlEmbed })`, with a
description documenting the security implications.

View File

@@ -4,7 +4,9 @@
"private": true,
"scripts": {
"build": "tsc --build",
"dev": "tsc --watch"
"dev": "tsc --watch",
"test": "vitest run",
"test:watch": "vitest"
},
"main": "dist/index.js",
"module": "./src/index.ts",

View File

@@ -16,6 +16,7 @@ export * from "./lib/custom-code-block";
export * from "./lib/drawio";
export * from "./lib/excalidraw";
export * from "./lib/embed";
export * from "./lib/html-embed/html-embed";
export * from "./lib/mention";
export * from "./lib/markdown";
export * from "./lib/search-and-replace";

View File

@@ -0,0 +1,138 @@
import { Node, mergeAttributes } from "@tiptap/core";
import { ReactNodeViewRenderer } from "@tiptap/react";
export interface HtmlEmbedOptions {
HTMLAttributes: Record<string, any>;
view: any;
}
export interface HtmlEmbedAttributes {
// Raw HTML/CSS/JS string that is injected verbatim into the wiki origin.
source?: string;
}
declare module "@tiptap/core" {
interface Commands<ReturnType> {
htmlEmbed: {
setHtmlEmbed: (attributes?: HtmlEmbedAttributes) => ReturnType;
};
}
}
/**
* Encode the raw source to base64 for the `data-source` attribute.
*
* The source is arbitrary HTML/CSS/JS. Storing it raw inside an HTML attribute
* would (a) require heavy escaping and (b) risk the parser interpreting markup
* inside the attribute. Base64 makes the round-trip HTML <-> ProseMirror JSON
* lossless and keeps the markup inert while it sits in the attribute.
*
* `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so that non-Latin1
* (UTF-8) characters survive the base64 step.
*/
export function encodeHtmlEmbedSource(source: string): string {
if (!source) return "";
try {
if (typeof btoa === "function") {
return btoa(encodeURIComponent(source));
}
// Node fallback (server-side schema parsing has no global btoa).
return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64");
} catch {
// Never swallow silently in a way that loses data: fall back to raw.
return "";
}
}
export function decodeHtmlEmbedSource(encoded: string): string {
if (!encoded) return "";
try {
if (typeof atob === "function") {
return decodeURIComponent(atob(encoded));
}
// Node fallback.
return decodeURIComponent(
Buffer.from(encoded, "base64").toString("utf-8"),
);
} catch {
return "";
}
}
export const HtmlEmbed = Node.create<HtmlEmbedOptions>({
name: "htmlEmbed",
inline: false,
group: "block",
// atom + isolating: the node has no editable ProseMirror children; its body
// is the opaque `source` string rendered by the NodeView.
atom: true,
isolating: true,
defining: true,
draggable: true,
addOptions() {
return {
HTMLAttributes: {},
view: null,
};
},
addAttributes() {
return {
source: {
default: "",
// Decode the base64 payload back to the raw source on parse.
parseHTML: (element) =>
decodeHtmlEmbedSource(element.getAttribute("data-source") || ""),
// Encode the raw source to base64 on render so it round-trips losslessly
// through the HTML <-> JSON conversions used by export/import/collab.
renderHTML: (attributes: HtmlEmbedAttributes) => ({
"data-source": encodeHtmlEmbedSource(attributes.source || ""),
}),
},
};
},
parseHTML() {
return [
{
tag: `div[data-type="${this.name}"]`,
},
];
},
renderHTML({ HTMLAttributes }) {
// The static HTML representation is just a marker div carrying the encoded
// source. The actual raw markup is NOT expanded here on purpose: the static
// generateHTML output (used for previews, search indexing, exports) must not
// itself become an injection vector. Only the client NodeView expands and
// executes the source.
return [
"div",
mergeAttributes(
{ "data-type": this.name },
this.options.HTMLAttributes,
HTMLAttributes,
),
];
},
addCommands() {
return {
setHtmlEmbed:
(attrs: HtmlEmbedAttributes) =>
({ commands }) => {
return commands.insertContent({
type: this.name,
attrs: attrs,
});
},
};
},
addNodeView() {
// Force the react node view to render immediately using flush sync.
this.editor.isInitialized = true;
return ReactNodeViewRenderer(this.options.view);
},
});

View File

@@ -0,0 +1,112 @@
import { describe, it, expect } from "vitest";
import { markdownToHtml, htmlToMarkdown } from "./index";
import {
encodeHtmlEmbedSource,
decodeHtmlEmbedSource,
} from "../html-embed/html-embed";
// SECURITY (Variant C admin gate, import attack surface).
//
// The markdown import path is the only write path where an htmlEmbed reaches
// the server purely from file bytes (no editor / collab socket). The marked
// tokenizer in `html-embed.marked.ts` and the turndown rule in
// `turndown.utils.ts` are what materialize the `<!--html-embed:BASE64-->`
// marker into the `<div data-type="htmlEmbed" data-source="BASE64">` element
// that the server then parses into an htmlEmbed node and the admin gate strips.
//
// If either the tokenizer regex or the turndown rule shape drifts, the marker
// would either (a) stop becoming an htmlEmbed node (silently dropping admin
// content) or (b) become some OTHER tag the server's `hasHtmlEmbedNode` no
// longer recognizes (a strip bypass). These tests pin the marker <-> embed-div
// contract that the server-side strip relies on. editor-ext had ZERO tests
// before this file; this adds the runner + the round-trip coverage.
// The server parses the embed div by matching `data-type="htmlEmbed"` and
// decoding `data-source`; mirror that here so the assertion is exactly what the
// real `htmlToJson` -> htmlEmbed node parse depends on (the node's parseHTML in
// html-embed.ts uses the same selector + decodeHtmlEmbedSource).
const EMBED_DIV_RE = /<div[^>]*\bdata-type="htmlEmbed"[^>]*>/;
function extractEmbedSource(html: string): string | undefined {
const div = EMBED_DIV_RE.exec(html);
if (!div) return undefined;
const enc = /data-source="([^"]*)"/.exec(div[0]);
if (!enc) return undefined;
return decodeHtmlEmbedSource(enc[1]);
}
// Replicates the server's `hasHtmlEmbedNode` decision against the embed *div*
// (the HTML form the server immediately converts to JSON). If this matches, the
// server's JSON-level `hasHtmlEmbedNode` will too, because htmlToJson maps this
// exact div to an htmlEmbed node.
function htmlHasHtmlEmbed(html: string): boolean {
return EMBED_DIV_RE.test(html);
}
describe("markdown <!--html-embed--> import round-trip", () => {
const source = "<script>x</script>";
it("markdownToHtml turns the marker into an htmlEmbed div carrying the source", async () => {
const md = "<!--html-embed:" + encodeHtmlEmbedSource(source) + "-->";
const html = await markdownToHtml(md);
// The marker became the embed div the server recognizes as an htmlEmbed
// node (so the server's hasHtmlEmbedNode would match it after htmlToJson).
expect(htmlHasHtmlEmbed(html)).toBe(true);
// The decoded source is the original script, intact.
expect(extractEmbedSource(html)).toBe(source);
// The raw script is NOT inlined into the HTML — it stays base64 in the
// attribute (the marker itself must not be a direct injection vector).
expect(html).not.toContain("<script>x</script>");
});
it("preserves UTF-8 / special chars in the embedded source", async () => {
const utf8 = '<script>console.log("héllo → 世界")</script>';
const md = "<!--html-embed:" + encodeHtmlEmbedSource(utf8) + "-->";
const html = await markdownToHtml(md);
expect(htmlHasHtmlEmbed(html)).toBe(true);
expect(extractEmbedSource(html)).toBe(utf8);
});
it("an empty marker still produces an htmlEmbed div (empty source)", async () => {
const html = await markdownToHtml("<!--html-embed:-->");
expect(htmlHasHtmlEmbed(html)).toBe(true);
expect(extractEmbedSource(html)).toBe("");
});
it("round-trips htmlToMarkdown -> markdownToHtml preserving the embed marker", async () => {
const encoded = encodeHtmlEmbedSource(source);
// NOTE: turndown drops a *blank* (childless) element before any custom rule
// runs, and the htmlEmbed div is normally childless. The export pipeline
// therefore must give the rule a non-blank div to fire on; we add an inert
// text child here to exercise the real turndown htmlEmbed rule. (A blank
// embed div serializing to "" is asserted separately below as a documented
// edge so this contract drift is visible.)
const startHtml = `<div data-type="htmlEmbed" data-source="${encoded}">x</div>`;
// Export to markdown: the turndown rule emits the <!--html-embed:..-->
// marker (lossless, inert in plain markdown viewers).
const md = htmlToMarkdown(startHtml);
expect(md).toContain("<!--html-embed:" + encoded + "-->");
// Re-import: the marker round-trips back into an embed div with the same
// decoded source — this is the marker <-> embed-div contract the server's
// import strip depends on.
const html = await markdownToHtml(md);
expect(htmlHasHtmlEmbed(html)).toBe(true);
expect(extractEmbedSource(html)).toBe(source);
});
it("documents that a BLANK embed div serializes to empty markdown (turndown drops childless blocks)", () => {
const encoded = encodeHtmlEmbedSource(source);
const blank = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
// This pins current behavior so a future change to the turndown rule (e.g.
// making it fire on blank nodes) is caught rather than silently shipping.
expect(htmlToMarkdown(blank)).toBe("");
});
it("the base64 codec itself round-trips (no '<' leaks into the attribute)", () => {
const encoded = encodeHtmlEmbedSource(source);
expect(encoded).not.toContain("<");
expect(decodeHtmlEmbedSource(encoded)).toBe(source);
});
});

View File

@@ -0,0 +1,41 @@
import { Token } from "marked";
interface HtmlEmbedToken {
type: "htmlEmbed";
raw: string;
encoded: string;
}
/**
* Marked extension that rebuilds an `htmlEmbed` node from the HTML comment
* marker produced by the turndown rule (`<!--html-embed:<base64>-->`).
*
* It emits the same marker div the node's `parseHTML` recognizes, so the
* pipeline MD -> HTML -> ProseMirror JSON restores the node (and its
* base64 `data-source`) exactly. We do NOT expand the raw markup here; the
* source stays base64-encoded in the attribute and is only executed by the
* client NodeView.
*/
export const htmlEmbedExtension = {
name: "htmlEmbed",
level: "block" as const,
start(src: string) {
return src.indexOf("<!--html-embed:");
},
tokenizer(src: string): HtmlEmbedToken | undefined {
const rule = /^<!--html-embed:([A-Za-z0-9+/=]*)-->/;
const match = rule.exec(src);
if (match) {
return {
type: "htmlEmbed",
raw: match[0],
encoded: match[1] ?? "",
};
}
},
renderer(token: Token) {
const htmlEmbedToken = token as HtmlEmbedToken;
return `<div data-type="htmlEmbed" data-source="${htmlEmbedToken.encoded}"></div>`;
},
};

View File

@@ -2,6 +2,7 @@ import { marked } from "marked";
import { calloutExtension } from "./callout.marked";
import { mathBlockExtension } from "./math-block.marked";
import { mathInlineExtension } from "./math-inline.marked";
import { htmlEmbedExtension } from "./html-embed.marked";
marked.use({
renderer: {
@@ -34,7 +35,12 @@ marked.use({
});
marked.use({
extensions: [calloutExtension, mathBlockExtension, mathInlineExtension],
extensions: [
calloutExtension,
mathBlockExtension,
mathInlineExtension,
htmlEmbedExtension,
],
});
marked.setOptions({ breaks: true });

View File

@@ -32,12 +32,39 @@ export function htmlToMarkdown(html: string): string {
mathInline,
mathBlock,
iframeEmbed,
htmlEmbed,
image,
video,
]);
return turndownService.turndown(html).replaceAll('<br>', ' ');
}
/**
* Serialize the `htmlEmbed` node to Markdown.
*
* Markdown has no native representation for an arbitrary-HTML block, so we
* preserve the node losslessly as an HTML comment carrying the base64-encoded
* source (the same `data-source` payload the node stores). `markdownToHtml`
* recognizes the same marker and rebuilds the node, so the round-trip
* MD -> HTML -> JSON keeps the source intact. The comment also keeps the raw
* markup inert in the exported `.md` file (it does not render in plain Markdown
* viewers).
*/
function htmlEmbed(turndownService: _TurndownService) {
turndownService.addRule('htmlEmbed', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'DIV' &&
node.getAttribute('data-type') === 'htmlEmbed'
);
},
replacement: function (_content: string, node: HTMLInputElement) {
const encoded = node.getAttribute('data-source') || '';
return `\n\n<!--html-embed:${encoded}-->\n\n`;
},
});
}
function listParagraph(turndownService: _TurndownService) {
turndownService.addRule('paragraph', {
filter: ['p'],

View File

@@ -11,6 +11,7 @@
"jsx": "react-jsx",
"sourceMap": true,
"outDir": "./dist",
"rootDir": "./src",
"baseUrl": "./",
"incremental": true,
"skipLibCheck": true,
@@ -19,5 +20,7 @@
"strictBindCallApply": false,
"forceConsistentCasingInFileNames": false,
"noFallthroughCasesInSwitch": false
}
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "src/**/*.spec.ts", "src/**/*.test.ts"]
}

View File

@@ -0,0 +1,13 @@
import { defineConfig } from "vitest/config";
// Minimal vitest setup for @docmost/editor-ext (mirrors apps/client's config,
// trimmed to what the markdown/html-embed round-trip tests need). The markdown
// utils run in plain Node (marked + turndown), so no jsdom/react plugin is
// required here.
export default defineConfig({
test: {
environment: "node",
globals: true,
include: ["src/**/*.{test,spec}.ts"],
},
});