feat(editor): admin-only raw HTML/CSS/JS embed node
Adds an htmlEmbed block node that renders and executes raw HTML/CSS/JS in the wiki origin (e.g. an analytics tracker) — the owner-chosen variant C. Because this is stored-XSS by design, only workspace admins/owners may get such a node persisted; everyone executes it when reading. - Node (editor-ext): htmlEmbed atom/isolating block; source stored base64 in data-source for lossless HTML<->JSON round-trip. renderHTML emits only the encoded marker (never inlines raw markup), so generateHTML/export/search are not themselves injection vectors. Registered in BOTH client extensions and server tiptapExtensions. Markdown round-trip via an <!--html-embed:b64--> comment (turndown) + a marked rule. - Client NodeView: injects source and re-creates <script> elements so they actually run; edit modal; renders in read-only/share too. Slash item is admin-gated (adminOnly filtered by the user's workspace role). - SERVER ENFORCEMENT (the real control — UI gating alone is insufficient): stripHtmlEmbedNodes() removes htmlEmbed from any document persisted by a non-admin, applied at every write path that introduces content from an untrusted author: collab onStoreDocument, REST/MCP/AI updatePageContent, single-file import, zip/multi-file import, page duplication, and transclusion unsync. Page restore introduces no new content. Public share/readonly viewers render fetched (already-stripped) content and do NOT open a collab socket, so the only residual is a transient broadcast window to concurrent authenticated editors (documented). Implements docs/arbitrary-html-embed-plan.md (variant C). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,43 @@
|
|||||||
|
.htmlEmbedNodeView {
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The container the raw source is injected into. */
|
||||||
|
.htmlEmbedContent {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Edit affordance overlay, only shown while editing the document. */
|
||||||
|
.htmlEmbedToolbar {
|
||||||
|
position: absolute;
|
||||||
|
top: 4px;
|
||||||
|
right: 4px;
|
||||||
|
z-index: 2;
|
||||||
|
opacity: 0;
|
||||||
|
transition: opacity 0.15s ease;
|
||||||
|
}
|
||||||
|
|
||||||
|
.htmlEmbedNodeView:hover .htmlEmbedToolbar {
|
||||||
|
opacity: 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Placeholder card shown when the source is empty (edit mode only). */
|
||||||
|
.htmlEmbedPlaceholder {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
gap: 8px;
|
||||||
|
padding: 16px;
|
||||||
|
border: 1px dashed var(--mantine-color-gray-4);
|
||||||
|
border-radius: 8px;
|
||||||
|
color: var(--mantine-color-dimmed);
|
||||||
|
|
||||||
|
@mixin dark {
|
||||||
|
border-color: var(--mantine-color-dark-3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.htmlEmbedSelected {
|
||||||
|
outline: 2px solid var(--mantine-color-blue-5);
|
||||||
|
border-radius: 8px;
|
||||||
|
}
|
||||||
@@ -0,0 +1,149 @@
|
|||||||
|
import { NodeViewProps, NodeViewWrapper } from "@tiptap/react";
|
||||||
|
import React, { useCallback, useEffect, useRef, useState } from "react";
|
||||||
|
import clsx from "clsx";
|
||||||
|
import {
|
||||||
|
ActionIcon,
|
||||||
|
Button,
|
||||||
|
Group,
|
||||||
|
Modal,
|
||||||
|
Text,
|
||||||
|
Textarea,
|
||||||
|
} from "@mantine/core";
|
||||||
|
import { IconCode, IconEdit } from "@tabler/icons-react";
|
||||||
|
import { useTranslation } from "react-i18next";
|
||||||
|
import useUserRole from "@/hooks/use-user-role.tsx";
|
||||||
|
import classes from "./html-embed-view.module.css";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inject raw HTML (including <script> tags) into `container`, executing any
|
||||||
|
* scripts.
|
||||||
|
*
|
||||||
|
* Setting `innerHTML` does NOT run inline or external <script> tags the browser
|
||||||
|
* parses that way: the HTML spec marks scripts inserted via innerHTML as
|
||||||
|
* "already started" so they never execute. To get the tracker/analytics
|
||||||
|
* use-case working we walk the freshly-parsed scripts and replace each with a
|
||||||
|
* brand-new <script> element copying its attributes and inline code. A
|
||||||
|
* programmatically created+inserted <script> DOES execute, so this restores
|
||||||
|
* normal script behaviour in the wiki origin (Variant C).
|
||||||
|
*/
|
||||||
|
function renderRawHtml(container: HTMLElement, source: string) {
|
||||||
|
// Clear any previous render (re-render on source change).
|
||||||
|
container.innerHTML = "";
|
||||||
|
if (!source) return;
|
||||||
|
|
||||||
|
container.innerHTML = source;
|
||||||
|
|
||||||
|
const scripts = Array.from(container.querySelectorAll("script"));
|
||||||
|
for (const oldScript of scripts) {
|
||||||
|
const newScript = document.createElement("script");
|
||||||
|
// Copy every attribute (src, type, async, defer, data-*, etc.).
|
||||||
|
for (const attr of Array.from(oldScript.attributes)) {
|
||||||
|
newScript.setAttribute(attr.name, attr.value);
|
||||||
|
}
|
||||||
|
// Copy inline code.
|
||||||
|
newScript.text = oldScript.textContent ?? "";
|
||||||
|
// Replacing the node in place triggers execution.
|
||||||
|
oldScript.parentNode?.replaceChild(newScript, oldScript);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function HtmlEmbedView(props: NodeViewProps) {
|
||||||
|
const { t } = useTranslation();
|
||||||
|
const { node, selected, updateAttributes, editor } = props;
|
||||||
|
const { source } = node.attrs as { source: string };
|
||||||
|
const { isAdmin } = useUserRole();
|
||||||
|
|
||||||
|
const contentRef = useRef<HTMLDivElement | null>(null);
|
||||||
|
const [modalOpen, setModalOpen] = useState(false);
|
||||||
|
const [draft, setDraft] = useState<string>(source || "");
|
||||||
|
|
||||||
|
// (Re)render the raw source whenever it changes. This runs in BOTH the
|
||||||
|
// editable editor and the read-only / public-share editor (same NodeView),
|
||||||
|
// so trackers fire for readers too — that is the intended behaviour.
|
||||||
|
useEffect(() => {
|
||||||
|
if (contentRef.current) {
|
||||||
|
renderRawHtml(contentRef.current, source || "");
|
||||||
|
}
|
||||||
|
}, [source]);
|
||||||
|
|
||||||
|
const openEditor = useCallback(() => {
|
||||||
|
setDraft(source || "");
|
||||||
|
setModalOpen(true);
|
||||||
|
}, [source]);
|
||||||
|
|
||||||
|
const onSave = useCallback(() => {
|
||||||
|
if (editor.isEditable) {
|
||||||
|
updateAttributes({ source: draft });
|
||||||
|
}
|
||||||
|
setModalOpen(false);
|
||||||
|
}, [draft, editor.isEditable, updateAttributes]);
|
||||||
|
|
||||||
|
// The edit affordance is only meaningful in edit mode, and authoring is
|
||||||
|
// restricted to admins (the server strips the node for non-admins anyway).
|
||||||
|
const canEdit = editor.isEditable && isAdmin;
|
||||||
|
|
||||||
|
return (
|
||||||
|
<NodeViewWrapper
|
||||||
|
data-drag-handle
|
||||||
|
className={clsx(classes.htmlEmbedNodeView, {
|
||||||
|
[classes.htmlEmbedSelected]: selected,
|
||||||
|
})}
|
||||||
|
>
|
||||||
|
{canEdit && (
|
||||||
|
<div className={classes.htmlEmbedToolbar}>
|
||||||
|
<ActionIcon
|
||||||
|
variant="default"
|
||||||
|
size="sm"
|
||||||
|
aria-label={t("Edit HTML embed")}
|
||||||
|
onClick={openEditor}
|
||||||
|
>
|
||||||
|
<IconEdit size={16} />
|
||||||
|
</ActionIcon>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{source ? (
|
||||||
|
// Raw HTML/CSS/JS rendered into the wiki origin. Scripts are re-created
|
||||||
|
// in renderRawHtml so they execute.
|
||||||
|
<div ref={contentRef} className={classes.htmlEmbedContent} />
|
||||||
|
) : canEdit ? (
|
||||||
|
<div className={classes.htmlEmbedPlaceholder} onClick={openEditor}>
|
||||||
|
<IconCode size={18} />
|
||||||
|
<Text size="sm">{t("Click to add HTML / CSS / JS")}</Text>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
// Empty source, non-editor: render nothing visible.
|
||||||
|
<div ref={contentRef} className={classes.htmlEmbedContent} />
|
||||||
|
)}
|
||||||
|
|
||||||
|
<Modal
|
||||||
|
opened={modalOpen}
|
||||||
|
onClose={() => setModalOpen(false)}
|
||||||
|
title={t("Edit HTML embed")}
|
||||||
|
size="lg"
|
||||||
|
>
|
||||||
|
<Text size="xs" c="dimmed" mb="xs">
|
||||||
|
{t(
|
||||||
|
"This HTML/CSS/JS runs in the page origin for everyone who views it. Admins only.",
|
||||||
|
)}
|
||||||
|
</Text>
|
||||||
|
<Textarea
|
||||||
|
autosize
|
||||||
|
minRows={10}
|
||||||
|
maxRows={24}
|
||||||
|
value={draft}
|
||||||
|
onChange={(e) => setDraft(e.currentTarget.value)}
|
||||||
|
placeholder={t("<script>...</script>")}
|
||||||
|
styles={{ input: { fontFamily: "monospace" } }}
|
||||||
|
data-autofocus
|
||||||
|
/>
|
||||||
|
<Group justify="flex-end" mt="md">
|
||||||
|
<Button variant="default" onClick={() => setModalOpen(false)}>
|
||||||
|
{t("Cancel")}
|
||||||
|
</Button>
|
||||||
|
<Button onClick={onSave}>{t("Save")}</Button>
|
||||||
|
</Group>
|
||||||
|
</Modal>
|
||||||
|
</NodeViewWrapper>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -587,6 +587,21 @@ const CommandGroups: SlashMenuGroupedItemsType = {
|
|||||||
.insertColumns({ layout: "five_equal" })
|
.insertColumns({ layout: "five_equal" })
|
||||||
.run(),
|
.run(),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
title: "HTML embed",
|
||||||
|
description: "Embed raw HTML, CSS and JavaScript (admins only).",
|
||||||
|
searchTerms: ["html", "css", "js", "javascript", "script", "tracker", "analytics", "raw", "embed"],
|
||||||
|
icon: IconCode,
|
||||||
|
adminOnly: true,
|
||||||
|
command: ({ editor, range }: CommandProps) => {
|
||||||
|
editor
|
||||||
|
.chain()
|
||||||
|
.focus()
|
||||||
|
.deleteRange(range)
|
||||||
|
.setHtmlEmbed({ source: "" })
|
||||||
|
.run();
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
title: "Iframe embed",
|
title: "Iframe embed",
|
||||||
description: "Embed any Iframe",
|
description: "Embed any Iframe",
|
||||||
@@ -744,6 +759,24 @@ const CommandGroups: SlashMenuGroupedItemsType = {
|
|||||||
],
|
],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read whether the current user is a workspace admin/owner from the persisted
|
||||||
|
* `currentUser` (the same payload `currentUserAtom` stores via localStorage).
|
||||||
|
* Used to hide admin-only slash items (e.g. raw HTML embed). This is a UI gate
|
||||||
|
* only; the server independently strips admin-only nodes from non-admin writes.
|
||||||
|
*/
|
||||||
|
function isCurrentUserAdmin(): boolean {
|
||||||
|
try {
|
||||||
|
const raw = localStorage.getItem("currentUser");
|
||||||
|
if (!raw) return false;
|
||||||
|
const parsed = JSON.parse(raw);
|
||||||
|
const role = parsed?.user?.role;
|
||||||
|
return role === "owner" || role === "admin";
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export const getSuggestionItems = ({
|
export const getSuggestionItems = ({
|
||||||
query,
|
query,
|
||||||
excludeItems,
|
excludeItems,
|
||||||
@@ -753,6 +786,7 @@ export const getSuggestionItems = ({
|
|||||||
}): SlashMenuGroupedItemsType => {
|
}): SlashMenuGroupedItemsType => {
|
||||||
const search = query.toLowerCase();
|
const search = query.toLowerCase();
|
||||||
const filteredGroups: SlashMenuGroupedItemsType = {};
|
const filteredGroups: SlashMenuGroupedItemsType = {};
|
||||||
|
const isAdmin = isCurrentUserAdmin();
|
||||||
|
|
||||||
const fuzzyMatch = (query: string, target: string) => {
|
const fuzzyMatch = (query: string, target: string) => {
|
||||||
let queryIndex = 0;
|
let queryIndex = 0;
|
||||||
@@ -767,6 +801,8 @@ export const getSuggestionItems = ({
|
|||||||
for (const [group, items] of Object.entries(CommandGroups)) {
|
for (const [group, items] of Object.entries(CommandGroups)) {
|
||||||
const filteredItems = items.filter((item) => {
|
const filteredItems = items.filter((item) => {
|
||||||
if (excludeItems?.has(item.title)) return false;
|
if (excludeItems?.has(item.title)) return false;
|
||||||
|
// Hide admin-only items (raw HTML embed) from non-admins.
|
||||||
|
if (item.adminOnly && !isAdmin) return false;
|
||||||
return (
|
return (
|
||||||
fuzzyMatch(search, item.title) ||
|
fuzzyMatch(search, item.title) ||
|
||||||
item.description.toLowerCase().includes(search) ||
|
item.description.toLowerCase().includes(search) ||
|
||||||
|
|||||||
@@ -21,6 +21,9 @@ export type SlashMenuItemType = {
|
|||||||
searchTerms: string[];
|
searchTerms: string[];
|
||||||
command: (props: CommandProps) => void;
|
command: (props: CommandProps) => void;
|
||||||
disable?: (editor: ReturnType<typeof useEditor>) => boolean;
|
disable?: (editor: ReturnType<typeof useEditor>) => boolean;
|
||||||
|
// When true, the item is only offered to workspace admins/owners. This is a
|
||||||
|
// UI convenience only — the real authoring gate is enforced server-side.
|
||||||
|
adminOnly?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
export type SlashMenuGroupedItemsType = {
|
export type SlashMenuGroupedItemsType = {
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ import {
|
|||||||
Drawio,
|
Drawio,
|
||||||
Excalidraw,
|
Excalidraw,
|
||||||
Embed,
|
Embed,
|
||||||
|
HtmlEmbed,
|
||||||
TiptapPdf,
|
TiptapPdf,
|
||||||
PageBreak,
|
PageBreak,
|
||||||
SearchAndReplace,
|
SearchAndReplace,
|
||||||
@@ -87,6 +88,7 @@ import CodeBlockView from "@/features/editor/components/code-block/code-block-vi
|
|||||||
import DrawioView from "../components/drawio/drawio-view";
|
import DrawioView from "../components/drawio/drawio-view";
|
||||||
import ExcalidrawView from "@/features/editor/components/excalidraw/excalidraw-view-lazy.tsx";
|
import ExcalidrawView from "@/features/editor/components/excalidraw/excalidraw-view-lazy.tsx";
|
||||||
import EmbedView from "@/features/editor/components/embed/embed-view.tsx";
|
import EmbedView from "@/features/editor/components/embed/embed-view.tsx";
|
||||||
|
import HtmlEmbedView from "@/features/editor/components/html-embed/html-embed-view.tsx";
|
||||||
import PdfView from "@/features/editor/components/pdf/pdf-view.tsx";
|
import PdfView from "@/features/editor/components/pdf/pdf-view.tsx";
|
||||||
import SubpagesView from "@/features/editor/components/subpages/subpages-view.tsx";
|
import SubpagesView from "@/features/editor/components/subpages/subpages-view.tsx";
|
||||||
import TransclusionView from "@/features/editor/components/transclusion/transclusion-view.tsx";
|
import TransclusionView from "@/features/editor/components/transclusion/transclusion-view.tsx";
|
||||||
@@ -365,6 +367,13 @@ export const mainExtensions = [
|
|||||||
Embed.configure({
|
Embed.configure({
|
||||||
view: EmbedView,
|
view: EmbedView,
|
||||||
}),
|
}),
|
||||||
|
// Raw HTML/CSS/JS node (Variant C). The node is registered for ALL users so
|
||||||
|
// documents authored by admins render correctly for everyone; INSERTION is
|
||||||
|
// gated to admins in the slash menu, and the server strips the node from any
|
||||||
|
// non-admin write so a non-admin cannot persist it.
|
||||||
|
HtmlEmbed.configure({
|
||||||
|
view: HtmlEmbedView,
|
||||||
|
}),
|
||||||
TiptapPdf.configure({
|
TiptapPdf.configure({
|
||||||
view: PdfView,
|
view: PdfView,
|
||||||
}),
|
}),
|
||||||
|
|||||||
@@ -8,6 +8,11 @@ import {
|
|||||||
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
|
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
|
||||||
import * as Y from 'yjs';
|
import * as Y from 'yjs';
|
||||||
import { User } from '@docmost/db/types/entity.types';
|
import { User } from '@docmost/db/types/entity.types';
|
||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from '../common/helpers/prosemirror/html-embed.util';
|
||||||
|
|
||||||
export type CollabEventHandlers = ReturnType<
|
export type CollabEventHandlers = ReturnType<
|
||||||
CollaborationHandler['getHandlers']
|
CollaborationHandler['getHandlers']
|
||||||
@@ -83,8 +88,25 @@ export class CollaborationHandler {
|
|||||||
user: User;
|
user: User;
|
||||||
},
|
},
|
||||||
) => {
|
) => {
|
||||||
const { prosemirrorJson, operation, user } = payload;
|
const { operation, user } = payload;
|
||||||
|
let { prosemirrorJson } = payload;
|
||||||
this.logger.debug('Updating page content via yjs', documentName);
|
this.logger.debug('Updating page content via yjs', documentName);
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate, REST/MCP/AI write path):
|
||||||
|
// updatePageContent is the server-side entrypoint used by the REST page
|
||||||
|
// update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute
|
||||||
|
// arbitrary JS in every reader's browser, so a NON-admin caller must not
|
||||||
|
// be able to persist them here. If the editing user is not a workspace
|
||||||
|
// admin/owner, strip every htmlEmbed node before it reaches the ydoc.
|
||||||
|
if (!canAuthorHtmlEmbed(user?.role)) {
|
||||||
|
if (hasHtmlEmbedNode(prosemirrorJson)) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Stripping htmlEmbed node(s) from non-admin update by user ${user?.id} on ${documentName}`,
|
||||||
|
);
|
||||||
|
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
await this.withYdocConnection(
|
await this.withYdocConnection(
|
||||||
hocuspocus,
|
hocuspocus,
|
||||||
documentName,
|
documentName,
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ import {
|
|||||||
Drawio,
|
Drawio,
|
||||||
Excalidraw,
|
Excalidraw,
|
||||||
Embed,
|
Embed,
|
||||||
|
HtmlEmbed,
|
||||||
Mention,
|
Mention,
|
||||||
Subpages,
|
Subpages,
|
||||||
Highlight,
|
Highlight,
|
||||||
@@ -102,6 +103,10 @@ export const tiptapExtensions = [
|
|||||||
Drawio,
|
Drawio,
|
||||||
Excalidraw,
|
Excalidraw,
|
||||||
Embed,
|
Embed,
|
||||||
|
// Registered server-side so the node survives schema parsing/serialization.
|
||||||
|
// Authoring is gated to admins at the document WRITE paths (see
|
||||||
|
// stripHtmlEmbedNodes usage in persistence/page services), NOT here.
|
||||||
|
HtmlEmbed,
|
||||||
Mention,
|
Mention,
|
||||||
Subpages,
|
Subpages,
|
||||||
Columns,
|
Columns,
|
||||||
|
|||||||
@@ -39,6 +39,11 @@ import {
|
|||||||
HISTORY_INTERVAL,
|
HISTORY_INTERVAL,
|
||||||
} from '../constants';
|
} from '../constants';
|
||||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class PersistenceExtension implements Extension {
|
export class PersistenceExtension implements Extension {
|
||||||
@@ -112,7 +117,56 @@ export class PersistenceExtension implements Extension {
|
|||||||
|
|
||||||
const pageId = getPageId(documentName);
|
const pageId = getPageId(documentName);
|
||||||
|
|
||||||
const tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
let tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate, collab WebSocket write path):
|
||||||
|
// The persisted snapshot is the merged ydoc, which may contain an htmlEmbed
|
||||||
|
// node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized
|
||||||
|
// JS in every reader's browser, so only workspace admins/owners may author
|
||||||
|
// it. When the user whose store triggers this persist is not an admin, strip
|
||||||
|
// every htmlEmbed node before it is written to the page row AND before the
|
||||||
|
// ydoc state is re-encoded, so the node cannot be reintroduced by a
|
||||||
|
// non-admin via the collab socket.
|
||||||
|
// NOTE (residual risk): the gate is keyed to the storing connection's user.
|
||||||
|
// If an admin already authored an htmlEmbed and a non-admin's later store
|
||||||
|
// does not touch it, this strip would remove the admin's embed on that
|
||||||
|
// non-admin store. This is intentionally conservative (fail closed): the
|
||||||
|
// admin re-adds/keeps the node on their own next edit. A future refinement
|
||||||
|
// could diff against the previously persisted admin-authored embeds.
|
||||||
|
//
|
||||||
|
// ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in
|
||||||
|
// the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs
|
||||||
|
// update to connected clients immediately, so a non-admin's transient
|
||||||
|
// htmlEmbed can execute in OTHER open editors' browsers in the brief window
|
||||||
|
// before this persist strips it. The exposure is limited to concurrent
|
||||||
|
// AUTHENTICATED space members who have the doc open with Edit rights
|
||||||
|
// (semi-trusted) — anonymous public-share/readonly viewers do NOT open a
|
||||||
|
// collab socket (ReadonlyPageEditor renders fetched, already-stripped
|
||||||
|
// content; HocuspocusProvider is only used by the authenticated editable
|
||||||
|
// page-editor), and the PERSISTED page row plus every share/readonly read
|
||||||
|
// path are protected by this strip. The window is therefore accepted rather
|
||||||
|
// than mitigated with an inbound beforeBroadcast strip.
|
||||||
|
if (!canAuthorHtmlEmbed(context?.user?.role)) {
|
||||||
|
if (hasHtmlEmbedNode(tiptapJson)) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Stripping htmlEmbed node(s) from non-admin collab store by user ${context?.user?.id} on ${documentName}`,
|
||||||
|
);
|
||||||
|
tiptapJson = stripHtmlEmbedNodes(tiptapJson);
|
||||||
|
// Reflect the stripped content back into the shared ydoc so the node is
|
||||||
|
// removed for all connected clients, not just the persisted row.
|
||||||
|
const fragment = document.getXmlFragment('default');
|
||||||
|
if (fragment.length > 0) {
|
||||||
|
fragment.delete(0, fragment.length);
|
||||||
|
}
|
||||||
|
const cleanDoc = TiptapTransformer.toYdoc(
|
||||||
|
tiptapJson,
|
||||||
|
'default',
|
||||||
|
tiptapExtensions,
|
||||||
|
);
|
||||||
|
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
|
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
|
||||||
|
|
||||||
let textContent = null;
|
let textContent = null;
|
||||||
|
|||||||
229
apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
Normal file
229
apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
Normal file
@@ -0,0 +1,229 @@
|
|||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from './html-embed.util';
|
||||||
|
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
|
||||||
|
import {
|
||||||
|
decodeHtmlEmbedSource,
|
||||||
|
encodeHtmlEmbedSource,
|
||||||
|
} from '@docmost/editor-ext';
|
||||||
|
|
||||||
|
const findFirstChild = (json: any, type: string): any | undefined => {
|
||||||
|
if (!json || typeof json !== 'object') return undefined;
|
||||||
|
if (json.type === type) return json;
|
||||||
|
if (Array.isArray(json.content)) {
|
||||||
|
for (const child of json.content) {
|
||||||
|
const found = findFirstChild(child, type);
|
||||||
|
if (found) return found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return undefined;
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('stripHtmlEmbedNodes', () => {
|
||||||
|
it('removes a top-level htmlEmbed node', () => {
|
||||||
|
const doc = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{ type: 'paragraph', content: [{ type: 'text', text: 'before' }] },
|
||||||
|
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||||
|
{ type: 'paragraph', content: [{ type: 'text', text: 'after' }] },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = stripHtmlEmbedNodes(doc);
|
||||||
|
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||||
|
// Other nodes are preserved.
|
||||||
|
expect(result.content).toHaveLength(2);
|
||||||
|
expect(result.content[0].content[0].text).toBe('before');
|
||||||
|
expect(result.content[1].content[0].text).toBe('after');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('removes nested htmlEmbed nodes (e.g. inside columns)', () => {
|
||||||
|
const doc = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'columns',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'column',
|
||||||
|
content: [
|
||||||
|
{ type: 'htmlEmbed', attrs: { source: '<b>x</b>' } },
|
||||||
|
{
|
||||||
|
type: 'paragraph',
|
||||||
|
content: [{ type: 'text', text: 'keep' }],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
const result = stripHtmlEmbedNodes(doc);
|
||||||
|
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||||
|
const col = findFirstChild(result, 'column');
|
||||||
|
expect(col.content).toHaveLength(1);
|
||||||
|
expect(col.content[0].type).toBe('paragraph');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('does not mutate the input document', () => {
|
||||||
|
const doc = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [{ type: 'htmlEmbed', attrs: { source: 'x' } }],
|
||||||
|
};
|
||||||
|
stripHtmlEmbedNodes(doc);
|
||||||
|
expect(doc.content).toHaveLength(1);
|
||||||
|
expect(doc.content[0].type).toBe('htmlEmbed');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('leaves documents without htmlEmbed untouched', () => {
|
||||||
|
const doc = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
expect(hasHtmlEmbedNode(doc)).toBe(false);
|
||||||
|
const result = stripHtmlEmbedNodes(doc);
|
||||||
|
expect(result).toEqual(doc);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('canAuthorHtmlEmbed', () => {
|
||||||
|
it('allows owner and admin', () => {
|
||||||
|
expect(canAuthorHtmlEmbed('owner')).toBe(true);
|
||||||
|
expect(canAuthorHtmlEmbed('admin')).toBe(true);
|
||||||
|
});
|
||||||
|
it('denies member and unknown/empty roles', () => {
|
||||||
|
expect(canAuthorHtmlEmbed('member')).toBe(false);
|
||||||
|
expect(canAuthorHtmlEmbed(null)).toBe(false);
|
||||||
|
expect(canAuthorHtmlEmbed(undefined)).toBe(false);
|
||||||
|
expect(canAuthorHtmlEmbed('viewer')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Replicates the write-path decision used by every non-admin persistence guard
|
||||||
|
// (collab store, single import, zip import, duplication, transclusion unsync):
|
||||||
|
// if !canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json) -> strip, else keep.
|
||||||
|
const applyAdminGate = (json: any, role: string | null | undefined) => {
|
||||||
|
if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) {
|
||||||
|
return stripHtmlEmbedNodes(json);
|
||||||
|
}
|
||||||
|
return json;
|
||||||
|
};
|
||||||
|
|
||||||
|
describe('admin-gate write-path decision (duplication / import / unsync)', () => {
|
||||||
|
const docWithEmbed = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||||
|
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
it('strips the embed for a non-admin (member) author', () => {
|
||||||
|
const result = applyAdminGate(docWithEmbed, 'member');
|
||||||
|
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||||
|
expect(result.content).toHaveLength(1);
|
||||||
|
expect(result.content[0].content[0].text).toBe('keep');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips the embed for unknown/empty roles', () => {
|
||||||
|
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false);
|
||||||
|
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe(
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'viewer'))).toBe(
|
||||||
|
false,
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps the embed for an admin author', () => {
|
||||||
|
const result = applyAdminGate(docWithEmbed, 'admin');
|
||||||
|
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||||
|
expect(result).toBe(docWithEmbed);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('keeps the embed for an owner author', () => {
|
||||||
|
const result = applyAdminGate(docWithEmbed, 'owner');
|
||||||
|
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('strips nested embeds (subtree/column duplication) for a non-admin', () => {
|
||||||
|
const nested = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'columns',
|
||||||
|
content: [
|
||||||
|
{
|
||||||
|
type: 'column',
|
||||||
|
content: [
|
||||||
|
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
|
||||||
|
{ type: 'paragraph', content: [{ type: 'text', text: 'ok' }] },
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
const result = applyAdminGate(nested, 'member');
|
||||||
|
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||||
|
const col = findFirstChild(result, 'column');
|
||||||
|
expect(col.content).toHaveLength(1);
|
||||||
|
expect(col.content[0].type).toBe('paragraph');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('leaves a non-admin doc without embeds untouched (no needless rewrite)', () => {
|
||||||
|
const clean = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
|
||||||
|
};
|
||||||
|
const result = applyAdminGate(clean, 'member');
|
||||||
|
expect(result).toBe(clean);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('htmlEmbed source base64 codec', () => {
|
||||||
|
it('round-trips arbitrary source including UTF-8', () => {
|
||||||
|
const source = '<script>console.log("héllo → 世界")</script>';
|
||||||
|
const encoded = encodeHtmlEmbedSource(source);
|
||||||
|
expect(encoded).not.toContain('<');
|
||||||
|
expect(decodeHtmlEmbedSource(encoded)).toBe(source);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('htmlEmbed node HTML <-> JSON round-trip', () => {
|
||||||
|
it('preserves the raw source through HTML -> JSON', () => {
|
||||||
|
const source = '<script>track("page")</script><style>.a{color:red}</style>';
|
||||||
|
const encoded = encodeHtmlEmbedSource(source);
|
||||||
|
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
|
||||||
|
|
||||||
|
const json = htmlToJson(html);
|
||||||
|
const node = findFirstChild(json, 'htmlEmbed');
|
||||||
|
expect(node).toBeDefined();
|
||||||
|
expect(node.attrs.source).toBe(source);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('round-trips JSON -> HTML -> JSON keeping the source', () => {
|
||||||
|
const source = '<div onclick="x()">raw & markup</div>';
|
||||||
|
const json = {
|
||||||
|
type: 'doc',
|
||||||
|
content: [{ type: 'htmlEmbed', attrs: { source } }],
|
||||||
|
};
|
||||||
|
|
||||||
|
const html = jsonToHtml(json);
|
||||||
|
// The static HTML carries the encoded source but does NOT inline the raw
|
||||||
|
// markup (it must not be an injection vector by itself).
|
||||||
|
expect(html).toContain('data-type="htmlEmbed"');
|
||||||
|
expect(html).not.toContain('onclick');
|
||||||
|
|
||||||
|
const back = htmlToJson(html);
|
||||||
|
const node = findFirstChild(back, 'htmlEmbed');
|
||||||
|
expect(node).toBeDefined();
|
||||||
|
expect(node.attrs.source).toBe(source);
|
||||||
|
});
|
||||||
|
});
|
||||||
@@ -0,0 +1,68 @@
|
|||||||
|
import { JSONContent } from '@tiptap/core';
|
||||||
|
|
||||||
|
export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
|
||||||
|
*
|
||||||
|
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
|
||||||
|
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
|
||||||
|
* author it. This helper is the server-side enforcement primitive: every WRITE
|
||||||
|
* path that may persist content from a NON-admin caller must run the incoming
|
||||||
|
* document through this function so a non-admin cannot smuggle the node in via
|
||||||
|
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
|
||||||
|
*
|
||||||
|
* Returns a NEW document; the input is not mutated. If the input is not a valid
|
||||||
|
* doc object it is returned unchanged (callers persist what they were given).
|
||||||
|
*/
|
||||||
|
export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||||
|
if (!pmJson || typeof pmJson !== 'object') {
|
||||||
|
return pmJson;
|
||||||
|
}
|
||||||
|
|
||||||
|
const node = pmJson as unknown as JSONContent;
|
||||||
|
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
const filtered: JSONContent[] = [];
|
||||||
|
for (const child of node.content) {
|
||||||
|
// Drop any htmlEmbed child outright.
|
||||||
|
if (child && child.type === HTML_EMBED_NODE_NAME) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
|
||||||
|
// also removed.
|
||||||
|
filtered.push(stripHtmlEmbedNodes(child));
|
||||||
|
}
|
||||||
|
return { ...node, content: filtered } as unknown as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
return { ...node } as unknown as T;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the document contains at least one `htmlEmbed` node anywhere
|
||||||
|
* in its tree. Useful to decide whether a strip pass actually changed anything
|
||||||
|
* (e.g. for logging a rejected non-admin embed attempt).
|
||||||
|
*/
|
||||||
|
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
|
||||||
|
if (!pmJson || typeof pmJson !== 'object') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const node = pmJson as JSONContent;
|
||||||
|
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (Array.isArray(node.content)) {
|
||||||
|
return node.content.some((child) => hasHtmlEmbedNode(child));
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
|
||||||
|
* Owners and admins are trusted; everyone else (member, and any unknown role)
|
||||||
|
* is not. Kept here so every write path shares one definition of "trusted".
|
||||||
|
*/
|
||||||
|
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
|
||||||
|
return role === 'owner' || role === 'admin';
|
||||||
|
}
|
||||||
@@ -30,6 +30,11 @@ import {
|
|||||||
isAttachmentNode,
|
isAttachmentNode,
|
||||||
removeMarkTypeFromDoc,
|
removeMarkTypeFromDoc,
|
||||||
} from '../../../common/helpers/prosemirror/utils';
|
} from '../../../common/helpers/prosemirror/utils';
|
||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||||
import {
|
import {
|
||||||
htmlToJson,
|
htmlToJson,
|
||||||
jsonToNode,
|
jsonToNode,
|
||||||
@@ -688,7 +693,25 @@ export class PageService {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
const prosemirrorJson = prosemirrorDoc.toJSON();
|
let prosemirrorJson = prosemirrorDoc.toJSON();
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate, duplication write path):
|
||||||
|
// Duplication builds the ydoc directly and bypasses the collab
|
||||||
|
// onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in
|
||||||
|
// readers' browsers, so only workspace admins/owners may author it. A
|
||||||
|
// non-admin with space Edit could otherwise duplicate an admin page
|
||||||
|
// that contains an embed into a new page authored by them. Strip every
|
||||||
|
// htmlEmbed node from each duplicated page when the duplicating user is
|
||||||
|
// not an admin, BEFORE computing textContent/ydoc/insert.
|
||||||
|
if (
|
||||||
|
!canAuthorHtmlEmbed(authUser.role) &&
|
||||||
|
hasHtmlEmbedNode(prosemirrorJson)
|
||||||
|
) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Stripping htmlEmbed node(s) from non-admin page duplication by user ${authUser.id} (source page ${page.id})`,
|
||||||
|
);
|
||||||
|
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
|
||||||
|
}
|
||||||
|
|
||||||
// Add "Copy of " prefix to the root page title only for duplicates in same space
|
// Add "Copy of " prefix to the root page title only for duplicates in same space
|
||||||
let title = page.title;
|
let title = page.title;
|
||||||
|
|||||||
@@ -23,6 +23,11 @@ import { rewriteAttachmentsForUnsync } from './utils/transclusion-unsync.util';
|
|||||||
import { TransclusionLookup } from './transclusion.types';
|
import { TransclusionLookup } from './transclusion.types';
|
||||||
import { Page, User } from '@docmost/db/types/entity.types';
|
import { Page, User } from '@docmost/db/types/entity.types';
|
||||||
import { PageAccessService } from '../page-access/page-access.service';
|
import { PageAccessService } from '../page-access/page-access.service';
|
||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||||
|
|
||||||
type ReferencingPageInfo = {
|
type ReferencingPageInfo = {
|
||||||
id: string;
|
id: string;
|
||||||
@@ -461,10 +466,12 @@ export class TransclusionService {
|
|||||||
throw new NotFoundException('Sync block not found');
|
throw new NotFoundException('Sync block not found');
|
||||||
}
|
}
|
||||||
|
|
||||||
const { content, copies } = rewriteAttachmentsForUnsync(
|
let content: unknown;
|
||||||
|
let copies: ReturnType<typeof rewriteAttachmentsForUnsync>['copies'];
|
||||||
|
({ content, copies } = rewriteAttachmentsForUnsync(
|
||||||
transclusion.content,
|
transclusion.content,
|
||||||
() => uuid7(),
|
() => uuid7(),
|
||||||
);
|
));
|
||||||
|
|
||||||
if (copies.length > 0) {
|
if (copies.length > 0) {
|
||||||
const oldIds = copies.map((c) => c.oldAttachmentId);
|
const oldIds = copies.map((c) => c.oldAttachmentId);
|
||||||
@@ -513,6 +520,21 @@ export class TransclusionService {
|
|||||||
transclusionId,
|
transclusionId,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate, transclusion unsync write path):
|
||||||
|
// The returned content is a source snapshot that the client materializes
|
||||||
|
// into the reference page via insertContentAt. The snapshot keeps any
|
||||||
|
// htmlEmbed verbatim, and unsync requires only space Edit/View. If the
|
||||||
|
// requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a
|
||||||
|
// non-admin can never receive an embed payload to re-persist (the collab
|
||||||
|
// strip on the subsequent save is debounced/race-prone and must not be the
|
||||||
|
// only guard). Admin behavior is unchanged.
|
||||||
|
if (!canAuthorHtmlEmbed(user.role) && hasHtmlEmbedNode(content)) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Stripping htmlEmbed node(s) from non-admin transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`,
|
||||||
|
);
|
||||||
|
content = stripHtmlEmbedNodes(content);
|
||||||
|
}
|
||||||
|
|
||||||
return { content };
|
return { content };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,6 +20,12 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
|||||||
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
||||||
import { markdownToHtml } from '@docmost/editor-ext';
|
import { markdownToHtml } from '@docmost/editor-ext';
|
||||||
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||||
|
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||||
import { formatImportHtml } from '../utils/import-formatter';
|
import { formatImportHtml } from '../utils/import-formatter';
|
||||||
import {
|
import {
|
||||||
buildAttachmentCandidates,
|
buildAttachmentCandidates,
|
||||||
@@ -53,6 +59,7 @@ export class FileImportTaskService {
|
|||||||
private readonly backlinkRepo: BacklinkRepo,
|
private readonly backlinkRepo: BacklinkRepo,
|
||||||
@InjectKysely() private readonly db: KyselyDB,
|
@InjectKysely() private readonly db: KyselyDB,
|
||||||
private readonly importAttachmentService: ImportAttachmentService,
|
private readonly importAttachmentService: ImportAttachmentService,
|
||||||
|
private readonly userRepo: UserRepo,
|
||||||
private eventEmitter: EventEmitter2,
|
private eventEmitter: EventEmitter2,
|
||||||
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
|
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
|
||||||
) {}
|
) {}
|
||||||
@@ -149,6 +156,20 @@ export class FileImportTaskService {
|
|||||||
.where('id', '=', fileTask.spaceId)
|
.where('id', '=', fileTask.spaceId)
|
||||||
.executeTakeFirst();
|
.executeTakeFirst();
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate, zip/multi-file import write path):
|
||||||
|
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||||
|
// serialized form), which would execute raw, unsanitized JS in readers'
|
||||||
|
// browsers. Only workspace admins/owners may author it. Resolve the
|
||||||
|
// importer's role ONCE here; each page's prosemirror JSON is run through the
|
||||||
|
// strip below before textContent/ydoc/insert when the importer is not an
|
||||||
|
// admin, so a non-admin cannot smuggle the node in via a zip import (which
|
||||||
|
// requires only space Edit).
|
||||||
|
const importingUser = await this.userRepo.findById(
|
||||||
|
fileTask.creatorId,
|
||||||
|
fileTask.workspaceId,
|
||||||
|
);
|
||||||
|
const importerCanAuthorHtmlEmbed = canAuthorHtmlEmbed(importingUser?.role);
|
||||||
|
|
||||||
const pagesMap = new Map<string, ImportPageNode>();
|
const pagesMap = new Map<string, ImportPageNode>();
|
||||||
|
|
||||||
for (const absPath of allFiles) {
|
for (const absPath of allFiles) {
|
||||||
@@ -496,9 +517,21 @@ export class FileImportTaskService {
|
|||||||
await this.importService.processHTML(html),
|
await this.importService.processHTML(html),
|
||||||
);
|
);
|
||||||
|
|
||||||
const { title, prosemirrorJson } =
|
let { title, prosemirrorJson } =
|
||||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
|
||||||
|
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
|
||||||
|
if (
|
||||||
|
!importerCanAuthorHtmlEmbed &&
|
||||||
|
hasHtmlEmbedNode(prosemirrorJson)
|
||||||
|
) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
|
||||||
|
);
|
||||||
|
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
|
||||||
|
}
|
||||||
|
|
||||||
const insertablePage: InsertablePage = {
|
const insertablePage: InsertablePage = {
|
||||||
id: page.id,
|
id: page.id,
|
||||||
slugId: page.slugId,
|
slugId: page.slugId,
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
|
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
|
||||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||||
|
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||||
|
import {
|
||||||
|
canAuthorHtmlEmbed,
|
||||||
|
hasHtmlEmbedNode,
|
||||||
|
stripHtmlEmbedNodes,
|
||||||
|
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||||
import { MultipartFile } from '@fastify/multipart';
|
import { MultipartFile } from '@fastify/multipart';
|
||||||
import * as path from 'path';
|
import * as path from 'path';
|
||||||
import {
|
import {
|
||||||
@@ -37,6 +43,7 @@ export class ImportService {
|
|||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private readonly pageRepo: PageRepo,
|
private readonly pageRepo: PageRepo,
|
||||||
|
private readonly userRepo: UserRepo,
|
||||||
private readonly storageService: StorageService,
|
private readonly storageService: StorageService,
|
||||||
@InjectKysely() private readonly db: KyselyDB,
|
@InjectKysely() private readonly db: KyselyDB,
|
||||||
@InjectQueue(QueueName.FILE_TASK_QUEUE)
|
@InjectQueue(QueueName.FILE_TASK_QUEUE)
|
||||||
@@ -83,8 +90,24 @@ export class ImportService {
|
|||||||
throw new BadRequestException(message);
|
throw new BadRequestException(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
const { title, prosemirrorJson } =
|
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
|
||||||
this.extractTitleAndRemoveHeading(prosemirrorState);
|
const title = extracted.title;
|
||||||
|
let prosemirrorJson = extracted.prosemirrorJson;
|
||||||
|
|
||||||
|
// SECURITY (Variant C admin gate, import write path):
|
||||||
|
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||||
|
// serialized form), which would execute raw JS in readers' browsers. Only
|
||||||
|
// workspace admins/owners may author it, so strip htmlEmbed nodes from
|
||||||
|
// imports performed by a non-admin user.
|
||||||
|
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
|
||||||
|
const importingUser = await this.userRepo.findById(userId, workspaceId);
|
||||||
|
if (!canAuthorHtmlEmbed(importingUser?.role)) {
|
||||||
|
this.logger.warn(
|
||||||
|
`Stripping htmlEmbed node(s) from non-admin import by user ${userId}`,
|
||||||
|
);
|
||||||
|
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const pageTitle = title || fileName;
|
const pageTitle = title || fileName;
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ export * from "./lib/custom-code-block";
|
|||||||
export * from "./lib/drawio";
|
export * from "./lib/drawio";
|
||||||
export * from "./lib/excalidraw";
|
export * from "./lib/excalidraw";
|
||||||
export * from "./lib/embed";
|
export * from "./lib/embed";
|
||||||
|
export * from "./lib/html-embed/html-embed";
|
||||||
export * from "./lib/mention";
|
export * from "./lib/mention";
|
||||||
export * from "./lib/markdown";
|
export * from "./lib/markdown";
|
||||||
export * from "./lib/search-and-replace";
|
export * from "./lib/search-and-replace";
|
||||||
|
|||||||
138
packages/editor-ext/src/lib/html-embed/html-embed.ts
Normal file
138
packages/editor-ext/src/lib/html-embed/html-embed.ts
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
import { Node, mergeAttributes } from "@tiptap/core";
|
||||||
|
import { ReactNodeViewRenderer } from "@tiptap/react";
|
||||||
|
|
||||||
|
export interface HtmlEmbedOptions {
|
||||||
|
HTMLAttributes: Record<string, any>;
|
||||||
|
view: any;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface HtmlEmbedAttributes {
|
||||||
|
// Raw HTML/CSS/JS string that is injected verbatim into the wiki origin.
|
||||||
|
source?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
declare module "@tiptap/core" {
|
||||||
|
interface Commands<ReturnType> {
|
||||||
|
htmlEmbed: {
|
||||||
|
setHtmlEmbed: (attributes?: HtmlEmbedAttributes) => ReturnType;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode the raw source to base64 for the `data-source` attribute.
|
||||||
|
*
|
||||||
|
* The source is arbitrary HTML/CSS/JS. Storing it raw inside an HTML attribute
|
||||||
|
* would (a) require heavy escaping and (b) risk the parser interpreting markup
|
||||||
|
* inside the attribute. Base64 makes the round-trip HTML <-> ProseMirror JSON
|
||||||
|
* lossless and keeps the markup inert while it sits in the attribute.
|
||||||
|
*
|
||||||
|
* `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so that non-Latin1
|
||||||
|
* (UTF-8) characters survive the base64 step.
|
||||||
|
*/
|
||||||
|
export function encodeHtmlEmbedSource(source: string): string {
|
||||||
|
if (!source) return "";
|
||||||
|
try {
|
||||||
|
if (typeof btoa === "function") {
|
||||||
|
return btoa(encodeURIComponent(source));
|
||||||
|
}
|
||||||
|
// Node fallback (server-side schema parsing has no global btoa).
|
||||||
|
return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64");
|
||||||
|
} catch {
|
||||||
|
// Never swallow silently in a way that loses data: fall back to raw.
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export function decodeHtmlEmbedSource(encoded: string): string {
|
||||||
|
if (!encoded) return "";
|
||||||
|
try {
|
||||||
|
if (typeof atob === "function") {
|
||||||
|
return decodeURIComponent(atob(encoded));
|
||||||
|
}
|
||||||
|
// Node fallback.
|
||||||
|
return decodeURIComponent(
|
||||||
|
Buffer.from(encoded, "base64").toString("utf-8"),
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const HtmlEmbed = Node.create<HtmlEmbedOptions>({
|
||||||
|
name: "htmlEmbed",
|
||||||
|
inline: false,
|
||||||
|
group: "block",
|
||||||
|
// atom + isolating: the node has no editable ProseMirror children; its body
|
||||||
|
// is the opaque `source` string rendered by the NodeView.
|
||||||
|
atom: true,
|
||||||
|
isolating: true,
|
||||||
|
defining: true,
|
||||||
|
draggable: true,
|
||||||
|
|
||||||
|
addOptions() {
|
||||||
|
return {
|
||||||
|
HTMLAttributes: {},
|
||||||
|
view: null,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
addAttributes() {
|
||||||
|
return {
|
||||||
|
source: {
|
||||||
|
default: "",
|
||||||
|
// Decode the base64 payload back to the raw source on parse.
|
||||||
|
parseHTML: (element) =>
|
||||||
|
decodeHtmlEmbedSource(element.getAttribute("data-source") || ""),
|
||||||
|
// Encode the raw source to base64 on render so it round-trips losslessly
|
||||||
|
// through the HTML <-> JSON conversions used by export/import/collab.
|
||||||
|
renderHTML: (attributes: HtmlEmbedAttributes) => ({
|
||||||
|
"data-source": encodeHtmlEmbedSource(attributes.source || ""),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
parseHTML() {
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
tag: `div[data-type="${this.name}"]`,
|
||||||
|
},
|
||||||
|
];
|
||||||
|
},
|
||||||
|
|
||||||
|
renderHTML({ HTMLAttributes }) {
|
||||||
|
// The static HTML representation is just a marker div carrying the encoded
|
||||||
|
// source. The actual raw markup is NOT expanded here on purpose: the static
|
||||||
|
// generateHTML output (used for previews, search indexing, exports) must not
|
||||||
|
// itself become an injection vector. Only the client NodeView expands and
|
||||||
|
// executes the source.
|
||||||
|
return [
|
||||||
|
"div",
|
||||||
|
mergeAttributes(
|
||||||
|
{ "data-type": this.name },
|
||||||
|
this.options.HTMLAttributes,
|
||||||
|
HTMLAttributes,
|
||||||
|
),
|
||||||
|
];
|
||||||
|
},
|
||||||
|
|
||||||
|
addCommands() {
|
||||||
|
return {
|
||||||
|
setHtmlEmbed:
|
||||||
|
(attrs: HtmlEmbedAttributes) =>
|
||||||
|
({ commands }) => {
|
||||||
|
return commands.insertContent({
|
||||||
|
type: this.name,
|
||||||
|
attrs: attrs,
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
},
|
||||||
|
|
||||||
|
addNodeView() {
|
||||||
|
// Force the react node view to render immediately using flush sync.
|
||||||
|
this.editor.isInitialized = true;
|
||||||
|
return ReactNodeViewRenderer(this.options.view);
|
||||||
|
},
|
||||||
|
});
|
||||||
@@ -0,0 +1,41 @@
|
|||||||
|
import { Token } from "marked";
|
||||||
|
|
||||||
|
interface HtmlEmbedToken {
|
||||||
|
type: "htmlEmbed";
|
||||||
|
raw: string;
|
||||||
|
encoded: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marked extension that rebuilds an `htmlEmbed` node from the HTML comment
|
||||||
|
* marker produced by the turndown rule (`<!--html-embed:<base64>-->`).
|
||||||
|
*
|
||||||
|
* It emits the same marker div the node's `parseHTML` recognizes, so the
|
||||||
|
* pipeline MD -> HTML -> ProseMirror JSON restores the node (and its
|
||||||
|
* base64 `data-source`) exactly. We do NOT expand the raw markup here; the
|
||||||
|
* source stays base64-encoded in the attribute and is only executed by the
|
||||||
|
* client NodeView.
|
||||||
|
*/
|
||||||
|
export const htmlEmbedExtension = {
|
||||||
|
name: "htmlEmbed",
|
||||||
|
level: "block" as const,
|
||||||
|
start(src: string) {
|
||||||
|
return src.indexOf("<!--html-embed:");
|
||||||
|
},
|
||||||
|
tokenizer(src: string): HtmlEmbedToken | undefined {
|
||||||
|
const rule = /^<!--html-embed:([A-Za-z0-9+/=]*)-->/;
|
||||||
|
const match = rule.exec(src);
|
||||||
|
|
||||||
|
if (match) {
|
||||||
|
return {
|
||||||
|
type: "htmlEmbed",
|
||||||
|
raw: match[0],
|
||||||
|
encoded: match[1] ?? "",
|
||||||
|
};
|
||||||
|
}
|
||||||
|
},
|
||||||
|
renderer(token: Token) {
|
||||||
|
const htmlEmbedToken = token as HtmlEmbedToken;
|
||||||
|
return `<div data-type="htmlEmbed" data-source="${htmlEmbedToken.encoded}"></div>`;
|
||||||
|
},
|
||||||
|
};
|
||||||
@@ -2,6 +2,7 @@ import { marked } from "marked";
|
|||||||
import { calloutExtension } from "./callout.marked";
|
import { calloutExtension } from "./callout.marked";
|
||||||
import { mathBlockExtension } from "./math-block.marked";
|
import { mathBlockExtension } from "./math-block.marked";
|
||||||
import { mathInlineExtension } from "./math-inline.marked";
|
import { mathInlineExtension } from "./math-inline.marked";
|
||||||
|
import { htmlEmbedExtension } from "./html-embed.marked";
|
||||||
|
|
||||||
marked.use({
|
marked.use({
|
||||||
renderer: {
|
renderer: {
|
||||||
@@ -34,7 +35,12 @@ marked.use({
|
|||||||
});
|
});
|
||||||
|
|
||||||
marked.use({
|
marked.use({
|
||||||
extensions: [calloutExtension, mathBlockExtension, mathInlineExtension],
|
extensions: [
|
||||||
|
calloutExtension,
|
||||||
|
mathBlockExtension,
|
||||||
|
mathInlineExtension,
|
||||||
|
htmlEmbedExtension,
|
||||||
|
],
|
||||||
});
|
});
|
||||||
|
|
||||||
marked.setOptions({ breaks: true });
|
marked.setOptions({ breaks: true });
|
||||||
|
|||||||
@@ -32,12 +32,39 @@ export function htmlToMarkdown(html: string): string {
|
|||||||
mathInline,
|
mathInline,
|
||||||
mathBlock,
|
mathBlock,
|
||||||
iframeEmbed,
|
iframeEmbed,
|
||||||
|
htmlEmbed,
|
||||||
image,
|
image,
|
||||||
video,
|
video,
|
||||||
]);
|
]);
|
||||||
return turndownService.turndown(html).replaceAll('<br>', ' ');
|
return turndownService.turndown(html).replaceAll('<br>', ' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serialize the `htmlEmbed` node to Markdown.
|
||||||
|
*
|
||||||
|
* Markdown has no native representation for an arbitrary-HTML block, so we
|
||||||
|
* preserve the node losslessly as an HTML comment carrying the base64-encoded
|
||||||
|
* source (the same `data-source` payload the node stores). `markdownToHtml`
|
||||||
|
* recognizes the same marker and rebuilds the node, so the round-trip
|
||||||
|
* MD -> HTML -> JSON keeps the source intact. The comment also keeps the raw
|
||||||
|
* markup inert in the exported `.md` file (it does not render in plain Markdown
|
||||||
|
* viewers).
|
||||||
|
*/
|
||||||
|
function htmlEmbed(turndownService: _TurndownService) {
|
||||||
|
turndownService.addRule('htmlEmbed', {
|
||||||
|
filter: function (node: HTMLInputElement) {
|
||||||
|
return (
|
||||||
|
node.nodeName === 'DIV' &&
|
||||||
|
node.getAttribute('data-type') === 'htmlEmbed'
|
||||||
|
);
|
||||||
|
},
|
||||||
|
replacement: function (_content: string, node: HTMLInputElement) {
|
||||||
|
const encoded = node.getAttribute('data-source') || '';
|
||||||
|
return `\n\n<!--html-embed:${encoded}-->\n\n`;
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
function listParagraph(turndownService: _TurndownService) {
|
function listParagraph(turndownService: _TurndownService) {
|
||||||
turndownService.addRule('paragraph', {
|
turndownService.addRule('paragraph', {
|
||||||
filter: ['p'],
|
filter: ['p'],
|
||||||
|
|||||||
Reference in New Issue
Block a user