Adds an htmlEmbed block node that renders and executes raw HTML/CSS/JS in the wiki origin (e.g. an analytics tracker) — the owner-chosen variant C. Because this is stored-XSS by design, only workspace admins/owners may get such a node persisted; everyone executes it when reading. - Node (editor-ext): htmlEmbed atom/isolating block; source stored base64 in data-source for lossless HTML<->JSON round-trip. renderHTML emits only the encoded marker (never inlines raw markup), so generateHTML/export/search are not themselves injection vectors. Registered in BOTH client extensions and server tiptapExtensions. Markdown round-trip via an <!--html-embed:b64--> comment (turndown) + a marked rule. - Client NodeView: injects source and re-creates <script> elements so they actually run; edit modal; renders in read-only/share too. Slash item is admin-gated (adminOnly filtered by the user's workspace role). - SERVER ENFORCEMENT (the real control — UI gating alone is insufficient): stripHtmlEmbedNodes() removes htmlEmbed from any document persisted by a non-admin, applied at every write path that introduces content from an untrusted author: collab onStoreDocument, REST/MCP/AI updatePageContent, single-file import, zip/multi-file import, page duplication, and transclusion unsync. Page restore introduces no new content. Public share/readonly viewers render fetched (already-stripped) content and do NOT open a collab socket, so the only residual is a transient broadcast window to concurrent authenticated editors (documented). Implements docs/arbitrary-html-embed-plan.md (variant C). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
69 lines
2.4 KiB
TypeScript
69 lines
2.4 KiB
TypeScript
import { JSONContent } from '@tiptap/core';
|
|
|
|
export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
|
|
|
|
/**
|
|
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
|
|
*
|
|
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
|
|
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
|
|
* author it. This helper is the server-side enforcement primitive: every WRITE
|
|
* path that may persist content from a NON-admin caller must run the incoming
|
|
* document through this function so a non-admin cannot smuggle the node in via
|
|
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
|
|
*
|
|
* Returns a NEW document; the input is not mutated. If the input is not a valid
|
|
* doc object it is returned unchanged (callers persist what they were given).
|
|
*/
|
|
export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
|
if (!pmJson || typeof pmJson !== 'object') {
|
|
return pmJson;
|
|
}
|
|
|
|
const node = pmJson as unknown as JSONContent;
|
|
|
|
if (Array.isArray(node.content)) {
|
|
const filtered: JSONContent[] = [];
|
|
for (const child of node.content) {
|
|
// Drop any htmlEmbed child outright.
|
|
if (child && child.type === HTML_EMBED_NODE_NAME) {
|
|
continue;
|
|
}
|
|
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
|
|
// also removed.
|
|
filtered.push(stripHtmlEmbedNodes(child));
|
|
}
|
|
return { ...node, content: filtered } as unknown as T;
|
|
}
|
|
|
|
return { ...node } as unknown as T;
|
|
}
|
|
|
|
/**
|
|
* Returns true if the document contains at least one `htmlEmbed` node anywhere
|
|
* in its tree. Useful to decide whether a strip pass actually changed anything
|
|
* (e.g. for logging a rejected non-admin embed attempt).
|
|
*/
|
|
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
|
|
if (!pmJson || typeof pmJson !== 'object') {
|
|
return false;
|
|
}
|
|
const node = pmJson as JSONContent;
|
|
if (node.type === HTML_EMBED_NODE_NAME) {
|
|
return true;
|
|
}
|
|
if (Array.isArray(node.content)) {
|
|
return node.content.some((child) => hasHtmlEmbedNode(child));
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
|
|
* Owners and admins are trusted; everyone else (member, and any unknown role)
|
|
* is not. Kept here so every write path shares one definition of "trusted".
|
|
*/
|
|
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
|
|
return role === 'owner' || role === 'admin';
|
|
}
|