feat(editor): admin-only raw HTML/CSS/JS embed node
Adds an htmlEmbed block node that renders and executes raw HTML/CSS/JS in the wiki origin (e.g. an analytics tracker) — the owner-chosen variant C. Because this is stored-XSS by design, only workspace admins/owners may get such a node persisted; everyone executes it when reading. - Node (editor-ext): htmlEmbed atom/isolating block; source stored base64 in data-source for lossless HTML<->JSON round-trip. renderHTML emits only the encoded marker (never inlines raw markup), so generateHTML/export/search are not themselves injection vectors. Registered in BOTH client extensions and server tiptapExtensions. Markdown round-trip via an <!--html-embed:b64--> comment (turndown) + a marked rule. - Client NodeView: injects source and re-creates <script> elements so they actually run; edit modal; renders in read-only/share too. Slash item is admin-gated (adminOnly filtered by the user's workspace role). - SERVER ENFORCEMENT (the real control — UI gating alone is insufficient): stripHtmlEmbedNodes() removes htmlEmbed from any document persisted by a non-admin, applied at every write path that introduces content from an untrusted author: collab onStoreDocument, REST/MCP/AI updatePageContent, single-file import, zip/multi-file import, page duplication, and transclusion unsync. Page restore introduces no new content. Public share/readonly viewers render fetched (already-stripped) content and do NOT open a collab socket, so the only residual is a transient broadcast window to concurrent authenticated editors (documented). Implements docs/arbitrary-html-embed-plan.md (variant C). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,7 @@ export * from "./lib/custom-code-block";
|
||||
export * from "./lib/drawio";
|
||||
export * from "./lib/excalidraw";
|
||||
export * from "./lib/embed";
|
||||
export * from "./lib/html-embed/html-embed";
|
||||
export * from "./lib/mention";
|
||||
export * from "./lib/markdown";
|
||||
export * from "./lib/search-and-replace";
|
||||
|
||||
138
packages/editor-ext/src/lib/html-embed/html-embed.ts
Normal file
138
packages/editor-ext/src/lib/html-embed/html-embed.ts
Normal file
@@ -0,0 +1,138 @@
|
||||
import { Node, mergeAttributes } from "@tiptap/core";
|
||||
import { ReactNodeViewRenderer } from "@tiptap/react";
|
||||
|
||||
export interface HtmlEmbedOptions {
|
||||
HTMLAttributes: Record<string, any>;
|
||||
view: any;
|
||||
}
|
||||
|
||||
export interface HtmlEmbedAttributes {
|
||||
// Raw HTML/CSS/JS string that is injected verbatim into the wiki origin.
|
||||
source?: string;
|
||||
}
|
||||
|
||||
declare module "@tiptap/core" {
|
||||
interface Commands<ReturnType> {
|
||||
htmlEmbed: {
|
||||
setHtmlEmbed: (attributes?: HtmlEmbedAttributes) => ReturnType;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Encode the raw source to base64 for the `data-source` attribute.
|
||||
*
|
||||
* The source is arbitrary HTML/CSS/JS. Storing it raw inside an HTML attribute
|
||||
* would (a) require heavy escaping and (b) risk the parser interpreting markup
|
||||
* inside the attribute. Base64 makes the round-trip HTML <-> ProseMirror JSON
|
||||
* lossless and keeps the markup inert while it sits in the attribute.
|
||||
*
|
||||
* `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so that non-Latin1
|
||||
* (UTF-8) characters survive the base64 step.
|
||||
*/
|
||||
export function encodeHtmlEmbedSource(source: string): string {
|
||||
if (!source) return "";
|
||||
try {
|
||||
if (typeof btoa === "function") {
|
||||
return btoa(encodeURIComponent(source));
|
||||
}
|
||||
// Node fallback (server-side schema parsing has no global btoa).
|
||||
return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64");
|
||||
} catch {
|
||||
// Never swallow silently in a way that loses data: fall back to raw.
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export function decodeHtmlEmbedSource(encoded: string): string {
|
||||
if (!encoded) return "";
|
||||
try {
|
||||
if (typeof atob === "function") {
|
||||
return decodeURIComponent(atob(encoded));
|
||||
}
|
||||
// Node fallback.
|
||||
return decodeURIComponent(
|
||||
Buffer.from(encoded, "base64").toString("utf-8"),
|
||||
);
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export const HtmlEmbed = Node.create<HtmlEmbedOptions>({
|
||||
name: "htmlEmbed",
|
||||
inline: false,
|
||||
group: "block",
|
||||
// atom + isolating: the node has no editable ProseMirror children; its body
|
||||
// is the opaque `source` string rendered by the NodeView.
|
||||
atom: true,
|
||||
isolating: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
|
||||
addOptions() {
|
||||
return {
|
||||
HTMLAttributes: {},
|
||||
view: null,
|
||||
};
|
||||
},
|
||||
|
||||
addAttributes() {
|
||||
return {
|
||||
source: {
|
||||
default: "",
|
||||
// Decode the base64 payload back to the raw source on parse.
|
||||
parseHTML: (element) =>
|
||||
decodeHtmlEmbedSource(element.getAttribute("data-source") || ""),
|
||||
// Encode the raw source to base64 on render so it round-trips losslessly
|
||||
// through the HTML <-> JSON conversions used by export/import/collab.
|
||||
renderHTML: (attributes: HtmlEmbedAttributes) => ({
|
||||
"data-source": encodeHtmlEmbedSource(attributes.source || ""),
|
||||
}),
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
parseHTML() {
|
||||
return [
|
||||
{
|
||||
tag: `div[data-type="${this.name}"]`,
|
||||
},
|
||||
];
|
||||
},
|
||||
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
// The static HTML representation is just a marker div carrying the encoded
|
||||
// source. The actual raw markup is NOT expanded here on purpose: the static
|
||||
// generateHTML output (used for previews, search indexing, exports) must not
|
||||
// itself become an injection vector. Only the client NodeView expands and
|
||||
// executes the source.
|
||||
return [
|
||||
"div",
|
||||
mergeAttributes(
|
||||
{ "data-type": this.name },
|
||||
this.options.HTMLAttributes,
|
||||
HTMLAttributes,
|
||||
),
|
||||
];
|
||||
},
|
||||
|
||||
addCommands() {
|
||||
return {
|
||||
setHtmlEmbed:
|
||||
(attrs: HtmlEmbedAttributes) =>
|
||||
({ commands }) => {
|
||||
return commands.insertContent({
|
||||
type: this.name,
|
||||
attrs: attrs,
|
||||
});
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
addNodeView() {
|
||||
// Force the react node view to render immediately using flush sync.
|
||||
this.editor.isInitialized = true;
|
||||
return ReactNodeViewRenderer(this.options.view);
|
||||
},
|
||||
});
|
||||
@@ -0,0 +1,41 @@
|
||||
import { Token } from "marked";
|
||||
|
||||
interface HtmlEmbedToken {
|
||||
type: "htmlEmbed";
|
||||
raw: string;
|
||||
encoded: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Marked extension that rebuilds an `htmlEmbed` node from the HTML comment
|
||||
* marker produced by the turndown rule (`<!--html-embed:<base64>-->`).
|
||||
*
|
||||
* It emits the same marker div the node's `parseHTML` recognizes, so the
|
||||
* pipeline MD -> HTML -> ProseMirror JSON restores the node (and its
|
||||
* base64 `data-source`) exactly. We do NOT expand the raw markup here; the
|
||||
* source stays base64-encoded in the attribute and is only executed by the
|
||||
* client NodeView.
|
||||
*/
|
||||
export const htmlEmbedExtension = {
|
||||
name: "htmlEmbed",
|
||||
level: "block" as const,
|
||||
start(src: string) {
|
||||
return src.indexOf("<!--html-embed:");
|
||||
},
|
||||
tokenizer(src: string): HtmlEmbedToken | undefined {
|
||||
const rule = /^<!--html-embed:([A-Za-z0-9+/=]*)-->/;
|
||||
const match = rule.exec(src);
|
||||
|
||||
if (match) {
|
||||
return {
|
||||
type: "htmlEmbed",
|
||||
raw: match[0],
|
||||
encoded: match[1] ?? "",
|
||||
};
|
||||
}
|
||||
},
|
||||
renderer(token: Token) {
|
||||
const htmlEmbedToken = token as HtmlEmbedToken;
|
||||
return `<div data-type="htmlEmbed" data-source="${htmlEmbedToken.encoded}"></div>`;
|
||||
},
|
||||
};
|
||||
@@ -2,6 +2,7 @@ import { marked } from "marked";
|
||||
import { calloutExtension } from "./callout.marked";
|
||||
import { mathBlockExtension } from "./math-block.marked";
|
||||
import { mathInlineExtension } from "./math-inline.marked";
|
||||
import { htmlEmbedExtension } from "./html-embed.marked";
|
||||
|
||||
marked.use({
|
||||
renderer: {
|
||||
@@ -34,7 +35,12 @@ marked.use({
|
||||
});
|
||||
|
||||
marked.use({
|
||||
extensions: [calloutExtension, mathBlockExtension, mathInlineExtension],
|
||||
extensions: [
|
||||
calloutExtension,
|
||||
mathBlockExtension,
|
||||
mathInlineExtension,
|
||||
htmlEmbedExtension,
|
||||
],
|
||||
});
|
||||
|
||||
marked.setOptions({ breaks: true });
|
||||
|
||||
@@ -32,12 +32,39 @@ export function htmlToMarkdown(html: string): string {
|
||||
mathInline,
|
||||
mathBlock,
|
||||
iframeEmbed,
|
||||
htmlEmbed,
|
||||
image,
|
||||
video,
|
||||
]);
|
||||
return turndownService.turndown(html).replaceAll('<br>', ' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize the `htmlEmbed` node to Markdown.
|
||||
*
|
||||
* Markdown has no native representation for an arbitrary-HTML block, so we
|
||||
* preserve the node losslessly as an HTML comment carrying the base64-encoded
|
||||
* source (the same `data-source` payload the node stores). `markdownToHtml`
|
||||
* recognizes the same marker and rebuilds the node, so the round-trip
|
||||
* MD -> HTML -> JSON keeps the source intact. The comment also keeps the raw
|
||||
* markup inert in the exported `.md` file (it does not render in plain Markdown
|
||||
* viewers).
|
||||
*/
|
||||
function htmlEmbed(turndownService: _TurndownService) {
|
||||
turndownService.addRule('htmlEmbed', {
|
||||
filter: function (node: HTMLInputElement) {
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
node.getAttribute('data-type') === 'htmlEmbed'
|
||||
);
|
||||
},
|
||||
replacement: function (_content: string, node: HTMLInputElement) {
|
||||
const encoded = node.getAttribute('data-source') || '';
|
||||
return `\n\n<!--html-embed:${encoded}-->\n\n`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function listParagraph(turndownService: _TurndownService) {
|
||||
turndownService.addRule('paragraph', {
|
||||
filter: ['p'],
|
||||
|
||||
Reference in New Issue
Block a user