feat(editor): admin-only raw HTML/CSS/JS embed node

Adds an htmlEmbed block node that renders and executes raw HTML/CSS/JS in the
wiki origin (e.g. an analytics tracker) — the owner-chosen variant C. Because
this is stored-XSS by design, only workspace admins/owners may get such a node
persisted; everyone executes it when reading.

- Node (editor-ext): htmlEmbed atom/isolating block; source stored base64 in
  data-source for lossless HTML<->JSON round-trip. renderHTML emits only the
  encoded marker (never inlines raw markup), so generateHTML/export/search are
  not themselves injection vectors. Registered in BOTH client extensions and
  server tiptapExtensions. Markdown round-trip via an <!--html-embed:b64-->
  comment (turndown) + a marked rule.
- Client NodeView: injects source and re-creates <script> elements so they
  actually run; edit modal; renders in read-only/share too. Slash item is
  admin-gated (adminOnly filtered by the user's workspace role).
- SERVER ENFORCEMENT (the real control — UI gating alone is insufficient):
  stripHtmlEmbedNodes() removes htmlEmbed from any document persisted by a
  non-admin, applied at every write path that introduces content from an
  untrusted author: collab onStoreDocument, REST/MCP/AI updatePageContent,
  single-file import, zip/multi-file import, page duplication, and transclusion
  unsync. Page restore introduces no new content. Public share/readonly viewers
  render fetched (already-stripped) content and do NOT open a collab socket, so
  the only residual is a transient broadcast window to concurrent authenticated
  editors (documented).

Implements docs/arbitrary-html-embed-plan.md (variant C).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-20 08:54:54 +03:00
parent c8af637654
commit bd28dbfe2b
19 changed files with 941 additions and 9 deletions

View File

@@ -16,6 +16,7 @@ export * from "./lib/custom-code-block";
export * from "./lib/drawio";
export * from "./lib/excalidraw";
export * from "./lib/embed";
export * from "./lib/html-embed/html-embed";
export * from "./lib/mention";
export * from "./lib/markdown";
export * from "./lib/search-and-replace";

View File

@@ -0,0 +1,138 @@
import { Node, mergeAttributes } from "@tiptap/core";
import { ReactNodeViewRenderer } from "@tiptap/react";
export interface HtmlEmbedOptions {
HTMLAttributes: Record<string, any>;
view: any;
}
export interface HtmlEmbedAttributes {
// Raw HTML/CSS/JS string that is injected verbatim into the wiki origin.
source?: string;
}
declare module "@tiptap/core" {
interface Commands<ReturnType> {
htmlEmbed: {
setHtmlEmbed: (attributes?: HtmlEmbedAttributes) => ReturnType;
};
}
}
/**
* Encode the raw source to base64 for the `data-source` attribute.
*
* The source is arbitrary HTML/CSS/JS. Storing it raw inside an HTML attribute
* would (a) require heavy escaping and (b) risk the parser interpreting markup
* inside the attribute. Base64 makes the round-trip HTML <-> ProseMirror JSON
* lossless and keeps the markup inert while it sits in the attribute.
*
* `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so that non-Latin1
* (UTF-8) characters survive the base64 step.
*/
export function encodeHtmlEmbedSource(source: string): string {
if (!source) return "";
try {
if (typeof btoa === "function") {
return btoa(encodeURIComponent(source));
}
// Node fallback (server-side schema parsing has no global btoa).
return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64");
} catch {
// Never swallow silently in a way that loses data: fall back to raw.
return "";
}
}
export function decodeHtmlEmbedSource(encoded: string): string {
if (!encoded) return "";
try {
if (typeof atob === "function") {
return decodeURIComponent(atob(encoded));
}
// Node fallback.
return decodeURIComponent(
Buffer.from(encoded, "base64").toString("utf-8"),
);
} catch {
return "";
}
}
export const HtmlEmbed = Node.create<HtmlEmbedOptions>({
name: "htmlEmbed",
inline: false,
group: "block",
// atom + isolating: the node has no editable ProseMirror children; its body
// is the opaque `source` string rendered by the NodeView.
atom: true,
isolating: true,
defining: true,
draggable: true,
addOptions() {
return {
HTMLAttributes: {},
view: null,
};
},
addAttributes() {
return {
source: {
default: "",
// Decode the base64 payload back to the raw source on parse.
parseHTML: (element) =>
decodeHtmlEmbedSource(element.getAttribute("data-source") || ""),
// Encode the raw source to base64 on render so it round-trips losslessly
// through the HTML <-> JSON conversions used by export/import/collab.
renderHTML: (attributes: HtmlEmbedAttributes) => ({
"data-source": encodeHtmlEmbedSource(attributes.source || ""),
}),
},
};
},
parseHTML() {
return [
{
tag: `div[data-type="${this.name}"]`,
},
];
},
renderHTML({ HTMLAttributes }) {
// The static HTML representation is just a marker div carrying the encoded
// source. The actual raw markup is NOT expanded here on purpose: the static
// generateHTML output (used for previews, search indexing, exports) must not
// itself become an injection vector. Only the client NodeView expands and
// executes the source.
return [
"div",
mergeAttributes(
{ "data-type": this.name },
this.options.HTMLAttributes,
HTMLAttributes,
),
];
},
addCommands() {
return {
setHtmlEmbed:
(attrs: HtmlEmbedAttributes) =>
({ commands }) => {
return commands.insertContent({
type: this.name,
attrs: attrs,
});
},
};
},
addNodeView() {
// Force the react node view to render immediately using flush sync.
this.editor.isInitialized = true;
return ReactNodeViewRenderer(this.options.view);
},
});

View File

@@ -0,0 +1,41 @@
import { Token } from "marked";
interface HtmlEmbedToken {
type: "htmlEmbed";
raw: string;
encoded: string;
}
/**
* Marked extension that rebuilds an `htmlEmbed` node from the HTML comment
* marker produced by the turndown rule (`<!--html-embed:<base64>-->`).
*
* It emits the same marker div the node's `parseHTML` recognizes, so the
* pipeline MD -> HTML -> ProseMirror JSON restores the node (and its
* base64 `data-source`) exactly. We do NOT expand the raw markup here; the
* source stays base64-encoded in the attribute and is only executed by the
* client NodeView.
*/
export const htmlEmbedExtension = {
name: "htmlEmbed",
level: "block" as const,
start(src: string) {
return src.indexOf("<!--html-embed:");
},
tokenizer(src: string): HtmlEmbedToken | undefined {
const rule = /^<!--html-embed:([A-Za-z0-9+/=]*)-->/;
const match = rule.exec(src);
if (match) {
return {
type: "htmlEmbed",
raw: match[0],
encoded: match[1] ?? "",
};
}
},
renderer(token: Token) {
const htmlEmbedToken = token as HtmlEmbedToken;
return `<div data-type="htmlEmbed" data-source="${htmlEmbedToken.encoded}"></div>`;
},
};

View File

@@ -2,6 +2,7 @@ import { marked } from "marked";
import { calloutExtension } from "./callout.marked";
import { mathBlockExtension } from "./math-block.marked";
import { mathInlineExtension } from "./math-inline.marked";
import { htmlEmbedExtension } from "./html-embed.marked";
marked.use({
renderer: {
@@ -34,7 +35,12 @@ marked.use({
});
marked.use({
extensions: [calloutExtension, mathBlockExtension, mathInlineExtension],
extensions: [
calloutExtension,
mathBlockExtension,
mathInlineExtension,
htmlEmbedExtension,
],
});
marked.setOptions({ breaks: true });

View File

@@ -32,12 +32,39 @@ export function htmlToMarkdown(html: string): string {
mathInline,
mathBlock,
iframeEmbed,
htmlEmbed,
image,
video,
]);
return turndownService.turndown(html).replaceAll('<br>', ' ');
}
/**
* Serialize the `htmlEmbed` node to Markdown.
*
* Markdown has no native representation for an arbitrary-HTML block, so we
* preserve the node losslessly as an HTML comment carrying the base64-encoded
* source (the same `data-source` payload the node stores). `markdownToHtml`
* recognizes the same marker and rebuilds the node, so the round-trip
* MD -> HTML -> JSON keeps the source intact. The comment also keeps the raw
* markup inert in the exported `.md` file (it does not render in plain Markdown
* viewers).
*/
function htmlEmbed(turndownService: _TurndownService) {
turndownService.addRule('htmlEmbed', {
filter: function (node: HTMLInputElement) {
return (
node.nodeName === 'DIV' &&
node.getAttribute('data-type') === 'htmlEmbed'
);
},
replacement: function (_content: string, node: HTMLInputElement) {
const encoded = node.getAttribute('data-source') || '';
return `\n\n<!--html-embed:${encoded}-->\n\n`;
},
});
}
function listParagraph(turndownService: _TurndownService) {
turndownService.addRule('paragraph', {
filter: ['p'],