feat(editor): admin-only raw HTML/CSS/JS embed node

Adds an htmlEmbed block node that renders and executes raw HTML/CSS/JS in the
wiki origin (e.g. an analytics tracker) — the owner-chosen variant C. Because
this is stored-XSS by design, only workspace admins/owners may get such a node
persisted; everyone executes it when reading.

- Node (editor-ext): htmlEmbed atom/isolating block; source stored base64 in
  data-source for lossless HTML<->JSON round-trip. renderHTML emits only the
  encoded marker (never inlines raw markup), so generateHTML/export/search are
  not themselves injection vectors. Registered in BOTH client extensions and
  server tiptapExtensions. Markdown round-trip via an <!--html-embed:b64-->
  comment (turndown) + a marked rule.
- Client NodeView: injects source and re-creates <script> elements so they
  actually run; edit modal; renders in read-only/share too. Slash item is
  admin-gated (adminOnly filtered by the user's workspace role).
- SERVER ENFORCEMENT (the real control — UI gating alone is insufficient):
  stripHtmlEmbedNodes() removes htmlEmbed from any document persisted by a
  non-admin, applied at every write path that introduces content from an
  untrusted author: collab onStoreDocument, REST/MCP/AI updatePageContent,
  single-file import, zip/multi-file import, page duplication, and transclusion
  unsync. Page restore introduces no new content. Public share/readonly viewers
  render fetched (already-stripped) content and do NOT open a collab socket, so
  the only residual is a transient broadcast window to concurrent authenticated
  editors (documented).

Implements docs/arbitrary-html-embed-plan.md (variant C).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-20 08:54:54 +03:00
parent c8af637654
commit bd28dbfe2b
19 changed files with 941 additions and 9 deletions

View File

@@ -0,0 +1,229 @@
import {
canAuthorHtmlEmbed,
hasHtmlEmbedNode,
stripHtmlEmbedNodes,
} from './html-embed.util';
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
import {
decodeHtmlEmbedSource,
encodeHtmlEmbedSource,
} from '@docmost/editor-ext';
const findFirstChild = (json: any, type: string): any | undefined => {
if (!json || typeof json !== 'object') return undefined;
if (json.type === type) return json;
if (Array.isArray(json.content)) {
for (const child of json.content) {
const found = findFirstChild(child, type);
if (found) return found;
}
}
return undefined;
};
describe('stripHtmlEmbedNodes', () => {
it('removes a top-level htmlEmbed node', () => {
const doc = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'before' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'after' }] },
],
};
const result = stripHtmlEmbedNodes(doc);
expect(hasHtmlEmbedNode(result)).toBe(false);
// Other nodes are preserved.
expect(result.content).toHaveLength(2);
expect(result.content[0].content[0].text).toBe('before');
expect(result.content[1].content[0].text).toBe('after');
});
it('removes nested htmlEmbed nodes (e.g. inside columns)', () => {
const doc = {
type: 'doc',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{ type: 'htmlEmbed', attrs: { source: '<b>x</b>' } },
{
type: 'paragraph',
content: [{ type: 'text', text: 'keep' }],
},
],
},
],
},
],
};
const result = stripHtmlEmbedNodes(doc);
expect(hasHtmlEmbedNode(result)).toBe(false);
const col = findFirstChild(result, 'column');
expect(col.content).toHaveLength(1);
expect(col.content[0].type).toBe('paragraph');
});
it('does not mutate the input document', () => {
const doc = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source: 'x' } }],
};
stripHtmlEmbedNodes(doc);
expect(doc.content).toHaveLength(1);
expect(doc.content[0].type).toBe('htmlEmbed');
});
it('leaves documents without htmlEmbed untouched', () => {
const doc = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
],
};
expect(hasHtmlEmbedNode(doc)).toBe(false);
const result = stripHtmlEmbedNodes(doc);
expect(result).toEqual(doc);
});
});
describe('canAuthorHtmlEmbed', () => {
it('allows owner and admin', () => {
expect(canAuthorHtmlEmbed('owner')).toBe(true);
expect(canAuthorHtmlEmbed('admin')).toBe(true);
});
it('denies member and unknown/empty roles', () => {
expect(canAuthorHtmlEmbed('member')).toBe(false);
expect(canAuthorHtmlEmbed(null)).toBe(false);
expect(canAuthorHtmlEmbed(undefined)).toBe(false);
expect(canAuthorHtmlEmbed('viewer')).toBe(false);
});
});
// Replicates the write-path decision used by every non-admin persistence guard
// (collab store, single import, zip import, duplication, transclusion unsync):
// if !canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json) -> strip, else keep.
const applyAdminGate = (json: any, role: string | null | undefined) => {
if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) {
return stripHtmlEmbedNodes(json);
}
return json;
};
describe('admin-gate write-path decision (duplication / import / unsync)', () => {
const docWithEmbed = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
],
};
it('strips the embed for a non-admin (member) author', () => {
const result = applyAdminGate(docWithEmbed, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(result.content).toHaveLength(1);
expect(result.content[0].content[0].text).toBe('keep');
});
it('strips the embed for unknown/empty roles', () => {
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false);
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe(
false,
);
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'viewer'))).toBe(
false,
);
});
it('keeps the embed for an admin author', () => {
const result = applyAdminGate(docWithEmbed, 'admin');
expect(hasHtmlEmbedNode(result)).toBe(true);
expect(result).toBe(docWithEmbed);
});
it('keeps the embed for an owner author', () => {
const result = applyAdminGate(docWithEmbed, 'owner');
expect(hasHtmlEmbedNode(result)).toBe(true);
});
it('strips nested embeds (subtree/column duplication) for a non-admin', () => {
const nested = {
type: 'doc',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'ok' }] },
],
},
],
},
],
};
const result = applyAdminGate(nested, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
const col = findFirstChild(result, 'column');
expect(col.content).toHaveLength(1);
expect(col.content[0].type).toBe('paragraph');
});
it('leaves a non-admin doc without embeds untouched (no needless rewrite)', () => {
const clean = {
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
};
const result = applyAdminGate(clean, 'member');
expect(result).toBe(clean);
});
});
describe('htmlEmbed source base64 codec', () => {
it('round-trips arbitrary source including UTF-8', () => {
const source = '<script>console.log("héllo → 世界")</script>';
const encoded = encodeHtmlEmbedSource(source);
expect(encoded).not.toContain('<');
expect(decodeHtmlEmbedSource(encoded)).toBe(source);
});
});
describe('htmlEmbed node HTML <-> JSON round-trip', () => {
it('preserves the raw source through HTML -> JSON', () => {
const source = '<script>track("page")</script><style>.a{color:red}</style>';
const encoded = encodeHtmlEmbedSource(source);
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
const json = htmlToJson(html);
const node = findFirstChild(json, 'htmlEmbed');
expect(node).toBeDefined();
expect(node.attrs.source).toBe(source);
});
it('round-trips JSON -> HTML -> JSON keeping the source', () => {
const source = '<div onclick="x()">raw &amp; markup</div>';
const json = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source } }],
};
const html = jsonToHtml(json);
// The static HTML carries the encoded source but does NOT inline the raw
// markup (it must not be an injection vector by itself).
expect(html).toContain('data-type="htmlEmbed"');
expect(html).not.toContain('onclick');
const back = htmlToJson(html);
const node = findFirstChild(back, 'htmlEmbed');
expect(node).toBeDefined();
expect(node.attrs.source).toBe(source);
});
});

View File

@@ -0,0 +1,68 @@
import { JSONContent } from '@tiptap/core';
export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
/**
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
*
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
* author it. This helper is the server-side enforcement primitive: every WRITE
* path that may persist content from a NON-admin caller must run the incoming
* document through this function so a non-admin cannot smuggle the node in via
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
*
* Returns a NEW document; the input is not mutated. If the input is not a valid
* doc object it is returned unchanged (callers persist what they were given).
*/
export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
if (!pmJson || typeof pmJson !== 'object') {
return pmJson;
}
const node = pmJson as unknown as JSONContent;
if (Array.isArray(node.content)) {
const filtered: JSONContent[] = [];
for (const child of node.content) {
// Drop any htmlEmbed child outright.
if (child && child.type === HTML_EMBED_NODE_NAME) {
continue;
}
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
// also removed.
filtered.push(stripHtmlEmbedNodes(child));
}
return { ...node, content: filtered } as unknown as T;
}
return { ...node } as unknown as T;
}
/**
* Returns true if the document contains at least one `htmlEmbed` node anywhere
* in its tree. Useful to decide whether a strip pass actually changed anything
* (e.g. for logging a rejected non-admin embed attempt).
*/
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
if (!pmJson || typeof pmJson !== 'object') {
return false;
}
const node = pmJson as JSONContent;
if (node.type === HTML_EMBED_NODE_NAME) {
return true;
}
if (Array.isArray(node.content)) {
return node.content.some((child) => hasHtmlEmbedNode(child));
}
return false;
}
/**
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
* Owners and admins are trusted; everyone else (member, and any unknown role)
* is not. Kept here so every write path shares one definition of "trusted".
*/
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
return role === 'owner' || role === 'admin';
}