feat(html-embed): sandbox the embed block; split trusted trackers into an admin field

Convert the htmlEmbed node from same-origin raw-HTML execution to a sandboxed
iframe (sandbox="allow-scripts allow-popups allow-forms", no allow-same-origin,
srcdoc) with postMessage auto-resize (validated by event.source) and an optional
manual height attr. The block now runs in an opaque origin and cannot reach the
viewer's cookies/session/API, so it is safe for any member.

Because the block is now harmless, remove the entire admin/role gating apparatus:
drop htmlEmbedAllowed/canAuthorHtmlEmbed/stripDisallowedHtmlEmbedNodes/
collectHtmlEmbedSources and every role-based strip on the write paths (collab
REST/MCP + socket, page create/duplicate, import x2, transclusion unsync), along
with the now-unused WorkspaceRepo/UserRepo injections and the PageService.create
callerRole param. Keep one strip: prepareContentForShare still removes htmlEmbed
on the anonymous public-share read path when the workspace master toggle is OFF.

The workspace settings.htmlEmbed toggle is now a plain feature switch (gates the
slash-menu and share rendering); when ON the block is available to all members.

Add settings.trackerHead: an admin-only raw HTML/JS analytics snippet injected
verbatim into the <head> of public share pages only (ShareSeoController), for
trackers that genuinely need same-origin. Admin-gated via the existing CASL
Manage/Settings ability; never injected into the authenticated app shell.

Closes security-review findings #1, #2, #4, #5, #10 (and #3 as a security issue).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-21 02:48:41 +03:00
parent b98c9d51c6
commit 81823fce1e
35 changed files with 482 additions and 1387 deletions

View File

@@ -20,14 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
@@ -61,8 +53,6 @@ export class FileImportTaskService {
private readonly backlinkRepo: BacklinkRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly importAttachmentService: ImportAttachmentService,
private readonly userRepo: UserRepo,
private readonly workspaceRepo: WorkspaceRepo,
private eventEmitter: EventEmitter2,
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
) {}
@@ -159,29 +149,6 @@ export class FileImportTaskService {
.where('id', '=', fileTask.spaceId)
.executeTakeFirst();
// SECURITY (Variant C admin gate, zip/multi-file import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw, unsanitized JS in readers'
// browsers. Only workspace admins/owners may author it. Resolve the
// importer's role ONCE here; each page's prosemirror JSON is run through the
// strip below before textContent/ydoc/insert when the importer is not an
// admin, so a non-admin cannot smuggle the node in via a zip import (which
// requires only space Edit).
const importingUser = await this.userRepo.findById(
fileTask.creatorId,
fileTask.workspaceId,
);
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
// survives only when the workspace feature toggle is ON and the importer is
// an admin/owner. OFF (default) => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
);
const importerCanAuthorHtmlEmbed = htmlEmbedAllowed(
htmlEmbedEnabled,
importingUser?.role,
);
const pagesMap = new Map<string, ImportPageNode>();
for (const absPath of allFiles) {
@@ -529,21 +496,9 @@ export class FileImportTaskService {
await this.importService.processHTML(html),
);
let { title, prosemirrorJson } =
const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
if (
!importerCanAuthorHtmlEmbed &&
hasHtmlEmbedNode(prosemirrorJson)
) {
this.logger.warn(
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,

View File

@@ -1,123 +0,0 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
// FAIL-CLOSED IDENTITY for the import write paths.
//
// import.service / file-import-task.service cannot be unit-LOADED under the
// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in
// transformIgnorePatterns). So we cover the two load-bearing properties at the
// strongest feasible layer:
//
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate
// predicate each entrypoint runs against the role resolved from
// userRepo.findById(...): a MISSING user (findById -> undefined) must fail
// closed (strip), and only 'admin'/'owner' keep the embed.
//
// (2) IDENTITY — source-pin which identity governs the gate so a refactor that
// swaps the lookup to the wrong user (e.g. the queue worker's caller) is
// caught: zip import resolves the role from `fileTask.creatorId`; single
// import from the request `userId`. NOT some ambient caller.
//
// If a guard is deleted/misplaced or the identity field changes, these break.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
],
});
// The real predicate both import entrypoints apply (see the SECURITY blocks in
// import.service.ts and file-import-task.service.ts): resolve the importer via
// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`.
function applyImportGate(
json: any,
featureEnabled: boolean,
importingUser: { role?: string } | undefined,
) {
if (
!htmlEmbedAllowed(featureEnabled, importingUser?.role) &&
hasHtmlEmbedNode(json)
) {
return stripHtmlEmbedNodes(json);
}
return json;
}
describe('import gate fail-closed by toggle AND resolved-user role (real helpers)', () => {
it('toggle ON + missing user (userRepo.findById -> undefined) strips the embed', () => {
// findById returns undefined when the user/workspace pair does not resolve;
// undefined?.role is undefined -> htmlEmbedAllowed(true, undefined) === false.
const result = applyImportGate(docWithEmbed(), true, undefined);
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it("toggle ON + resolved role 'member' strips", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'member' })),
).toBe(false);
});
it("toggle ON + resolved role 'admin' keeps the embed", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'admin' })),
).toBe(true);
});
it("toggle ON + resolved role 'owner' keeps the embed", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'owner' })),
).toBe(true);
});
it('toggle OFF strips for every role (admin/owner/member)', () => {
for (const role of ['admin', 'owner', 'member'] as const) {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), false, { role })),
).toBe(false);
}
});
});
// Source-pin the identity each entrypoint feeds to userRepo.findById. These are
// the lines that decide WHOSE role governs the gate; pinning them means a
// refactor that points the lookup at the wrong user trips the test.
const SRC_DIR = join(__dirname);
describe('import gate identity is pinned to the importer (source contract)', () => {
it('single import resolves the role from the request userId', () => {
const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8');
// The role lookup must key on the request `userId`, then gate on the role.
expect(src).toMatch(
/this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/,
);
expect(src).toMatch(
/htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*\)/,
);
// And the toggle is resolved from the workspace settings.
expect(src).toContain('isHtmlEmbedFeatureEnabled(');
// And the gate uses the real strip helper.
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
});
it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => {
const src = readFileSync(
join(SRC_DIR, 'file-import-task.service.ts'),
'utf-8',
);
expect(src).toMatch(
/this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/,
);
expect(src).toMatch(
/importerCanAuthorHtmlEmbed\s*=\s*htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*,?\s*\)/,
);
expect(src).toContain('isHtmlEmbedFeatureEnabled(');
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
});
});

View File

@@ -1,13 +1,5 @@
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import {
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { MultipartFile } from '@fastify/multipart';
import * as path from 'path';
import {
@@ -45,12 +37,10 @@ export class ImportService {
constructor(
private readonly pageRepo: PageRepo,
private readonly userRepo: UserRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async importPage(
@@ -95,28 +85,7 @@ export class ImportService {
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
let prosemirrorJson = extracted.prosemirrorJson;
// SECURITY (Variant C admin gate, import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw JS in readers' browsers. Only
// workspace admins/owners may author it, so strip htmlEmbed nodes from
// imports performed by a non-admin user.
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
const importingUser = await this.userRepo.findById(userId, workspaceId);
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
// feature toggle is ON and the importer is an admin/owner. OFF (default)
// => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(workspaceId))?.settings,
);
if (!htmlEmbedAllowed(htmlEmbedEnabled, importingUser?.role)) {
this.logger.warn(
`Stripping htmlEmbed node(s) from import by user ${userId}`,
);
prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson);
}
}
const prosemirrorJson = extracted.prosemirrorJson;
const pageTitle = title || fileName;