Files
gitmost/apps/server/src/integrations/import/services/import.service.ts
claude_code 81823fce1e feat(html-embed): sandbox the embed block; split trusted trackers into an admin field
Convert the htmlEmbed node from same-origin raw-HTML execution to a sandboxed
iframe (sandbox="allow-scripts allow-popups allow-forms", no allow-same-origin,
srcdoc) with postMessage auto-resize (validated by event.source) and an optional
manual height attr. The block now runs in an opaque origin and cannot reach the
viewer's cookies/session/API, so it is safe for any member.

Because the block is now harmless, remove the entire admin/role gating apparatus:
drop htmlEmbedAllowed/canAuthorHtmlEmbed/stripDisallowedHtmlEmbedNodes/
collectHtmlEmbedSources and every role-based strip on the write paths (collab
REST/MCP + socket, page create/duplicate, import x2, transclusion unsync), along
with the now-unused WorkspaceRepo/UserRepo injections and the PageService.create
callerRole param. Keep one strip: prepareContentForShare still removes htmlEmbed
on the anonymous public-share read path when the workspace master toggle is OFF.

The workspace settings.htmlEmbed toggle is now a plain feature switch (gates the
slash-menu and share rendering); when ON the block is available to all members.

Add settings.trackerHead: an admin-only raw HTML/JS analytics snippet injected
verbatim into the <head> of public share pages only (ShareSeoController), for
trackers that genuinely need same-origin. Admin-gated via the existing CASL
Manage/Settings ability; never injected into the authenticated app shell.

Closes security-review findings #1, #2, #4, #5, #10 (and #3 as a security issue).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-21 02:48:41 +03:00

267 lines
7.5 KiB
TypeScript

import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { MultipartFile } from '@fastify/multipart';
import * as path from 'path';
import {
htmlToJson,
jsonToText,
tiptapExtensions,
} from '../../../collaboration/collaboration.util';
import { InjectKysely } from 'nestjs-kysely';
import { KyselyDB } from '@docmost/db/types/kysely.types';
import {
generateSlugId,
sanitizeFileName,
createByteCountingStream,
} from '../../../common/helpers';
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { TiptapTransformer } from '@hocuspocus/transformer';
import * as Y from 'yjs';
import { markdownToHtml } from '@docmost/editor-ext';
import {
FileTaskStatus,
FileTaskType,
getFileTaskFolderPath,
} from '../utils/file.utils';
import { v7 as uuid7 } from 'uuid';
import { StorageService } from '../../storage/storage.service';
import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueJob, QueueName } from '../../queue/constants';
import { load } from 'cheerio';
import { normalizeImportHtml } from '../utils/import-formatter';
@Injectable()
export class ImportService {
private readonly logger = new Logger(ImportService.name);
constructor(
private readonly pageRepo: PageRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
) {}
async importPage(
filePromise: Promise<MultipartFile>,
userId: string,
spaceId: string,
workspaceId: string,
) {
const file = await filePromise;
const fileBuffer = await file.toBuffer();
const fileExtension = path.extname(file.filename).toLowerCase();
const fileName = sanitizeFileName(
path.basename(file.filename, fileExtension),
);
const fileContent = fileBuffer.toString();
let prosemirrorState = null;
let createdPage = null;
try {
if (fileExtension.endsWith('.md')) {
prosemirrorState = await this.processMarkdown(fileContent);
} else if (fileExtension.endsWith('.html')) {
prosemirrorState = await this.processHTML(fileContent);
}
} catch (err) {
// Surface the real cause instead of a generic mask, so the failure is
// diagnosable from the HTTP response (project convention: never swallow).
const reason =
err instanceof Error ? `${err.name}: ${err.message}` : String(err);
this.logger.error(`Error processing file content: ${reason}`, err);
throw new BadRequestException(
`Error processing file content: ${reason}`,
);
}
if (!prosemirrorState) {
const message = 'Failed to create ProseMirror state';
this.logger.error(message);
throw new BadRequestException(message);
}
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
const prosemirrorJson = extracted.prosemirrorJson;
const pageTitle = title || fileName;
if (prosemirrorJson) {
try {
const pagePosition = await this.getNewPagePosition(spaceId);
createdPage = await this.pageRepo.insertPage({
slugId: generateSlugId(),
title: pageTitle,
content: prosemirrorJson,
textContent: jsonToText(prosemirrorJson),
ydoc: await this.createYdoc(prosemirrorJson),
position: pagePosition,
spaceId: spaceId,
creatorId: userId,
workspaceId: workspaceId,
lastUpdatedById: userId,
});
this.logger.debug(
`Successfully imported "${title}${fileExtension}. ID: ${createdPage.id} - SlugId: ${createdPage.slugId}"`,
);
} catch (err) {
const reason =
err instanceof Error ? `${err.name}: ${err.message}` : String(err);
this.logger.error(`Failed to create imported page: ${reason}`, err);
throw new BadRequestException(
`Failed to create imported page: ${reason}`,
);
}
}
return createdPage;
}
async processMarkdown(markdownInput: string): Promise<any> {
try {
const html = await markdownToHtml(markdownInput);
return this.processHTML(html);
} catch (err) {
throw err;
}
}
async processHTML(htmlInput: string): Promise<any> {
try {
const $ = load(htmlInput);
normalizeImportHtml($, $.root());
return htmlToJson($.html() || '');
} catch (err) {
throw err;
}
}
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
if (prosemirrorJson) {
// this.logger.debug(`Converting prosemirror json state to ydoc`);
const ydoc = TiptapTransformer.toYdoc(
prosemirrorJson,
'default',
tiptapExtensions,
);
Y.encodeStateAsUpdate(ydoc);
return Buffer.from(Y.encodeStateAsUpdate(ydoc));
}
return null;
}
extractTitleAndRemoveHeading(prosemirrorState: any) {
let title: string | null = null;
const content = prosemirrorState.content ?? [];
if (
content.length > 0 &&
content[0].type === 'heading' &&
content[0].attrs?.level === 1
) {
title = content[0].content?.[0]?.text ?? null;
content.shift();
}
// ensure at least one paragraph
if (content.length === 0) {
content.push({
type: 'paragraph',
content: [],
});
}
return {
title,
prosemirrorJson: {
...prosemirrorState,
content,
},
};
}
async getNewPagePosition(
spaceId: string,
parentPageId?: string,
): Promise<string> {
let query = this.db
.selectFrom('pages')
.select(['id', 'position'])
.where('spaceId', '=', spaceId)
.orderBy('position', (ob) => ob.collate('C').desc())
.limit(1);
if (parentPageId) {
query = query.where('parentPageId', '=', parentPageId);
} else {
query = query.where('parentPageId', 'is', null);
}
const lastPage = await query.executeTakeFirst();
if (lastPage) {
return generateJitteredKeyBetween(lastPage.position, null);
} else {
return generateJitteredKeyBetween(null, null);
}
}
async importZip(
filePromise: Promise<MultipartFile>,
source: string,
userId: string,
spaceId: string,
workspaceId: string,
) {
const file = await filePromise;
const fileExtension = path.extname(file.filename).toLowerCase();
const fileName = sanitizeFileName(
path.basename(file.filename, fileExtension),
);
const fileNameWithExt = fileName + fileExtension;
const fileTaskId = uuid7();
const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileNameWithExt}`;
// upload file
const { stream, getBytesRead } = createByteCountingStream(file.file);
await this.storageService.upload(filePath, stream);
const fileSize = getBytesRead();
const fileTask = await this.db
.insertInto('fileTasks')
.values({
id: fileTaskId,
type: FileTaskType.Import,
source: source,
status: FileTaskStatus.Processing,
fileName: fileNameWithExt,
filePath: filePath,
fileSize: fileSize,
fileExt: 'zip',
creatorId: userId,
spaceId: spaceId,
workspaceId: workspaceId,
})
.returningAll()
.executeTakeFirst();
await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, {
fileTaskId: fileTaskId,
});
return fileTask;
}
}