Merge remote-tracking branch 'gitea/develop' into fix/review-batch-2
# Conflicts: # AGENTS.md # CHANGELOG.md # README.md # apps/server/src/collaboration/collaboration.handler.ts # apps/server/src/common/helpers/prosemirror/html-embed.spec.ts # apps/server/src/common/helpers/prosemirror/html-embed.util.ts # apps/server/src/core/ai-chat/public-share-chat.service.ts # apps/server/src/core/ai-chat/public-share-chat.spec.ts # apps/server/src/core/ai-chat/public-share-workspace-limiter.ts # apps/server/src/core/page/services/page.service.ts # apps/server/src/core/page/transclusion/transclusion.service.ts # apps/server/src/integrations/import/services/file-import-task.service.ts # apps/server/src/integrations/import/services/import.service.ts
This commit is contained in:
@@ -3,6 +3,7 @@ import { InjectQueue } from '@nestjs/bullmq';
|
||||
import { Queue } from 'bullmq';
|
||||
import { QueueName, QueueJob } from '../queue/constants';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
@@ -49,6 +50,7 @@ export interface UpdateAiSettingsInput {
|
||||
export class AiSettingsService {
|
||||
constructor(
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
|
||||
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
|
||||
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -110,6 +112,26 @@ export class AiSettingsService {
|
||||
return settings?.ai?.publicShareAssistant === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the display name of the agent role acting as the public-share
|
||||
* assistant's identity, so the anonymous widget can label messages with the
|
||||
* persona name instead of the generic "AI agent". Returns null when no role
|
||||
* is configured, or the referenced role is missing/disabled (built-in persona
|
||||
* → the client falls back to "AI agent"). Mirrors the role resolution in
|
||||
* PublicShareChatService.resolveShareRole.
|
||||
*/
|
||||
async resolvePublicShareAssistantName(
|
||||
workspaceId: string,
|
||||
): Promise<string | null> {
|
||||
const resolved = await this.resolve(workspaceId);
|
||||
const roleId = resolved?.publicShareAssistantRoleId;
|
||||
if (!roleId) return null;
|
||||
const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
|
||||
if (!role || !role.enabled) return null;
|
||||
const name = role.name?.trim();
|
||||
return name ? name : null;
|
||||
}
|
||||
|
||||
/** Read the stored non-secret provider settings for a workspace. */
|
||||
private async readProvider(
|
||||
workspaceId: string,
|
||||
|
||||
@@ -20,12 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
||||
import { markdownToHtml } from '@docmost/editor-ext';
|
||||
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { formatImportHtml } from '../utils/import-formatter';
|
||||
import {
|
||||
buildAttachmentCandidates,
|
||||
@@ -59,8 +53,6 @@ export class FileImportTaskService {
|
||||
private readonly backlinkRepo: BacklinkRepo,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
private readonly importAttachmentService: ImportAttachmentService,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private eventEmitter: EventEmitter2,
|
||||
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
|
||||
) {}
|
||||
@@ -157,25 +149,6 @@ export class FileImportTaskService {
|
||||
.where('id', '=', fileTask.spaceId)
|
||||
.executeTakeFirst();
|
||||
|
||||
// SECURITY (Variant C admin gate, zip/multi-file import write path):
|
||||
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||
// serialized form), which would execute raw, unsanitized JS in readers'
|
||||
// browsers. Only workspace admins/owners may author it. Resolve the
|
||||
// importer's role ONCE here; each page's prosemirror JSON is run through the
|
||||
// strip below before textContent/ydoc/insert when the importer is not an
|
||||
// admin, so a non-admin cannot smuggle the node in via a zip import (which
|
||||
// requires only space Edit).
|
||||
const importingUser = await this.userRepo.findById(
|
||||
fileTask.creatorId,
|
||||
fileTask.workspaceId,
|
||||
);
|
||||
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
|
||||
// survives only when the workspace feature toggle is ON and the importer is
|
||||
// an admin/owner. OFF (default) => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
|
||||
);
|
||||
|
||||
const pagesMap = new Map<string, ImportPageNode>();
|
||||
|
||||
for (const absPath of allFiles) {
|
||||
@@ -523,22 +496,9 @@ export class FileImportTaskService {
|
||||
await this.importService.processHTML(html),
|
||||
);
|
||||
|
||||
let { title, prosemirrorJson } =
|
||||
const { title, prosemirrorJson } =
|
||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||
|
||||
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
|
||||
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
|
||||
// Gate (featureEnabled AND admin) is resolved once above and recomputed
|
||||
// by the helper from the same htmlEmbedEnabled + importer role.
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: importingUser?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
|
||||
),
|
||||
});
|
||||
|
||||
const insertablePage: InsertablePage = {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
|
||||
@@ -1,266 +0,0 @@
|
||||
// Exercises the REAL htmlEmbed admin gate on the two import write paths:
|
||||
//
|
||||
// (1) ImportService.importPage() — single .html/.md upload
|
||||
// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
|
||||
//
|
||||
// Both build content/textContent/ydoc directly and persist (bypassing the
|
||||
// collab onStoreDocument strip), so each must run the imported document through
|
||||
// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
|
||||
// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
|
||||
// `stripHtmlEmbedNodes` BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL services with deps mocked, feeds an imported
|
||||
// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
|
||||
// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
|
||||
// content (captured at the insert boundary) is stripped for a non-admin /
|
||||
// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
|
||||
// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// Three modules are mocked away because they pull transitive ESM deps that
|
||||
// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
|
||||
// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
|
||||
// import-attachment). None of them participate in the gate decision:
|
||||
// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
|
||||
// faithful passthroughs (the embed div has no href/iframe, so the real
|
||||
// normalizer would leave it untouched anyway).
|
||||
// - import-attachment: attachment rewriting; passthrough returns html as-is.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
jest.mock('../utils/import-formatter', () => ({
|
||||
normalizeImportHtml: () => {},
|
||||
formatImportHtml: async (opts: any) => ({
|
||||
html: opts.html,
|
||||
backlinks: [],
|
||||
pageIcon: undefined,
|
||||
}),
|
||||
}));
|
||||
jest.mock('./import-attachment.service', () => ({
|
||||
ImportAttachmentService: class {},
|
||||
}));
|
||||
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { ImportService } from './import.service';
|
||||
import { FileImportTaskService } from './file-import-task.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'importer-1';
|
||||
|
||||
// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
|
||||
// `<div data-type="htmlEmbed" data-source="BASE64">` into an htmlEmbed PM node
|
||||
// (base64 below decodes to `<script>x</script>`).
|
||||
const HTML_WITH_EMBED =
|
||||
'<p>imported body</p>' +
|
||||
'<div data-type="htmlEmbed" data-source="PHNjcmlwdD54PC9zY3JpcHQ+"></div>';
|
||||
|
||||
function workspaceRepoFor(featureEnabled: boolean) {
|
||||
return {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
// userRepo.findById resolves the importer's role (or undefined for a missing
|
||||
// user -> fail closed).
|
||||
function userRepoFor(user: { role?: string } | undefined) {
|
||||
return { findById: jest.fn(async () => user) };
|
||||
}
|
||||
|
||||
describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
|
||||
// Run importPage with a single uploaded .html and return the persisted content
|
||||
// captured at pageRepo.insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const pageRepo: any = {
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
captured.push(row);
|
||||
return { id: 'p1', slugId: 's1', ...row };
|
||||
}),
|
||||
};
|
||||
// db is only used for getNewPagePosition (a select chain).
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = { selectFrom: () => selectChain };
|
||||
|
||||
const service = new ImportService(
|
||||
pageRepo,
|
||||
userRepoFor(user) as any,
|
||||
{ putBuffer: jest.fn() } as any, // storageService (unused on this path)
|
||||
db,
|
||||
{ add: jest.fn() } as any, // fileTaskQueue (unused)
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
);
|
||||
|
||||
const file: any = {
|
||||
filename: 'doc.html',
|
||||
toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
|
||||
};
|
||||
await service.importPage(Promise.resolve(file), USER, SPACE, WS);
|
||||
expect(captured).toHaveLength(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
|
||||
let extractDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
// Real temp dir holding a single .html page that carries the embed; the
|
||||
// method reads it from disk via fs.readFile.
|
||||
extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
|
||||
await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fs.rm(extractDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// Run processGenericImport over the temp dir and return the content persisted
|
||||
// for the imported page (captured at trx.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const trxInsertChain = (table: string) => ({
|
||||
values: (row: any) => {
|
||||
if (table === 'pages') captured.push(row);
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
});
|
||||
const trx: any = { insertInto: trxInsertChain };
|
||||
const db: any = {
|
||||
// spaces lookup at the top of processGenericImport
|
||||
selectFrom: () => ({
|
||||
select: () => ({
|
||||
where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
|
||||
}),
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb)
|
||||
transaction: () => ({ execute: async (cb: any) => cb(trx) }),
|
||||
};
|
||||
|
||||
// importService stub: only the real, gate-relevant helpers are used. We give
|
||||
// it the REAL implementations by delegating to a real ImportService for
|
||||
// processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
|
||||
// strip path runs for real.
|
||||
const realImport = new ImportService(
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
);
|
||||
const importService: any = {
|
||||
processHTML: (html: string) => realImport.processHTML(html),
|
||||
extractTitleAndRemoveHeading: (s: any) =>
|
||||
realImport.extractTitleAndRemoveHeading(s),
|
||||
createYdoc: (j: any) => realImport.createYdoc(j),
|
||||
};
|
||||
|
||||
const importAttachmentService: any = {
|
||||
// passthrough: no attachment rewriting, return html unchanged
|
||||
processAttachments: jest.fn(async (opts: any) => opts.html),
|
||||
};
|
||||
|
||||
const service = new FileImportTaskService(
|
||||
{ putBuffer: jest.fn() } as any, // storageService
|
||||
importService,
|
||||
{ nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
|
||||
{ insertBacklink: jest.fn() } as any, // backlinkRepo
|
||||
db,
|
||||
importAttachmentService,
|
||||
userRepoFor(user) as any,
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
{ emit: jest.fn() } as any, // eventEmitter
|
||||
{ logBatchWithContext: jest.fn() } as any, // auditService
|
||||
);
|
||||
|
||||
const fileTask: any = {
|
||||
id: 'task-1',
|
||||
creatorId: USER,
|
||||
workspaceId: WS,
|
||||
spaceId: SPACE,
|
||||
source: 'generic',
|
||||
};
|
||||
|
||||
await service.processGenericImport({ extractDir, fileTask });
|
||||
expect(captured.length).toBeGreaterThanOrEqual(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,12 +1,5 @@
|
||||
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import {
|
||||
hasHtmlEmbedNode,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { MultipartFile } from '@fastify/multipart';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
@@ -44,12 +37,10 @@ export class ImportService {
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly storageService: StorageService,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
@InjectQueue(QueueName.FILE_TASK_QUEUE)
|
||||
private readonly fileTaskQueue: Queue,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async importPage(
|
||||
@@ -94,32 +85,7 @@ export class ImportService {
|
||||
|
||||
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
|
||||
const title = extracted.title;
|
||||
let prosemirrorJson = extracted.prosemirrorJson;
|
||||
|
||||
// SECURITY (Variant C admin gate, import write path):
|
||||
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||
// serialized form), which would execute raw JS in readers' browsers. Only
|
||||
// workspace admins/owners may author it, so strip htmlEmbed nodes from
|
||||
// imports performed by a non-admin user.
|
||||
// Outer has-check first so the user/workspace lookups below run only when an
|
||||
// embed is actually present (the common case carries none).
|
||||
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
|
||||
const importingUser = await this.userRepo.findById(userId, workspaceId);
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the importer is an admin/owner. OFF (default)
|
||||
// => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: importingUser?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from import by user ${userId}`,
|
||||
),
|
||||
});
|
||||
}
|
||||
const prosemirrorJson = extracted.prosemirrorJson;
|
||||
|
||||
const pageTitle = title || fileName;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user