Merge remote-tracking branch 'gitea/develop' into fix/review-batch-2

# Conflicts:
#	AGENTS.md
#	CHANGELOG.md
#	README.md
#	apps/server/src/collaboration/collaboration.handler.ts
#	apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
#	apps/server/src/common/helpers/prosemirror/html-embed.util.ts
#	apps/server/src/core/ai-chat/public-share-chat.service.ts
#	apps/server/src/core/ai-chat/public-share-chat.spec.ts
#	apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
#	apps/server/src/core/page/services/page.service.ts
#	apps/server/src/core/page/transclusion/transclusion.service.ts
#	apps/server/src/integrations/import/services/file-import-task.service.ts
#	apps/server/src/integrations/import/services/import.service.ts
This commit is contained in:
claude code agent 227
2026-06-21 05:32:44 +03:00
65 changed files with 1448 additions and 2927 deletions

View File

@@ -3,6 +3,7 @@ import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueName, QueueJob } from '../queue/constants';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
@@ -49,6 +50,7 @@ export interface UpdateAiSettingsInput {
export class AiSettingsService {
constructor(
private readonly workspaceRepo: WorkspaceRepo,
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly pageRepo: PageRepo,
@@ -110,6 +112,26 @@ export class AiSettingsService {
return settings?.ai?.publicShareAssistant === true;
}
/**
* Resolve the display name of the agent role acting as the public-share
* assistant's identity, so the anonymous widget can label messages with the
* persona name instead of the generic "AI agent". Returns null when no role
* is configured, or the referenced role is missing/disabled (built-in persona
* → the client falls back to "AI agent"). Mirrors the role resolution in
* PublicShareChatService.resolveShareRole.
*/
async resolvePublicShareAssistantName(
workspaceId: string,
): Promise<string | null> {
const resolved = await this.resolve(workspaceId);
const roleId = resolved?.publicShareAssistantRoleId;
if (!roleId) return null;
const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
if (!role || !role.enabled) return null;
const name = role.name?.trim();
return name ? name : null;
}
/** Read the stored non-secret provider settings for a workspace. */
private async readProvider(
workspaceId: string,

View File

@@ -20,12 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
@@ -59,8 +53,6 @@ export class FileImportTaskService {
private readonly backlinkRepo: BacklinkRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly importAttachmentService: ImportAttachmentService,
private readonly userRepo: UserRepo,
private readonly workspaceRepo: WorkspaceRepo,
private eventEmitter: EventEmitter2,
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
) {}
@@ -157,25 +149,6 @@ export class FileImportTaskService {
.where('id', '=', fileTask.spaceId)
.executeTakeFirst();
// SECURITY (Variant C admin gate, zip/multi-file import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw, unsanitized JS in readers'
// browsers. Only workspace admins/owners may author it. Resolve the
// importer's role ONCE here; each page's prosemirror JSON is run through the
// strip below before textContent/ydoc/insert when the importer is not an
// admin, so a non-admin cannot smuggle the node in via a zip import (which
// requires only space Edit).
const importingUser = await this.userRepo.findById(
fileTask.creatorId,
fileTask.workspaceId,
);
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
// survives only when the workspace feature toggle is ON and the importer is
// an admin/owner. OFF (default) => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
);
const pagesMap = new Map<string, ImportPageNode>();
for (const absPath of allFiles) {
@@ -523,22 +496,9 @@ export class FileImportTaskService {
await this.importService.processHTML(html),
);
let { title, prosemirrorJson } =
const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
// Gate (featureEnabled AND admin) is resolved once above and recomputed
// by the helper from the same htmlEmbedEnabled + importer role.
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: importingUser?.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
),
});
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,

View File

@@ -1,266 +0,0 @@
// Exercises the REAL htmlEmbed admin gate on the two import write paths:
//
// (1) ImportService.importPage() — single .html/.md upload
// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
//
// Both build content/textContent/ydoc directly and persist (bypassing the
// collab onStoreDocument strip), so each must run the imported document through
// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
// `stripHtmlEmbedNodes` BEFORE persisting.
//
// This spec constructs the REAL services with deps mocked, feeds an imported
// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
// content (captured at the insert boundary) is stripped for a non-admin /
// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
//
// Three modules are mocked away because they pull transitive ESM deps that
// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
// import-attachment). None of them participate in the gate decision:
// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
// faithful passthroughs (the embed div has no href/iframe, so the real
// normalizer would leave it untouched anyway).
// - import-attachment: attachment rewriting; passthrough returns html as-is.
jest.mock('../../../collaboration/collaboration.gateway', () => ({
CollaborationGateway: class {},
}));
jest.mock('../utils/import-formatter', () => ({
normalizeImportHtml: () => {},
formatImportHtml: async (opts: any) => ({
html: opts.html,
backlinks: [],
pageIcon: undefined,
}),
}));
jest.mock('./import-attachment.service', () => ({
ImportAttachmentService: class {},
}));
import { promises as fs } from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { ImportService } from './import.service';
import { FileImportTaskService } from './file-import-task.service';
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
const WS = 'ws-1';
const SPACE = 'space-1';
const USER = 'importer-1';
// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
// `<div data-type="htmlEmbed" data-source="BASE64">` into an htmlEmbed PM node
// (base64 below decodes to `<script>x</script>`).
const HTML_WITH_EMBED =
'<p>imported body</p>' +
'<div data-type="htmlEmbed" data-source="PHNjcmlwdD54PC9zY3JpcHQ+"></div>';
function workspaceRepoFor(featureEnabled: boolean) {
return {
findById: jest.fn(async () => ({
id: WS,
settings: { htmlEmbed: featureEnabled },
})),
};
}
// userRepo.findById resolves the importer's role (or undefined for a missing
// user -> fail closed).
function userRepoFor(user: { role?: string } | undefined) {
return { findById: jest.fn(async () => user) };
}
describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
// Run importPage with a single uploaded .html and return the persisted content
// captured at pageRepo.insertPage.
async function persistedContent(
featureEnabled: boolean,
user: { role?: string } | undefined,
) {
const captured: any[] = [];
const pageRepo: any = {
insertPage: jest.fn(async (row: any) => {
captured.push(row);
return { id: 'p1', slugId: 's1', ...row };
}),
};
// db is only used for getNewPagePosition (a select chain).
const selectChain: any = {
select: () => selectChain,
where: () => selectChain,
orderBy: () => selectChain,
limit: () => selectChain,
executeTakeFirst: async () => undefined,
};
const db: any = { selectFrom: () => selectChain };
const service = new ImportService(
pageRepo,
userRepoFor(user) as any,
{ putBuffer: jest.fn() } as any, // storageService (unused on this path)
db,
{ add: jest.fn() } as any, // fileTaskQueue (unused)
workspaceRepoFor(featureEnabled) as any,
);
const file: any = {
filename: 'doc.html',
toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
};
await service.importPage(Promise.resolve(file), USER, SPACE, WS);
expect(captured).toHaveLength(1);
return captured[0].content;
}
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
const content = await persistedContent(true, { role: 'member' });
expect(hasHtmlEmbedNode(content)).toBe(false);
expect(JSON.stringify(content)).toContain('imported body');
});
it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
false,
);
});
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
true,
);
});
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
true,
);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
).toBe(false);
});
});
describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
let extractDir: string;
beforeEach(async () => {
// Real temp dir holding a single .html page that carries the embed; the
// method reads it from disk via fs.readFile.
extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
});
afterEach(async () => {
await fs.rm(extractDir, { recursive: true, force: true });
});
// Run processGenericImport over the temp dir and return the content persisted
// for the imported page (captured at trx.insertInto('pages').values(...)).
async function persistedContent(
featureEnabled: boolean,
user: { role?: string } | undefined,
) {
const captured: any[] = [];
const trxInsertChain = (table: string) => ({
values: (row: any) => {
if (table === 'pages') captured.push(row);
return { execute: async () => undefined };
},
});
const trx: any = { insertInto: trxInsertChain };
const db: any = {
// spaces lookup at the top of processGenericImport
selectFrom: () => ({
select: () => ({
where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
}),
}),
// executeTx -> db.transaction().execute(cb)
transaction: () => ({ execute: async (cb: any) => cb(trx) }),
};
// importService stub: only the real, gate-relevant helpers are used. We give
// it the REAL implementations by delegating to a real ImportService for
// processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
// strip path runs for real.
const realImport = new ImportService(
{} as any,
{} as any,
{} as any,
{} as any,
{} as any,
{} as any,
);
const importService: any = {
processHTML: (html: string) => realImport.processHTML(html),
extractTitleAndRemoveHeading: (s: any) =>
realImport.extractTitleAndRemoveHeading(s),
createYdoc: (j: any) => realImport.createYdoc(j),
};
const importAttachmentService: any = {
// passthrough: no attachment rewriting, return html unchanged
processAttachments: jest.fn(async (opts: any) => opts.html),
};
const service = new FileImportTaskService(
{ putBuffer: jest.fn() } as any, // storageService
importService,
{ nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
{ insertBacklink: jest.fn() } as any, // backlinkRepo
db,
importAttachmentService,
userRepoFor(user) as any,
workspaceRepoFor(featureEnabled) as any,
{ emit: jest.fn() } as any, // eventEmitter
{ logBatchWithContext: jest.fn() } as any, // auditService
);
const fileTask: any = {
id: 'task-1',
creatorId: USER,
workspaceId: WS,
spaceId: SPACE,
source: 'generic',
};
await service.processGenericImport({ extractDir, fileTask });
expect(captured.length).toBeGreaterThanOrEqual(1);
return captured[0].content;
}
it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
const content = await persistedContent(true, { role: 'member' });
expect(hasHtmlEmbedNode(content)).toBe(false);
expect(JSON.stringify(content)).toContain('imported body');
});
it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
false,
);
});
it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
true,
);
});
it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
true,
);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
).toBe(false);
});
});

View File

@@ -1,12 +1,5 @@
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import {
hasHtmlEmbedNode,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { MultipartFile } from '@fastify/multipart';
import * as path from 'path';
import {
@@ -44,12 +37,10 @@ export class ImportService {
constructor(
private readonly pageRepo: PageRepo,
private readonly userRepo: UserRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async importPage(
@@ -94,32 +85,7 @@ export class ImportService {
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
let prosemirrorJson = extracted.prosemirrorJson;
// SECURITY (Variant C admin gate, import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw JS in readers' browsers. Only
// workspace admins/owners may author it, so strip htmlEmbed nodes from
// imports performed by a non-admin user.
// Outer has-check first so the user/workspace lookups below run only when an
// embed is actually present (the common case carries none).
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
const importingUser = await this.userRepo.findById(userId, workspaceId);
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
// feature toggle is ON and the importer is an admin/owner. OFF (default)
// => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(workspaceId))?.settings,
);
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: importingUser?.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from import by user ${userId}`,
),
});
}
const prosemirrorJson = extracted.prosemirrorJson;
const pageTitle = title || fileName;