Merge remote-tracking branch 'gitea/develop' into fix/review-batch-2

# Conflicts:
#	AGENTS.md
#	CHANGELOG.md
#	README.md
#	apps/server/src/collaboration/collaboration.handler.ts
#	apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
#	apps/server/src/common/helpers/prosemirror/html-embed.util.ts
#	apps/server/src/core/ai-chat/public-share-chat.service.ts
#	apps/server/src/core/ai-chat/public-share-chat.spec.ts
#	apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
#	apps/server/src/core/page/services/page.service.ts
#	apps/server/src/core/page/transclusion/transclusion.service.ts
#	apps/server/src/integrations/import/services/file-import-task.service.ts
#	apps/server/src/integrations/import/services/import.service.ts
This commit is contained in:
claude code agent 227
2026-06-21 05:32:44 +03:00
65 changed files with 1448 additions and 2927 deletions

View File

@@ -1,120 +0,0 @@
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
import { CollaborationHandler } from './collaboration.handler';
import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the
// REST/MCP/AI content-update entrypoint, used by the page update endpoint and
// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed
// BEFORE handing the json to withYdocConnection. We stub only
// withYdocConnection (which would otherwise open a real hocuspocus connection):
// the role-extraction (`user?.role`) + strip that run upstream of it are REAL
// production code. The 'replace' branch then runs the production
// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we
// decode back to JSON and assert on. This replaces the re-implemented
// `applyAdminGate` stand-in for this entrypoint.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
{
type: 'columns',
content: [
{
type: 'column',
attrs: { position: 'left' },
content: [
{ type: 'htmlEmbed', attrs: { source: '<script>nested</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'inner' }] },
],
},
{
type: 'column',
attrs: { position: 'right' },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] },
],
},
],
},
{ type: 'htmlEmbed', attrs: { source: '<script>top</script>' } },
],
});
/**
* Run the REAL updatePageContent('replace') with a stubbed withYdocConnection.
* The stub provides a real Y.Doc + recording fragment; the production fn calls
* TiptapTransformer.toYdoc(<gated json>) and applies it to the doc, so decoding
* the doc afterward yields exactly the gated content.
*/
async function gatedContentFor(
role: string | null | undefined,
featureEnabled = true,
) {
// Workspace settings read used by the toggle-AND-admin gate.
const workspaceRepo = {
findById: jest.fn(async () => ({
id: 'ws-1',
settings: { htmlEmbed: featureEnabled },
})),
};
const handler = new CollaborationHandler(workspaceRepo as any);
const captureDoc = new Y.Doc();
jest
.spyOn(handler, 'withYdocConnection')
.mockImplementation(async (_hp, _name, _ctx, fn: any) => {
const fragment = captureDoc.getXmlFragment('default');
// Mirror the real Document surface the fn touches.
const docLike: any = {
getXmlFragment: () => fragment,
};
// The fn does: fragment.delete(0,len) then
// Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls
// Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target.
fn(captureDoc);
});
const handlers = handler.getHandlers({} as any);
await handlers.updatePageContent('page-1', {
prosemirrorJson: docWithEmbed(),
operation: 'replace',
user: { id: 'u1', role, workspaceId: 'ws-1' } as any,
});
return TiptapTransformer.fromYdoc(captureDoc, 'default');
}
describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => {
it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => {
const gated = await gatedContentFor('member');
expect(hasHtmlEmbedNode(gated)).toBe(false);
// Non-embed siblings survive.
const json = JSON.stringify(gated);
expect(json).toContain('keep');
expect(json).toContain('inner');
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false);
}
});
it('toggle ON + admin: htmlEmbed preserved', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('admin', true))).toBe(true);
});
it('toggle ON + owner: htmlEmbed preserved', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('owner', true))).toBe(true);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('admin', false))).toBe(false);
});
it('toggle OFF + member: stripped', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('member', false))).toBe(false);
});
});

View File

@@ -8,11 +8,6 @@ import {
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
import * as Y from 'yjs';
import { User } from '@docmost/db/types/entity.types';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
export type CollabEventHandlers = ReturnType<
CollaborationHandler['getHandlers']
@@ -22,8 +17,6 @@ export type CollabEventHandlers = ReturnType<
export class CollaborationHandler {
private readonly logger = new Logger(CollaborationHandler.name);
constructor(private readonly workspaceRepo: WorkspaceRepo) {}
getHandlers(hocuspocus: Hocuspocus) {
return {
alterState: async (documentName: string, payload: { pageId: string }) => {
@@ -89,30 +82,9 @@ export class CollaborationHandler {
},
) => {
const { operation, user } = payload;
let { prosemirrorJson } = payload;
const { prosemirrorJson } = payload;
this.logger.debug('Updating page content via yjs', documentName);
// SECURITY (Variant C admin gate, REST/MCP/AI write path):
// updatePageContent is the server-side entrypoint used by the REST page
// update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute
// arbitrary JS in every reader's browser, so a NON-admin caller must not
// be able to persist them here. If the editing user is not a workspace
// admin/owner, strip every htmlEmbed node before it reaches the ydoc.
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
// feature toggle is ON and the editing user is an admin/owner. OFF
// (default) => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(user?.workspaceId))?.settings,
);
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: user?.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from update by user ${user?.id} on ${documentName}`,
),
});
await this.withYdocConnection(
hocuspocus,
documentName,

View File

@@ -1,456 +0,0 @@
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
import { PersistenceExtension } from './persistence.extension';
import { tiptapExtensions } from '../collaboration.util';
import {
collectHtmlEmbedSources,
hasHtmlEmbedNode,
HTML_EMBED_NODE_NAME,
} from '../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab
// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks.
// This replaces the prior re-implemented `applyAdminGate` stand-in for this
// entrypoint: if the role-extraction expression (`context?.user?.role`), the
// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail.
const RICH_DOC = {
type: 'doc',
content: [
{
type: 'paragraph',
content: [{ type: 'text', text: 'intro paragraph' }],
},
{
type: 'columns',
content: [
{
type: 'column',
attrs: { position: 'left' },
content: [
{
type: 'paragraph',
content: [
{ type: 'text', text: 'left col, mentioning ' },
{
type: 'mention',
attrs: {
id: 'mention-1',
label: 'Alice',
entityType: 'user',
entityId: 'user-123',
creatorId: 'creator-1',
},
},
],
},
// Nested embed inside a column — must be stripped recursively.
{
type: HTML_EMBED_NODE_NAME,
attrs: { source: '<script>nested()</script>' },
},
],
},
{
type: 'column',
attrs: { position: 'right' },
content: [
{
type: 'table',
content: [
{
type: 'tableRow',
content: [
{
type: 'tableHeader',
attrs: { colspan: 1, rowspan: 1 },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'H' }] },
],
},
],
},
{
type: 'tableRow',
content: [
{
type: 'tableCell',
attrs: { colspan: 1, rowspan: 1 },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'cell' }] },
],
},
],
},
],
},
],
},
],
},
// Top-level embed — must be stripped.
{
type: HTML_EMBED_NODE_NAME,
attrs: { source: '<script>top()</script>' },
},
{
type: 'paragraph',
content: [{ type: 'text', text: 'outro paragraph' }],
},
],
};
function buildYdoc(json: any): Y.Doc {
return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
}
// Count nodes by type across the whole tree (excludes htmlEmbed by listing it
// separately) so we can assert every OTHER node type survived the strip.
function nodeTypeCounts(json: any): Record<string, number> {
const counts: Record<string, number> = {};
const walk = (n: any) => {
if (!n || typeof n !== 'object') return;
if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1;
if (Array.isArray(n.content)) n.content.forEach(walk);
};
walk(json);
return counts;
}
/**
* Construct a real PersistenceExtension with the minimum mocks needed for
* onStoreDocument to reach the strip + persist branch, and capture the content
* that would be written to the page row.
*/
function buildExtension(featureEnabled = true, priorContent?: any) {
const captured: { content?: any } = {};
const existingPage = {
id: 'page-1',
slugId: 'slug-1',
spaceId: 'space-1',
workspaceId: 'ws-1',
creatorId: 'creator-1',
contributorIds: [],
// The currently-persisted content. Defaults to an empty doc (differs from
// new content -> persist runs); a test may pass a prior admin embed here to
// exercise the preserve-admin-embed branch.
content: priorContent ?? { type: 'doc', content: [] },
createdAt: new Date(),
lastUpdatedSource: 'user',
};
const pageRepo = {
findById: jest.fn(async () => ({ ...existingPage })),
updatePage: jest.fn(async (values: any) => {
captured.content = values.content;
}),
};
const pageHistoryRepo = {
findPageLastHistory: jest.fn(async () => null),
saveHistory: jest.fn(async () => undefined),
};
// db.transaction().execute(cb) just runs the callback (no real DB).
const db = {
transaction: () => ({
execute: (cb: any) => cb({} as any),
}),
};
const noopQueue = { add: jest.fn(async () => undefined) } as any;
const collabHistory = { addContributors: jest.fn(async () => undefined) } as any;
const transclusionService = {
syncPageTransclusions: jest.fn(async () => undefined),
syncPageReferences: jest.fn(async () => undefined),
} as any;
// Workspace settings read used by the toggle-AND-admin gate.
const workspaceRepo = {
findById: jest.fn(async () => ({
id: 'ws-1',
settings: { htmlEmbed: featureEnabled },
})),
};
const ext = new PersistenceExtension(
pageRepo as any,
pageHistoryRepo as any,
db as any,
noopQueue,
noopQueue,
noopQueue,
collabHistory,
transclusionService,
workspaceRepo as any,
);
return { ext, captured, pageRepo };
}
async function runStore(
role: string | null | undefined,
doc: Y.Doc,
featureEnabled = true,
priorContent?: any,
) {
const { ext, captured } = buildExtension(featureEnabled, priorContent);
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
// none, so stub it (the post-persist broadcast is not under test here).
(doc as any).broadcastStateless = () => undefined;
await ext.onStoreDocument({
documentName: 'page-1',
document: doc,
context: { user: { id: 'u1', role } },
} as any);
return captured;
}
describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => {
it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => {
const doc = buildYdoc(RICH_DOC);
const before = TiptapTransformer.fromYdoc(doc, 'default');
expect(hasHtmlEmbedNode(before)).toBe(true);
const beforeCounts = nodeTypeCounts(before);
const captured = await runStore('member', doc);
expect(captured.content).toBeDefined();
// htmlEmbed gone from the persisted content.
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
// Every non-embed node type is preserved with the SAME count (guards against
// data loss if a node were missing from tiptapExtensions and dropped on the
// toYdoc rebuild).
const afterCounts = nodeTypeCounts(captured.content);
for (const [type, count] of Object.entries(beforeCounts)) {
if (type === HTML_EMBED_NODE_NAME) continue;
expect(afterCounts[type]).toBe(count);
}
// The two embeds are gone.
expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2);
expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined();
// The shared ydoc fragment was also rewritten clean (re-decode it).
const reDecoded = TiptapTransformer.fromYdoc(doc, 'default');
expect(hasHtmlEmbedNode(reDecoded)).toBe(false);
});
it('toggle ON + admin store: htmlEmbed preserved in persisted content', async () => {
const captured = await runStore('admin', buildYdoc(RICH_DOC), true);
expect(captured.content).toBeDefined();
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2);
});
it('toggle ON + owner store: htmlEmbed preserved', async () => {
const captured = await runStore('owner', buildYdoc(RICH_DOC), true);
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
});
it('toggle OFF + admin store: stripped (feature disabled for everyone)', async () => {
const captured = await runStore('admin', buildYdoc(RICH_DOC), false);
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
});
it('toggle OFF + owner store: stripped', async () => {
const captured = await runStore('owner', buildYdoc(RICH_DOC), false);
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
});
it('toggle OFF + member store: stripped', async () => {
const captured = await runStore('member', buildYdoc(RICH_DOC), false);
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
});
it('unknown/empty role: fails closed (stripped)', async () => {
expect(
hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content),
).toBe(false);
expect(
hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content),
).toBe(false);
expect(
hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content),
).toBe(false);
});
it('toggle ON + non-admin store: PRESERVES an admin embed already in the persisted content through an unrelated edit', async () => {
// Prior persisted content already holds an admin-authored embed.
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
const prior = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
],
};
// A non-admin makes an UNRELATED edit (tweaks the paragraph) but the embed
// is still present in the merged doc.
const edited = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
],
};
const captured = await runStore('member', buildYdoc(edited), true, prior);
expect(captured.content).toBeDefined();
// The admin's pre-existing embed survives the non-admin store.
expect(collectHtmlEmbedSources(captured.content)).toEqual(
new Set([ADMIN_SOURCE]),
);
});
it('toggle ON + non-admin store: strips a NEWLY-added embed while keeping the prior admin one', async () => {
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
const prior = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
],
};
// Non-admin keeps the admin embed, makes an unrelated paragraph edit (so the
// store is not a no-op and is persisted), and ALSO adds a brand-new embed.
const edited = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
],
};
const captured = await runStore('member', buildYdoc(edited), true, prior);
expect(captured.content).toBeDefined();
// Only the admin-vetted source remains; the newly-introduced one is stripped.
expect(collectHtmlEmbedSources(captured.content)).toEqual(
new Set([ADMIN_SOURCE]),
);
});
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
const emptyDoc = buildYdoc({
type: 'doc',
content: [{ type: 'paragraph' }],
});
// Non-admin path with an empty/embed-free fragment must be a no-op strip,
// not throw.
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
});
});
// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges
// the shared ydoc sub-second, before the 10s store debounce. We call it directly
// (it is the debounced timer body) and assert the ydoc fragment no longer yields
// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds
// already in the persisted content survive.
describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => {
async function runGuard(
role: string | null | undefined,
doc: Y.Doc,
featureEnabled = true,
priorContent?: any,
) {
const { ext } = buildExtension(featureEnabled, priorContent);
await (ext as any).guardHtmlEmbed(
'page-1',
doc,
{ user: { id: 'u1', role, workspaceId: 'ws-1' } },
);
}
it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => {
// Prior persisted content has NO embed; the live doc has one a non-admin
// just added.
const doc = buildYdoc({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
],
});
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
true,
);
await runGuard('member', doc, true, { type: 'doc', content: [] });
// The shared ydoc fragment no longer yields any htmlEmbed.
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
false,
);
});
it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => {
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
const prior = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
],
};
// Live doc keeps the admin embed AND adds a brand-new one.
const doc = buildYdoc({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
],
});
await runGuard('member', doc, true, prior);
// Only the admin-vetted source survives in the shared ydoc.
expect(
collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')),
).toEqual(new Set([ADMIN_SOURCE]));
});
it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => {
// Even an embed that matches the prior content is stripped when the toggle
// is OFF, because the OFF path passes allowed=null (strip everything) and
// never reads the prior content for an allow-list.
const SOURCE = '<script>any()</script>';
const doc = buildYdoc({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } },
],
});
await runGuard('member', doc, false, {
type: 'doc',
content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }],
});
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
false,
);
});
it('admin role: guard is a defensive no-op (embed preserved)', async () => {
const doc = buildYdoc({
type: 'doc',
content: [
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>ok()</script>' } },
],
});
await runGuard('admin', doc, true, { type: 'doc', content: [] });
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
true,
);
});
it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => {
const doc = buildYdoc({
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }],
});
await runGuard('member', doc, true, { type: 'doc', content: [] });
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
false,
);
});
});

View File

@@ -39,16 +39,6 @@ import {
HISTORY_INTERVAL,
} from '../constants';
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
import {
canAuthorHtmlEmbed,
collectHtmlEmbedSources,
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripDisallowedHtmlEmbedNodes,
stripHtmlEmbedNodes,
} from '../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
@Injectable()
export class PersistenceExtension implements Extension {
@@ -59,21 +49,6 @@ export class PersistenceExtension implements Extension {
// coalescing window" per document and OR it across all edits in the window,
// so the snapshot is marked 'agent' regardless of who wrote last.
private agentTouched: Map<string, boolean> = new Map();
// Per-document debounce timers for the early htmlEmbed guard (Gitea #26).
// onChange schedules a short (~300ms) debounced strip that converges the
// shared ydoc for all connected clients well before the 10s store debounce,
// shrinking the pre-persist broadcast window of a non-admin's transient embed.
private htmlEmbedGuardTimers: Map<string, NodeJS.Timeout> = new Map();
// Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated
// in onLoadDocument (which already loads the page + has workspace context) and
// read in onChange to gate early-guard scheduling: when the toggle is OFF (the
// common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and
// rely on the onStoreDocument strip as the backstop (when OFF the embed does
// not execute in editable mode anyway). Cleared in afterUnloadDocument.
// STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF
// until the document is reloaded, so the early guard won't schedule — accepted,
// the onStoreDocument backstop still strips on persist.
private htmlEmbedToggleByDoc: Map<string, boolean> = new Map();
constructor(
private readonly pageRepo: PageRepo,
@@ -84,7 +59,6 @@ export class PersistenceExtension implements Extension {
@InjectQueue(QueueName.NOTIFICATION_QUEUE) private notificationQueue: Queue,
private readonly collabHistory: CollabHistoryService,
private readonly transclusionService: TransclusionService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async onLoadDocument(data: onLoadDocumentPayload) {
@@ -105,23 +79,6 @@ export class PersistenceExtension implements Extension {
return;
}
// Cache the workspace htmlEmbed toggle for this document (Gitea #26). We
// already have the page (hence its workspaceId) here, so resolve the toggle
// once and cache it keyed by documentName. onChange reads this to decide
// whether to schedule the early guard at all — when OFF we skip the guard
// entirely (no timer, no fromYdoc, no DB read). Cleared in
// afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note.
try {
const enabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(page.workspaceId))?.settings,
);
this.htmlEmbedToggleByDoc.set(documentName, enabled);
} catch (err) {
// Fail OFF: if the toggle can't be resolved, never schedule the early
// guard; the onStoreDocument backstop still strips on persist.
this.htmlEmbedToggleByDoc.set(documentName, false);
}
if (page.ydoc) {
this.logger.debug(`ydoc loaded from db: ${pageId}`);
@@ -155,109 +112,7 @@ export class PersistenceExtension implements Extension {
const pageId = getPageId(documentName);
let tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
// SECURITY (Variant C admin gate, collab WebSocket write path):
// The persisted snapshot is the merged ydoc, which may contain an htmlEmbed
// node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized
// JS in every reader's browser, so only workspace admins/owners may author
// it. When the user whose store triggers this persist is not an admin, strip
// every htmlEmbed node before it is written to the page row AND before the
// ydoc state is re-encoded, so the node cannot be reintroduced by a
// non-admin via the collab socket.
// NOTE (defense-in-depth refinement, Gitea #29): the gate is keyed to the
// storing connection's user, but it no longer blindly strips EVERY embed on
// a non-admin store. We distinguish two cases inside the !allowed branch:
// - Feature toggle OFF => strip ALL embeds (the feature is disabled for
// everyone; existing embeds get cleaned up on the next save).
// - Toggle ON but the storer is a NON-admin => strip only NEWLY-introduced
// embeds and PRESERVE embeds already present in the currently-persisted
// page content (admin-authored, already vetted). So a non-admin still
// cannot ADD an embed, but an unrelated edit (e.g. a paragraph tweak) no
// longer destroys an admin's existing embed (the prior data-loss bug).
// The pre-existing-embed identity is the raw `attrs.source` (see
// collectHtmlEmbedSources). A non-admin who copies an existing admin embed's
// exact source elsewhere passes — acceptable, that HTML is already vetted.
//
// ACCEPTED RESIDUAL RISK (toggle-ON allow-list TOCTOU): the allow-list is a
// best-effort snapshot read OUTSIDE the locked transaction (the prior content
// is pre-read above, but inside executeTx the row is re-read withLock without
// recomputing the allow-list). A concurrent admin store that changes the
// persisted embeds between the pre-read and this write can make the preserve
// decision use a slightly stale snapshot — worst case one embed transiently
// kept or dropped; it converges on the next store, with no auth bypass or
// broader data loss. The race is accepted because it only affects concurrent
// authenticated editors on the (rare) toggle-ON non-admin path, converges on
// the next store, and the persisted row plus every share/readonly read path
// remain protected by the strip.
//
// RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26):
// this strip runs in the debounced onStoreDocument (up to 10s), but
// hocuspocus broadcasts each inbound Yjs update to connected clients
// immediately, so a non-admin's transient htmlEmbed can execute in OTHER open
// editors' browsers in the window before this persist strips it. The exposure
// is limited to concurrent AUTHENTICATED space members who have the doc open
// with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do
// NOT open a collab socket (ReadonlyPageEditor renders fetched,
// already-stripped content; HocuspocusProvider is only used by the
// authenticated editable page-editor), and the PERSISTED page row plus every
// share/readonly read path are protected by this strip.
// The window is now SHRUNK to sub-second by an onChange-debounced early guard
// (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as
// this block and re-encodes the cleaned ydoc, converging the doc for all
// clients long before this 10s store debounce fires. This onStoreDocument
// strip remains the authoritative backstop for persistence. The irreducible
// residual is only the VERY FIRST inbound broadcast before the ~300ms debounce
// fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the
// node before that first relay, so it cannot be eliminated entirely.
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature
// toggle is ON and the storing user is an admin/owner. OFF (default) =>
// stripped for everyone (existing embeds get cleaned up on next save).
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(context?.user?.workspaceId))?.settings,
);
if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) {
if (hasHtmlEmbedNode(tiptapJson)) {
let strippedJson: typeof tiptapJson;
if (htmlEmbedEnabled === false) {
// Toggle OFF: feature disabled for everyone -> strip ALL embeds.
strippedJson = stripHtmlEmbedNodes(tiptapJson);
} else {
// Toggle ON, non-admin storer: preserve embeds already present in the
// currently-persisted (admin-vetted) page content; strip only the
// newly-introduced ones. Pre-read the prior content — a small extra
// query only on this rare non-admin + toggle-ON path.
const prior = await this.pageRepo.findById(pageId, {
includeContent: true,
});
const allowed = collectHtmlEmbedSources(prior?.content);
strippedJson = stripDisallowedHtmlEmbedNodes(tiptapJson, allowed);
}
// Only mutate the ydoc + log when the strip actually removed something;
// an unnecessary ydoc rewrite would churn the doc for all clients. With
// the toggle-ON branch a non-admin store that only touches admin-vetted
// embeds leaves the content unchanged here.
if (!isDeepStrictEqual(strippedJson, tiptapJson)) {
this.logger.warn(
`Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`,
);
tiptapJson = strippedJson;
// Reflect the stripped content back into the shared ydoc so the node
// is removed for all connected clients, not just the persisted row.
const fragment = document.getXmlFragment('default');
if (fragment.length > 0) {
fragment.delete(0, fragment.length);
}
const cleanDoc = TiptapTransformer.toYdoc(
tiptapJson,
'default',
tiptapExtensions,
);
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
}
}
}
const tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
@@ -429,168 +284,12 @@ export class PersistenceExtension implements Extension {
if (data.context?.actor === 'agent') {
this.agentTouched.set(documentName, true);
}
// Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced
// guard ONLY when (a) this document's workspace toggle is cached ON and
// (b) the changing connection's user is a NON-admin (cannot author
// htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer,
// no fromYdoc, no DB read — killing the OFF-case overhead (the common
// default); the onStoreDocument strip is the backstop and an OFF embed does
// not execute in editable mode anyway. We do NO expensive work here — we only
// (re)schedule the timer; the debounce coalesces rapid edits into a single
// guard check.
if (
userId &&
this.htmlEmbedToggleByDoc.get(documentName) === true &&
!canAuthorHtmlEmbed(data.context?.user?.role)
) {
const existing = this.htmlEmbedGuardTimers.get(documentName);
if (existing) {
clearTimeout(existing);
}
const timer = setTimeout(() => {
this.htmlEmbedGuardTimers.delete(documentName);
void this.guardHtmlEmbed(documentName, data.document, data.context);
}, 300);
this.htmlEmbedGuardTimers.set(documentName, timer);
}
}
/**
* Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the
* onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of
* waiting for the 10s store debounce, so a non-admin's transient embed is
* removed from the shared ydoc — and re-broadcast as cleaned state — for all
* connected clients in sub-second time. onStoreDocument remains the
* authoritative persistence backstop; this is an ADDITIONAL early pass.
*
* CONCURRENCY (the critical invariant): the Y.Doc mutation is a single
* SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the
* applyUpdate write. ALL async work (the workspace toggle lookup and the
* persisted-content read for the allow-list) happens FIRST, before that block.
* Because JS is single-threaded, a synchronous block cannot interleave with
* inbound Yjs update handlers, so a concurrent edit that lands while we await
* cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then
* delete + rebuild + applyUpdate without yielding. (An earlier version awaited
* DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap
* was lost — this restructure fixes that.)
*
* The allow-list is a best-effort snapshot read outside any lock (TOCTOU
* accepted, same as onStoreDocument): worst case one embed is transiently kept
* or dropped; it converges on the next guard/store, with no auth bypass.
*
* Loop-safety: the corrective applyUpdate has a null origin, so the re-fired
* onChange carries no userId and is not rescheduled; and after a strip no
* htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the
* hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer
* would crash the process — so the whole body is wrapped in try/catch.
*/
private async guardHtmlEmbed(
documentName: string,
document: Y.Doc,
context: any,
): Promise<void> {
// Defensive: ensure no stale timer entry survives for this document.
this.htmlEmbedGuardTimers.delete(documentName);
try {
// Re-check defensively: onChange only schedules for non-admins, but if an
// admin/owner somehow reaches here, the embed is authored content — do
// nothing (onStoreDocument's toggle-AND-admin gate handles persistence).
if (canAuthorHtmlEmbed(context?.user?.role)) {
return;
}
// ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ----
// Resolve the workspace toggle exactly as onStoreDocument does. When OFF we
// strip everything; when ON we use the preserve logic (keep admin-vetted
// embeds, strip only the non-admin's newly-introduced ones).
const enabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(context?.user?.workspaceId))
?.settings,
);
// The allow-list (admin-vetted sources already in the persisted content).
// null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block,
// so no await sits between the doc snapshot and the doc write.
let allowed: Set<string> | null = null;
if (enabled !== false) {
const prior = await this.pageRepo.findById(getPageId(documentName), {
includeContent: true,
});
allowed = collectHtmlEmbedSources(prior?.content);
}
// The awaits above may have let the document be unloaded/destroyed. If so,
// bail — mutating a destroyed doc is pointless and could throw (the
// try/catch is the ultimate safety net regardless).
if ((document as { isDestroyed?: boolean }).isDestroyed) {
return;
}
// ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ----
// Because there is no await between fromYdoc and applyUpdate, no inbound
// Yjs update can interleave, so a concurrent edit cannot be lost.
const json = TiptapTransformer.fromYdoc(document, 'default');
// Cheap exit: nothing to guard if the doc has no embed at all. This is also
// why a post-strip re-fire is a no-op (loop-safe).
if (!hasHtmlEmbedNode(json)) {
return;
}
const strippedJson =
allowed === null
? stripHtmlEmbedNodes(json)
: stripDisallowedHtmlEmbedNodes(json, allowed);
// Nothing was stripped (e.g. the only embed is an admin-vetted one) — do
// not churn the shared ydoc for all clients.
if (isDeepStrictEqual(strippedJson, json)) {
return;
}
// Reflect the stripped content back into the shared ydoc EXACTLY as
// onStoreDocument does, so the node is removed for all connected clients,
// not just on the eventual persist. This re-encode broadcasts the cleaned
// state; after it hasHtmlEmbedNode is false, so any later guard fire is a
// cheap no-op (loop-safe).
const fragment = document.getXmlFragment('default');
if (fragment.length > 0) {
fragment.delete(0, fragment.length);
}
const cleanDoc = TiptapTransformer.toYdoc(
strippedJson,
'default',
tiptapExtensions,
);
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
this.logger.warn(
`Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`,
);
} catch (err) {
// NEVER rethrow out of a timer-scheduled call.
this.logger.error(
`Early htmlEmbed guard failed on ${documentName}`,
err,
);
}
}
async afterUnloadDocument(data: afterUnloadDocumentPayload) {
const documentName = data.documentName;
this.contributors.delete(documentName);
this.agentTouched.delete(documentName);
// Drop the cached toggle for this document so a reload re-resolves it (and
// picks up a mid-session admin toggle flip).
this.htmlEmbedToggleByDoc.delete(documentName);
// Clear any pending early-guard timer so it cannot fire after the document
// is unloaded (leak / use-after-unload prevention).
const timer = this.htmlEmbedGuardTimers.get(documentName);
if (timer) {
clearTimeout(timer);
this.htmlEmbedGuardTimers.delete(documentName);
}
}
private consumeContributors(documentName: string): string[] {

View File

@@ -3,20 +3,17 @@ import { htmlToJson } from '../../../collaboration/collaboration.util';
import { hasHtmlEmbedNode, stripHtmlEmbedNodes } from './html-embed.util';
/**
* CONTRACT (security): an attacker who controls imported markdown/HTML could try
* to smuggle an htmlEmbed in the *serialized* DOM form —
* CONTRACT: imported markdown/HTML can carry an htmlEmbed in the *serialized*
* DOM form —
* <div data-type="htmlEmbed" data-source="...">
* — directly, bypassing the editor's `<!--html-embed:-->` comment marker.
*
* This exercises the REAL server import conversion path that ImportService uses
* The block renders inside a sandboxed iframe, so this is not an XSS surface;
* this exercises the REAL server import conversion path that ImportService uses
* (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
* link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
* the ACTUAL behaviour so we know whether the strip gate can be bypassed.
*
* FINDING (documented): the raw embed div DOES round-trip through marked +
* htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
* `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
* detectable and STRIPPABLE — the write-path gate covers it.
* that such a node is DETECTED and STRIPPABLE — so the share read path's
* master-toggle strip can remove it when the workspace toggle is OFF.
*/
describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
@@ -38,7 +35,7 @@ describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTM
// The div parses into a real htmlEmbed node carrying the decoded source.
expect(hasHtmlEmbedNode(json)).toBe(true);
// Because it is detected, the write-path gate can strip it for non-admins.
// Because it is detected, the share master-toggle strip can remove it.
const stripped = stripHtmlEmbedNodes(json);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
// Surrounding non-embed content is retained.

View File

@@ -1,11 +1,6 @@
import {
canAuthorHtmlEmbed,
collectHtmlEmbedSources,
hasHtmlEmbedNode,
htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
stripDisallowedHtmlEmbedNodes,
stripHtmlEmbedIfNotAllowed,
stripHtmlEmbedNodes,
} from './html-embed.util';
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
@@ -96,17 +91,6 @@ describe('stripHtmlEmbedNodes', () => {
expect(result).toEqual(doc);
});
it('neutralizes a root node that is itself an htmlEmbed', () => {
// Defensive: the PM root is always a `doc`, so this is unreachable in normal
// use, but the helper must still never return a bare htmlEmbed.
const root = {
type: 'htmlEmbed',
attrs: { source: '<script>alert(1)</script>' },
};
const result = stripHtmlEmbedNodes(root);
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
const doc = {
@@ -172,169 +156,6 @@ describe('stripHtmlEmbedNodes', () => {
});
});
describe('collectHtmlEmbedSources', () => {
it('collects the source of every htmlEmbed node, including nested ones', () => {
const doc = {
type: 'doc',
content: [
{ type: 'htmlEmbed', attrs: { source: '<b>top</b>' } },
{
type: 'columns',
content: [
{
type: 'column',
content: [
{ type: 'htmlEmbed', attrs: { source: '<i>nested</i>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] },
],
},
],
},
],
};
const sources = collectHtmlEmbedSources(doc);
expect(sources).toEqual(new Set(['<b>top</b>', '<i>nested</i>']));
});
it('returns an empty set for a doc with no embeds', () => {
const doc = {
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
};
expect(collectHtmlEmbedSources(doc).size).toBe(0);
});
it('gracefully skips embeds with absent attrs or non-string source', () => {
const doc = {
type: 'doc',
content: [
{ type: 'htmlEmbed' }, // no attrs
{ type: 'htmlEmbed', attrs: {} }, // no source
{ type: 'htmlEmbed', attrs: { source: 42 } }, // non-string
{ type: 'htmlEmbed', attrs: { source: '<ok/>' } },
],
};
expect(collectHtmlEmbedSources(doc)).toEqual(new Set(['<ok/>']));
});
it('returns an empty set for non-object input', () => {
expect(collectHtmlEmbedSources(null).size).toBe(0);
expect(collectHtmlEmbedSources(undefined).size).toBe(0);
expect(collectHtmlEmbedSources('x' as any).size).toBe(0);
});
});
describe('stripDisallowedHtmlEmbedNodes', () => {
it('keeps an embed whose source is allowed and removes the rest', () => {
const doc = {
type: 'doc',
content: [
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
{ type: 'htmlEmbed', attrs: { source: '<new-evil/>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
],
};
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
// The allowed embed and the paragraph survive; the new embed is gone.
expect(result.content).toHaveLength(2);
expect(result.content[0].attrs.source).toBe('<vetted/>');
expect(result.content[1].type).toBe('paragraph');
});
it('keeps BOTH embeds when two nodes share the same allowed source', () => {
// Source-identity semantics: identity is the raw `attrs.source`, so a
// non-admin who duplicates an existing admin-vetted source keeps both copies.
// This is intended — the raw HTML is already vetted, so a duplicate is safe.
const doc = {
type: 'doc',
content: [
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'mid' }] },
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
],
};
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
expect(hasHtmlEmbedNode(result)).toBe(true);
const embeds = result.content.filter(
(n: any) => n.type === 'htmlEmbed',
);
expect(embeds).toHaveLength(2);
expect(embeds.every((n: any) => n.attrs.source === '<vetted/>')).toBe(true);
});
it('removes a newly-introduced embed when nothing is allowed', () => {
const doc = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
};
const result = stripDisallowedHtmlEmbedNodes(doc, new Set());
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it('filters nested embeds by the allow-list (e.g. inside columns)', () => {
const doc = {
type: 'doc',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
{ type: 'htmlEmbed', attrs: { source: '<new/>' } },
],
},
],
},
],
};
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
const col = findFirstChild(result, 'column');
expect(col.content).toHaveLength(1);
expect(col.content[0].attrs.source).toBe('<vetted/>');
});
it('treats an embed with absent/non-string source as not allowed (stripped)', () => {
const doc = {
type: 'doc',
content: [
{ type: 'htmlEmbed' },
{ type: 'htmlEmbed', attrs: {} },
],
};
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it('does not mutate the input document', () => {
const doc = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
};
stripDisallowedHtmlEmbedNodes(doc, new Set());
expect(doc.content).toHaveLength(1);
expect(doc.content[0].type).toBe('htmlEmbed');
});
it('neutralizes a root node that is itself a disallowed htmlEmbed', () => {
const root = { type: 'htmlEmbed', attrs: { source: '<new/>' } };
const result = stripDisallowedHtmlEmbedNodes(root, new Set());
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => {
const root = { type: 'htmlEmbed', attrs: { source: '<vetted/>' } };
const result = stripDisallowedHtmlEmbedNodes(root, new Set(['<vetted/>']));
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
});
it('returns non-object input unchanged', () => {
expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull();
});
});
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
@@ -367,19 +188,6 @@ describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
});
});
describe('canAuthorHtmlEmbed', () => {
it('allows owner and admin', () => {
expect(canAuthorHtmlEmbed('owner')).toBe(true);
expect(canAuthorHtmlEmbed('admin')).toBe(true);
});
it('denies member and unknown/empty roles', () => {
expect(canAuthorHtmlEmbed('member')).toBe(false);
expect(canAuthorHtmlEmbed(null)).toBe(false);
expect(canAuthorHtmlEmbed(undefined)).toBe(false);
expect(canAuthorHtmlEmbed('viewer')).toBe(false);
});
});
describe('isHtmlEmbedFeatureEnabled', () => {
it('is true only when settings.htmlEmbed === true', () => {
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true);
@@ -394,165 +202,22 @@ describe('isHtmlEmbedFeatureEnabled', () => {
});
});
describe('htmlEmbedAllowed (toggle AND admin)', () => {
it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => {
expect(htmlEmbedAllowed(false, 'admin')).toBe(false);
expect(htmlEmbedAllowed(false, 'owner')).toBe(false);
});
it('toggle OFF + member => not allowed', () => {
expect(htmlEmbedAllowed(false, 'member')).toBe(false);
});
it('toggle ON + admin/owner => allowed', () => {
expect(htmlEmbedAllowed(true, 'admin')).toBe(true);
expect(htmlEmbedAllowed(true, 'owner')).toBe(true);
});
it('toggle ON + member/unknown => not allowed', () => {
expect(htmlEmbedAllowed(true, 'member')).toBe(false);
expect(htmlEmbedAllowed(true, null)).toBe(false);
expect(htmlEmbedAllowed(true, undefined)).toBe(false);
expect(htmlEmbedAllowed(true, 'viewer')).toBe(false);
});
});
// The shared write-path strip ritual extracted from the 5 plain call-sites
// (collab handler, page create/duplicate, import, file-import-task,
// transclusion-unsync). Tested here once instead of being re-verified in each
// call-site's spec.
describe('stripHtmlEmbedIfNotAllowed (shared write-path gate)', () => {
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>x()</script>' } },
],
});
const docWithoutEmbed = () => ({
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }],
});
it('keeps the doc unchanged when feature is ON and role is admin (allowed)', () => {
const json = docWithEmbed();
const onStrip = jest.fn();
const result = stripHtmlEmbedIfNotAllowed(json, {
featureEnabled: true,
role: 'admin',
onStrip,
});
// Allowed => same reference returned, embed preserved, no side-effect.
expect(result).toBe(json);
expect(hasHtmlEmbedNode(result)).toBe(true);
expect(onStrip).not.toHaveBeenCalled();
});
it('keeps the doc unchanged for an owner when feature is ON (allowed)', () => {
const json = docWithEmbed();
const onStrip = jest.fn();
const result = stripHtmlEmbedIfNotAllowed(json, {
featureEnabled: true,
role: 'owner',
onStrip,
});
expect(result).toBe(json);
expect(hasHtmlEmbedNode(result)).toBe(true);
expect(onStrip).not.toHaveBeenCalled();
});
it('strips the embed when the feature is OFF (even for an admin)', () => {
const json = docWithEmbed();
const onStrip = jest.fn();
const result = stripHtmlEmbedIfNotAllowed(json, {
featureEnabled: false,
role: 'admin',
onStrip,
});
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(onStrip).toHaveBeenCalledTimes(1);
});
it('strips the embed for a non-admin when the feature is ON', () => {
const json = docWithEmbed();
const onStrip = jest.fn();
const result = stripHtmlEmbedIfNotAllowed(json, {
featureEnabled: true,
role: 'member',
onStrip,
});
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(onStrip).toHaveBeenCalledTimes(1);
});
it('strips the embed for a null/undefined role when the feature is ON', () => {
for (const role of [null, undefined]) {
const onStrip = jest.fn();
const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
featureEnabled: true,
role,
onStrip,
});
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(onStrip).toHaveBeenCalledTimes(1);
}
});
it('returns input unchanged and does NOT call onStrip when no embed is present', () => {
const json = docWithoutEmbed();
const onStrip = jest.fn();
// Not allowed (feature OFF), but there is nothing to strip.
const result = stripHtmlEmbedIfNotAllowed(json, {
featureEnabled: false,
role: 'member',
onStrip,
});
expect(result).toBe(json);
expect(onStrip).not.toHaveBeenCalled();
});
it('calls onStrip exactly once per strip', () => {
const onStrip = jest.fn();
stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
featureEnabled: false,
role: 'member',
onStrip,
});
expect(onStrip).toHaveBeenCalledTimes(1);
});
it('works without an onStrip callback (optional)', () => {
const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
featureEnabled: false,
role: 'member',
});
expect(hasHtmlEmbedNode(result)).toBe(false);
});
});
// NOTE: a previous revision of this file re-implemented the write-path admin
// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
// deleted/misplaced real guard would have kept those green. The stand-in is
// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
// now tested against their REAL code in:
// - collaboration/extensions/persistence.extension.html-embed.spec.ts
// - collaboration/collaboration.handler.html-embed.spec.ts
// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
// - integrations/import/services/import-html-embed-identity.spec.ts (import)
// The htmlEmbed node renders inside a sandboxed iframe, so the per-write role
// gate has been removed. `stripHtmlEmbedNodes` + `isHtmlEmbedFeatureEnabled`
// remain ONLY to honor the workspace master toggle on the anonymous public-share
// read path — tested against the real share code in:
// - core/share/share-html-embed.spec.ts
//
// The case below stays here because it asserts a REAL parse path
// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
// re-implemented gate.
describe('htmlEmbed smuggled via the markdown/html <!--html-embed--> form (real parse + real helpers)', () => {
it('the parsed node is detected and stripped by the real helpers', () => {
// The markdown/html create formats decode to the same htmlEmbed node, so the
// gate (run on the parsed JSON) covers them identically.
const source = '<script>steal()</script>';
// The case below asserts that the REAL parse path (htmlToJson, the markdown/html
// form) produces an htmlEmbed node the master-toggle strip can detect & remove.
describe('htmlEmbed via the markdown/html form (real parse + real strip helper)', () => {
it('the parsed node is detected and stripped by the real helper', () => {
const source = '<script>track()</script>';
const encoded = encodeHtmlEmbedSource(source);
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
const parsed = htmlToJson(html);
expect(hasHtmlEmbedNode(parsed)).toBe(true);
// A non-admin role gates to strip via the real helpers.
expect(canAuthorHtmlEmbed('member')).toBe(false);
const stripped = stripHtmlEmbedNodes(parsed);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
});

View File

@@ -5,12 +5,12 @@ export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
/**
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
*
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
* author it. This helper is the server-side enforcement primitive: every WRITE
* path that may persist content from a NON-admin caller must run the incoming
* document through this function so a non-admin cannot smuggle the node in via
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
* The `htmlEmbed` node renders inside a SANDBOXED iframe (no `allow-same-origin`)
* on the client, so its content cannot touch the viewer's session/cookies/API —
* it is NOT a stored-XSS surface. This helper is retained ONLY to honor the
* workspace master toggle (`settings.htmlEmbed`) on the anonymous public-share
* read path: an anonymous viewer cannot read the workspace toggle, so the server
* strips the block when the toggle is OFF before serving shared content.
*
* Returns a NEW document; the input is not mutated. If the input is not a valid
* doc object it is returned unchanged (callers persist what they were given).
@@ -22,15 +22,6 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
const node = pmJson as unknown as JSONContent;
// Defensive root-type check: if the ROOT node is itself an htmlEmbed, the
// children-filtering below could never drop it, so a bare htmlEmbed would be
// returned as-is. This branch is unreachable in normal use (the PM document
// root is always a `doc`) and exists only to make the helper total — a bare
// htmlEmbed can never be returned by this function.
if (node.type === HTML_EMBED_NODE_NAME) {
return { type: 'doc', content: [] } as unknown as T;
}
if (Array.isArray(node.content)) {
const filtered: JSONContent[] = [];
for (const child of node.content) {
@@ -48,111 +39,12 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
return { ...node } as unknown as T;
}
/**
* Walk the document and collect a stable identity for every `htmlEmbed` node.
*
* The identity is the node's `attrs.source` string — the raw HTML the embed
* renders. Two embeds that render the exact same HTML are treated as the same
* identity. Used by the collab persist path to know which embeds are ALREADY
* present in the currently-persisted (admin-vetted) page content, so a later
* non-admin store can strip only NEWLY-introduced embeds while preserving the
* pre-existing admin-authored ones.
*
* Absent attrs or a non-string/absent `source` are skipped gracefully (such a
* node contributes no identity to the set).
*/
export function collectHtmlEmbedSources(pmJson: unknown): Set<string> {
const sources = new Set<string>();
const walk = (node: unknown): void => {
if (!node || typeof node !== 'object') {
return;
}
const n = node as JSONContent;
if (n.type === HTML_EMBED_NODE_NAME) {
const source = (n.attrs as Record<string, unknown> | undefined)?.source;
if (typeof source === 'string') {
sources.add(source);
}
}
if (Array.isArray(n.content)) {
for (const child of n.content) {
walk(child);
}
}
};
walk(pmJson);
return sources;
}
/**
* Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose
* `attrs.source` is in `allowedSources`; remove the rest.
*
* Used on the collab persist path when the feature toggle is ON but the storing
* user is a NON-admin: `allowedSources` is the set of embed sources already
* present in the currently-persisted page content (admin-authored, already
* vetted). A non-admin therefore cannot ADD a new embed, but their unrelated
* edit also cannot destroy an admin's existing one.
*
* NOTE: identity is the raw source string, so a non-admin who COPIES an existing
* admin embed's exact source into a NEW location passes this check. That is
* acceptable — the source is already admin-vetted content present in the doc; no
* new untrusted HTML is introduced.
*
* Returns a NEW document; the input is not mutated. Same defensive root-type
* check pattern as {@link stripHtmlEmbedNodes}.
*/
export function stripDisallowedHtmlEmbedNodes<T = JSONContent>(
pmJson: T,
allowedSources: Set<string>,
): T {
if (!pmJson || typeof pmJson !== 'object') {
return pmJson;
}
const node = pmJson as unknown as JSONContent;
// Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is
// itself an htmlEmbed and its source is NOT allowed, the children-filtering
// below could never drop it, so neutralize it here. Unreachable in normal use
// (the PM document root is always a `doc`).
if (node.type === HTML_EMBED_NODE_NAME) {
const source = (node.attrs as Record<string, unknown> | undefined)?.source;
if (typeof source === 'string' && allowedSources.has(source)) {
return { ...node } as unknown as T;
}
return { type: 'doc', content: [] } as unknown as T;
}
if (Array.isArray(node.content)) {
const filtered: JSONContent[] = [];
for (const child of node.content) {
// Drop a disallowed htmlEmbed child (newly introduced); keep an allowed
// one (already present in the persisted, admin-vetted content).
if (child && child.type === HTML_EMBED_NODE_NAME) {
const source = (child.attrs as Record<string, unknown> | undefined)
?.source;
if (typeof source === 'string' && allowedSources.has(source)) {
filtered.push({ ...child });
}
continue;
}
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
// also filtered by the same allow-list.
filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources));
}
return { ...node, content: filtered } as unknown as T;
}
return { ...node } as unknown as T;
}
/**
* Returns true if the document contains at least one `htmlEmbed` node anywhere
* in its tree. Useful to decide whether a strip pass actually changed anything
* (e.g. for logging a rejected non-admin embed attempt).
* in its tree. Useful to decide whether a strip pass on the share read path
* actually changed anything. After the write-path role gate removal this is no
* longer called by production code; it is retained as a test-only assertion
* helper (and a detection primitive should a future read path need it).
*/
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
if (!pmJson || typeof pmJson !== 'object') {
@@ -169,62 +61,9 @@ export function hasHtmlEmbedNode(pmJson: unknown): boolean {
}
/**
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
* Owners and admins are trusted; everyone else (member, and any unknown role)
* is not. Kept here so every write path shares one definition of "trusted".
*/
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
return role === 'owner' || role === 'admin';
}
/**
* Combined write-path gate for the htmlEmbed feature.
*
* htmlEmbed is allowed in a document only when the workspace feature toggle is
* ON and the authoring/saving user is a workspace admin/owner. OFF (default) =>
* stripped for EVERYONE, including admins (the feature is disabled).
*
* `featureEnabled` is read from the workspace settings for the relevant write
* (`workspace.settings?.htmlEmbed === true`). Every WRITE path that may persist
* htmlEmbed content must gate on this combined predicate, so that turning the
* toggle OFF strips existing embeds on the next save and prevents new ones from
* being persisted regardless of role.
*/
export function htmlEmbedAllowed(
featureEnabled: boolean,
role: string | null | undefined,
): boolean {
return featureEnabled === true && canAuthorHtmlEmbed(role);
}
/**
* Strip htmlEmbed nodes unless the (feature-enabled AND role-allowed) gate
* passes. Returns the possibly-stripped doc. The caller resolves featureEnabled
* (from workspace settings) and role (actor) itself — those legitimately differ
* per call-site (e.g. share path uses role=null) — this helper owns only the
* has-check + AND + strip + optional onStrip callback.
*
* Centralizes the 4-step write-path ritual (resolve role -> resolve
* featureEnabled -> htmlEmbedAllowed AND -> stripHtmlEmbedNodes) so the plain
* strip-all call-sites share one tested decision. Sites with CUSTOM strip logic
* (e.g. the collab persist path's preserve-admin variant) keep their own code.
*/
export function stripHtmlEmbedIfNotAllowed<T>(
json: T,
opts: { featureEnabled: boolean; role: string | null | undefined; onStrip?: () => void },
): T {
if (htmlEmbedAllowed(opts.featureEnabled, opts.role)) return json;
if (hasHtmlEmbedNode(json)) {
opts.onStrip?.();
return stripHtmlEmbedNodes(json);
}
return json;
}
/**
* Read the workspace-level htmlEmbed feature toggle from a workspace's settings
* jsonb. ABSENT/non-true => OFF (the default). Kept here so every server write
* path resolves the toggle the same way.
* Read the workspace-level htmlEmbed master toggle from a workspace's settings
* jsonb. ABSENT/non-true => OFF (the default). Kept here so the share read path
* resolves the toggle the same way it is persisted.
*/
export function isHtmlEmbedFeatureEnabled(
settings: unknown | null | undefined,

View File

@@ -65,21 +65,19 @@ export const MAX_SHARE_MESSAGES = 30;
export const MAX_SHARE_MESSAGE_CHARS = 8000;
/**
* Default per-request output cap for the anonymous share assistant. Bounds the
* tokens a single anonymous request can generate; worst case = steps x this.
*/
export const SHARE_AI_MAX_OUTPUT_TOKENS = 512;
/**
* Read the per-request output cap from the environment (overridable seam),
* falling back to the sane default. A non-positive / unparseable value uses the
* default. Mirrors resolveShareAiWorkspaceMax().
* Per-request output-token ceiling for the anonymous assistant. `streamText`
* runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
* request is bounded by (steps × this). The per-workspace cap bounds the COUNT
* of calls; this bounds the SIZE of each, so a single anonymous call cannot run
* up the provider bill even if the per-IP throttle is evaded. Env-overridable
* seam; a non-positive or unparseable value falls back to the default.
*/
export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
export function resolveShareAiMaxOutputTokens(): number {
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
return Number.isFinite(raw) && raw > 0
? Math.floor(raw)
: SHARE_AI_MAX_OUTPUT_TOKENS;
: SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
}
/**
@@ -225,8 +223,8 @@ export class PublicShareChatService {
tools,
// Bound the agent loop for anonymous callers.
stopWhen: stepCountIs(5),
// Bounds per-request output so one anonymous request can't run up the
// provider bill; worst case = steps x this.
// Cap per-request output so one anonymous call cannot run up the provider
// bill even if the per-IP throttle is evaded; worst case = steps × this.
maxOutputTokens: resolveShareAiMaxOutputTokens(),
abortSignal: signal,
onError: ({ error }) => {

View File

@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
import {
PublicShareChatService,
filterShareTranscript,
resolveShareAiMaxOutputTokens,
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
} from './public-share-chat.service';
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
import {
@@ -400,6 +402,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
});
});
describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
const original = process.env[ENV];
afterEach(() => {
if (original === undefined) delete process.env[ENV];
else process.env[ENV] = original;
});
it('falls back to the default when unset', () => {
delete process.env[ENV];
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
});
it('uses (and floors) a valid positive value from the env', () => {
process.env[ENV] = '1024.9';
expect(resolveShareAiMaxOutputTokens()).toBe(1024);
});
it('falls back to the default for zero, a negative, or a non-numeric value', () => {
process.env[ENV] = '0';
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
process.env[ENV] = '-5';
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
process.env[ENV] = 'not-a-number';
expect(resolveShareAiMaxOutputTokens()).toBe(
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
);
});
});
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
it('allows up to the cap within a window, then 429s (returns false)', async () => {
const limiter = makeLimiter(3, 60_000, () => 1_000);
@@ -482,9 +522,11 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
});
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
// FAIL CLOSED (#62): if Redis is down we cannot prove the workspace is under
// its cap, so DENY (the controller 429s) rather than admit an unmetered,
// billable anonymous call. The feature is optional, so denial is harmless.
// The per-workspace cap is the COST backstop for an OPTIONAL anonymous
// assistant. If Redis is unavailable we cannot prove the workspace is under
// its cap, so we DENY (controller 429s) rather than admit an unmetered,
// billable call — a brief Redis blip disabling the assistant is safer than
// an unbounded provider bill.
const failingRedis = {
eval: () => Promise.reject(new Error('redis down')),
} as unknown as import('ioredis').Redis;

View File

@@ -99,11 +99,11 @@ export class PublicShareWorkspaceLimiter {
/**
* Account one call for `key`. Returns true if it is within the cap (allowed),
* false if the cap over the trailing window is exceeded (caller must 429).
* On a Redis failure we FAIL CLOSED (return false): if Redis is down we cannot
* prove the workspace is under its cap, so we DENY rather than admit an
* unmetered, billable anonymous call. The feature is optional, so the
* temporary denial is harmless. (Operators wanting a tighter steady-state cap
* can lower the default via SHARE_AI_WORKSPACE_MAX_PER_HOUR, e.g. =100.)
* On a Redis failure we FAIL CLOSED (return false): this cap is the COST
* backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
* cannot prove the workspace is under its cap and therefore DENY rather than
* admit an unmetered, billable anonymous call. A transient Redis blip briefly
* disabling the assistant is preferable to an unbounded provider bill.
*/
async tryConsume(key: string): Promise<boolean> {
const t = this.now();
@@ -122,9 +122,11 @@ export class PublicShareWorkspaceLimiter {
);
return admitted === 1;
} catch (err) {
// FAIL CLOSED: if Redis is down we cannot prove the workspace is under its
// cap, so DENY (controller 429s) rather than admit an unmetered, billable
// anonymous call. The feature is optional, so denial is harmless.
// FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
// under its cap, so we DENY (the controller 429s) rather than admit an
// unmetered, billable anonymous call. The assistant is optional, so a
// transient Redis blip briefly disabling it is the safer failure mode than
// an unbounded provider bill.
this.logger.error(
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
err as Error,

View File

@@ -10,7 +10,6 @@ describe('PageController', () => {
controller = new PageController(
{} as any, // pageService
{} as any, // pageRepo
{} as any, // workspaceRepo
{} as any, // pageHistoryService
{} as any, // spaceAbility
{} as any, // pageAccessService

View File

@@ -39,11 +39,6 @@ import {
} from '../casl/interfaces/space-ability.type';
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedNodes,
} from '../../common/helpers/prosemirror/html-embed.util';
import { RecentPageDto } from './dto/recent-page.dto';
import { CreatedByUserDto } from './dto/created-by-user.dto';
import { DuplicatePageDto } from './dto/duplicate-page.dto';
@@ -68,7 +63,6 @@ export class PageController {
constructor(
private readonly pageService: PageService,
private readonly pageRepo: PageRepo,
private readonly workspaceRepo: WorkspaceRepo,
private readonly pageHistoryService: PageHistoryService,
private readonly spaceAbility: SpaceAbilityFactory,
private readonly pageAccessService: PageAccessService,
@@ -98,18 +92,6 @@ export class PageController {
const permissions = { canEdit, hasRestriction };
if (page.content) {
const workspace = await this.workspaceRepo.findById(page.workspaceId);
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
// Kill-switch: when the workspace feature is OFF, never serve raw
// htmlEmbed nodes on the read path (mirrors the public-share strip),
// so disabling the feature is an immediate, total kill-switch and not
// dependent on the page being re-saved. Admin-authored content only.
// Fail-closed: a missing workspace resolves to OFF and is stripped.
page.content = stripHtmlEmbedNodes(page.content) as any;
}
}
if (dto.format && dto.format !== 'json' && page.content) {
const contentOutput =
dto.format === 'markdown'
@@ -255,9 +237,6 @@ export class PageController {
user.id,
workspace.id,
createPageDto,
// Pass the caller's workspace role so create() can enforce the htmlEmbed
// admin gate (non-admins cannot author raw-JS embeds).
user.role,
provenance,
);
@@ -554,16 +533,6 @@ export class PageController {
await this.pageAccessService.validateCanView(page, user);
if (history.content) {
const workspace = await this.workspaceRepo.findById(page.workspaceId);
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
// Kill-switch: history snapshots are an authenticated read path too, so
// strip htmlEmbed when the workspace feature is OFF (same as /info and
// the public-share path). Fail-closed on a missing workspace.
history.content = stripHtmlEmbedNodes(history.content) as any;
}
}
return history;
}

View File

@@ -1,240 +0,0 @@
// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab
// write paths: PageService.create() and PageService.duplicatePage(). Both build
// content/textContent/ydoc directly and persist, bypassing the collab
// onStoreDocument strip, so each must run the incoming document through the
// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not
// allowed, `stripHtmlEmbedNodes`) BEFORE persisting.
//
// This spec constructs the REAL PageService with every constructor dep mocked,
// feeds content containing an `htmlEmbed`, calls the real method, and asserts on
// the PERSISTED content (captured at the repo insert / db insert boundary) that
// the embed was actually stripped (member/unknown role) or preserved
// (admin/owner + toggle ON). Mirrors the GOOD pattern in
// transclusion/spec/transclusion-unsync-html-embed.spec.ts.
//
// page.service.ts pulls in the collaboration gateway (a transitive ESM chain
// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so
// that single module is mocked away — it is never used on the create/duplicate
// gate paths.
jest.mock('../../../collaboration/collaboration.gateway', () => ({
CollaborationGateway: class {},
}));
import { PageService } from './page.service';
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
const WS = 'ws-1';
const SPACE = 'space-1';
const USER = 'u1';
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
],
});
// Minimal chainable kysely stub. `nextPagePosition` (used by create) and
// duplicatePage's bulk insert go through `this.db`; only the calls those paths
// make need to resolve. `capturedInserts` collects every page row handed to
// `insertInto('pages').values(...)` so we can assert on the persisted content.
function buildDb(capturedInserts: any[]) {
const selectChain: any = {
select: () => selectChain,
selectAll: () => selectChain,
where: () => selectChain,
orderBy: () => selectChain,
limit: () => selectChain,
execute: async () => [],
executeTakeFirst: async () => undefined,
};
const db: any = {
selectFrom: () => selectChain,
insertInto: (table: string) => ({
values: (rows: any) => {
if (table === 'pages') {
for (const row of Array.isArray(rows) ? rows : [rows]) {
capturedInserts.push(row);
}
}
return { execute: async () => undefined };
},
}),
// executeTx -> db.transaction().execute(cb): run the callback with `db`
// itself acting as the transaction so any in-tx inserts are captured too.
transaction: () => ({ execute: async (cb: any) => cb(db) }),
};
return db;
}
// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled`
// drives the workspace toggle the gate reads via workspaceRepo.findById.
function buildService(opts: {
featureEnabled: boolean;
capturedInserts: any[];
rootPage?: any; // for duplicatePage
}) {
const { featureEnabled, capturedInserts } = opts;
const pageRepo: any = {
findById: jest.fn(async () => null), // no parent page in create tests
// create() persists here; capture the row so we can inspect content.
insertPage: jest.fn(async (row: any) => {
capturedInserts.push(row);
return { id: 'new-page', slugId: 'slug-1', ...row };
}),
getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)),
};
const pagePermissionRepo: any = {
// duplicatePage filters accessible pages; grant the root so it is copied.
filterAccessiblePageIds: jest.fn(async () =>
opts.rootPage ? [opts.rootPage.id] : [],
),
};
const workspaceRepo: any = {
findById: jest.fn(async () => ({
id: WS,
settings: { htmlEmbed: featureEnabled },
})),
};
const attachmentRepo: any = { findByIds: jest.fn(async () => []) };
const storageService: any = { copy: jest.fn(async () => undefined) };
const noopQueue: any = { add: jest.fn(async () => undefined) };
const eventEmitter: any = { emit: jest.fn() };
const collaborationGateway: any = {};
const watcherService: any = {};
// duplicatePage fires transclusion bulk inserts after persisting; they are
// best-effort (wrapped in try/catch) and irrelevant to the gate.
const transclusionService: any = {
insertTransclusionsForPages: jest.fn(async () => undefined),
insertReferencesForPages: jest.fn(async () => undefined),
insertTemplateReferencesForPages: jest.fn(async () => undefined),
};
const db = buildDb(capturedInserts);
const service = new PageService(
pageRepo,
pagePermissionRepo,
attachmentRepo,
db,
storageService,
noopQueue, // attachmentQueue
noopQueue, // aiQueue
noopQueue, // generalQueue
eventEmitter,
collaborationGateway,
watcherService,
transclusionService,
workspaceRepo,
);
return service;
}
describe('PageService.create htmlEmbed admin gate (real code)', () => {
// Run create() and return the content actually persisted via insertPage.
async function persistedContent(
featureEnabled: boolean,
callerRole: string | null | undefined,
) {
const capturedInserts: any[] = [];
const service = buildService({ featureEnabled, capturedInserts });
await service.create(
USER,
WS,
{
spaceId: SPACE,
title: 'p',
// 'json' format is used as-is by parseProsemirrorContent (passed to the
// real jsonToNode schema validation), so hand it the PM-JSON object.
content: docWithEmbed(),
format: 'json' as any,
} as any,
callerRole,
);
expect(capturedInserts).toHaveLength(1);
return capturedInserts[0].content;
}
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
const content = await persistedContent(true, 'member');
expect(hasHtmlEmbedNode(content)).toBe(false);
// Non-embed content survives.
expect(JSON.stringify(content)).toContain('body');
});
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
});
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
}
});
});
describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => {
// Duplicate a single source page that contains an embed and return the content
// persisted for the copy (captured at db.insertInto('pages').values(...)).
async function persistedContent(
featureEnabled: boolean,
role: string | null | undefined,
) {
const rootPage: any = {
id: 'src-page',
slugId: 'src-slug',
title: 'Source',
icon: null,
position: 'a0',
spaceId: SPACE,
workspaceId: WS,
parentPageId: null,
content: docWithEmbed(),
};
const capturedInserts: any[] = [];
const service = buildService({ featureEnabled, capturedInserts, rootPage });
const authUser: any = { id: USER, workspaceId: WS, role };
await service.duplicatePage(rootPage, undefined, authUser);
// The bulk insert is the page persist boundary; one source page -> one copy.
const pageRows = capturedInserts.filter((r) => r.content);
expect(pageRows.length).toBeGreaterThanOrEqual(1);
return pageRows[0].content;
}
it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => {
const content = await persistedContent(true, 'member');
expect(hasHtmlEmbedNode(content)).toBe(false);
expect(JSON.stringify(content)).toContain('body');
});
it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
});
it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
}
});
});

View File

@@ -20,7 +20,6 @@ describe('PageService', () => {
{} as any, // collaborationGateway
{} as any, // watcherService
{} as any, // transclusionService
{} as any, // workspaceRepo
);
});

View File

@@ -31,11 +31,6 @@ import {
isAttachmentNode,
removeMarkTypeFromDoc,
} from '../../../common/helpers/prosemirror/utils';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import {
htmlToJson,
jsonToNode,
@@ -81,7 +76,6 @@ export class PageService {
private collaborationGateway: CollaborationGateway,
private readonly watcherService: WatcherService,
private readonly transclusionService: TransclusionService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async findById(
@@ -101,10 +95,6 @@ export class PageService {
userId: string,
workspaceId: string,
createPageDto: CreatePageDto,
// Workspace role of the caller. Used to enforce the htmlEmbed admin gate on
// the create write path (see below). Optional/typed loosely so unknown or
// missing roles fall through to the non-admin (strip) branch by default.
callerRole?: string | null,
// Optional agent-edit provenance (from the signed access claim). When the
// actor is 'agent', stamp the page's source marker so a freshly created page
// shows it was created by the AI agent (§14 N2) — create goes through REST,
@@ -135,35 +125,11 @@ export class PageService {
let ydoc = undefined;
if (createPageDto?.content && createPageDto?.format) {
let prosemirrorJson = await this.parseProsemirrorContent(
const prosemirrorJson = await this.parseProsemirrorContent(
createPageDto.content,
createPageDto.format,
);
// SECURITY (Variant C admin gate, plain page-create write path):
// create() builds content/textContent/ydoc directly and persists them via
// insertPage, bypassing the collab onStoreDocument strip. htmlEmbed renders
// raw, unsanitized JS in readers' browsers, so only workspace admins/owners
// may author it. The create controller requires only space Edit, so a
// regular member could otherwise POST a doc (json, or the markdown/html
// <!--html-embed:BASE64--> forms that parse to the same node) containing an
// htmlEmbed and store XSS for every reader. Strip every htmlEmbed node when
// the caller is not an admin, BEFORE deriving textContent/ydoc/insert.
// The gate is toggle-AND-admin: htmlEmbed survives only when the workspace
// feature toggle is ON and the caller is an admin/owner. OFF (default) =>
// stripped for everyone. Cheap settings read keyed to the workspace.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(workspaceId))?.settings,
);
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: callerRole,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from page creation by user ${userId} (space ${createPageDto.spaceId})`,
),
});
content = prosemirrorJson;
textContent = jsonToText(prosemirrorJson);
ydoc = createYdocFromJson(prosemirrorJson);
@@ -653,12 +619,6 @@ export class PageService {
const attachmentMap = new Map<string, ICopyPageAttachment>();
// Resolve the htmlEmbed toggle ONCE for the workspace; the per-page gate
// below is toggle-AND-admin (OFF default => stripped for everyone).
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(rootPage.workspaceId))?.settings,
);
const insertablePages: InsertablePage[] = await Promise.all(
pages.map(async (page) => {
const pageContent = getProsemirrorContent(page.content);
@@ -769,24 +729,7 @@ export class PageService {
}
});
let prosemirrorJson = prosemirrorDoc.toJSON();
// SECURITY (Variant C admin gate, duplication write path):
// Duplication builds the ydoc directly and bypasses the collab
// onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in
// readers' browsers, so only workspace admins/owners may author it. A
// non-admin with space Edit could otherwise duplicate an admin page
// that contains an embed into a new page authored by them. Strip every
// htmlEmbed node from each duplicated page when the duplicating user is
// not an admin, BEFORE computing textContent/ydoc/insert.
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: authUser.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from page duplication by user ${authUser.id} (source page ${page.id})`,
),
});
const prosemirrorJson = prosemirrorDoc.toJSON();
// Add "Copy of " prefix to the root page title only for duplicates in same space
let title = page.title;

View File

@@ -68,7 +68,6 @@ describe('TransclusionService — template access core (real filter)', () => {
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
return { service, db, pageRepo, spaceMemberRepo, pagePermissionRepo };
@@ -227,7 +226,6 @@ describe('TransclusionService.filterViewerAccessiblePageIds — AND ordering (co
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
return { service, filterAccessiblePageIds };
@@ -324,7 +322,6 @@ describe('TransclusionService.syncPageTemplateReferences — workspace scoping',
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
return {
@@ -471,7 +468,6 @@ describe('TransclusionService.insertTemplateReferencesForPages — per-workspace
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
return { service, insertMany };
}

View File

@@ -41,7 +41,6 @@ describe('TransclusionService.lookupTemplate — anti-leak catch branch', () =>
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
// Stub the access decision; we are testing the content-prep stage, not access.
@@ -158,7 +157,6 @@ describe('TransclusionService.lookupTemplate — soft-deleted source via real fi
{} as any,
{} as any,
{} as any,
{} as any,
);
const { items } = await service.lookupTemplate(['deleted-src'], 'u1', 'w1');

View File

@@ -35,7 +35,6 @@ describe('TransclusionService.lookupTemplate (access mapping)', () => {
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
jest

View File

@@ -57,7 +57,6 @@ function buildService(opts: {
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
{} as any, // workspaceRepo
);
}

View File

@@ -1,145 +0,0 @@
import { TransclusionService } from '../transclusion.service';
import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
// unsync returns a source snapshot the client materializes into the reference
// page; a non-admin must never receive an embed payload to re-persist. The gate
// reads `user.role` and strips before returning. All repos / access checks are
// mocked so the REAL gate logic runs end-to-end. Complements the existing
// transclusion specs (rewriteAttachmentsForUnsync, controller).
const WS = 'ws-1';
const REF_PAGE = 'ref-1';
const SRC_PAGE = 'src-1';
const TX_ID = 'tx-1';
const sourceContentWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>steal()</script>' } },
],
});
function buildService(featureEnabled = true) {
const pageRepo = {
findById: jest.fn(async (id: string) => ({
id,
workspaceId: WS,
spaceId: 'space-1',
deletedAt: null,
})),
};
const pageTransclusionsRepo = {
findByPageAndTransclusion: jest.fn(async () => ({
content: sourceContentWithEmbed(),
})),
};
const pageTransclusionReferencesRepo = {
deleteOne: jest.fn(async () => undefined),
};
const attachmentRepo = { findByIds: jest.fn(async () => []) };
const storageService = { copy: jest.fn(async () => undefined) };
const pageAccessService = {
validateCanEdit: jest.fn(async () => undefined),
validateCanView: jest.fn(async () => undefined),
};
// Workspace settings read used by the toggle-AND-admin gate.
const workspaceRepo = {
findById: jest.fn(async () => ({
id: WS,
settings: { htmlEmbed: featureEnabled },
})),
};
const service = new TransclusionService(
{} as any, // db (unused on this path)
pageTransclusionsRepo as any,
pageTransclusionReferencesRepo as any,
{} as any, // pageTemplateReferencesRepo (unused on this path)
pageRepo as any,
{} as any, // pagePermissionRepo (unused)
{} as any, // spaceMemberRepo (unused)
attachmentRepo as any,
storageService as any,
pageAccessService as any,
workspaceRepo as any,
);
return service;
}
function userWithRole(role: string | null | undefined) {
return { id: 'u1', workspaceId: WS, role } as any;
}
describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
it('non-admin (member): returned content has htmlEmbed stripped', async () => {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('member'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
// Non-embed content is preserved.
expect(JSON.stringify(content)).toContain('snapshot body');
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole(role),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
}
});
it('toggle ON + admin: returned content keeps the htmlEmbed', async () => {
const service = buildService(true);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('admin'),
);
expect(hasHtmlEmbedNode(content)).toBe(true);
});
it('toggle ON + owner: returned content keeps the htmlEmbed', async () => {
const service = buildService(true);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('owner'),
);
expect(hasHtmlEmbedNode(content)).toBe(true);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
const service = buildService(false);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('admin'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
});
it('toggle OFF + member: stripped', async () => {
const service = buildService(false);
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('member'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
});
});

View File

@@ -33,11 +33,6 @@ import {
import { jsonToNode } from '../../../collaboration/collaboration.util';
import { Page, User } from '@docmost/db/types/entity.types';
import { PageAccessService } from '../page-access/page-access.service';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
type ReferencingPageInfo = {
id: string;
@@ -63,7 +58,6 @@ export class TransclusionService {
private readonly attachmentRepo: AttachmentRepo,
private readonly storageService: StorageService,
private readonly pageAccessService: PageAccessService,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async syncPageTransclusions(
@@ -773,26 +767,6 @@ export class TransclusionService {
transclusionId,
);
// SECURITY (Variant C admin gate, transclusion unsync write path):
// The returned content is a source snapshot that the client materializes
// into the reference page via insertContentAt. The snapshot keeps any
// htmlEmbed verbatim, and unsync requires only space Edit/View. If the
// requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a
// non-admin can never receive an embed payload to re-persist (the collab
// strip on the subsequent save is debounced/race-prone and must not be the
// only guard). Admin behavior is unchanged.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(user.workspaceId))?.settings,
);
content = stripHtmlEmbedIfNotAllowed(content, {
featureEnabled: htmlEmbedEnabled,
role: user.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`,
),
});
return { content };
}
}

View File

@@ -1,12 +1,14 @@
import { ShareService } from './share.service';
import { hasHtmlEmbedNode } from '../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL ShareService server-authoritative htmlEmbed kill-switch for
// shared content. An anonymous public-share viewer cannot read the per-workspace
// htmlEmbed toggle, so the SERVER must decide what to serve: when the toggle is
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are kept so
// the read-only client executes them. All repos / token service are mocked so the
// real prepareContentForShare logic runs end-to-end via getSharedPage.
// Exercises the REAL ShareService server-authoritative htmlEmbed master toggle
// for shared content. The block renders inside a sandboxed iframe (harmless), so
// this is NOT an XSS guard — it is the master-toggle enforcement for anonymous
// shares: an anonymous public-share viewer cannot read the per-workspace
// htmlEmbed toggle, so the SERVER must decide what to serve. When the toggle is
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are served
// and rendered in their sandboxed frame. All repos / token service are mocked so
// the real prepareContentForShare logic runs end-to-end via getSharedPage.
const WS = 'ws-1';
const PAGE = 'page-1';

View File

@@ -1,4 +1,4 @@
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
import { Controller, Get, Logger, Param, Req, Res } from '@nestjs/common';
import { ShareService } from './share.service';
import { FastifyReply, FastifyRequest } from 'fastify';
import { join } from 'path';
@@ -11,6 +11,8 @@ import { htmlEscape } from '../../common/helpers/html-escaper';
@Controller('share')
export class ShareSeoController {
private readonly logger = new Logger(ShareSeoController.name);
constructor(
private readonly shareService: ShareService,
private workspaceRepo: WorkspaceRepo,
@@ -84,10 +86,34 @@ export class ShareSeoController {
.join('\n ');
const html = fs.readFileSync(indexFilePath, 'utf8');
const transformedHtml = html
let transformedHtml = html
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
.replace(metaTagVar, metaTags);
// Deliberate same-origin tracker surface: this is the ONE place where an
// admin-authored analytics/tracker snippet (settings.trackerHead) is
// injected verbatim into the page origin. It is admin-only (writable only
// via the admin-gated workspace settings) and applies to PUBLIC SHARE
// pages only. It is trusted content, so it is NOT escaped. The htmlEmbed
// block itself is sandboxed and is the safe surface for everyone else.
const trackerHead = (workspace?.settings as any)?.trackerHead;
if (typeof trackerHead === 'string' && trackerHead.trim().length > 0) {
if (transformedHtml.includes('</head>')) {
// Function replacer: the snippet is admin-authored trusted content and
// must be injected verbatim. A string replacement would interpret `$&`,
// `$'`, `` $` `` and `$$` inside it as substitution patterns and mangle
// the tracker; a function return value is inserted literally.
transformedHtml = transformedHtml.replace(
'</head>',
() => `${trackerHead}\n</head>`,
);
} else {
this.logger.warn(
'trackerHead is configured but no </head> marker was found in the share index HTML; tracker snippet was not injected.',
);
}
}
res.type('text/html').send(transformedHtml);
}
}

View File

@@ -87,9 +87,16 @@ export class ShareController {
workspace.id,
);
// Resolve the identity name only when the assistant is enabled, so the
// anonymous widget can label messages with the configured persona name.
const aiAssistantName = aiAssistant
? await this.aiSettings.resolvePublicShareAssistantName(workspace.id)
: null;
return {
...shareData,
aiAssistant,
aiAssistantName,
features: this.licenseCheckService.resolveFeatures(
workspace.licenseKey,
workspace.plan,

View File

@@ -524,12 +524,14 @@ export class ShareService {
* not leak structure (existence, location, count, resolved state, or
* comment ids) to public viewers.
*
* 3. Strip `htmlEmbed` nodes when the workspace feature toggle is OFF. This
* makes the toggle a SERVER-AUTHORITATIVE kill-switch for shared content:
* when OFF the embed is never served to the anonymous viewer (who can't
* read the per-workspace toggle), when ON the embed is served so the
* read-only client executes it. `htmlEmbedEnabled` is resolved fail-closed
* by the callers (missing workspace => OFF => strip).
* 3. Strip `htmlEmbed` nodes when the workspace master toggle is OFF. The
* block renders inside a sandboxed iframe on the client (harmless, no
* same-origin access), so this is NOT an XSS guard — it is the
* SERVER-AUTHORITATIVE enforcement of the workspace master toggle for
* anonymous shares: an anonymous viewer cannot read the per-workspace
* toggle, so when OFF the block is never served, and when ON it is served
* and rendered in its sandboxed frame. `htmlEmbedEnabled` is resolved
* fail-closed by the callers (missing workspace => OFF => strip).
*
* Both share-content paths — the host page (`updatePublicAttachments`) and
* the share-scoped transclusion lookup (`lookupTransclusionForShare`) —
@@ -544,8 +546,9 @@ export class ShareService {
): Promise<Node | null> {
let pmJson = getProsemirrorContent(content);
// Kill-switch: when the workspace toggle is OFF, never serve htmlEmbed
// nodes to public viewers. Strip before tokenizing/serializing.
// Master-toggle enforcement: when the workspace toggle is OFF, never serve
// htmlEmbed nodes to anonymous public viewers (who cannot read the toggle).
// Strip before tokenizing/serializing.
if (!htmlEmbedEnabled) {
pmJson = stripHtmlEmbedNodes(pmJson);
}

View File

@@ -5,6 +5,8 @@ import {
IsBoolean,
IsInt,
IsOptional,
IsString,
MaxLength,
Min,
} from 'class-validator';
@@ -53,12 +55,22 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
@IsBoolean()
aiDictation: boolean;
// Workspace feature toggle for the admin-only HTML embed feature. Persisted at
// settings.htmlEmbed. ABSENT/false => OFF (default).
// Workspace master toggle that enables/disables the HTML embed block type.
// Persisted at settings.htmlEmbed. ABSENT/false => OFF (default). The block
// itself renders in a sandboxed iframe, so this is a feature switch, not a
// security gate.
@IsOptional()
@IsBoolean()
htmlEmbed: boolean;
// Admin-only analytics/tracker snippet (raw HTML/JS) injected verbatim into
// the <head> of PUBLIC SHARE pages only (same-origin). Persisted at
// settings.trackerHead. Admin-authored trusted content.
@IsOptional()
@IsString()
@MaxLength(20000)
trackerHead?: string;
@IsOptional()
@IsBoolean()
aiPublicShareAssistant: boolean;

View File

@@ -108,4 +108,38 @@ describe('WorkspaceService.update — htmlEmbed toggle persistence (real code)',
expect(logged.changes.before.htmlEmbed).toBe(false);
expect(logged.changes.after.htmlEmbed).toBe(true);
});
it('persists trackerHead via updateSetting with the trackerHead key', async () => {
const { service, updateSetting } = buildService({});
await service.update('w1', { trackerHead: '<script>ga()</script>' } as any);
expect(updateSetting).toHaveBeenCalledWith(
'w1',
'trackerHead',
'<script>ga()</script>',
expect.anything(),
);
});
it('does NOT call updateSetting when trackerHead is undefined in the dto', async () => {
const { service, updateSetting } = buildService({});
await service.update('w1', { name: 'New name' } as any);
expect(updateSetting).not.toHaveBeenCalled();
});
it('audits the trackerHead change (before/after) when the value changes', async () => {
const { service, auditService } = buildService({
settingsBefore: { trackerHead: '' },
});
await service.update('w1', { trackerHead: '<script>m()</script>' } as any);
expect(auditService.log).toHaveBeenCalledTimes(1);
const logged = auditService.log.mock.calls[0][0];
expect(logged.changes.before.trackerHead).toBe('');
expect(logged.changes.after.trackerHead).toBe('<script>m()</script>');
});
});

View File

@@ -525,6 +525,22 @@ export class WorkspaceService {
);
}
if (typeof updateWorkspaceDto.trackerHead !== 'undefined') {
// Admin-only analytics/tracker snippet injected into the <head> of
// public share pages (same-origin). Persisted at settings.trackerHead.
const prev = (settingsBefore as any)?.trackerHead ?? '';
if (prev !== updateWorkspaceDto.trackerHead) {
before.trackerHead = prev;
after.trackerHead = updateWorkspaceDto.trackerHead;
}
await this.workspaceRepo.updateSetting(
workspaceId,
'trackerHead',
updateWorkspaceDto.trackerHead,
trx,
);
}
if (typeof updateWorkspaceDto.aiPublicShareAssistant !== 'undefined') {
const prev = settingsBefore?.ai?.publicShareAssistant ?? false;
if (prev !== updateWorkspaceDto.aiPublicShareAssistant) {
@@ -549,6 +565,7 @@ export class WorkspaceService {
delete updateWorkspaceDto.aiChat;
delete updateWorkspaceDto.aiDictation;
delete updateWorkspaceDto.htmlEmbed;
delete updateWorkspaceDto.trackerHead;
delete updateWorkspaceDto.aiPublicShareAssistant;
await this.workspaceRepo.updateWorkspace(

View File

@@ -3,6 +3,7 @@ import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueName, QueueJob } from '../queue/constants';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
@@ -49,6 +50,7 @@ export interface UpdateAiSettingsInput {
export class AiSettingsService {
constructor(
private readonly workspaceRepo: WorkspaceRepo,
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly pageRepo: PageRepo,
@@ -110,6 +112,26 @@ export class AiSettingsService {
return settings?.ai?.publicShareAssistant === true;
}
/**
* Resolve the display name of the agent role acting as the public-share
* assistant's identity, so the anonymous widget can label messages with the
* persona name instead of the generic "AI agent". Returns null when no role
* is configured, or the referenced role is missing/disabled (built-in persona
* → the client falls back to "AI agent"). Mirrors the role resolution in
* PublicShareChatService.resolveShareRole.
*/
async resolvePublicShareAssistantName(
workspaceId: string,
): Promise<string | null> {
const resolved = await this.resolve(workspaceId);
const roleId = resolved?.publicShareAssistantRoleId;
if (!roleId) return null;
const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
if (!role || !role.enabled) return null;
const name = role.name?.trim();
return name ? name : null;
}
/** Read the stored non-secret provider settings for a workspace. */
private async readProvider(
workspaceId: string,

View File

@@ -20,12 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
import {
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
@@ -59,8 +53,6 @@ export class FileImportTaskService {
private readonly backlinkRepo: BacklinkRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly importAttachmentService: ImportAttachmentService,
private readonly userRepo: UserRepo,
private readonly workspaceRepo: WorkspaceRepo,
private eventEmitter: EventEmitter2,
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
) {}
@@ -157,25 +149,6 @@ export class FileImportTaskService {
.where('id', '=', fileTask.spaceId)
.executeTakeFirst();
// SECURITY (Variant C admin gate, zip/multi-file import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw, unsanitized JS in readers'
// browsers. Only workspace admins/owners may author it. Resolve the
// importer's role ONCE here; each page's prosemirror JSON is run through the
// strip below before textContent/ydoc/insert when the importer is not an
// admin, so a non-admin cannot smuggle the node in via a zip import (which
// requires only space Edit).
const importingUser = await this.userRepo.findById(
fileTask.creatorId,
fileTask.workspaceId,
);
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
// survives only when the workspace feature toggle is ON and the importer is
// an admin/owner. OFF (default) => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
);
const pagesMap = new Map<string, ImportPageNode>();
for (const absPath of allFiles) {
@@ -523,22 +496,9 @@ export class FileImportTaskService {
await this.importService.processHTML(html),
);
let { title, prosemirrorJson } =
const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
// Gate (featureEnabled AND admin) is resolved once above and recomputed
// by the helper from the same htmlEmbedEnabled + importer role.
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: importingUser?.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
),
});
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,

View File

@@ -1,266 +0,0 @@
// Exercises the REAL htmlEmbed admin gate on the two import write paths:
//
// (1) ImportService.importPage() — single .html/.md upload
// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
//
// Both build content/textContent/ydoc directly and persist (bypassing the
// collab onStoreDocument strip), so each must run the imported document through
// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
// `stripHtmlEmbedNodes` BEFORE persisting.
//
// This spec constructs the REAL services with deps mocked, feeds an imported
// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
// content (captured at the insert boundary) is stripped for a non-admin /
// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
//
// Three modules are mocked away because they pull transitive ESM deps that
// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
// import-attachment). None of them participate in the gate decision:
// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
// faithful passthroughs (the embed div has no href/iframe, so the real
// normalizer would leave it untouched anyway).
// - import-attachment: attachment rewriting; passthrough returns html as-is.
jest.mock('../../../collaboration/collaboration.gateway', () => ({
CollaborationGateway: class {},
}));
jest.mock('../utils/import-formatter', () => ({
normalizeImportHtml: () => {},
formatImportHtml: async (opts: any) => ({
html: opts.html,
backlinks: [],
pageIcon: undefined,
}),
}));
jest.mock('./import-attachment.service', () => ({
ImportAttachmentService: class {},
}));
import { promises as fs } from 'node:fs';
import * as os from 'node:os';
import * as path from 'node:path';
import { ImportService } from './import.service';
import { FileImportTaskService } from './file-import-task.service';
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
const WS = 'ws-1';
const SPACE = 'space-1';
const USER = 'importer-1';
// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
// `<div data-type="htmlEmbed" data-source="BASE64">` into an htmlEmbed PM node
// (base64 below decodes to `<script>x</script>`).
const HTML_WITH_EMBED =
'<p>imported body</p>' +
'<div data-type="htmlEmbed" data-source="PHNjcmlwdD54PC9zY3JpcHQ+"></div>';
function workspaceRepoFor(featureEnabled: boolean) {
return {
findById: jest.fn(async () => ({
id: WS,
settings: { htmlEmbed: featureEnabled },
})),
};
}
// userRepo.findById resolves the importer's role (or undefined for a missing
// user -> fail closed).
function userRepoFor(user: { role?: string } | undefined) {
return { findById: jest.fn(async () => user) };
}
describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
// Run importPage with a single uploaded .html and return the persisted content
// captured at pageRepo.insertPage.
async function persistedContent(
featureEnabled: boolean,
user: { role?: string } | undefined,
) {
const captured: any[] = [];
const pageRepo: any = {
insertPage: jest.fn(async (row: any) => {
captured.push(row);
return { id: 'p1', slugId: 's1', ...row };
}),
};
// db is only used for getNewPagePosition (a select chain).
const selectChain: any = {
select: () => selectChain,
where: () => selectChain,
orderBy: () => selectChain,
limit: () => selectChain,
executeTakeFirst: async () => undefined,
};
const db: any = { selectFrom: () => selectChain };
const service = new ImportService(
pageRepo,
userRepoFor(user) as any,
{ putBuffer: jest.fn() } as any, // storageService (unused on this path)
db,
{ add: jest.fn() } as any, // fileTaskQueue (unused)
workspaceRepoFor(featureEnabled) as any,
);
const file: any = {
filename: 'doc.html',
toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
};
await service.importPage(Promise.resolve(file), USER, SPACE, WS);
expect(captured).toHaveLength(1);
return captured[0].content;
}
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
const content = await persistedContent(true, { role: 'member' });
expect(hasHtmlEmbedNode(content)).toBe(false);
expect(JSON.stringify(content)).toContain('imported body');
});
it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
false,
);
});
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
true,
);
});
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
true,
);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
).toBe(false);
});
});
describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
let extractDir: string;
beforeEach(async () => {
// Real temp dir holding a single .html page that carries the embed; the
// method reads it from disk via fs.readFile.
extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
});
afterEach(async () => {
await fs.rm(extractDir, { recursive: true, force: true });
});
// Run processGenericImport over the temp dir and return the content persisted
// for the imported page (captured at trx.insertInto('pages').values(...)).
async function persistedContent(
featureEnabled: boolean,
user: { role?: string } | undefined,
) {
const captured: any[] = [];
const trxInsertChain = (table: string) => ({
values: (row: any) => {
if (table === 'pages') captured.push(row);
return { execute: async () => undefined };
},
});
const trx: any = { insertInto: trxInsertChain };
const db: any = {
// spaces lookup at the top of processGenericImport
selectFrom: () => ({
select: () => ({
where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
}),
}),
// executeTx -> db.transaction().execute(cb)
transaction: () => ({ execute: async (cb: any) => cb(trx) }),
};
// importService stub: only the real, gate-relevant helpers are used. We give
// it the REAL implementations by delegating to a real ImportService for
// processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
// strip path runs for real.
const realImport = new ImportService(
{} as any,
{} as any,
{} as any,
{} as any,
{} as any,
{} as any,
);
const importService: any = {
processHTML: (html: string) => realImport.processHTML(html),
extractTitleAndRemoveHeading: (s: any) =>
realImport.extractTitleAndRemoveHeading(s),
createYdoc: (j: any) => realImport.createYdoc(j),
};
const importAttachmentService: any = {
// passthrough: no attachment rewriting, return html unchanged
processAttachments: jest.fn(async (opts: any) => opts.html),
};
const service = new FileImportTaskService(
{ putBuffer: jest.fn() } as any, // storageService
importService,
{ nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
{ insertBacklink: jest.fn() } as any, // backlinkRepo
db,
importAttachmentService,
userRepoFor(user) as any,
workspaceRepoFor(featureEnabled) as any,
{ emit: jest.fn() } as any, // eventEmitter
{ logBatchWithContext: jest.fn() } as any, // auditService
);
const fileTask: any = {
id: 'task-1',
creatorId: USER,
workspaceId: WS,
spaceId: SPACE,
source: 'generic',
};
await service.processGenericImport({ extractDir, fileTask });
expect(captured.length).toBeGreaterThanOrEqual(1);
return captured[0].content;
}
it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
const content = await persistedContent(true, { role: 'member' });
expect(hasHtmlEmbedNode(content)).toBe(false);
expect(JSON.stringify(content)).toContain('imported body');
});
it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
false,
);
});
it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
true,
);
});
it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
true,
);
});
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
expect(
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
).toBe(false);
});
});

View File

@@ -1,12 +1,5 @@
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
import { UserRepo } from '@docmost/db/repos/user/user.repo';
import {
hasHtmlEmbedNode,
isHtmlEmbedFeatureEnabled,
stripHtmlEmbedIfNotAllowed,
} from '../../../common/helpers/prosemirror/html-embed.util';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { MultipartFile } from '@fastify/multipart';
import * as path from 'path';
import {
@@ -44,12 +37,10 @@ export class ImportService {
constructor(
private readonly pageRepo: PageRepo,
private readonly userRepo: UserRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
private readonly workspaceRepo: WorkspaceRepo,
) {}
async importPage(
@@ -94,32 +85,7 @@ export class ImportService {
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
let prosemirrorJson = extracted.prosemirrorJson;
// SECURITY (Variant C admin gate, import write path):
// An imported .html/.md file can carry an htmlEmbed marker (the node's
// serialized form), which would execute raw JS in readers' browsers. Only
// workspace admins/owners may author it, so strip htmlEmbed nodes from
// imports performed by a non-admin user.
// Outer has-check first so the user/workspace lookups below run only when an
// embed is actually present (the common case carries none).
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
const importingUser = await this.userRepo.findById(userId, workspaceId);
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
// feature toggle is ON and the importer is an admin/owner. OFF (default)
// => stripped for everyone.
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
(await this.workspaceRepo.findById(workspaceId))?.settings,
);
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
featureEnabled: htmlEmbedEnabled,
role: importingUser?.role,
onStrip: () =>
this.logger.warn(
`Stripping htmlEmbed node(s) from import by user ${userId}`,
),
});
}
const prosemirrorJson = extracted.prosemirrorJson;
const pageTitle = title || fileName;