Merge remote-tracking branch 'gitea/develop' into fix/review-batch-2
# Conflicts: # AGENTS.md # CHANGELOG.md # README.md # apps/server/src/collaboration/collaboration.handler.ts # apps/server/src/common/helpers/prosemirror/html-embed.spec.ts # apps/server/src/common/helpers/prosemirror/html-embed.util.ts # apps/server/src/core/ai-chat/public-share-chat.service.ts # apps/server/src/core/ai-chat/public-share-chat.spec.ts # apps/server/src/core/ai-chat/public-share-workspace-limiter.ts # apps/server/src/core/page/services/page.service.ts # apps/server/src/core/page/transclusion/transclusion.service.ts # apps/server/src/integrations/import/services/file-import-task.service.ts # apps/server/src/integrations/import/services/import.service.ts
This commit is contained in:
@@ -1,120 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { CollaborationHandler } from './collaboration.handler';
|
||||
import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the
|
||||
// REST/MCP/AI content-update entrypoint, used by the page update endpoint and
|
||||
// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed
|
||||
// BEFORE handing the json to withYdocConnection. We stub only
|
||||
// withYdocConnection (which would otherwise open a real hocuspocus connection):
|
||||
// the role-extraction (`user?.role`) + strip that run upstream of it are REAL
|
||||
// production code. The 'replace' branch then runs the production
|
||||
// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we
|
||||
// decode back to JSON and assert on. This replaces the re-implemented
|
||||
// `applyAdminGate` stand-in for this entrypoint.
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'left' },
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>nested</script>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'inner' }] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'right' },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>top</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
/**
|
||||
* Run the REAL updatePageContent('replace') with a stubbed withYdocConnection.
|
||||
* The stub provides a real Y.Doc + recording fragment; the production fn calls
|
||||
* TiptapTransformer.toYdoc(<gated json>) and applies it to the doc, so decoding
|
||||
* the doc afterward yields exactly the gated content.
|
||||
*/
|
||||
async function gatedContentFor(
|
||||
role: string | null | undefined,
|
||||
featureEnabled = true,
|
||||
) {
|
||||
// Workspace settings read used by the toggle-AND-admin gate.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: 'ws-1',
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
const handler = new CollaborationHandler(workspaceRepo as any);
|
||||
const captureDoc = new Y.Doc();
|
||||
|
||||
jest
|
||||
.spyOn(handler, 'withYdocConnection')
|
||||
.mockImplementation(async (_hp, _name, _ctx, fn: any) => {
|
||||
const fragment = captureDoc.getXmlFragment('default');
|
||||
// Mirror the real Document surface the fn touches.
|
||||
const docLike: any = {
|
||||
getXmlFragment: () => fragment,
|
||||
};
|
||||
// The fn does: fragment.delete(0,len) then
|
||||
// Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls
|
||||
// Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target.
|
||||
fn(captureDoc);
|
||||
});
|
||||
|
||||
const handlers = handler.getHandlers({} as any);
|
||||
await handlers.updatePageContent('page-1', {
|
||||
prosemirrorJson: docWithEmbed(),
|
||||
operation: 'replace',
|
||||
user: { id: 'u1', role, workspaceId: 'ws-1' } as any,
|
||||
});
|
||||
|
||||
return TiptapTransformer.fromYdoc(captureDoc, 'default');
|
||||
}
|
||||
|
||||
describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => {
|
||||
const gated = await gatedContentFor('member');
|
||||
expect(hasHtmlEmbedNode(gated)).toBe(false);
|
||||
// Non-embed siblings survive.
|
||||
const json = JSON.stringify(gated);
|
||||
expect(json).toContain('keep');
|
||||
expect(json).toContain('inner');
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('toggle ON + admin: htmlEmbed preserved', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('admin', true))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: htmlEmbed preserved', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('owner', true))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('admin', false))).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + member: stripped', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('member', false))).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -8,11 +8,6 @@ import {
|
||||
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
|
||||
import * as Y from 'yjs';
|
||||
import { User } from '@docmost/db/types/entity.types';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
|
||||
export type CollabEventHandlers = ReturnType<
|
||||
CollaborationHandler['getHandlers']
|
||||
@@ -22,8 +17,6 @@ export type CollabEventHandlers = ReturnType<
|
||||
export class CollaborationHandler {
|
||||
private readonly logger = new Logger(CollaborationHandler.name);
|
||||
|
||||
constructor(private readonly workspaceRepo: WorkspaceRepo) {}
|
||||
|
||||
getHandlers(hocuspocus: Hocuspocus) {
|
||||
return {
|
||||
alterState: async (documentName: string, payload: { pageId: string }) => {
|
||||
@@ -89,30 +82,9 @@ export class CollaborationHandler {
|
||||
},
|
||||
) => {
|
||||
const { operation, user } = payload;
|
||||
let { prosemirrorJson } = payload;
|
||||
const { prosemirrorJson } = payload;
|
||||
this.logger.debug('Updating page content via yjs', documentName);
|
||||
|
||||
// SECURITY (Variant C admin gate, REST/MCP/AI write path):
|
||||
// updatePageContent is the server-side entrypoint used by the REST page
|
||||
// update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute
|
||||
// arbitrary JS in every reader's browser, so a NON-admin caller must not
|
||||
// be able to persist them here. If the editing user is not a workspace
|
||||
// admin/owner, strip every htmlEmbed node before it reaches the ydoc.
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the editing user is an admin/owner. OFF
|
||||
// (default) => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(user?.workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: user?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from update by user ${user?.id} on ${documentName}`,
|
||||
),
|
||||
});
|
||||
|
||||
await this.withYdocConnection(
|
||||
hocuspocus,
|
||||
documentName,
|
||||
|
||||
@@ -1,456 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { PersistenceExtension } from './persistence.extension';
|
||||
import { tiptapExtensions } from '../collaboration.util';
|
||||
import {
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
HTML_EMBED_NODE_NAME,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab
|
||||
// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks.
|
||||
// This replaces the prior re-implemented `applyAdminGate` stand-in for this
|
||||
// entrypoint: if the role-extraction expression (`context?.user?.role`), the
|
||||
// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail.
|
||||
|
||||
const RICH_DOC = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'intro paragraph' }],
|
||||
},
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'left' },
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'left col, mentioning ' },
|
||||
{
|
||||
type: 'mention',
|
||||
attrs: {
|
||||
id: 'mention-1',
|
||||
label: 'Alice',
|
||||
entityType: 'user',
|
||||
entityId: 'user-123',
|
||||
creatorId: 'creator-1',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Nested embed inside a column — must be stripped recursively.
|
||||
{
|
||||
type: HTML_EMBED_NODE_NAME,
|
||||
attrs: { source: '<script>nested()</script>' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'right' },
|
||||
content: [
|
||||
{
|
||||
type: 'table',
|
||||
content: [
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableHeader',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'H' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableCell',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'cell' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
// Top-level embed — must be stripped.
|
||||
{
|
||||
type: HTML_EMBED_NODE_NAME,
|
||||
attrs: { source: '<script>top()</script>' },
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'outro paragraph' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
function buildYdoc(json: any): Y.Doc {
|
||||
return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
|
||||
}
|
||||
|
||||
// Count nodes by type across the whole tree (excludes htmlEmbed by listing it
|
||||
// separately) so we can assert every OTHER node type survived the strip.
|
||||
function nodeTypeCounts(json: any): Record<string, number> {
|
||||
const counts: Record<string, number> = {};
|
||||
const walk = (n: any) => {
|
||||
if (!n || typeof n !== 'object') return;
|
||||
if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1;
|
||||
if (Array.isArray(n.content)) n.content.forEach(walk);
|
||||
};
|
||||
walk(json);
|
||||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a real PersistenceExtension with the minimum mocks needed for
|
||||
* onStoreDocument to reach the strip + persist branch, and capture the content
|
||||
* that would be written to the page row.
|
||||
*/
|
||||
function buildExtension(featureEnabled = true, priorContent?: any) {
|
||||
const captured: { content?: any } = {};
|
||||
|
||||
const existingPage = {
|
||||
id: 'page-1',
|
||||
slugId: 'slug-1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
creatorId: 'creator-1',
|
||||
contributorIds: [],
|
||||
// The currently-persisted content. Defaults to an empty doc (differs from
|
||||
// new content -> persist runs); a test may pass a prior admin embed here to
|
||||
// exercise the preserve-admin-embed branch.
|
||||
content: priorContent ?? { type: 'doc', content: [] },
|
||||
createdAt: new Date(),
|
||||
lastUpdatedSource: 'user',
|
||||
};
|
||||
|
||||
const pageRepo = {
|
||||
findById: jest.fn(async () => ({ ...existingPage })),
|
||||
updatePage: jest.fn(async (values: any) => {
|
||||
captured.content = values.content;
|
||||
}),
|
||||
};
|
||||
const pageHistoryRepo = {
|
||||
findPageLastHistory: jest.fn(async () => null),
|
||||
saveHistory: jest.fn(async () => undefined),
|
||||
};
|
||||
// db.transaction().execute(cb) just runs the callback (no real DB).
|
||||
const db = {
|
||||
transaction: () => ({
|
||||
execute: (cb: any) => cb({} as any),
|
||||
}),
|
||||
};
|
||||
const noopQueue = { add: jest.fn(async () => undefined) } as any;
|
||||
const collabHistory = { addContributors: jest.fn(async () => undefined) } as any;
|
||||
const transclusionService = {
|
||||
syncPageTransclusions: jest.fn(async () => undefined),
|
||||
syncPageReferences: jest.fn(async () => undefined),
|
||||
} as any;
|
||||
|
||||
// Workspace settings read used by the toggle-AND-admin gate.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: 'ws-1',
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const ext = new PersistenceExtension(
|
||||
pageRepo as any,
|
||||
pageHistoryRepo as any,
|
||||
db as any,
|
||||
noopQueue,
|
||||
noopQueue,
|
||||
noopQueue,
|
||||
collabHistory,
|
||||
transclusionService,
|
||||
workspaceRepo as any,
|
||||
);
|
||||
|
||||
return { ext, captured, pageRepo };
|
||||
}
|
||||
|
||||
async function runStore(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext, captured } = buildExtension(featureEnabled, priorContent);
|
||||
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
|
||||
// none, so stub it (the post-persist broadcast is not under test here).
|
||||
(doc as any).broadcastStateless = () => undefined;
|
||||
await ext.onStoreDocument({
|
||||
documentName: 'page-1',
|
||||
document: doc,
|
||||
context: { user: { id: 'u1', role } },
|
||||
} as any);
|
||||
return captured;
|
||||
}
|
||||
|
||||
describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => {
|
||||
const doc = buildYdoc(RICH_DOC);
|
||||
const before = TiptapTransformer.fromYdoc(doc, 'default');
|
||||
expect(hasHtmlEmbedNode(before)).toBe(true);
|
||||
const beforeCounts = nodeTypeCounts(before);
|
||||
|
||||
const captured = await runStore('member', doc);
|
||||
|
||||
expect(captured.content).toBeDefined();
|
||||
// htmlEmbed gone from the persisted content.
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
|
||||
// Every non-embed node type is preserved with the SAME count (guards against
|
||||
// data loss if a node were missing from tiptapExtensions and dropped on the
|
||||
// toYdoc rebuild).
|
||||
const afterCounts = nodeTypeCounts(captured.content);
|
||||
for (const [type, count] of Object.entries(beforeCounts)) {
|
||||
if (type === HTML_EMBED_NODE_NAME) continue;
|
||||
expect(afterCounts[type]).toBe(count);
|
||||
}
|
||||
// The two embeds are gone.
|
||||
expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2);
|
||||
expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined();
|
||||
|
||||
// The shared ydoc fragment was also rewritten clean (re-decode it).
|
||||
const reDecoded = TiptapTransformer.fromYdoc(doc, 'default');
|
||||
expect(hasHtmlEmbedNode(reDecoded)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle ON + admin store: htmlEmbed preserved in persisted content', async () => {
|
||||
const captured = await runStore('admin', buildYdoc(RICH_DOC), true);
|
||||
expect(captured.content).toBeDefined();
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
|
||||
expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2);
|
||||
});
|
||||
|
||||
it('toggle ON + owner store: htmlEmbed preserved', async () => {
|
||||
const captured = await runStore('owner', buildYdoc(RICH_DOC), true);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin store: stripped (feature disabled for everyone)', async () => {
|
||||
const captured = await runStore('admin', buildYdoc(RICH_DOC), false);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + owner store: stripped', async () => {
|
||||
const captured = await runStore('owner', buildYdoc(RICH_DOC), false);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + member store: stripped', async () => {
|
||||
const captured = await runStore('member', buildYdoc(RICH_DOC), false);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin store: PRESERVES an admin embed already in the persisted content through an unrelated edit', async () => {
|
||||
// Prior persisted content already holds an admin-authored embed.
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// A non-admin makes an UNRELATED edit (tweaks the paragraph) but the embed
|
||||
// is still present in the merged doc.
|
||||
const edited = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
|
||||
const captured = await runStore('member', buildYdoc(edited), true, prior);
|
||||
expect(captured.content).toBeDefined();
|
||||
// The admin's pre-existing embed survives the non-admin store.
|
||||
expect(collectHtmlEmbedSources(captured.content)).toEqual(
|
||||
new Set([ADMIN_SOURCE]),
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin store: strips a NEWLY-added embed while keeping the prior admin one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Non-admin keeps the admin embed, makes an unrelated paragraph edit (so the
|
||||
// store is not a no-op and is persisted), and ALSO adds a brand-new embed.
|
||||
const edited = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
};
|
||||
|
||||
const captured = await runStore('member', buildYdoc(edited), true, prior);
|
||||
expect(captured.content).toBeDefined();
|
||||
// Only the admin-vetted source remains; the newly-introduced one is stripped.
|
||||
expect(collectHtmlEmbedSources(captured.content)).toEqual(
|
||||
new Set([ADMIN_SOURCE]),
|
||||
);
|
||||
});
|
||||
|
||||
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
|
||||
const emptyDoc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph' }],
|
||||
});
|
||||
// Non-admin path with an empty/embed-free fragment must be a no-op strip,
|
||||
// not throw.
|
||||
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges
|
||||
// the shared ydoc sub-second, before the 10s store debounce. We call it directly
|
||||
// (it is the debounced timer body) and assert the ydoc fragment no longer yields
|
||||
// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds
|
||||
// already in the persisted content survive.
|
||||
describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => {
|
||||
async function runGuard(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext } = buildExtension(featureEnabled, priorContent);
|
||||
await (ext as any).guardHtmlEmbed(
|
||||
'page-1',
|
||||
doc,
|
||||
{ user: { id: 'u1', role, workspaceId: 'ws-1' } },
|
||||
);
|
||||
}
|
||||
|
||||
it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => {
|
||||
// Prior persisted content has NO embed; the live doc has one a non-admin
|
||||
// just added.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
|
||||
// The shared ydoc fragment no longer yields any htmlEmbed.
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Live doc keeps the admin embed AND adds a brand-new one.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
await runGuard('member', doc, true, prior);
|
||||
|
||||
// Only the admin-vetted source survives in the shared ydoc.
|
||||
expect(
|
||||
collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')),
|
||||
).toEqual(new Set([ADMIN_SOURCE]));
|
||||
});
|
||||
|
||||
it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => {
|
||||
// Even an embed that matches the prior content is stripped when the toggle
|
||||
// is OFF, because the OFF path passes allowed=null (strip everything) and
|
||||
// never reads the prior content for an allow-list.
|
||||
const SOURCE = '<script>any()</script>';
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } },
|
||||
],
|
||||
});
|
||||
await runGuard('member', doc, false, {
|
||||
type: 'doc',
|
||||
content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('admin role: guard is a defensive no-op (embed preserved)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>ok()</script>' } },
|
||||
],
|
||||
});
|
||||
await runGuard('admin', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }],
|
||||
});
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -39,16 +39,6 @@ import {
|
||||
HISTORY_INTERVAL,
|
||||
} from '../constants';
|
||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripDisallowedHtmlEmbedNodes,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
|
||||
@Injectable()
|
||||
export class PersistenceExtension implements Extension {
|
||||
@@ -59,21 +49,6 @@ export class PersistenceExtension implements Extension {
|
||||
// coalescing window" per document and OR it across all edits in the window,
|
||||
// so the snapshot is marked 'agent' regardless of who wrote last.
|
||||
private agentTouched: Map<string, boolean> = new Map();
|
||||
// Per-document debounce timers for the early htmlEmbed guard (Gitea #26).
|
||||
// onChange schedules a short (~300ms) debounced strip that converges the
|
||||
// shared ydoc for all connected clients well before the 10s store debounce,
|
||||
// shrinking the pre-persist broadcast window of a non-admin's transient embed.
|
||||
private htmlEmbedGuardTimers: Map<string, NodeJS.Timeout> = new Map();
|
||||
// Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated
|
||||
// in onLoadDocument (which already loads the page + has workspace context) and
|
||||
// read in onChange to gate early-guard scheduling: when the toggle is OFF (the
|
||||
// common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and
|
||||
// rely on the onStoreDocument strip as the backstop (when OFF the embed does
|
||||
// not execute in editable mode anyway). Cleared in afterUnloadDocument.
|
||||
// STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF
|
||||
// until the document is reloaded, so the early guard won't schedule — accepted,
|
||||
// the onStoreDocument backstop still strips on persist.
|
||||
private htmlEmbedToggleByDoc: Map<string, boolean> = new Map();
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -84,7 +59,6 @@ export class PersistenceExtension implements Extension {
|
||||
@InjectQueue(QueueName.NOTIFICATION_QUEUE) private notificationQueue: Queue,
|
||||
private readonly collabHistory: CollabHistoryService,
|
||||
private readonly transclusionService: TransclusionService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async onLoadDocument(data: onLoadDocumentPayload) {
|
||||
@@ -105,23 +79,6 @@ export class PersistenceExtension implements Extension {
|
||||
return;
|
||||
}
|
||||
|
||||
// Cache the workspace htmlEmbed toggle for this document (Gitea #26). We
|
||||
// already have the page (hence its workspaceId) here, so resolve the toggle
|
||||
// once and cache it keyed by documentName. onChange reads this to decide
|
||||
// whether to schedule the early guard at all — when OFF we skip the guard
|
||||
// entirely (no timer, no fromYdoc, no DB read). Cleared in
|
||||
// afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note.
|
||||
try {
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(page.workspaceId))?.settings,
|
||||
);
|
||||
this.htmlEmbedToggleByDoc.set(documentName, enabled);
|
||||
} catch (err) {
|
||||
// Fail OFF: if the toggle can't be resolved, never schedule the early
|
||||
// guard; the onStoreDocument backstop still strips on persist.
|
||||
this.htmlEmbedToggleByDoc.set(documentName, false);
|
||||
}
|
||||
|
||||
if (page.ydoc) {
|
||||
this.logger.debug(`ydoc loaded from db: ${pageId}`);
|
||||
|
||||
@@ -155,109 +112,7 @@ export class PersistenceExtension implements Extension {
|
||||
|
||||
const pageId = getPageId(documentName);
|
||||
|
||||
let tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
// SECURITY (Variant C admin gate, collab WebSocket write path):
|
||||
// The persisted snapshot is the merged ydoc, which may contain an htmlEmbed
|
||||
// node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized
|
||||
// JS in every reader's browser, so only workspace admins/owners may author
|
||||
// it. When the user whose store triggers this persist is not an admin, strip
|
||||
// every htmlEmbed node before it is written to the page row AND before the
|
||||
// ydoc state is re-encoded, so the node cannot be reintroduced by a
|
||||
// non-admin via the collab socket.
|
||||
// NOTE (defense-in-depth refinement, Gitea #29): the gate is keyed to the
|
||||
// storing connection's user, but it no longer blindly strips EVERY embed on
|
||||
// a non-admin store. We distinguish two cases inside the !allowed branch:
|
||||
// - Feature toggle OFF => strip ALL embeds (the feature is disabled for
|
||||
// everyone; existing embeds get cleaned up on the next save).
|
||||
// - Toggle ON but the storer is a NON-admin => strip only NEWLY-introduced
|
||||
// embeds and PRESERVE embeds already present in the currently-persisted
|
||||
// page content (admin-authored, already vetted). So a non-admin still
|
||||
// cannot ADD an embed, but an unrelated edit (e.g. a paragraph tweak) no
|
||||
// longer destroys an admin's existing embed (the prior data-loss bug).
|
||||
// The pre-existing-embed identity is the raw `attrs.source` (see
|
||||
// collectHtmlEmbedSources). A non-admin who copies an existing admin embed's
|
||||
// exact source elsewhere passes — acceptable, that HTML is already vetted.
|
||||
//
|
||||
// ACCEPTED RESIDUAL RISK (toggle-ON allow-list TOCTOU): the allow-list is a
|
||||
// best-effort snapshot read OUTSIDE the locked transaction (the prior content
|
||||
// is pre-read above, but inside executeTx the row is re-read withLock without
|
||||
// recomputing the allow-list). A concurrent admin store that changes the
|
||||
// persisted embeds between the pre-read and this write can make the preserve
|
||||
// decision use a slightly stale snapshot — worst case one embed transiently
|
||||
// kept or dropped; it converges on the next store, with no auth bypass or
|
||||
// broader data loss. The race is accepted because it only affects concurrent
|
||||
// authenticated editors on the (rare) toggle-ON non-admin path, converges on
|
||||
// the next store, and the persisted row plus every share/readonly read path
|
||||
// remain protected by the strip.
|
||||
//
|
||||
// RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26):
|
||||
// this strip runs in the debounced onStoreDocument (up to 10s), but
|
||||
// hocuspocus broadcasts each inbound Yjs update to connected clients
|
||||
// immediately, so a non-admin's transient htmlEmbed can execute in OTHER open
|
||||
// editors' browsers in the window before this persist strips it. The exposure
|
||||
// is limited to concurrent AUTHENTICATED space members who have the doc open
|
||||
// with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do
|
||||
// NOT open a collab socket (ReadonlyPageEditor renders fetched,
|
||||
// already-stripped content; HocuspocusProvider is only used by the
|
||||
// authenticated editable page-editor), and the PERSISTED page row plus every
|
||||
// share/readonly read path are protected by this strip.
|
||||
// The window is now SHRUNK to sub-second by an onChange-debounced early guard
|
||||
// (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as
|
||||
// this block and re-encodes the cleaned ydoc, converging the doc for all
|
||||
// clients long before this 10s store debounce fires. This onStoreDocument
|
||||
// strip remains the authoritative backstop for persistence. The irreducible
|
||||
// residual is only the VERY FIRST inbound broadcast before the ~300ms debounce
|
||||
// fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the
|
||||
// node before that first relay, so it cannot be eliminated entirely.
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature
|
||||
// toggle is ON and the storing user is an admin/owner. OFF (default) =>
|
||||
// stripped for everyone (existing embeds get cleaned up on next save).
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(context?.user?.workspaceId))?.settings,
|
||||
);
|
||||
if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) {
|
||||
if (hasHtmlEmbedNode(tiptapJson)) {
|
||||
let strippedJson: typeof tiptapJson;
|
||||
if (htmlEmbedEnabled === false) {
|
||||
// Toggle OFF: feature disabled for everyone -> strip ALL embeds.
|
||||
strippedJson = stripHtmlEmbedNodes(tiptapJson);
|
||||
} else {
|
||||
// Toggle ON, non-admin storer: preserve embeds already present in the
|
||||
// currently-persisted (admin-vetted) page content; strip only the
|
||||
// newly-introduced ones. Pre-read the prior content — a small extra
|
||||
// query only on this rare non-admin + toggle-ON path.
|
||||
const prior = await this.pageRepo.findById(pageId, {
|
||||
includeContent: true,
|
||||
});
|
||||
const allowed = collectHtmlEmbedSources(prior?.content);
|
||||
strippedJson = stripDisallowedHtmlEmbedNodes(tiptapJson, allowed);
|
||||
}
|
||||
|
||||
// Only mutate the ydoc + log when the strip actually removed something;
|
||||
// an unnecessary ydoc rewrite would churn the doc for all clients. With
|
||||
// the toggle-ON branch a non-admin store that only touches admin-vetted
|
||||
// embeds leaves the content unchanged here.
|
||||
if (!isDeepStrictEqual(strippedJson, tiptapJson)) {
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
tiptapJson = strippedJson;
|
||||
// Reflect the stripped content back into the shared ydoc so the node
|
||||
// is removed for all connected clients, not just the persisted row.
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
tiptapJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
}
|
||||
}
|
||||
}
|
||||
const tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
|
||||
|
||||
@@ -429,168 +284,12 @@ export class PersistenceExtension implements Extension {
|
||||
if (data.context?.actor === 'agent') {
|
||||
this.agentTouched.set(documentName, true);
|
||||
}
|
||||
|
||||
// Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced
|
||||
// guard ONLY when (a) this document's workspace toggle is cached ON and
|
||||
// (b) the changing connection's user is a NON-admin (cannot author
|
||||
// htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer,
|
||||
// no fromYdoc, no DB read — killing the OFF-case overhead (the common
|
||||
// default); the onStoreDocument strip is the backstop and an OFF embed does
|
||||
// not execute in editable mode anyway. We do NO expensive work here — we only
|
||||
// (re)schedule the timer; the debounce coalesces rapid edits into a single
|
||||
// guard check.
|
||||
if (
|
||||
userId &&
|
||||
this.htmlEmbedToggleByDoc.get(documentName) === true &&
|
||||
!canAuthorHtmlEmbed(data.context?.user?.role)
|
||||
) {
|
||||
const existing = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (existing) {
|
||||
clearTimeout(existing);
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
void this.guardHtmlEmbed(documentName, data.document, data.context);
|
||||
}, 300);
|
||||
this.htmlEmbedGuardTimers.set(documentName, timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the
|
||||
* onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of
|
||||
* waiting for the 10s store debounce, so a non-admin's transient embed is
|
||||
* removed from the shared ydoc — and re-broadcast as cleaned state — for all
|
||||
* connected clients in sub-second time. onStoreDocument remains the
|
||||
* authoritative persistence backstop; this is an ADDITIONAL early pass.
|
||||
*
|
||||
* CONCURRENCY (the critical invariant): the Y.Doc mutation is a single
|
||||
* SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the
|
||||
* applyUpdate write. ALL async work (the workspace toggle lookup and the
|
||||
* persisted-content read for the allow-list) happens FIRST, before that block.
|
||||
* Because JS is single-threaded, a synchronous block cannot interleave with
|
||||
* inbound Yjs update handlers, so a concurrent edit that lands while we await
|
||||
* cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then
|
||||
* delete + rebuild + applyUpdate without yielding. (An earlier version awaited
|
||||
* DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap
|
||||
* was lost — this restructure fixes that.)
|
||||
*
|
||||
* The allow-list is a best-effort snapshot read outside any lock (TOCTOU
|
||||
* accepted, same as onStoreDocument): worst case one embed is transiently kept
|
||||
* or dropped; it converges on the next guard/store, with no auth bypass.
|
||||
*
|
||||
* Loop-safety: the corrective applyUpdate has a null origin, so the re-fired
|
||||
* onChange carries no userId and is not rescheduled; and after a strip no
|
||||
* htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the
|
||||
* hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer
|
||||
* would crash the process — so the whole body is wrapped in try/catch.
|
||||
*/
|
||||
private async guardHtmlEmbed(
|
||||
documentName: string,
|
||||
document: Y.Doc,
|
||||
context: any,
|
||||
): Promise<void> {
|
||||
// Defensive: ensure no stale timer entry survives for this document.
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
try {
|
||||
// Re-check defensively: onChange only schedules for non-admins, but if an
|
||||
// admin/owner somehow reaches here, the embed is authored content — do
|
||||
// nothing (onStoreDocument's toggle-AND-admin gate handles persistence).
|
||||
if (canAuthorHtmlEmbed(context?.user?.role)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ----
|
||||
// Resolve the workspace toggle exactly as onStoreDocument does. When OFF we
|
||||
// strip everything; when ON we use the preserve logic (keep admin-vetted
|
||||
// embeds, strip only the non-admin's newly-introduced ones).
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(context?.user?.workspaceId))
|
||||
?.settings,
|
||||
);
|
||||
|
||||
// The allow-list (admin-vetted sources already in the persisted content).
|
||||
// null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block,
|
||||
// so no await sits between the doc snapshot and the doc write.
|
||||
let allowed: Set<string> | null = null;
|
||||
if (enabled !== false) {
|
||||
const prior = await this.pageRepo.findById(getPageId(documentName), {
|
||||
includeContent: true,
|
||||
});
|
||||
allowed = collectHtmlEmbedSources(prior?.content);
|
||||
}
|
||||
|
||||
// The awaits above may have let the document be unloaded/destroyed. If so,
|
||||
// bail — mutating a destroyed doc is pointless and could throw (the
|
||||
// try/catch is the ultimate safety net regardless).
|
||||
if ((document as { isDestroyed?: boolean }).isDestroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ----
|
||||
// Because there is no await between fromYdoc and applyUpdate, no inbound
|
||||
// Yjs update can interleave, so a concurrent edit cannot be lost.
|
||||
const json = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
// Cheap exit: nothing to guard if the doc has no embed at all. This is also
|
||||
// why a post-strip re-fire is a no-op (loop-safe).
|
||||
if (!hasHtmlEmbedNode(json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const strippedJson =
|
||||
allowed === null
|
||||
? stripHtmlEmbedNodes(json)
|
||||
: stripDisallowedHtmlEmbedNodes(json, allowed);
|
||||
|
||||
// Nothing was stripped (e.g. the only embed is an admin-vetted one) — do
|
||||
// not churn the shared ydoc for all clients.
|
||||
if (isDeepStrictEqual(strippedJson, json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Reflect the stripped content back into the shared ydoc EXACTLY as
|
||||
// onStoreDocument does, so the node is removed for all connected clients,
|
||||
// not just on the eventual persist. This re-encode broadcasts the cleaned
|
||||
// state; after it hasHtmlEmbedNode is false, so any later guard fire is a
|
||||
// cheap no-op (loop-safe).
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
strippedJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
} catch (err) {
|
||||
// NEVER rethrow out of a timer-scheduled call.
|
||||
this.logger.error(
|
||||
`Early htmlEmbed guard failed on ${documentName}`,
|
||||
err,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async afterUnloadDocument(data: afterUnloadDocumentPayload) {
|
||||
const documentName = data.documentName;
|
||||
this.contributors.delete(documentName);
|
||||
this.agentTouched.delete(documentName);
|
||||
// Drop the cached toggle for this document so a reload re-resolves it (and
|
||||
// picks up a mid-session admin toggle flip).
|
||||
this.htmlEmbedToggleByDoc.delete(documentName);
|
||||
// Clear any pending early-guard timer so it cannot fire after the document
|
||||
// is unloaded (leak / use-after-unload prevention).
|
||||
const timer = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
}
|
||||
}
|
||||
|
||||
private consumeContributors(documentName: string): string[] {
|
||||
|
||||
@@ -3,20 +3,17 @@ import { htmlToJson } from '../../../collaboration/collaboration.util';
|
||||
import { hasHtmlEmbedNode, stripHtmlEmbedNodes } from './html-embed.util';
|
||||
|
||||
/**
|
||||
* CONTRACT (security): an attacker who controls imported markdown/HTML could try
|
||||
* to smuggle an htmlEmbed in the *serialized* DOM form —
|
||||
* CONTRACT: imported markdown/HTML can carry an htmlEmbed in the *serialized*
|
||||
* DOM form —
|
||||
* <div data-type="htmlEmbed" data-source="...">
|
||||
* — directly, bypassing the editor's `<!--html-embed:-->` comment marker.
|
||||
*
|
||||
* This exercises the REAL server import conversion path that ImportService uses
|
||||
* The block renders inside a sandboxed iframe, so this is not an XSS surface;
|
||||
* this exercises the REAL server import conversion path that ImportService uses
|
||||
* (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
|
||||
* link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
|
||||
* the ACTUAL behaviour so we know whether the strip gate can be bypassed.
|
||||
*
|
||||
* FINDING (documented): the raw embed div DOES round-trip through marked +
|
||||
* htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
|
||||
* `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
|
||||
* detectable and STRIPPABLE — the write-path gate covers it.
|
||||
* that such a node is DETECTED and STRIPPABLE — so the share read path's
|
||||
* master-toggle strip can remove it when the workspace toggle is OFF.
|
||||
*/
|
||||
describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
|
||||
it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
|
||||
@@ -38,7 +35,7 @@ describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTM
|
||||
// The div parses into a real htmlEmbed node carrying the decoded source.
|
||||
expect(hasHtmlEmbedNode(json)).toBe(true);
|
||||
|
||||
// Because it is detected, the write-path gate can strip it for non-admins.
|
||||
// Because it is detected, the share master-toggle strip can remove it.
|
||||
const stripped = stripHtmlEmbedNodes(json);
|
||||
expect(hasHtmlEmbedNode(stripped)).toBe(false);
|
||||
// Surrounding non-embed content is retained.
|
||||
|
||||
@@ -1,11 +1,6 @@
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripDisallowedHtmlEmbedNodes,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
stripHtmlEmbedNodes,
|
||||
} from './html-embed.util';
|
||||
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
|
||||
@@ -96,17 +91,6 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
expect(result).toEqual(doc);
|
||||
});
|
||||
|
||||
it('neutralizes a root node that is itself an htmlEmbed', () => {
|
||||
// Defensive: the PM root is always a `doc`, so this is unreachable in normal
|
||||
// use, but the helper must still never return a bare htmlEmbed.
|
||||
const root = {
|
||||
type: 'htmlEmbed',
|
||||
attrs: { source: '<script>alert(1)</script>' },
|
||||
};
|
||||
const result = stripHtmlEmbedNodes(root);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
|
||||
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
|
||||
const doc = {
|
||||
@@ -172,169 +156,6 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('collectHtmlEmbedSources', () => {
|
||||
it('collects the source of every htmlEmbed node, including nested ones', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<b>top</b>' } },
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<i>nested</i>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const sources = collectHtmlEmbedSources(doc);
|
||||
expect(sources).toEqual(new Set(['<b>top</b>', '<i>nested</i>']));
|
||||
});
|
||||
|
||||
it('returns an empty set for a doc with no embeds', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
|
||||
};
|
||||
expect(collectHtmlEmbedSources(doc).size).toBe(0);
|
||||
});
|
||||
|
||||
it('gracefully skips embeds with absent attrs or non-string source', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed' }, // no attrs
|
||||
{ type: 'htmlEmbed', attrs: {} }, // no source
|
||||
{ type: 'htmlEmbed', attrs: { source: 42 } }, // non-string
|
||||
{ type: 'htmlEmbed', attrs: { source: '<ok/>' } },
|
||||
],
|
||||
};
|
||||
expect(collectHtmlEmbedSources(doc)).toEqual(new Set(['<ok/>']));
|
||||
});
|
||||
|
||||
it('returns an empty set for non-object input', () => {
|
||||
expect(collectHtmlEmbedSources(null).size).toBe(0);
|
||||
expect(collectHtmlEmbedSources(undefined).size).toBe(0);
|
||||
expect(collectHtmlEmbedSources('x' as any).size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripDisallowedHtmlEmbedNodes', () => {
|
||||
it('keeps an embed whose source is allowed and removes the rest', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<new-evil/>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
|
||||
// The allowed embed and the paragraph survive; the new embed is gone.
|
||||
expect(result.content).toHaveLength(2);
|
||||
expect(result.content[0].attrs.source).toBe('<vetted/>');
|
||||
expect(result.content[1].type).toBe('paragraph');
|
||||
});
|
||||
|
||||
it('keeps BOTH embeds when two nodes share the same allowed source', () => {
|
||||
// Source-identity semantics: identity is the raw `attrs.source`, so a
|
||||
// non-admin who duplicates an existing admin-vetted source keeps both copies.
|
||||
// This is intended — the raw HTML is already vetted, so a duplicate is safe.
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'mid' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
const embeds = result.content.filter(
|
||||
(n: any) => n.type === 'htmlEmbed',
|
||||
);
|
||||
expect(embeds).toHaveLength(2);
|
||||
expect(embeds.every((n: any) => n.attrs.source === '<vetted/>')).toBe(true);
|
||||
});
|
||||
|
||||
it('removes a newly-introduced embed when nothing is allowed', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set());
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('filters nested embeds by the allow-list (e.g. inside columns)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<new/>' } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
const col = findFirstChild(result, 'column');
|
||||
expect(col.content).toHaveLength(1);
|
||||
expect(col.content[0].attrs.source).toBe('<vetted/>');
|
||||
});
|
||||
|
||||
it('treats an embed with absent/non-string source as not allowed (stripped)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed' },
|
||||
{ type: 'htmlEmbed', attrs: {} },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('does not mutate the input document', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
|
||||
};
|
||||
stripDisallowedHtmlEmbedNodes(doc, new Set());
|
||||
expect(doc.content).toHaveLength(1);
|
||||
expect(doc.content[0].type).toBe('htmlEmbed');
|
||||
});
|
||||
|
||||
it('neutralizes a root node that is itself a disallowed htmlEmbed', () => {
|
||||
const root = { type: 'htmlEmbed', attrs: { source: '<new/>' } };
|
||||
const result = stripDisallowedHtmlEmbedNodes(root, new Set());
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => {
|
||||
const root = { type: 'htmlEmbed', attrs: { source: '<vetted/>' } };
|
||||
const result = stripDisallowedHtmlEmbedNodes(root, new Set(['<vetted/>']));
|
||||
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
|
||||
});
|
||||
|
||||
it('returns non-object input unchanged', () => {
|
||||
expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
|
||||
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
|
||||
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
|
||||
@@ -367,19 +188,6 @@ describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('canAuthorHtmlEmbed', () => {
|
||||
it('allows owner and admin', () => {
|
||||
expect(canAuthorHtmlEmbed('owner')).toBe(true);
|
||||
expect(canAuthorHtmlEmbed('admin')).toBe(true);
|
||||
});
|
||||
it('denies member and unknown/empty roles', () => {
|
||||
expect(canAuthorHtmlEmbed('member')).toBe(false);
|
||||
expect(canAuthorHtmlEmbed(null)).toBe(false);
|
||||
expect(canAuthorHtmlEmbed(undefined)).toBe(false);
|
||||
expect(canAuthorHtmlEmbed('viewer')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isHtmlEmbedFeatureEnabled', () => {
|
||||
it('is true only when settings.htmlEmbed === true', () => {
|
||||
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true);
|
||||
@@ -394,165 +202,22 @@ describe('isHtmlEmbedFeatureEnabled', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('htmlEmbedAllowed (toggle AND admin)', () => {
|
||||
it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => {
|
||||
expect(htmlEmbedAllowed(false, 'admin')).toBe(false);
|
||||
expect(htmlEmbedAllowed(false, 'owner')).toBe(false);
|
||||
});
|
||||
it('toggle OFF + member => not allowed', () => {
|
||||
expect(htmlEmbedAllowed(false, 'member')).toBe(false);
|
||||
});
|
||||
it('toggle ON + admin/owner => allowed', () => {
|
||||
expect(htmlEmbedAllowed(true, 'admin')).toBe(true);
|
||||
expect(htmlEmbedAllowed(true, 'owner')).toBe(true);
|
||||
});
|
||||
it('toggle ON + member/unknown => not allowed', () => {
|
||||
expect(htmlEmbedAllowed(true, 'member')).toBe(false);
|
||||
expect(htmlEmbedAllowed(true, null)).toBe(false);
|
||||
expect(htmlEmbedAllowed(true, undefined)).toBe(false);
|
||||
expect(htmlEmbedAllowed(true, 'viewer')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// The shared write-path strip ritual extracted from the 5 plain call-sites
|
||||
// (collab handler, page create/duplicate, import, file-import-task,
|
||||
// transclusion-unsync). Tested here once instead of being re-verified in each
|
||||
// call-site's spec.
|
||||
describe('stripHtmlEmbedIfNotAllowed (shared write-path gate)', () => {
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>x()</script>' } },
|
||||
],
|
||||
});
|
||||
const docWithoutEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }],
|
||||
});
|
||||
|
||||
it('keeps the doc unchanged when feature is ON and role is admin (allowed)', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: true,
|
||||
role: 'admin',
|
||||
onStrip,
|
||||
});
|
||||
// Allowed => same reference returned, embed preserved, no side-effect.
|
||||
expect(result).toBe(json);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
expect(onStrip).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('keeps the doc unchanged for an owner when feature is ON (allowed)', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: true,
|
||||
role: 'owner',
|
||||
onStrip,
|
||||
});
|
||||
expect(result).toBe(json);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
expect(onStrip).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('strips the embed when the feature is OFF (even for an admin)', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: false,
|
||||
role: 'admin',
|
||||
onStrip,
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('strips the embed for a non-admin when the feature is ON', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: true,
|
||||
role: 'member',
|
||||
onStrip,
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('strips the embed for a null/undefined role when the feature is ON', () => {
|
||||
for (const role of [null, undefined]) {
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
|
||||
featureEnabled: true,
|
||||
role,
|
||||
onStrip,
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('returns input unchanged and does NOT call onStrip when no embed is present', () => {
|
||||
const json = docWithoutEmbed();
|
||||
const onStrip = jest.fn();
|
||||
// Not allowed (feature OFF), but there is nothing to strip.
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: false,
|
||||
role: 'member',
|
||||
onStrip,
|
||||
});
|
||||
expect(result).toBe(json);
|
||||
expect(onStrip).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('calls onStrip exactly once per strip', () => {
|
||||
const onStrip = jest.fn();
|
||||
stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
|
||||
featureEnabled: false,
|
||||
role: 'member',
|
||||
onStrip,
|
||||
});
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('works without an onStrip callback (optional)', () => {
|
||||
const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
|
||||
featureEnabled: false,
|
||||
role: 'member',
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// NOTE: a previous revision of this file re-implemented the write-path admin
|
||||
// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
|
||||
// deleted/misplaced real guard would have kept those green. The stand-in is
|
||||
// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
|
||||
// now tested against their REAL code in:
|
||||
// - collaboration/extensions/persistence.extension.html-embed.spec.ts
|
||||
// - collaboration/collaboration.handler.html-embed.spec.ts
|
||||
// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
|
||||
// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
|
||||
// - integrations/import/services/import-html-embed-identity.spec.ts (import)
|
||||
// The htmlEmbed node renders inside a sandboxed iframe, so the per-write role
|
||||
// gate has been removed. `stripHtmlEmbedNodes` + `isHtmlEmbedFeatureEnabled`
|
||||
// remain ONLY to honor the workspace master toggle on the anonymous public-share
|
||||
// read path — tested against the real share code in:
|
||||
// - core/share/share-html-embed.spec.ts
|
||||
//
|
||||
// The case below stays here because it asserts a REAL parse path
|
||||
// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
|
||||
// re-implemented gate.
|
||||
describe('htmlEmbed smuggled via the markdown/html <!--html-embed--> form (real parse + real helpers)', () => {
|
||||
it('the parsed node is detected and stripped by the real helpers', () => {
|
||||
// The markdown/html create formats decode to the same htmlEmbed node, so the
|
||||
// gate (run on the parsed JSON) covers them identically.
|
||||
const source = '<script>steal()</script>';
|
||||
// The case below asserts that the REAL parse path (htmlToJson, the markdown/html
|
||||
// form) produces an htmlEmbed node the master-toggle strip can detect & remove.
|
||||
describe('htmlEmbed via the markdown/html form (real parse + real strip helper)', () => {
|
||||
it('the parsed node is detected and stripped by the real helper', () => {
|
||||
const source = '<script>track()</script>';
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
|
||||
const parsed = htmlToJson(html);
|
||||
expect(hasHtmlEmbedNode(parsed)).toBe(true);
|
||||
|
||||
// A non-admin role gates to strip via the real helpers.
|
||||
expect(canAuthorHtmlEmbed('member')).toBe(false);
|
||||
const stripped = stripHtmlEmbedNodes(parsed);
|
||||
expect(hasHtmlEmbedNode(stripped)).toBe(false);
|
||||
});
|
||||
|
||||
@@ -5,12 +5,12 @@ export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
|
||||
/**
|
||||
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
|
||||
*
|
||||
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
|
||||
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
|
||||
* author it. This helper is the server-side enforcement primitive: every WRITE
|
||||
* path that may persist content from a NON-admin caller must run the incoming
|
||||
* document through this function so a non-admin cannot smuggle the node in via
|
||||
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
|
||||
* The `htmlEmbed` node renders inside a SANDBOXED iframe (no `allow-same-origin`)
|
||||
* on the client, so its content cannot touch the viewer's session/cookies/API —
|
||||
* it is NOT a stored-XSS surface. This helper is retained ONLY to honor the
|
||||
* workspace master toggle (`settings.htmlEmbed`) on the anonymous public-share
|
||||
* read path: an anonymous viewer cannot read the workspace toggle, so the server
|
||||
* strips the block when the toggle is OFF before serving shared content.
|
||||
*
|
||||
* Returns a NEW document; the input is not mutated. If the input is not a valid
|
||||
* doc object it is returned unchanged (callers persist what they were given).
|
||||
@@ -22,15 +22,6 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||
|
||||
const node = pmJson as unknown as JSONContent;
|
||||
|
||||
// Defensive root-type check: if the ROOT node is itself an htmlEmbed, the
|
||||
// children-filtering below could never drop it, so a bare htmlEmbed would be
|
||||
// returned as-is. This branch is unreachable in normal use (the PM document
|
||||
// root is always a `doc`) and exists only to make the helper total — a bare
|
||||
// htmlEmbed can never be returned by this function.
|
||||
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||
return { type: 'doc', content: [] } as unknown as T;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
const filtered: JSONContent[] = [];
|
||||
for (const child of node.content) {
|
||||
@@ -48,111 +39,12 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the document and collect a stable identity for every `htmlEmbed` node.
|
||||
*
|
||||
* The identity is the node's `attrs.source` string — the raw HTML the embed
|
||||
* renders. Two embeds that render the exact same HTML are treated as the same
|
||||
* identity. Used by the collab persist path to know which embeds are ALREADY
|
||||
* present in the currently-persisted (admin-vetted) page content, so a later
|
||||
* non-admin store can strip only NEWLY-introduced embeds while preserving the
|
||||
* pre-existing admin-authored ones.
|
||||
*
|
||||
* Absent attrs or a non-string/absent `source` are skipped gracefully (such a
|
||||
* node contributes no identity to the set).
|
||||
*/
|
||||
export function collectHtmlEmbedSources(pmJson: unknown): Set<string> {
|
||||
const sources = new Set<string>();
|
||||
|
||||
const walk = (node: unknown): void => {
|
||||
if (!node || typeof node !== 'object') {
|
||||
return;
|
||||
}
|
||||
const n = node as JSONContent;
|
||||
if (n.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (n.attrs as Record<string, unknown> | undefined)?.source;
|
||||
if (typeof source === 'string') {
|
||||
sources.add(source);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(n.content)) {
|
||||
for (const child of n.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
walk(pmJson);
|
||||
return sources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose
|
||||
* `attrs.source` is in `allowedSources`; remove the rest.
|
||||
*
|
||||
* Used on the collab persist path when the feature toggle is ON but the storing
|
||||
* user is a NON-admin: `allowedSources` is the set of embed sources already
|
||||
* present in the currently-persisted page content (admin-authored, already
|
||||
* vetted). A non-admin therefore cannot ADD a new embed, but their unrelated
|
||||
* edit also cannot destroy an admin's existing one.
|
||||
*
|
||||
* NOTE: identity is the raw source string, so a non-admin who COPIES an existing
|
||||
* admin embed's exact source into a NEW location passes this check. That is
|
||||
* acceptable — the source is already admin-vetted content present in the doc; no
|
||||
* new untrusted HTML is introduced.
|
||||
*
|
||||
* Returns a NEW document; the input is not mutated. Same defensive root-type
|
||||
* check pattern as {@link stripHtmlEmbedNodes}.
|
||||
*/
|
||||
export function stripDisallowedHtmlEmbedNodes<T = JSONContent>(
|
||||
pmJson: T,
|
||||
allowedSources: Set<string>,
|
||||
): T {
|
||||
if (!pmJson || typeof pmJson !== 'object') {
|
||||
return pmJson;
|
||||
}
|
||||
|
||||
const node = pmJson as unknown as JSONContent;
|
||||
|
||||
// Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is
|
||||
// itself an htmlEmbed and its source is NOT allowed, the children-filtering
|
||||
// below could never drop it, so neutralize it here. Unreachable in normal use
|
||||
// (the PM document root is always a `doc`).
|
||||
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (node.attrs as Record<string, unknown> | undefined)?.source;
|
||||
if (typeof source === 'string' && allowedSources.has(source)) {
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
return { type: 'doc', content: [] } as unknown as T;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
const filtered: JSONContent[] = [];
|
||||
for (const child of node.content) {
|
||||
// Drop a disallowed htmlEmbed child (newly introduced); keep an allowed
|
||||
// one (already present in the persisted, admin-vetted content).
|
||||
if (child && child.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (child.attrs as Record<string, unknown> | undefined)
|
||||
?.source;
|
||||
if (typeof source === 'string' && allowedSources.has(source)) {
|
||||
filtered.push({ ...child });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
|
||||
// also filtered by the same allow-list.
|
||||
filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources));
|
||||
}
|
||||
return { ...node, content: filtered } as unknown as T;
|
||||
}
|
||||
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the document contains at least one `htmlEmbed` node anywhere
|
||||
* in its tree. Useful to decide whether a strip pass actually changed anything
|
||||
* (e.g. for logging a rejected non-admin embed attempt).
|
||||
* in its tree. Useful to decide whether a strip pass on the share read path
|
||||
* actually changed anything. After the write-path role gate removal this is no
|
||||
* longer called by production code; it is retained as a test-only assertion
|
||||
* helper (and a detection primitive should a future read path need it).
|
||||
*/
|
||||
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
|
||||
if (!pmJson || typeof pmJson !== 'object') {
|
||||
@@ -169,62 +61,9 @@ export function hasHtmlEmbedNode(pmJson: unknown): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
|
||||
* Owners and admins are trusted; everyone else (member, and any unknown role)
|
||||
* is not. Kept here so every write path shares one definition of "trusted".
|
||||
*/
|
||||
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
|
||||
return role === 'owner' || role === 'admin';
|
||||
}
|
||||
|
||||
/**
|
||||
* Combined write-path gate for the htmlEmbed feature.
|
||||
*
|
||||
* htmlEmbed is allowed in a document only when the workspace feature toggle is
|
||||
* ON and the authoring/saving user is a workspace admin/owner. OFF (default) =>
|
||||
* stripped for EVERYONE, including admins (the feature is disabled).
|
||||
*
|
||||
* `featureEnabled` is read from the workspace settings for the relevant write
|
||||
* (`workspace.settings?.htmlEmbed === true`). Every WRITE path that may persist
|
||||
* htmlEmbed content must gate on this combined predicate, so that turning the
|
||||
* toggle OFF strips existing embeds on the next save and prevents new ones from
|
||||
* being persisted regardless of role.
|
||||
*/
|
||||
export function htmlEmbedAllowed(
|
||||
featureEnabled: boolean,
|
||||
role: string | null | undefined,
|
||||
): boolean {
|
||||
return featureEnabled === true && canAuthorHtmlEmbed(role);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip htmlEmbed nodes unless the (feature-enabled AND role-allowed) gate
|
||||
* passes. Returns the possibly-stripped doc. The caller resolves featureEnabled
|
||||
* (from workspace settings) and role (actor) itself — those legitimately differ
|
||||
* per call-site (e.g. share path uses role=null) — this helper owns only the
|
||||
* has-check + AND + strip + optional onStrip callback.
|
||||
*
|
||||
* Centralizes the 4-step write-path ritual (resolve role -> resolve
|
||||
* featureEnabled -> htmlEmbedAllowed AND -> stripHtmlEmbedNodes) so the plain
|
||||
* strip-all call-sites share one tested decision. Sites with CUSTOM strip logic
|
||||
* (e.g. the collab persist path's preserve-admin variant) keep their own code.
|
||||
*/
|
||||
export function stripHtmlEmbedIfNotAllowed<T>(
|
||||
json: T,
|
||||
opts: { featureEnabled: boolean; role: string | null | undefined; onStrip?: () => void },
|
||||
): T {
|
||||
if (htmlEmbedAllowed(opts.featureEnabled, opts.role)) return json;
|
||||
if (hasHtmlEmbedNode(json)) {
|
||||
opts.onStrip?.();
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the workspace-level htmlEmbed feature toggle from a workspace's settings
|
||||
* jsonb. ABSENT/non-true => OFF (the default). Kept here so every server write
|
||||
* path resolves the toggle the same way.
|
||||
* Read the workspace-level htmlEmbed master toggle from a workspace's settings
|
||||
* jsonb. ABSENT/non-true => OFF (the default). Kept here so the share read path
|
||||
* resolves the toggle the same way it is persisted.
|
||||
*/
|
||||
export function isHtmlEmbedFeatureEnabled(
|
||||
settings: unknown | null | undefined,
|
||||
|
||||
@@ -65,21 +65,19 @@ export const MAX_SHARE_MESSAGES = 30;
|
||||
export const MAX_SHARE_MESSAGE_CHARS = 8000;
|
||||
|
||||
/**
|
||||
* Default per-request output cap for the anonymous share assistant. Bounds the
|
||||
* tokens a single anonymous request can generate; worst case = steps x this.
|
||||
*/
|
||||
export const SHARE_AI_MAX_OUTPUT_TOKENS = 512;
|
||||
|
||||
/**
|
||||
* Read the per-request output cap from the environment (overridable seam),
|
||||
* falling back to the sane default. A non-positive / unparseable value uses the
|
||||
* default. Mirrors resolveShareAiWorkspaceMax().
|
||||
* Per-request output-token ceiling for the anonymous assistant. `streamText`
|
||||
* runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
|
||||
* request is bounded by (steps × this). The per-workspace cap bounds the COUNT
|
||||
* of calls; this bounds the SIZE of each, so a single anonymous call cannot run
|
||||
* up the provider bill even if the per-IP throttle is evaded. Env-overridable
|
||||
* seam; a non-positive or unparseable value falls back to the default.
|
||||
*/
|
||||
export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
|
||||
export function resolveShareAiMaxOutputTokens(): number {
|
||||
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
|
||||
return Number.isFinite(raw) && raw > 0
|
||||
? Math.floor(raw)
|
||||
: SHARE_AI_MAX_OUTPUT_TOKENS;
|
||||
: SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -225,8 +223,8 @@ export class PublicShareChatService {
|
||||
tools,
|
||||
// Bound the agent loop for anonymous callers.
|
||||
stopWhen: stepCountIs(5),
|
||||
// Bounds per-request output so one anonymous request can't run up the
|
||||
// provider bill; worst case = steps x this.
|
||||
// Cap per-request output so one anonymous call cannot run up the provider
|
||||
// bill even if the per-IP throttle is evaded; worst case = steps × this.
|
||||
maxOutputTokens: resolveShareAiMaxOutputTokens(),
|
||||
abortSignal: signal,
|
||||
onError: ({ error }) => {
|
||||
|
||||
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
|
||||
import {
|
||||
PublicShareChatService,
|
||||
filterShareTranscript,
|
||||
resolveShareAiMaxOutputTokens,
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
} from './public-share-chat.service';
|
||||
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
|
||||
import {
|
||||
@@ -400,6 +402,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
|
||||
const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
|
||||
const original = process.env[ENV];
|
||||
|
||||
afterEach(() => {
|
||||
if (original === undefined) delete process.env[ENV];
|
||||
else process.env[ENV] = original;
|
||||
});
|
||||
|
||||
it('falls back to the default when unset', () => {
|
||||
delete process.env[ENV];
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
|
||||
});
|
||||
|
||||
it('uses (and floors) a valid positive value from the env', () => {
|
||||
process.env[ENV] = '1024.9';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(1024);
|
||||
});
|
||||
|
||||
it('falls back to the default for zero, a negative, or a non-numeric value', () => {
|
||||
process.env[ENV] = '0';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
process.env[ENV] = '-5';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
process.env[ENV] = 'not-a-number';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
|
||||
it('allows up to the cap within a window, then 429s (returns false)', async () => {
|
||||
const limiter = makeLimiter(3, 60_000, () => 1_000);
|
||||
@@ -482,9 +522,11 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
|
||||
});
|
||||
|
||||
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
|
||||
// FAIL CLOSED (#62): if Redis is down we cannot prove the workspace is under
|
||||
// its cap, so DENY (the controller 429s) rather than admit an unmetered,
|
||||
// billable anonymous call. The feature is optional, so denial is harmless.
|
||||
// The per-workspace cap is the COST backstop for an OPTIONAL anonymous
|
||||
// assistant. If Redis is unavailable we cannot prove the workspace is under
|
||||
// its cap, so we DENY (controller 429s) rather than admit an unmetered,
|
||||
// billable call — a brief Redis blip disabling the assistant is safer than
|
||||
// an unbounded provider bill.
|
||||
const failingRedis = {
|
||||
eval: () => Promise.reject(new Error('redis down')),
|
||||
} as unknown as import('ioredis').Redis;
|
||||
|
||||
@@ -99,11 +99,11 @@ export class PublicShareWorkspaceLimiter {
|
||||
/**
|
||||
* Account one call for `key`. Returns true if it is within the cap (allowed),
|
||||
* false if the cap over the trailing window is exceeded (caller must 429).
|
||||
* On a Redis failure we FAIL CLOSED (return false): if Redis is down we cannot
|
||||
* prove the workspace is under its cap, so we DENY rather than admit an
|
||||
* unmetered, billable anonymous call. The feature is optional, so the
|
||||
* temporary denial is harmless. (Operators wanting a tighter steady-state cap
|
||||
* can lower the default via SHARE_AI_WORKSPACE_MAX_PER_HOUR, e.g. =100.)
|
||||
* On a Redis failure we FAIL CLOSED (return false): this cap is the COST
|
||||
* backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
|
||||
* cannot prove the workspace is under its cap and therefore DENY rather than
|
||||
* admit an unmetered, billable anonymous call. A transient Redis blip briefly
|
||||
* disabling the assistant is preferable to an unbounded provider bill.
|
||||
*/
|
||||
async tryConsume(key: string): Promise<boolean> {
|
||||
const t = this.now();
|
||||
@@ -122,9 +122,11 @@ export class PublicShareWorkspaceLimiter {
|
||||
);
|
||||
return admitted === 1;
|
||||
} catch (err) {
|
||||
// FAIL CLOSED: if Redis is down we cannot prove the workspace is under its
|
||||
// cap, so DENY (controller 429s) rather than admit an unmetered, billable
|
||||
// anonymous call. The feature is optional, so denial is harmless.
|
||||
// FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
|
||||
// under its cap, so we DENY (the controller 429s) rather than admit an
|
||||
// unmetered, billable anonymous call. The assistant is optional, so a
|
||||
// transient Redis blip briefly disabling it is the safer failure mode than
|
||||
// an unbounded provider bill.
|
||||
this.logger.error(
|
||||
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
|
||||
err as Error,
|
||||
|
||||
@@ -10,7 +10,6 @@ describe('PageController', () => {
|
||||
controller = new PageController(
|
||||
{} as any, // pageService
|
||||
{} as any, // pageRepo
|
||||
{} as any, // workspaceRepo
|
||||
{} as any, // pageHistoryService
|
||||
{} as any, // spaceAbility
|
||||
{} as any, // pageAccessService
|
||||
|
||||
@@ -39,11 +39,6 @@ import {
|
||||
} from '../casl/interfaces/space-ability.type';
|
||||
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
import { RecentPageDto } from './dto/recent-page.dto';
|
||||
import { CreatedByUserDto } from './dto/created-by-user.dto';
|
||||
import { DuplicatePageDto } from './dto/duplicate-page.dto';
|
||||
@@ -68,7 +63,6 @@ export class PageController {
|
||||
constructor(
|
||||
private readonly pageService: PageService,
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly pageHistoryService: PageHistoryService,
|
||||
private readonly spaceAbility: SpaceAbilityFactory,
|
||||
private readonly pageAccessService: PageAccessService,
|
||||
@@ -98,18 +92,6 @@ export class PageController {
|
||||
|
||||
const permissions = { canEdit, hasRestriction };
|
||||
|
||||
if (page.content) {
|
||||
const workspace = await this.workspaceRepo.findById(page.workspaceId);
|
||||
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
|
||||
// Kill-switch: when the workspace feature is OFF, never serve raw
|
||||
// htmlEmbed nodes on the read path (mirrors the public-share strip),
|
||||
// so disabling the feature is an immediate, total kill-switch and not
|
||||
// dependent on the page being re-saved. Admin-authored content only.
|
||||
// Fail-closed: a missing workspace resolves to OFF and is stripped.
|
||||
page.content = stripHtmlEmbedNodes(page.content) as any;
|
||||
}
|
||||
}
|
||||
|
||||
if (dto.format && dto.format !== 'json' && page.content) {
|
||||
const contentOutput =
|
||||
dto.format === 'markdown'
|
||||
@@ -255,9 +237,6 @@ export class PageController {
|
||||
user.id,
|
||||
workspace.id,
|
||||
createPageDto,
|
||||
// Pass the caller's workspace role so create() can enforce the htmlEmbed
|
||||
// admin gate (non-admins cannot author raw-JS embeds).
|
||||
user.role,
|
||||
provenance,
|
||||
);
|
||||
|
||||
@@ -554,16 +533,6 @@ export class PageController {
|
||||
|
||||
await this.pageAccessService.validateCanView(page, user);
|
||||
|
||||
if (history.content) {
|
||||
const workspace = await this.workspaceRepo.findById(page.workspaceId);
|
||||
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
|
||||
// Kill-switch: history snapshots are an authenticated read path too, so
|
||||
// strip htmlEmbed when the workspace feature is OFF (same as /info and
|
||||
// the public-share path). Fail-closed on a missing workspace.
|
||||
history.content = stripHtmlEmbedNodes(history.content) as any;
|
||||
}
|
||||
}
|
||||
|
||||
return history;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab
|
||||
// write paths: PageService.create() and PageService.duplicatePage(). Both build
|
||||
// content/textContent/ydoc directly and persist, bypassing the collab
|
||||
// onStoreDocument strip, so each must run the incoming document through the
|
||||
// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not
|
||||
// allowed, `stripHtmlEmbedNodes`) BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL PageService with every constructor dep mocked,
|
||||
// feeds content containing an `htmlEmbed`, calls the real method, and asserts on
|
||||
// the PERSISTED content (captured at the repo insert / db insert boundary) that
|
||||
// the embed was actually stripped (member/unknown role) or preserved
|
||||
// (admin/owner + toggle ON). Mirrors the GOOD pattern in
|
||||
// transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// page.service.ts pulls in the collaboration gateway (a transitive ESM chain
|
||||
// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so
|
||||
// that single module is mocked away — it is never used on the create/duplicate
|
||||
// gate paths.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
|
||||
import { PageService } from './page.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'u1';
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
// Minimal chainable kysely stub. `nextPagePosition` (used by create) and
|
||||
// duplicatePage's bulk insert go through `this.db`; only the calls those paths
|
||||
// make need to resolve. `capturedInserts` collects every page row handed to
|
||||
// `insertInto('pages').values(...)` so we can assert on the persisted content.
|
||||
function buildDb(capturedInserts: any[]) {
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
selectAll: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
execute: async () => [],
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = {
|
||||
selectFrom: () => selectChain,
|
||||
insertInto: (table: string) => ({
|
||||
values: (rows: any) => {
|
||||
if (table === 'pages') {
|
||||
for (const row of Array.isArray(rows) ? rows : [rows]) {
|
||||
capturedInserts.push(row);
|
||||
}
|
||||
}
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb): run the callback with `db`
|
||||
// itself acting as the transaction so any in-tx inserts are captured too.
|
||||
transaction: () => ({ execute: async (cb: any) => cb(db) }),
|
||||
};
|
||||
return db;
|
||||
}
|
||||
|
||||
// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled`
|
||||
// drives the workspace toggle the gate reads via workspaceRepo.findById.
|
||||
function buildService(opts: {
|
||||
featureEnabled: boolean;
|
||||
capturedInserts: any[];
|
||||
rootPage?: any; // for duplicatePage
|
||||
}) {
|
||||
const { featureEnabled, capturedInserts } = opts;
|
||||
|
||||
const pageRepo: any = {
|
||||
findById: jest.fn(async () => null), // no parent page in create tests
|
||||
// create() persists here; capture the row so we can inspect content.
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
capturedInserts.push(row);
|
||||
return { id: 'new-page', slugId: 'slug-1', ...row };
|
||||
}),
|
||||
getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)),
|
||||
};
|
||||
|
||||
const pagePermissionRepo: any = {
|
||||
// duplicatePage filters accessible pages; grant the root so it is copied.
|
||||
filterAccessiblePageIds: jest.fn(async () =>
|
||||
opts.rootPage ? [opts.rootPage.id] : [],
|
||||
),
|
||||
};
|
||||
|
||||
const workspaceRepo: any = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const attachmentRepo: any = { findByIds: jest.fn(async () => []) };
|
||||
const storageService: any = { copy: jest.fn(async () => undefined) };
|
||||
const noopQueue: any = { add: jest.fn(async () => undefined) };
|
||||
const eventEmitter: any = { emit: jest.fn() };
|
||||
const collaborationGateway: any = {};
|
||||
const watcherService: any = {};
|
||||
// duplicatePage fires transclusion bulk inserts after persisting; they are
|
||||
// best-effort (wrapped in try/catch) and irrelevant to the gate.
|
||||
const transclusionService: any = {
|
||||
insertTransclusionsForPages: jest.fn(async () => undefined),
|
||||
insertReferencesForPages: jest.fn(async () => undefined),
|
||||
insertTemplateReferencesForPages: jest.fn(async () => undefined),
|
||||
};
|
||||
|
||||
const db = buildDb(capturedInserts);
|
||||
|
||||
const service = new PageService(
|
||||
pageRepo,
|
||||
pagePermissionRepo,
|
||||
attachmentRepo,
|
||||
db,
|
||||
storageService,
|
||||
noopQueue, // attachmentQueue
|
||||
noopQueue, // aiQueue
|
||||
noopQueue, // generalQueue
|
||||
eventEmitter,
|
||||
collaborationGateway,
|
||||
watcherService,
|
||||
transclusionService,
|
||||
workspaceRepo,
|
||||
);
|
||||
return service;
|
||||
}
|
||||
|
||||
describe('PageService.create htmlEmbed admin gate (real code)', () => {
|
||||
// Run create() and return the content actually persisted via insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
callerRole: string | null | undefined,
|
||||
) {
|
||||
const capturedInserts: any[] = [];
|
||||
const service = buildService({ featureEnabled, capturedInserts });
|
||||
await service.create(
|
||||
USER,
|
||||
WS,
|
||||
{
|
||||
spaceId: SPACE,
|
||||
title: 'p',
|
||||
// 'json' format is used as-is by parseProsemirrorContent (passed to the
|
||||
// real jsonToNode schema validation), so hand it the PM-JSON object.
|
||||
content: docWithEmbed(),
|
||||
format: 'json' as any,
|
||||
} as any,
|
||||
callerRole,
|
||||
);
|
||||
expect(capturedInserts).toHaveLength(1);
|
||||
return capturedInserts[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, 'member');
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
// Non-embed content survives.
|
||||
expect(JSON.stringify(content)).toContain('body');
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => {
|
||||
// Duplicate a single source page that contains an embed and return the content
|
||||
// persisted for the copy (captured at db.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
role: string | null | undefined,
|
||||
) {
|
||||
const rootPage: any = {
|
||||
id: 'src-page',
|
||||
slugId: 'src-slug',
|
||||
title: 'Source',
|
||||
icon: null,
|
||||
position: 'a0',
|
||||
spaceId: SPACE,
|
||||
workspaceId: WS,
|
||||
parentPageId: null,
|
||||
content: docWithEmbed(),
|
||||
};
|
||||
const capturedInserts: any[] = [];
|
||||
const service = buildService({ featureEnabled, capturedInserts, rootPage });
|
||||
const authUser: any = { id: USER, workspaceId: WS, role };
|
||||
await service.duplicatePage(rootPage, undefined, authUser);
|
||||
// The bulk insert is the page persist boundary; one source page -> one copy.
|
||||
const pageRows = capturedInserts.filter((r) => r.content);
|
||||
expect(pageRows.length).toBeGreaterThanOrEqual(1);
|
||||
return pageRows[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, 'member');
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('body');
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -20,7 +20,6 @@ describe('PageService', () => {
|
||||
{} as any, // collaborationGateway
|
||||
{} as any, // watcherService
|
||||
{} as any, // transclusionService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
@@ -31,11 +31,6 @@ import {
|
||||
isAttachmentNode,
|
||||
removeMarkTypeFromDoc,
|
||||
} from '../../../common/helpers/prosemirror/utils';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import {
|
||||
htmlToJson,
|
||||
jsonToNode,
|
||||
@@ -81,7 +76,6 @@ export class PageService {
|
||||
private collaborationGateway: CollaborationGateway,
|
||||
private readonly watcherService: WatcherService,
|
||||
private readonly transclusionService: TransclusionService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async findById(
|
||||
@@ -101,10 +95,6 @@ export class PageService {
|
||||
userId: string,
|
||||
workspaceId: string,
|
||||
createPageDto: CreatePageDto,
|
||||
// Workspace role of the caller. Used to enforce the htmlEmbed admin gate on
|
||||
// the create write path (see below). Optional/typed loosely so unknown or
|
||||
// missing roles fall through to the non-admin (strip) branch by default.
|
||||
callerRole?: string | null,
|
||||
// Optional agent-edit provenance (from the signed access claim). When the
|
||||
// actor is 'agent', stamp the page's source marker so a freshly created page
|
||||
// shows it was created by the AI agent (§14 N2) — create goes through REST,
|
||||
@@ -135,35 +125,11 @@ export class PageService {
|
||||
let ydoc = undefined;
|
||||
|
||||
if (createPageDto?.content && createPageDto?.format) {
|
||||
let prosemirrorJson = await this.parseProsemirrorContent(
|
||||
const prosemirrorJson = await this.parseProsemirrorContent(
|
||||
createPageDto.content,
|
||||
createPageDto.format,
|
||||
);
|
||||
|
||||
// SECURITY (Variant C admin gate, plain page-create write path):
|
||||
// create() builds content/textContent/ydoc directly and persists them via
|
||||
// insertPage, bypassing the collab onStoreDocument strip. htmlEmbed renders
|
||||
// raw, unsanitized JS in readers' browsers, so only workspace admins/owners
|
||||
// may author it. The create controller requires only space Edit, so a
|
||||
// regular member could otherwise POST a doc (json, or the markdown/html
|
||||
// <!--html-embed:BASE64--> forms that parse to the same node) containing an
|
||||
// htmlEmbed and store XSS for every reader. Strip every htmlEmbed node when
|
||||
// the caller is not an admin, BEFORE deriving textContent/ydoc/insert.
|
||||
// The gate is toggle-AND-admin: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the caller is an admin/owner. OFF (default) =>
|
||||
// stripped for everyone. Cheap settings read keyed to the workspace.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: callerRole,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from page creation by user ${userId} (space ${createPageDto.spaceId})`,
|
||||
),
|
||||
});
|
||||
|
||||
content = prosemirrorJson;
|
||||
textContent = jsonToText(prosemirrorJson);
|
||||
ydoc = createYdocFromJson(prosemirrorJson);
|
||||
@@ -653,12 +619,6 @@ export class PageService {
|
||||
|
||||
const attachmentMap = new Map<string, ICopyPageAttachment>();
|
||||
|
||||
// Resolve the htmlEmbed toggle ONCE for the workspace; the per-page gate
|
||||
// below is toggle-AND-admin (OFF default => stripped for everyone).
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(rootPage.workspaceId))?.settings,
|
||||
);
|
||||
|
||||
const insertablePages: InsertablePage[] = await Promise.all(
|
||||
pages.map(async (page) => {
|
||||
const pageContent = getProsemirrorContent(page.content);
|
||||
@@ -769,24 +729,7 @@ export class PageService {
|
||||
}
|
||||
});
|
||||
|
||||
let prosemirrorJson = prosemirrorDoc.toJSON();
|
||||
|
||||
// SECURITY (Variant C admin gate, duplication write path):
|
||||
// Duplication builds the ydoc directly and bypasses the collab
|
||||
// onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in
|
||||
// readers' browsers, so only workspace admins/owners may author it. A
|
||||
// non-admin with space Edit could otherwise duplicate an admin page
|
||||
// that contains an embed into a new page authored by them. Strip every
|
||||
// htmlEmbed node from each duplicated page when the duplicating user is
|
||||
// not an admin, BEFORE computing textContent/ydoc/insert.
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: authUser.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from page duplication by user ${authUser.id} (source page ${page.id})`,
|
||||
),
|
||||
});
|
||||
const prosemirrorJson = prosemirrorDoc.toJSON();
|
||||
|
||||
// Add "Copy of " prefix to the root page title only for duplicates in same space
|
||||
let title = page.title;
|
||||
|
||||
@@ -68,7 +68,6 @@ describe('TransclusionService — template access core (real filter)', () => {
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
return { service, db, pageRepo, spaceMemberRepo, pagePermissionRepo };
|
||||
@@ -227,7 +226,6 @@ describe('TransclusionService.filterViewerAccessiblePageIds — AND ordering (co
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
return { service, filterAccessiblePageIds };
|
||||
@@ -324,7 +322,6 @@ describe('TransclusionService.syncPageTemplateReferences — workspace scoping',
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -471,7 +468,6 @@ describe('TransclusionService.insertTemplateReferencesForPages — per-workspace
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
return { service, insertMany };
|
||||
}
|
||||
|
||||
@@ -41,7 +41,6 @@ describe('TransclusionService.lookupTemplate — anti-leak catch branch', () =>
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
// Stub the access decision; we are testing the content-prep stage, not access.
|
||||
@@ -158,7 +157,6 @@ describe('TransclusionService.lookupTemplate — soft-deleted source via real fi
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
);
|
||||
|
||||
const { items } = await service.lookupTemplate(['deleted-src'], 'u1', 'w1');
|
||||
|
||||
@@ -35,7 +35,6 @@ describe('TransclusionService.lookupTemplate (access mapping)', () => {
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
jest
|
||||
|
||||
@@ -57,7 +57,6 @@ function buildService(opts: {
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
import { TransclusionService } from '../transclusion.service';
|
||||
import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
|
||||
// unsync returns a source snapshot the client materializes into the reference
|
||||
// page; a non-admin must never receive an embed payload to re-persist. The gate
|
||||
// reads `user.role` and strips before returning. All repos / access checks are
|
||||
// mocked so the REAL gate logic runs end-to-end. Complements the existing
|
||||
// transclusion specs (rewriteAttachmentsForUnsync, controller).
|
||||
|
||||
const WS = 'ws-1';
|
||||
const REF_PAGE = 'ref-1';
|
||||
const SRC_PAGE = 'src-1';
|
||||
const TX_ID = 'tx-1';
|
||||
|
||||
const sourceContentWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>steal()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
function buildService(featureEnabled = true) {
|
||||
const pageRepo = {
|
||||
findById: jest.fn(async (id: string) => ({
|
||||
id,
|
||||
workspaceId: WS,
|
||||
spaceId: 'space-1',
|
||||
deletedAt: null,
|
||||
})),
|
||||
};
|
||||
const pageTransclusionsRepo = {
|
||||
findByPageAndTransclusion: jest.fn(async () => ({
|
||||
content: sourceContentWithEmbed(),
|
||||
})),
|
||||
};
|
||||
const pageTransclusionReferencesRepo = {
|
||||
deleteOne: jest.fn(async () => undefined),
|
||||
};
|
||||
const attachmentRepo = { findByIds: jest.fn(async () => []) };
|
||||
const storageService = { copy: jest.fn(async () => undefined) };
|
||||
const pageAccessService = {
|
||||
validateCanEdit: jest.fn(async () => undefined),
|
||||
validateCanView: jest.fn(async () => undefined),
|
||||
};
|
||||
// Workspace settings read used by the toggle-AND-admin gate.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const service = new TransclusionService(
|
||||
{} as any, // db (unused on this path)
|
||||
pageTransclusionsRepo as any,
|
||||
pageTransclusionReferencesRepo as any,
|
||||
{} as any, // pageTemplateReferencesRepo (unused on this path)
|
||||
pageRepo as any,
|
||||
{} as any, // pagePermissionRepo (unused)
|
||||
{} as any, // spaceMemberRepo (unused)
|
||||
attachmentRepo as any,
|
||||
storageService as any,
|
||||
pageAccessService as any,
|
||||
workspaceRepo as any,
|
||||
);
|
||||
return service;
|
||||
}
|
||||
|
||||
function userWithRole(role: string | null | undefined) {
|
||||
return { id: 'u1', workspaceId: WS, role } as any;
|
||||
}
|
||||
|
||||
describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin (member): returned content has htmlEmbed stripped', async () => {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('member'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
// Non-embed content is preserved.
|
||||
expect(JSON.stringify(content)).toContain('snapshot body');
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole(role),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('toggle ON + admin: returned content keeps the htmlEmbed', async () => {
|
||||
const service = buildService(true);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('admin'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: returned content keeps the htmlEmbed', async () => {
|
||||
const service = buildService(true);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('owner'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
const service = buildService(false);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('admin'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + member: stripped', async () => {
|
||||
const service = buildService(false);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('member'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -33,11 +33,6 @@ import {
|
||||
import { jsonToNode } from '../../../collaboration/collaboration.util';
|
||||
import { Page, User } from '@docmost/db/types/entity.types';
|
||||
import { PageAccessService } from '../page-access/page-access.service';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
|
||||
type ReferencingPageInfo = {
|
||||
id: string;
|
||||
@@ -63,7 +58,6 @@ export class TransclusionService {
|
||||
private readonly attachmentRepo: AttachmentRepo,
|
||||
private readonly storageService: StorageService,
|
||||
private readonly pageAccessService: PageAccessService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async syncPageTransclusions(
|
||||
@@ -773,26 +767,6 @@ export class TransclusionService {
|
||||
transclusionId,
|
||||
);
|
||||
|
||||
// SECURITY (Variant C admin gate, transclusion unsync write path):
|
||||
// The returned content is a source snapshot that the client materializes
|
||||
// into the reference page via insertContentAt. The snapshot keeps any
|
||||
// htmlEmbed verbatim, and unsync requires only space Edit/View. If the
|
||||
// requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a
|
||||
// non-admin can never receive an embed payload to re-persist (the collab
|
||||
// strip on the subsequent save is debounced/race-prone and must not be the
|
||||
// only guard). Admin behavior is unchanged.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(user.workspaceId))?.settings,
|
||||
);
|
||||
content = stripHtmlEmbedIfNotAllowed(content, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: user.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`,
|
||||
),
|
||||
});
|
||||
|
||||
return { content };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import { ShareService } from './share.service';
|
||||
import { hasHtmlEmbedNode } from '../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL ShareService server-authoritative htmlEmbed kill-switch for
|
||||
// shared content. An anonymous public-share viewer cannot read the per-workspace
|
||||
// htmlEmbed toggle, so the SERVER must decide what to serve: when the toggle is
|
||||
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are kept so
|
||||
// the read-only client executes them. All repos / token service are mocked so the
|
||||
// real prepareContentForShare logic runs end-to-end via getSharedPage.
|
||||
// Exercises the REAL ShareService server-authoritative htmlEmbed master toggle
|
||||
// for shared content. The block renders inside a sandboxed iframe (harmless), so
|
||||
// this is NOT an XSS guard — it is the master-toggle enforcement for anonymous
|
||||
// shares: an anonymous public-share viewer cannot read the per-workspace
|
||||
// htmlEmbed toggle, so the SERVER must decide what to serve. When the toggle is
|
||||
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are served
|
||||
// and rendered in their sandboxed frame. All repos / token service are mocked so
|
||||
// the real prepareContentForShare logic runs end-to-end via getSharedPage.
|
||||
|
||||
const WS = 'ws-1';
|
||||
const PAGE = 'page-1';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
|
||||
import { Controller, Get, Logger, Param, Req, Res } from '@nestjs/common';
|
||||
import { ShareService } from './share.service';
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { join } from 'path';
|
||||
@@ -11,6 +11,8 @@ import { htmlEscape } from '../../common/helpers/html-escaper';
|
||||
|
||||
@Controller('share')
|
||||
export class ShareSeoController {
|
||||
private readonly logger = new Logger(ShareSeoController.name);
|
||||
|
||||
constructor(
|
||||
private readonly shareService: ShareService,
|
||||
private workspaceRepo: WorkspaceRepo,
|
||||
@@ -84,10 +86,34 @@ export class ShareSeoController {
|
||||
.join('\n ');
|
||||
|
||||
const html = fs.readFileSync(indexFilePath, 'utf8');
|
||||
const transformedHtml = html
|
||||
let transformedHtml = html
|
||||
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
|
||||
.replace(metaTagVar, metaTags);
|
||||
|
||||
// Deliberate same-origin tracker surface: this is the ONE place where an
|
||||
// admin-authored analytics/tracker snippet (settings.trackerHead) is
|
||||
// injected verbatim into the page origin. It is admin-only (writable only
|
||||
// via the admin-gated workspace settings) and applies to PUBLIC SHARE
|
||||
// pages only. It is trusted content, so it is NOT escaped. The htmlEmbed
|
||||
// block itself is sandboxed and is the safe surface for everyone else.
|
||||
const trackerHead = (workspace?.settings as any)?.trackerHead;
|
||||
if (typeof trackerHead === 'string' && trackerHead.trim().length > 0) {
|
||||
if (transformedHtml.includes('</head>')) {
|
||||
// Function replacer: the snippet is admin-authored trusted content and
|
||||
// must be injected verbatim. A string replacement would interpret `$&`,
|
||||
// `$'`, `` $` `` and `$$` inside it as substitution patterns and mangle
|
||||
// the tracker; a function return value is inserted literally.
|
||||
transformedHtml = transformedHtml.replace(
|
||||
'</head>',
|
||||
() => `${trackerHead}\n</head>`,
|
||||
);
|
||||
} else {
|
||||
this.logger.warn(
|
||||
'trackerHead is configured but no </head> marker was found in the share index HTML; tracker snippet was not injected.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
res.type('text/html').send(transformedHtml);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,9 +87,16 @@ export class ShareController {
|
||||
workspace.id,
|
||||
);
|
||||
|
||||
// Resolve the identity name only when the assistant is enabled, so the
|
||||
// anonymous widget can label messages with the configured persona name.
|
||||
const aiAssistantName = aiAssistant
|
||||
? await this.aiSettings.resolvePublicShareAssistantName(workspace.id)
|
||||
: null;
|
||||
|
||||
return {
|
||||
...shareData,
|
||||
aiAssistant,
|
||||
aiAssistantName,
|
||||
features: this.licenseCheckService.resolveFeatures(
|
||||
workspace.licenseKey,
|
||||
workspace.plan,
|
||||
|
||||
@@ -524,12 +524,14 @@ export class ShareService {
|
||||
* not leak structure (existence, location, count, resolved state, or
|
||||
* comment ids) to public viewers.
|
||||
*
|
||||
* 3. Strip `htmlEmbed` nodes when the workspace feature toggle is OFF. This
|
||||
* makes the toggle a SERVER-AUTHORITATIVE kill-switch for shared content:
|
||||
* when OFF the embed is never served to the anonymous viewer (who can't
|
||||
* read the per-workspace toggle), when ON the embed is served so the
|
||||
* read-only client executes it. `htmlEmbedEnabled` is resolved fail-closed
|
||||
* by the callers (missing workspace => OFF => strip).
|
||||
* 3. Strip `htmlEmbed` nodes when the workspace master toggle is OFF. The
|
||||
* block renders inside a sandboxed iframe on the client (harmless, no
|
||||
* same-origin access), so this is NOT an XSS guard — it is the
|
||||
* SERVER-AUTHORITATIVE enforcement of the workspace master toggle for
|
||||
* anonymous shares: an anonymous viewer cannot read the per-workspace
|
||||
* toggle, so when OFF the block is never served, and when ON it is served
|
||||
* and rendered in its sandboxed frame. `htmlEmbedEnabled` is resolved
|
||||
* fail-closed by the callers (missing workspace => OFF => strip).
|
||||
*
|
||||
* Both share-content paths — the host page (`updatePublicAttachments`) and
|
||||
* the share-scoped transclusion lookup (`lookupTransclusionForShare`) —
|
||||
@@ -544,8 +546,9 @@ export class ShareService {
|
||||
): Promise<Node | null> {
|
||||
let pmJson = getProsemirrorContent(content);
|
||||
|
||||
// Kill-switch: when the workspace toggle is OFF, never serve htmlEmbed
|
||||
// nodes to public viewers. Strip before tokenizing/serializing.
|
||||
// Master-toggle enforcement: when the workspace toggle is OFF, never serve
|
||||
// htmlEmbed nodes to anonymous public viewers (who cannot read the toggle).
|
||||
// Strip before tokenizing/serializing.
|
||||
if (!htmlEmbedEnabled) {
|
||||
pmJson = stripHtmlEmbedNodes(pmJson);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ import {
|
||||
IsBoolean,
|
||||
IsInt,
|
||||
IsOptional,
|
||||
IsString,
|
||||
MaxLength,
|
||||
Min,
|
||||
} from 'class-validator';
|
||||
|
||||
@@ -53,12 +55,22 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
|
||||
@IsBoolean()
|
||||
aiDictation: boolean;
|
||||
|
||||
// Workspace feature toggle for the admin-only HTML embed feature. Persisted at
|
||||
// settings.htmlEmbed. ABSENT/false => OFF (default).
|
||||
// Workspace master toggle that enables/disables the HTML embed block type.
|
||||
// Persisted at settings.htmlEmbed. ABSENT/false => OFF (default). The block
|
||||
// itself renders in a sandboxed iframe, so this is a feature switch, not a
|
||||
// security gate.
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
htmlEmbed: boolean;
|
||||
|
||||
// Admin-only analytics/tracker snippet (raw HTML/JS) injected verbatim into
|
||||
// the <head> of PUBLIC SHARE pages only (same-origin). Persisted at
|
||||
// settings.trackerHead. Admin-authored trusted content.
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(20000)
|
||||
trackerHead?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
aiPublicShareAssistant: boolean;
|
||||
|
||||
@@ -108,4 +108,38 @@ describe('WorkspaceService.update — htmlEmbed toggle persistence (real code)',
|
||||
expect(logged.changes.before.htmlEmbed).toBe(false);
|
||||
expect(logged.changes.after.htmlEmbed).toBe(true);
|
||||
});
|
||||
|
||||
it('persists trackerHead via updateSetting with the trackerHead key', async () => {
|
||||
const { service, updateSetting } = buildService({});
|
||||
|
||||
await service.update('w1', { trackerHead: '<script>ga()</script>' } as any);
|
||||
|
||||
expect(updateSetting).toHaveBeenCalledWith(
|
||||
'w1',
|
||||
'trackerHead',
|
||||
'<script>ga()</script>',
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
|
||||
it('does NOT call updateSetting when trackerHead is undefined in the dto', async () => {
|
||||
const { service, updateSetting } = buildService({});
|
||||
|
||||
await service.update('w1', { name: 'New name' } as any);
|
||||
|
||||
expect(updateSetting).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('audits the trackerHead change (before/after) when the value changes', async () => {
|
||||
const { service, auditService } = buildService({
|
||||
settingsBefore: { trackerHead: '' },
|
||||
});
|
||||
|
||||
await service.update('w1', { trackerHead: '<script>m()</script>' } as any);
|
||||
|
||||
expect(auditService.log).toHaveBeenCalledTimes(1);
|
||||
const logged = auditService.log.mock.calls[0][0];
|
||||
expect(logged.changes.before.trackerHead).toBe('');
|
||||
expect(logged.changes.after.trackerHead).toBe('<script>m()</script>');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -525,6 +525,22 @@ export class WorkspaceService {
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof updateWorkspaceDto.trackerHead !== 'undefined') {
|
||||
// Admin-only analytics/tracker snippet injected into the <head> of
|
||||
// public share pages (same-origin). Persisted at settings.trackerHead.
|
||||
const prev = (settingsBefore as any)?.trackerHead ?? '';
|
||||
if (prev !== updateWorkspaceDto.trackerHead) {
|
||||
before.trackerHead = prev;
|
||||
after.trackerHead = updateWorkspaceDto.trackerHead;
|
||||
}
|
||||
await this.workspaceRepo.updateSetting(
|
||||
workspaceId,
|
||||
'trackerHead',
|
||||
updateWorkspaceDto.trackerHead,
|
||||
trx,
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof updateWorkspaceDto.aiPublicShareAssistant !== 'undefined') {
|
||||
const prev = settingsBefore?.ai?.publicShareAssistant ?? false;
|
||||
if (prev !== updateWorkspaceDto.aiPublicShareAssistant) {
|
||||
@@ -549,6 +565,7 @@ export class WorkspaceService {
|
||||
delete updateWorkspaceDto.aiChat;
|
||||
delete updateWorkspaceDto.aiDictation;
|
||||
delete updateWorkspaceDto.htmlEmbed;
|
||||
delete updateWorkspaceDto.trackerHead;
|
||||
delete updateWorkspaceDto.aiPublicShareAssistant;
|
||||
|
||||
await this.workspaceRepo.updateWorkspace(
|
||||
|
||||
@@ -3,6 +3,7 @@ import { InjectQueue } from '@nestjs/bullmq';
|
||||
import { Queue } from 'bullmq';
|
||||
import { QueueName, QueueJob } from '../queue/constants';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
@@ -49,6 +50,7 @@ export interface UpdateAiSettingsInput {
|
||||
export class AiSettingsService {
|
||||
constructor(
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
|
||||
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
|
||||
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -110,6 +112,26 @@ export class AiSettingsService {
|
||||
return settings?.ai?.publicShareAssistant === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the display name of the agent role acting as the public-share
|
||||
* assistant's identity, so the anonymous widget can label messages with the
|
||||
* persona name instead of the generic "AI agent". Returns null when no role
|
||||
* is configured, or the referenced role is missing/disabled (built-in persona
|
||||
* → the client falls back to "AI agent"). Mirrors the role resolution in
|
||||
* PublicShareChatService.resolveShareRole.
|
||||
*/
|
||||
async resolvePublicShareAssistantName(
|
||||
workspaceId: string,
|
||||
): Promise<string | null> {
|
||||
const resolved = await this.resolve(workspaceId);
|
||||
const roleId = resolved?.publicShareAssistantRoleId;
|
||||
if (!roleId) return null;
|
||||
const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
|
||||
if (!role || !role.enabled) return null;
|
||||
const name = role.name?.trim();
|
||||
return name ? name : null;
|
||||
}
|
||||
|
||||
/** Read the stored non-secret provider settings for a workspace. */
|
||||
private async readProvider(
|
||||
workspaceId: string,
|
||||
|
||||
@@ -20,12 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
||||
import { markdownToHtml } from '@docmost/editor-ext';
|
||||
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { formatImportHtml } from '../utils/import-formatter';
|
||||
import {
|
||||
buildAttachmentCandidates,
|
||||
@@ -59,8 +53,6 @@ export class FileImportTaskService {
|
||||
private readonly backlinkRepo: BacklinkRepo,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
private readonly importAttachmentService: ImportAttachmentService,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private eventEmitter: EventEmitter2,
|
||||
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
|
||||
) {}
|
||||
@@ -157,25 +149,6 @@ export class FileImportTaskService {
|
||||
.where('id', '=', fileTask.spaceId)
|
||||
.executeTakeFirst();
|
||||
|
||||
// SECURITY (Variant C admin gate, zip/multi-file import write path):
|
||||
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||
// serialized form), which would execute raw, unsanitized JS in readers'
|
||||
// browsers. Only workspace admins/owners may author it. Resolve the
|
||||
// importer's role ONCE here; each page's prosemirror JSON is run through the
|
||||
// strip below before textContent/ydoc/insert when the importer is not an
|
||||
// admin, so a non-admin cannot smuggle the node in via a zip import (which
|
||||
// requires only space Edit).
|
||||
const importingUser = await this.userRepo.findById(
|
||||
fileTask.creatorId,
|
||||
fileTask.workspaceId,
|
||||
);
|
||||
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
|
||||
// survives only when the workspace feature toggle is ON and the importer is
|
||||
// an admin/owner. OFF (default) => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
|
||||
);
|
||||
|
||||
const pagesMap = new Map<string, ImportPageNode>();
|
||||
|
||||
for (const absPath of allFiles) {
|
||||
@@ -523,22 +496,9 @@ export class FileImportTaskService {
|
||||
await this.importService.processHTML(html),
|
||||
);
|
||||
|
||||
let { title, prosemirrorJson } =
|
||||
const { title, prosemirrorJson } =
|
||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||
|
||||
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
|
||||
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
|
||||
// Gate (featureEnabled AND admin) is resolved once above and recomputed
|
||||
// by the helper from the same htmlEmbedEnabled + importer role.
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: importingUser?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
|
||||
),
|
||||
});
|
||||
|
||||
const insertablePage: InsertablePage = {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
|
||||
@@ -1,266 +0,0 @@
|
||||
// Exercises the REAL htmlEmbed admin gate on the two import write paths:
|
||||
//
|
||||
// (1) ImportService.importPage() — single .html/.md upload
|
||||
// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
|
||||
//
|
||||
// Both build content/textContent/ydoc directly and persist (bypassing the
|
||||
// collab onStoreDocument strip), so each must run the imported document through
|
||||
// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
|
||||
// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
|
||||
// `stripHtmlEmbedNodes` BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL services with deps mocked, feeds an imported
|
||||
// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
|
||||
// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
|
||||
// content (captured at the insert boundary) is stripped for a non-admin /
|
||||
// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
|
||||
// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// Three modules are mocked away because they pull transitive ESM deps that
|
||||
// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
|
||||
// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
|
||||
// import-attachment). None of them participate in the gate decision:
|
||||
// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
|
||||
// faithful passthroughs (the embed div has no href/iframe, so the real
|
||||
// normalizer would leave it untouched anyway).
|
||||
// - import-attachment: attachment rewriting; passthrough returns html as-is.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
jest.mock('../utils/import-formatter', () => ({
|
||||
normalizeImportHtml: () => {},
|
||||
formatImportHtml: async (opts: any) => ({
|
||||
html: opts.html,
|
||||
backlinks: [],
|
||||
pageIcon: undefined,
|
||||
}),
|
||||
}));
|
||||
jest.mock('./import-attachment.service', () => ({
|
||||
ImportAttachmentService: class {},
|
||||
}));
|
||||
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { ImportService } from './import.service';
|
||||
import { FileImportTaskService } from './file-import-task.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'importer-1';
|
||||
|
||||
// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
|
||||
// `<div data-type="htmlEmbed" data-source="BASE64">` into an htmlEmbed PM node
|
||||
// (base64 below decodes to `<script>x</script>`).
|
||||
const HTML_WITH_EMBED =
|
||||
'<p>imported body</p>' +
|
||||
'<div data-type="htmlEmbed" data-source="PHNjcmlwdD54PC9zY3JpcHQ+"></div>';
|
||||
|
||||
function workspaceRepoFor(featureEnabled: boolean) {
|
||||
return {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
// userRepo.findById resolves the importer's role (or undefined for a missing
|
||||
// user -> fail closed).
|
||||
function userRepoFor(user: { role?: string } | undefined) {
|
||||
return { findById: jest.fn(async () => user) };
|
||||
}
|
||||
|
||||
describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
|
||||
// Run importPage with a single uploaded .html and return the persisted content
|
||||
// captured at pageRepo.insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const pageRepo: any = {
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
captured.push(row);
|
||||
return { id: 'p1', slugId: 's1', ...row };
|
||||
}),
|
||||
};
|
||||
// db is only used for getNewPagePosition (a select chain).
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = { selectFrom: () => selectChain };
|
||||
|
||||
const service = new ImportService(
|
||||
pageRepo,
|
||||
userRepoFor(user) as any,
|
||||
{ putBuffer: jest.fn() } as any, // storageService (unused on this path)
|
||||
db,
|
||||
{ add: jest.fn() } as any, // fileTaskQueue (unused)
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
);
|
||||
|
||||
const file: any = {
|
||||
filename: 'doc.html',
|
||||
toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
|
||||
};
|
||||
await service.importPage(Promise.resolve(file), USER, SPACE, WS);
|
||||
expect(captured).toHaveLength(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
|
||||
let extractDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
// Real temp dir holding a single .html page that carries the embed; the
|
||||
// method reads it from disk via fs.readFile.
|
||||
extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
|
||||
await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fs.rm(extractDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// Run processGenericImport over the temp dir and return the content persisted
|
||||
// for the imported page (captured at trx.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const trxInsertChain = (table: string) => ({
|
||||
values: (row: any) => {
|
||||
if (table === 'pages') captured.push(row);
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
});
|
||||
const trx: any = { insertInto: trxInsertChain };
|
||||
const db: any = {
|
||||
// spaces lookup at the top of processGenericImport
|
||||
selectFrom: () => ({
|
||||
select: () => ({
|
||||
where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
|
||||
}),
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb)
|
||||
transaction: () => ({ execute: async (cb: any) => cb(trx) }),
|
||||
};
|
||||
|
||||
// importService stub: only the real, gate-relevant helpers are used. We give
|
||||
// it the REAL implementations by delegating to a real ImportService for
|
||||
// processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
|
||||
// strip path runs for real.
|
||||
const realImport = new ImportService(
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
);
|
||||
const importService: any = {
|
||||
processHTML: (html: string) => realImport.processHTML(html),
|
||||
extractTitleAndRemoveHeading: (s: any) =>
|
||||
realImport.extractTitleAndRemoveHeading(s),
|
||||
createYdoc: (j: any) => realImport.createYdoc(j),
|
||||
};
|
||||
|
||||
const importAttachmentService: any = {
|
||||
// passthrough: no attachment rewriting, return html unchanged
|
||||
processAttachments: jest.fn(async (opts: any) => opts.html),
|
||||
};
|
||||
|
||||
const service = new FileImportTaskService(
|
||||
{ putBuffer: jest.fn() } as any, // storageService
|
||||
importService,
|
||||
{ nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
|
||||
{ insertBacklink: jest.fn() } as any, // backlinkRepo
|
||||
db,
|
||||
importAttachmentService,
|
||||
userRepoFor(user) as any,
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
{ emit: jest.fn() } as any, // eventEmitter
|
||||
{ logBatchWithContext: jest.fn() } as any, // auditService
|
||||
);
|
||||
|
||||
const fileTask: any = {
|
||||
id: 'task-1',
|
||||
creatorId: USER,
|
||||
workspaceId: WS,
|
||||
spaceId: SPACE,
|
||||
source: 'generic',
|
||||
};
|
||||
|
||||
await service.processGenericImport({ extractDir, fileTask });
|
||||
expect(captured.length).toBeGreaterThanOrEqual(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,12 +1,5 @@
|
||||
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import {
|
||||
hasHtmlEmbedNode,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { MultipartFile } from '@fastify/multipart';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
@@ -44,12 +37,10 @@ export class ImportService {
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly storageService: StorageService,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
@InjectQueue(QueueName.FILE_TASK_QUEUE)
|
||||
private readonly fileTaskQueue: Queue,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async importPage(
|
||||
@@ -94,32 +85,7 @@ export class ImportService {
|
||||
|
||||
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
|
||||
const title = extracted.title;
|
||||
let prosemirrorJson = extracted.prosemirrorJson;
|
||||
|
||||
// SECURITY (Variant C admin gate, import write path):
|
||||
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||
// serialized form), which would execute raw JS in readers' browsers. Only
|
||||
// workspace admins/owners may author it, so strip htmlEmbed nodes from
|
||||
// imports performed by a non-admin user.
|
||||
// Outer has-check first so the user/workspace lookups below run only when an
|
||||
// embed is actually present (the common case carries none).
|
||||
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
|
||||
const importingUser = await this.userRepo.findById(userId, workspaceId);
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the importer is an admin/owner. OFF (default)
|
||||
// => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: importingUser?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from import by user ${userId}`,
|
||||
),
|
||||
});
|
||||
}
|
||||
const prosemirrorJson = extracted.prosemirrorJson;
|
||||
|
||||
const pageTitle = title || fileName;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user