diff --git a/apps/server/package.json b/apps/server/package.json index 10fa66c1..67e2b50a 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -195,7 +195,8 @@ "moduleNameMapper": { "^@docmost/db/(.*)$": "/database/$1", "^@docmost/transactional/(.*)$": "/integrations/transactional/$1", - "^@docmost/ee/(.*)$": "/ee/$1" + "^@docmost/ee/(.*)$": "/ee/$1", + "^src/(.*)$": "/$1" } } } diff --git a/apps/server/src/collaboration/collaboration.handler.html-embed.spec.ts b/apps/server/src/collaboration/collaboration.handler.html-embed.spec.ts new file mode 100644 index 00000000..4431fa67 --- /dev/null +++ b/apps/server/src/collaboration/collaboration.handler.html-embed.spec.ts @@ -0,0 +1,102 @@ +import * as Y from 'yjs'; +import { TiptapTransformer } from '@hocuspocus/transformer'; +import { CollaborationHandler } from './collaboration.handler'; +import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the +// REST/MCP/AI content-update entrypoint, used by the page update endpoint and +// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed +// BEFORE handing the json to withYdocConnection. We stub only +// withYdocConnection (which would otherwise open a real hocuspocus connection): +// the role-extraction (`user?.role`) + strip that run upstream of it are REAL +// production code. The 'replace' branch then runs the production +// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we +// decode back to JSON and assert on. This replaces the re-implemented +// `applyAdminGate` stand-in for this entrypoint. + +const docWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, + { + type: 'columns', + content: [ + { + type: 'column', + attrs: { position: 'left' }, + content: [ + { type: 'htmlEmbed', attrs: { source: '' } }, + { type: 'paragraph', content: [{ type: 'text', text: 'inner' }] }, + ], + }, + { + type: 'column', + attrs: { position: 'right' }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'r' }] }, + ], + }, + ], + }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +/** + * Run the REAL updatePageContent('replace') with a stubbed withYdocConnection. + * The stub provides a real Y.Doc + recording fragment; the production fn calls + * TiptapTransformer.toYdoc() and applies it to the doc, so decoding + * the doc afterward yields exactly the gated content. + */ +async function gatedContentFor(role: string | null | undefined) { + const handler = new CollaborationHandler(); + const captureDoc = new Y.Doc(); + + jest + .spyOn(handler, 'withYdocConnection') + .mockImplementation(async (_hp, _name, _ctx, fn: any) => { + const fragment = captureDoc.getXmlFragment('default'); + // Mirror the real Document surface the fn touches. + const docLike: any = { + getXmlFragment: () => fragment, + }; + // The fn does: fragment.delete(0,len) then + // Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls + // Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target. + fn(captureDoc); + }); + + const handlers = handler.getHandlers({} as any); + await handlers.updatePageContent('page-1', { + prosemirrorJson: docWithEmbed(), + operation: 'replace', + user: { id: 'u1', role } as any, + }); + + return TiptapTransformer.fromYdoc(captureDoc, 'default'); +} + +describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => { + it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => { + const gated = await gatedContentFor('member'); + expect(hasHtmlEmbedNode(gated)).toBe(false); + // Non-embed siblings survive. + const json = JSON.stringify(gated); + expect(json).toContain('keep'); + expect(json).toContain('inner'); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false); + } + }); + + it('admin: htmlEmbed preserved', async () => { + expect(hasHtmlEmbedNode(await gatedContentFor('admin'))).toBe(true); + }); + + it('owner: htmlEmbed preserved', async () => { + expect(hasHtmlEmbedNode(await gatedContentFor('owner'))).toBe(true); + }); +}); diff --git a/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts new file mode 100644 index 00000000..79f2dcbc --- /dev/null +++ b/apps/server/src/collaboration/extensions/persistence.extension.html-embed.spec.ts @@ -0,0 +1,252 @@ +import * as Y from 'yjs'; +import { TiptapTransformer } from '@hocuspocus/transformer'; +import { PersistenceExtension } from './persistence.extension'; +import { tiptapExtensions } from '../collaboration.util'; +import { + hasHtmlEmbedNode, + HTML_EMBED_NODE_NAME, +} from '../../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab +// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks. +// This replaces the prior re-implemented `applyAdminGate` stand-in for this +// entrypoint: if the role-extraction expression (`context?.user?.role`), the +// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail. + +const RICH_DOC = { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [{ type: 'text', text: 'intro paragraph' }], + }, + { + type: 'columns', + content: [ + { + type: 'column', + attrs: { position: 'left' }, + content: [ + { + type: 'paragraph', + content: [ + { type: 'text', text: 'left col, mentioning ' }, + { + type: 'mention', + attrs: { + id: 'mention-1', + label: 'Alice', + entityType: 'user', + entityId: 'user-123', + creatorId: 'creator-1', + }, + }, + ], + }, + // Nested embed inside a column — must be stripped recursively. + { + type: HTML_EMBED_NODE_NAME, + attrs: { source: '' }, + }, + ], + }, + { + type: 'column', + attrs: { position: 'right' }, + content: [ + { + type: 'table', + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableHeader', + attrs: { colspan: 1, rowspan: 1 }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'H' }] }, + ], + }, + ], + }, + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + attrs: { colspan: 1, rowspan: 1 }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'cell' }] }, + ], + }, + ], + }, + ], + }, + ], + }, + ], + }, + // Top-level embed — must be stripped. + { + type: HTML_EMBED_NODE_NAME, + attrs: { source: '' }, + }, + { + type: 'paragraph', + content: [{ type: 'text', text: 'outro paragraph' }], + }, + ], +}; + +function buildYdoc(json: any): Y.Doc { + return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions); +} + +// Count nodes by type across the whole tree (excludes htmlEmbed by listing it +// separately) so we can assert every OTHER node type survived the strip. +function nodeTypeCounts(json: any): Record { + const counts: Record = {}; + const walk = (n: any) => { + if (!n || typeof n !== 'object') return; + if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1; + if (Array.isArray(n.content)) n.content.forEach(walk); + }; + walk(json); + return counts; +} + +/** + * Construct a real PersistenceExtension with the minimum mocks needed for + * onStoreDocument to reach the strip + persist branch, and capture the content + * that would be written to the page row. + */ +function buildExtension() { + const captured: { content?: any } = {}; + + const existingPage = { + id: 'page-1', + slugId: 'slug-1', + spaceId: 'space-1', + workspaceId: 'ws-1', + creatorId: 'creator-1', + contributorIds: [], + content: { type: 'doc', content: [] }, // differs from new content -> persist runs + createdAt: new Date(), + lastUpdatedSource: 'user', + }; + + const pageRepo = { + findById: jest.fn(async () => ({ ...existingPage })), + updatePage: jest.fn(async (values: any) => { + captured.content = values.content; + }), + }; + const pageHistoryRepo = { + findPageLastHistory: jest.fn(async () => null), + saveHistory: jest.fn(async () => undefined), + }; + // db.transaction().execute(cb) just runs the callback (no real DB). + const db = { + transaction: () => ({ + execute: (cb: any) => cb({} as any), + }), + }; + const noopQueue = { add: jest.fn(async () => undefined) } as any; + const collabHistory = { addContributors: jest.fn(async () => undefined) } as any; + const transclusionService = { + syncPageTransclusions: jest.fn(async () => undefined), + syncPageReferences: jest.fn(async () => undefined), + } as any; + + const ext = new PersistenceExtension( + pageRepo as any, + pageHistoryRepo as any, + db as any, + noopQueue, + noopQueue, + noopQueue, + collabHistory, + transclusionService, + ); + + return { ext, captured, pageRepo }; +} + +async function runStore(role: string | null | undefined, doc: Y.Doc) { + const { ext, captured } = buildExtension(); + // hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has + // none, so stub it (the post-persist broadcast is not under test here). + (doc as any).broadcastStateless = () => undefined; + await ext.onStoreDocument({ + documentName: 'page-1', + document: doc, + context: { user: { id: 'u1', role } }, + } as any); + return captured; +} + +describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => { + it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => { + const doc = buildYdoc(RICH_DOC); + const before = TiptapTransformer.fromYdoc(doc, 'default'); + expect(hasHtmlEmbedNode(before)).toBe(true); + const beforeCounts = nodeTypeCounts(before); + + const captured = await runStore('member', doc); + + expect(captured.content).toBeDefined(); + // htmlEmbed gone from the persisted content. + expect(hasHtmlEmbedNode(captured.content)).toBe(false); + + // Every non-embed node type is preserved with the SAME count (guards against + // data loss if a node were missing from tiptapExtensions and dropped on the + // toYdoc rebuild). + const afterCounts = nodeTypeCounts(captured.content); + for (const [type, count] of Object.entries(beforeCounts)) { + if (type === HTML_EMBED_NODE_NAME) continue; + expect(afterCounts[type]).toBe(count); + } + // The two embeds are gone. + expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2); + expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined(); + + // The shared ydoc fragment was also rewritten clean (re-decode it). + const reDecoded = TiptapTransformer.fromYdoc(doc, 'default'); + expect(hasHtmlEmbedNode(reDecoded)).toBe(false); + }); + + it('admin store: htmlEmbed preserved in persisted content', async () => { + const captured = await runStore('admin', buildYdoc(RICH_DOC)); + expect(captured.content).toBeDefined(); + expect(hasHtmlEmbedNode(captured.content)).toBe(true); + expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2); + }); + + it('owner store: htmlEmbed preserved', async () => { + const captured = await runStore('owner', buildYdoc(RICH_DOC)); + expect(hasHtmlEmbedNode(captured.content)).toBe(true); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + expect( + hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content), + ).toBe(false); + expect( + hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content), + ).toBe(false); + expect( + hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content), + ).toBe(false); + }); + + it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => { + const emptyDoc = buildYdoc({ + type: 'doc', + content: [{ type: 'paragraph' }], + }); + // Non-admin path with an empty/embed-free fragment must be a no-op strip, + // not throw. + await expect(runStore('member', emptyDoc)).resolves.toBeDefined(); + }); +}); diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts index ec215c87..4bedc075 100644 --- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts +++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts @@ -105,135 +105,34 @@ describe('canAuthorHtmlEmbed', () => { }); }); -// Replicates the write-path decision used by every non-admin persistence guard -// (page create, collab store, single import, zip import, duplication, -// transclusion unsync): -// if !canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json) -> strip, else keep. -const applyAdminGate = (json: any, role: string | null | undefined) => { - if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) { - return stripHtmlEmbedNodes(json); - } - return json; -}; - -describe('admin-gate write-path decision (duplication / import / unsync)', () => { - const docWithEmbed = { - type: 'doc', - content: [ - { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, - { type: 'htmlEmbed', attrs: { source: '' } }, - ], - }; - - it('strips the embed for a non-admin (member) author', () => { - const result = applyAdminGate(docWithEmbed, 'member'); - expect(hasHtmlEmbedNode(result)).toBe(false); - expect(result.content).toHaveLength(1); - expect(result.content[0].content[0].text).toBe('keep'); - }); - - it('strips the embed for unknown/empty roles', () => { - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false); - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe( - false, - ); - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'viewer'))).toBe( - false, - ); - }); - - it('keeps the embed for an admin author', () => { - const result = applyAdminGate(docWithEmbed, 'admin'); - expect(hasHtmlEmbedNode(result)).toBe(true); - expect(result).toBe(docWithEmbed); - }); - - it('keeps the embed for an owner author', () => { - const result = applyAdminGate(docWithEmbed, 'owner'); - expect(hasHtmlEmbedNode(result)).toBe(true); - }); - - it('strips nested embeds (subtree/column duplication) for a non-admin', () => { - const nested = { - type: 'doc', - content: [ - { - type: 'columns', - content: [ - { - type: 'column', - content: [ - { type: 'htmlEmbed', attrs: { source: '' } }, - { type: 'paragraph', content: [{ type: 'text', text: 'ok' }] }, - ], - }, - ], - }, - ], - }; - const result = applyAdminGate(nested, 'member'); - expect(hasHtmlEmbedNode(result)).toBe(false); - const col = findFirstChild(result, 'column'); - expect(col.content).toHaveLength(1); - expect(col.content[0].type).toBe('paragraph'); - }); - - it('leaves a non-admin doc without embeds untouched (no needless rewrite)', () => { - const clean = { - type: 'doc', - content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }], - }; - const result = applyAdminGate(clean, 'member'); - expect(result).toBe(clean); - }); -}); - -// PageService.create() now applies exactly `applyAdminGate` (above) to the -// parsed ProseMirror JSON BEFORE deriving content/textContent/ydoc and calling -// insertPage. The create controller only requires space Edit, so without this a -// regular member could POST a doc (json, or the markdown/html -// forms that parse to the same node) containing an -// htmlEmbed and store XSS for every reader. These cases pin the create-path -// decision: non-admin -> stripped, admin/owner -> kept. -describe('admin-gate write-path decision (page create)', () => { - const docWithEmbed = { - type: 'doc', - content: [ - { type: 'paragraph', content: [{ type: 'text', text: 'body' }] }, - { type: 'htmlEmbed', attrs: { source: '' } }, - ], - }; - - it('strips the embed when a non-admin (member) creates the page', () => { - const result = applyAdminGate(docWithEmbed, 'member'); - expect(hasHtmlEmbedNode(result)).toBe(false); - expect(result.content).toHaveLength(1); - expect(result.content[0].content[0].text).toBe('body'); - }); - - it('strips the embed when the creator role is unknown/empty', () => { - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false); - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe( - false, - ); - }); - - it('keeps the embed when an admin or owner creates the page', () => { - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'admin'))).toBe(true); - expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'owner'))).toBe(true); - }); - - it('strips embeds smuggled via the markdown/html form', () => { - // The markdown/html create formats decode to the same htmlEmbed node, so - // the gate (run on the parsed JSON) covers them identically. +// NOTE: a previous revision of this file re-implemented the write-path admin +// gate as a local `applyAdminGate` stand-in and asserted against THAT. A +// deleted/misplaced real guard would have kept those green. The stand-in is +// removed. The collab store, REST/MCP update, and transclusion-unsync paths are +// now tested against their REAL code in: +// - collaboration/extensions/persistence.extension.html-embed.spec.ts +// - collaboration/collaboration.handler.html-embed.spec.ts +// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts +// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup) +// - integrations/import/services/import-html-embed-identity.spec.ts (import) +// +// The case below stays here because it asserts a REAL parse path +// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a +// re-implemented gate. +describe('htmlEmbed smuggled via the markdown/html form (real parse + real helpers)', () => { + it('the parsed node is detected and stripped by the real helpers', () => { + // The markdown/html create formats decode to the same htmlEmbed node, so the + // gate (run on the parsed JSON) covers them identically. const source = ''; const encoded = encodeHtmlEmbedSource(source); const html = `
`; const parsed = htmlToJson(html); expect(hasHtmlEmbedNode(parsed)).toBe(true); - const result = applyAdminGate(parsed, 'member'); - expect(hasHtmlEmbedNode(result)).toBe(false); + // A non-admin role gates to strip via the real helpers. + expect(canAuthorHtmlEmbed('member')).toBe(false); + const stripped = stripHtmlEmbedNodes(parsed); + expect(hasHtmlEmbedNode(stripped)).toBe(false); }); }); diff --git a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts new file mode 100644 index 00000000..43361325 --- /dev/null +++ b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts @@ -0,0 +1,77 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; + +// PageService.create() and duplicatePage() guards. +// +// page.service.ts cannot be unit-LOADED under the server's jest config (a +// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns), +// so we cover the two load-bearing properties at the strongest feasible layer: +// +// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate +// each path applies: non-admin/unknown role -> strip, admin/owner -> keep. +// +// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole` +// param threaded from the request; duplicate: `authUser.role`), so a +// refactor that drops the guard or reads the wrong role trips the test. +// This is what replaces the removed `applyAdminGate` stand-in for these +// two entrypoints. + +const docWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +// The real predicate both paths apply (see SECURITY blocks in page.service.ts). +function applyGate(json: any, role: string | null | undefined) { + if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) { + return stripHtmlEmbedNodes(json); + } + return json; +} + +describe('page create/duplicate gate decision (real helpers)', () => { + it('non-admin (member) strips', () => { + const result = applyGate(docWithEmbed(), 'member'); + expect(hasHtmlEmbedNode(result)).toBe(false); + expect(result.content).toHaveLength(1); + expect(result.content[0].content[0].text).toBe('body'); + }); + + it('unknown/empty role fails closed (strips)', () => { + for (const role of [null, undefined, 'viewer'] as const) { + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), role))).toBe(false); + } + }); + + it('admin/owner keep', () => { + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), 'admin'))).toBe(true); + expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), 'owner'))).toBe(true); + }); +}); + +const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8'); + +describe('page create/duplicate gate identity is pinned (source contract)', () => { + it('create() gates on the caller role param before deriving content/ydoc', () => { + // create() receives the caller's workspace role as `callerRole` and gates on + // it; the embed must be stripped BEFORE insertPage. + expect(SRC).toMatch( + /!canAuthorHtmlEmbed\(\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, + ); + expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)'); + }); + + it('duplicatePage() gates on the duplicating user role (authUser.role)', () => { + expect(SRC).toMatch( + /!canAuthorHtmlEmbed\(\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/, + ); + }); +}); diff --git a/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts new file mode 100644 index 00000000..dc179d33 --- /dev/null +++ b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts @@ -0,0 +1,114 @@ +import { TransclusionService } from '../transclusion.service'; +import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util'; + +// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate. +// unsync returns a source snapshot the client materializes into the reference +// page; a non-admin must never receive an embed payload to re-persist. The gate +// reads `user.role` and strips before returning. All repos / access checks are +// mocked so the REAL gate logic runs end-to-end. Complements the existing +// transclusion specs (rewriteAttachmentsForUnsync, controller). + +const WS = 'ws-1'; +const REF_PAGE = 'ref-1'; +const SRC_PAGE = 'src-1'; +const TX_ID = 'tx-1'; + +const sourceContentWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +function buildService() { + const pageRepo = { + findById: jest.fn(async (id: string) => ({ + id, + workspaceId: WS, + spaceId: 'space-1', + deletedAt: null, + })), + }; + const pageTransclusionsRepo = { + findByPageAndTransclusion: jest.fn(async () => ({ + content: sourceContentWithEmbed(), + })), + }; + const pageTransclusionReferencesRepo = { + deleteOne: jest.fn(async () => undefined), + }; + const attachmentRepo = { findByIds: jest.fn(async () => []) }; + const storageService = { copy: jest.fn(async () => undefined) }; + const pageAccessService = { + validateCanEdit: jest.fn(async () => undefined), + validateCanView: jest.fn(async () => undefined), + }; + + const service = new TransclusionService( + {} as any, // db (unused on this path) + pageTransclusionsRepo as any, + pageTransclusionReferencesRepo as any, + pageRepo as any, + {} as any, // pagePermissionRepo (unused) + {} as any, // spaceMemberRepo (unused) + attachmentRepo as any, + storageService as any, + pageAccessService as any, + ); + return service; +} + +function userWithRole(role: string | null | undefined) { + return { id: 'u1', workspaceId: WS, role } as any; +} + +describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => { + it('non-admin (member): returned content has htmlEmbed stripped', async () => { + const service = buildService(); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('member'), + ); + expect(hasHtmlEmbedNode(content)).toBe(false); + // Non-embed content is preserved. + expect(JSON.stringify(content)).toContain('snapshot body'); + }); + + it('unknown/empty role: fails closed (stripped)', async () => { + for (const role of [undefined, null, 'viewer'] as const) { + const service = buildService(); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole(role), + ); + expect(hasHtmlEmbedNode(content)).toBe(false); + } + }); + + it('admin: returned content keeps the htmlEmbed', async () => { + const service = buildService(); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('admin'), + ); + expect(hasHtmlEmbedNode(content)).toBe(true); + }); + + it('owner: returned content keeps the htmlEmbed', async () => { + const service = buildService(); + const { content } = await service.unsyncReference( + REF_PAGE, + SRC_PAGE, + TX_ID, + userWithRole('owner'), + ); + expect(hasHtmlEmbedNode(content)).toBe(true); + }); +}); diff --git a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts new file mode 100644 index 00000000..003208d8 --- /dev/null +++ b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts @@ -0,0 +1,104 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { + canAuthorHtmlEmbed, + hasHtmlEmbedNode, + stripHtmlEmbedNodes, +} from '../../../common/helpers/prosemirror/html-embed.util'; + +// FAIL-CLOSED IDENTITY for the import write paths. +// +// import.service / file-import-task.service cannot be unit-LOADED under the +// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in +// transformIgnorePatterns). So we cover the two load-bearing properties at the +// strongest feasible layer: +// +// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate +// predicate each entrypoint runs against the role resolved from +// userRepo.findById(...): a MISSING user (findById -> undefined) must fail +// closed (strip), and only 'admin'/'owner' keep the embed. +// +// (2) IDENTITY — source-pin which identity governs the gate so a refactor that +// swaps the lookup to the wrong user (e.g. the queue worker's caller) is +// caught: zip import resolves the role from `fileTask.creatorId`; single +// import from the request `userId`. NOT some ambient caller. +// +// If a guard is deleted/misplaced or the identity field changes, these break. + +const docWithEmbed = () => ({ + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] }, + { type: 'htmlEmbed', attrs: { source: '' } }, + ], +}); + +// The real predicate both import entrypoints apply (see the SECURITY blocks in +// import.service.ts and file-import-task.service.ts): resolve the importer via +// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`. +function applyImportGate(json: any, importingUser: { role?: string } | undefined) { + if (!canAuthorHtmlEmbed(importingUser?.role) && hasHtmlEmbedNode(json)) { + return stripHtmlEmbedNodes(json); + } + return json; +} + +describe('import gate fail-closed by resolved-user role (real helpers)', () => { + it('missing user (userRepo.findById -> undefined) strips the embed', () => { + // findById returns undefined when the user/workspace pair does not resolve; + // undefined?.role is undefined -> canAuthorHtmlEmbed(undefined) === false. + const importingUser = undefined; + const result = applyImportGate(docWithEmbed(), importingUser); + expect(hasHtmlEmbedNode(result)).toBe(false); + }); + + it("resolved role 'member' strips", () => { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'member' })), + ).toBe(false); + }); + + it("resolved role 'admin' keeps the embed", () => { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'admin' })), + ).toBe(true); + }); + + it("resolved role 'owner' keeps the embed", () => { + expect( + hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'owner' })), + ).toBe(true); + }); +}); + +// Source-pin the identity each entrypoint feeds to userRepo.findById. These are +// the lines that decide WHOSE role governs the gate; pinning them means a +// refactor that points the lookup at the wrong user trips the test. +const SRC_DIR = join(__dirname); + +describe('import gate identity is pinned to the importer (source contract)', () => { + it('single import resolves the role from the request userId', () => { + const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8'); + // The role lookup must key on the request `userId`, then gate on the role. + expect(src).toMatch( + /this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/, + ); + expect(src).toMatch(/canAuthorHtmlEmbed\(\s*importingUser\?\.role\s*\)/); + // And the gate uses the real strip helper. + expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + }); + + it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => { + const src = readFileSync( + join(SRC_DIR, 'file-import-task.service.ts'), + 'utf-8', + ); + expect(src).toMatch( + /this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/, + ); + expect(src).toMatch( + /importerCanAuthorHtmlEmbed\s*=\s*canAuthorHtmlEmbed\(\s*importingUser\?\.role\s*\)/, + ); + expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)'); + }); +}); diff --git a/packages/editor-ext/package.json b/packages/editor-ext/package.json index 23ddcaff..0e9b8305 100644 --- a/packages/editor-ext/package.json +++ b/packages/editor-ext/package.json @@ -4,7 +4,9 @@ "private": true, "scripts": { "build": "tsc --build", - "dev": "tsc --watch" + "dev": "tsc --watch", + "test": "vitest run", + "test:watch": "vitest" }, "main": "dist/index.js", "module": "./src/index.ts", diff --git a/packages/editor-ext/src/lib/markdown/markdown-html-embed.spec.ts b/packages/editor-ext/src/lib/markdown/markdown-html-embed.spec.ts new file mode 100644 index 00000000..d9c5d249 --- /dev/null +++ b/packages/editor-ext/src/lib/markdown/markdown-html-embed.spec.ts @@ -0,0 +1,112 @@ +import { describe, it, expect } from "vitest"; +import { markdownToHtml, htmlToMarkdown } from "./index"; +import { + encodeHtmlEmbedSource, + decodeHtmlEmbedSource, +} from "../html-embed/html-embed"; + +// SECURITY (Variant C admin gate, import attack surface). +// +// The markdown import path is the only write path where an htmlEmbed reaches +// the server purely from file bytes (no editor / collab socket). The marked +// tokenizer in `html-embed.marked.ts` and the turndown rule in +// `turndown.utils.ts` are what materialize the `` +// marker into the `
` element +// that the server then parses into an htmlEmbed node and the admin gate strips. +// +// If either the tokenizer regex or the turndown rule shape drifts, the marker +// would either (a) stop becoming an htmlEmbed node (silently dropping admin +// content) or (b) become some OTHER tag the server's `hasHtmlEmbedNode` no +// longer recognizes (a strip bypass). These tests pin the marker <-> embed-div +// contract that the server-side strip relies on. editor-ext had ZERO tests +// before this file; this adds the runner + the round-trip coverage. + +// The server parses the embed div by matching `data-type="htmlEmbed"` and +// decoding `data-source`; mirror that here so the assertion is exactly what the +// real `htmlToJson` -> htmlEmbed node parse depends on (the node's parseHTML in +// html-embed.ts uses the same selector + decodeHtmlEmbedSource). +const EMBED_DIV_RE = /]*\bdata-type="htmlEmbed"[^>]*>/; +function extractEmbedSource(html: string): string | undefined { + const div = EMBED_DIV_RE.exec(html); + if (!div) return undefined; + const enc = /data-source="([^"]*)"/.exec(div[0]); + if (!enc) return undefined; + return decodeHtmlEmbedSource(enc[1]); +} + +// Replicates the server's `hasHtmlEmbedNode` decision against the embed *div* +// (the HTML form the server immediately converts to JSON). If this matches, the +// server's JSON-level `hasHtmlEmbedNode` will too, because htmlToJson maps this +// exact div to an htmlEmbed node. +function htmlHasHtmlEmbed(html: string): boolean { + return EMBED_DIV_RE.test(html); +} + +describe("markdown import round-trip", () => { + const source = ""; + + it("markdownToHtml turns the marker into an htmlEmbed div carrying the source", async () => { + const md = ""; + const html = await markdownToHtml(md); + + // The marker became the embed div the server recognizes as an htmlEmbed + // node (so the server's hasHtmlEmbedNode would match it after htmlToJson). + expect(htmlHasHtmlEmbed(html)).toBe(true); + // The decoded source is the original script, intact. + expect(extractEmbedSource(html)).toBe(source); + // The raw script is NOT inlined into the HTML — it stays base64 in the + // attribute (the marker itself must not be a direct injection vector). + expect(html).not.toContain(""); + }); + + it("preserves UTF-8 / special chars in the embedded source", async () => { + const utf8 = ''; + const md = ""; + const html = await markdownToHtml(md); + expect(htmlHasHtmlEmbed(html)).toBe(true); + expect(extractEmbedSource(html)).toBe(utf8); + }); + + it("an empty marker still produces an htmlEmbed div (empty source)", async () => { + const html = await markdownToHtml(""); + expect(htmlHasHtmlEmbed(html)).toBe(true); + expect(extractEmbedSource(html)).toBe(""); + }); + + it("round-trips htmlToMarkdown -> markdownToHtml preserving the embed marker", async () => { + const encoded = encodeHtmlEmbedSource(source); + // NOTE: turndown drops a *blank* (childless) element before any custom rule + // runs, and the htmlEmbed div is normally childless. The export pipeline + // therefore must give the rule a non-blank div to fire on; we add an inert + // text child here to exercise the real turndown htmlEmbed rule. (A blank + // embed div serializing to "" is asserted separately below as a documented + // edge so this contract drift is visible.) + const startHtml = `
x
`; + + // Export to markdown: the turndown rule emits the + // marker (lossless, inert in plain markdown viewers). + const md = htmlToMarkdown(startHtml); + expect(md).toContain(""); + + // Re-import: the marker round-trips back into an embed div with the same + // decoded source — this is the marker <-> embed-div contract the server's + // import strip depends on. + const html = await markdownToHtml(md); + expect(htmlHasHtmlEmbed(html)).toBe(true); + expect(extractEmbedSource(html)).toBe(source); + }); + + it("documents that a BLANK embed div serializes to empty markdown (turndown drops childless blocks)", () => { + const encoded = encodeHtmlEmbedSource(source); + const blank = `
`; + // This pins current behavior so a future change to the turndown rule (e.g. + // making it fire on blank nodes) is caught rather than silently shipping. + expect(htmlToMarkdown(blank)).toBe(""); + }); + + it("the base64 codec itself round-trips (no '<' leaks into the attribute)", () => { + const encoded = encodeHtmlEmbedSource(source); + expect(encoded).not.toContain("<"); + expect(decodeHtmlEmbedSource(encoded)).toBe(source); + }); +}); diff --git a/packages/editor-ext/tsconfig.json b/packages/editor-ext/tsconfig.json index 974fea06..a4ad0d72 100644 --- a/packages/editor-ext/tsconfig.json +++ b/packages/editor-ext/tsconfig.json @@ -11,6 +11,7 @@ "jsx": "react-jsx", "sourceMap": true, "outDir": "./dist", + "rootDir": "./src", "baseUrl": "./", "incremental": true, "skipLibCheck": true, @@ -19,5 +20,7 @@ "strictBindCallApply": false, "forceConsistentCasingInFileNames": false, "noFallthroughCasesInSwitch": false - } + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist", "src/**/*.spec.ts", "src/**/*.test.ts"] } diff --git a/packages/editor-ext/vitest.config.ts b/packages/editor-ext/vitest.config.ts new file mode 100644 index 00000000..783f61d8 --- /dev/null +++ b/packages/editor-ext/vitest.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from "vitest/config"; + +// Minimal vitest setup for @docmost/editor-ext (mirrors apps/client's config, +// trimmed to what the markdown/html-embed round-trip tests need). The markdown +// utils run in plain Node (marked + turndown), so no jsdom/react plugin is +// required here. +export default defineConfig({ + test: { + environment: "node", + globals: true, + include: ["src/**/*.{test,spec}.ts"], + }, +});