import { canAuthorHtmlEmbed, collectHtmlEmbedSources, hasHtmlEmbedNode, htmlEmbedAllowed, isHtmlEmbedFeatureEnabled, stripDisallowedHtmlEmbedNodes, stripHtmlEmbedIfNotAllowed, stripHtmlEmbedNodes, } from './html-embed.util'; import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util'; import { decodeHtmlEmbedSource, encodeHtmlEmbedSource, } from '@docmost/editor-ext'; const findFirstChild = (json: any, type: string): any | undefined => { if (!json || typeof json !== 'object') return undefined; if (json.type === type) return json; if (Array.isArray(json.content)) { for (const child of json.content) { const found = findFirstChild(child, type); if (found) return found; } } return undefined; }; describe('stripHtmlEmbedNodes', () => { it('removes a top-level htmlEmbed node', () => { const doc = { type: 'doc', content: [ { type: 'paragraph', content: [{ type: 'text', text: 'before' }] }, { type: 'htmlEmbed', attrs: { source: '' } }, { type: 'paragraph', content: [{ type: 'text', text: 'after' }] }, ], }; const result = stripHtmlEmbedNodes(doc); expect(hasHtmlEmbedNode(result)).toBe(false); // Other nodes are preserved. expect(result.content).toHaveLength(2); expect(result.content[0].content[0].text).toBe('before'); expect(result.content[1].content[0].text).toBe('after'); }); it('removes nested htmlEmbed nodes (e.g. inside columns)', () => { const doc = { type: 'doc', content: [ { type: 'columns', content: [ { type: 'column', content: [ { type: 'htmlEmbed', attrs: { source: 'x' } }, { type: 'paragraph', content: [{ type: 'text', text: 'keep' }], }, ], }, ], }, ], }; const result = stripHtmlEmbedNodes(doc); expect(hasHtmlEmbedNode(result)).toBe(false); const col = findFirstChild(result, 'column'); expect(col.content).toHaveLength(1); expect(col.content[0].type).toBe('paragraph'); }); it('does not mutate the input document', () => { const doc = { type: 'doc', content: [{ type: 'htmlEmbed', attrs: { source: 'x' } }], }; stripHtmlEmbedNodes(doc); expect(doc.content).toHaveLength(1); expect(doc.content[0].type).toBe('htmlEmbed'); }); it('leaves documents without htmlEmbed untouched', () => { const doc = { type: 'doc', content: [ { type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }, ], }; expect(hasHtmlEmbedNode(doc)).toBe(false); const result = stripHtmlEmbedNodes(doc); expect(result).toEqual(doc); }); it('neutralizes a root node that is itself an htmlEmbed', () => { // Defensive: the PM root is always a `doc`, so this is unreachable in normal // use, but the helper must still never return a bare htmlEmbed. const root = { type: 'htmlEmbed', attrs: { source: '' }, }; const result = stripHtmlEmbedNodes(root); expect(hasHtmlEmbedNode(result)).toBe(false); }); it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => { // htmlEmbed sits as a sibling of a paragraph, nested four containers deep. const doc = { type: 'doc', content: [ { type: 'callout', content: [ { type: 'columns', content: [ { type: 'column', content: [ { type: 'paragraph', content: [{ type: 'text', text: 'deep keep' }], }, { type: 'htmlEmbed', attrs: { source: '' } }, ], }, ], }, ], }, ], }; const result = stripHtmlEmbedNodes(doc); expect(hasHtmlEmbedNode(result)).toBe(false); const col = findFirstChild(result, 'column'); // Sibling paragraph survives; only the embed is removed. expect(col.content).toHaveLength(1); expect(col.content[0].type).toBe('paragraph'); expect(col.content[0].content[0].text).toBe('deep keep'); }); it('returns non-object / null / array-without-content nodes unchanged', () => { // Non-object inputs are returned as-is (callers persist what they got). expect(stripHtmlEmbedNodes(null as any)).toBeNull(); expect(stripHtmlEmbedNodes(undefined as any)).toBeUndefined(); expect(stripHtmlEmbedNodes('not-a-node' as any)).toBe('not-a-node'); expect(stripHtmlEmbedNodes(42 as any)).toBe(42); // An object node with no `content` array is returned shallow-cloned, equal. const leaf = { type: 'paragraph', attrs: { id: 'x' } }; const out = stripHtmlEmbedNodes(leaf); expect(out).toEqual(leaf); expect(out).not.toBe(leaf); // new object, input not mutated }); it('yields empty content (not null/undefined) for a doc whose only child is an htmlEmbed', () => { const doc = { type: 'doc', content: [{ type: 'htmlEmbed', attrs: { source: 'only' } }], }; const result = stripHtmlEmbedNodes(doc) as any; expect(Array.isArray(result.content)).toBe(true); expect(result.content).toHaveLength(0); expect(result.content).not.toBeNull(); expect(result.content).not.toBeUndefined(); expect(hasHtmlEmbedNode(result)).toBe(false); }); }); describe('collectHtmlEmbedSources', () => { it('collects the source of every htmlEmbed node, including nested ones', () => { const doc = { type: 'doc', content: [ { type: 'htmlEmbed', attrs: { source: 'top' } }, { type: 'columns', content: [ { type: 'column', content: [ { type: 'htmlEmbed', attrs: { source: 'nested' } }, { type: 'paragraph', content: [{ type: 'text', text: 'x' }] }, ], }, ], }, ], }; const sources = collectHtmlEmbedSources(doc); expect(sources).toEqual(new Set(['top', 'nested'])); }); it('returns an empty set for a doc with no embeds', () => { const doc = { type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }], }; expect(collectHtmlEmbedSources(doc).size).toBe(0); }); it('gracefully skips embeds with absent attrs or non-string source', () => { const doc = { type: 'doc', content: [ { type: 'htmlEmbed' }, // no attrs { type: 'htmlEmbed', attrs: {} }, // no source { type: 'htmlEmbed', attrs: { source: 42 } }, // non-string { type: 'htmlEmbed', attrs: { source: '' } }, ], }; expect(collectHtmlEmbedSources(doc)).toEqual(new Set([''])); }); it('returns an empty set for non-object input', () => { expect(collectHtmlEmbedSources(null).size).toBe(0); expect(collectHtmlEmbedSources(undefined).size).toBe(0); expect(collectHtmlEmbedSources('x' as any).size).toBe(0); }); }); describe('stripDisallowedHtmlEmbedNodes', () => { it('keeps an embed whose source is allowed and removes the rest', () => { const doc = { type: 'doc', content: [ { type: 'htmlEmbed', attrs: { source: '' } }, { type: 'htmlEmbed', attrs: { source: '' } }, { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, ], }; const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); expect(collectHtmlEmbedSources(result)).toEqual(new Set([''])); // The allowed embed and the paragraph survive; the new embed is gone. expect(result.content).toHaveLength(2); expect(result.content[0].attrs.source).toBe(''); expect(result.content[1].type).toBe('paragraph'); }); it('keeps BOTH embeds when two nodes share the same allowed source', () => { // Source-identity semantics: identity is the raw `attrs.source`, so a // non-admin who duplicates an existing admin-vetted source keeps both copies. // This is intended — the raw HTML is already vetted, so a duplicate is safe. const doc = { type: 'doc', content: [ { type: 'htmlEmbed', attrs: { source: '' } }, { type: 'paragraph', content: [{ type: 'text', text: 'mid' }] }, { type: 'htmlEmbed', attrs: { source: '' } }, ], }; const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); expect(hasHtmlEmbedNode(result)).toBe(true); const embeds = result.content.filter( (n: any) => n.type === 'htmlEmbed', ); expect(embeds).toHaveLength(2); expect(embeds.every((n: any) => n.attrs.source === '')).toBe(true); }); it('removes a newly-introduced embed when nothing is allowed', () => { const doc = { type: 'doc', content: [{ type: 'htmlEmbed', attrs: { source: '' } }], }; const result = stripDisallowedHtmlEmbedNodes(doc, new Set()); expect(hasHtmlEmbedNode(result)).toBe(false); }); it('filters nested embeds by the allow-list (e.g. inside columns)', () => { const doc = { type: 'doc', content: [ { type: 'columns', content: [ { type: 'column', content: [ { type: 'htmlEmbed', attrs: { source: '' } }, { type: 'htmlEmbed', attrs: { source: '' } }, ], }, ], }, ], }; const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); const col = findFirstChild(result, 'column'); expect(col.content).toHaveLength(1); expect(col.content[0].attrs.source).toBe(''); }); it('treats an embed with absent/non-string source as not allowed (stripped)', () => { const doc = { type: 'doc', content: [ { type: 'htmlEmbed' }, { type: 'htmlEmbed', attrs: {} }, ], }; const result = stripDisallowedHtmlEmbedNodes(doc, new Set([''])); expect(hasHtmlEmbedNode(result)).toBe(false); }); it('does not mutate the input document', () => { const doc = { type: 'doc', content: [{ type: 'htmlEmbed', attrs: { source: '' } }], }; stripDisallowedHtmlEmbedNodes(doc, new Set()); expect(doc.content).toHaveLength(1); expect(doc.content[0].type).toBe('htmlEmbed'); }); it('neutralizes a root node that is itself a disallowed htmlEmbed', () => { const root = { type: 'htmlEmbed', attrs: { source: '' } }; const result = stripDisallowedHtmlEmbedNodes(root, new Set()); expect(hasHtmlEmbedNode(result)).toBe(false); }); it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => { const root = { type: 'htmlEmbed', attrs: { source: '' } }; const result = stripDisallowedHtmlEmbedNodes(root, new Set([''])); expect(collectHtmlEmbedSources(result)).toEqual(new Set([''])); }); it('returns non-object input unchanged', () => { expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull(); }); }); describe('hasHtmlEmbedNode (root/odd-shape detection)', () => { it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => { const rootEmbed = { type: 'htmlEmbed', attrs: { source: '' } }; expect(hasHtmlEmbedNode(rootEmbed)).toBe(true); }); it('returns false for a doc with embed-like TEXT but no htmlEmbed node', () => { // The literal string "htmlEmbed" appears only as text content, not as a // node type, so it must NOT be detected. const doc = { type: 'doc', content: [ { type: 'paragraph', content: [ { type: 'text', text: 'type: htmlEmbed
' }, ], }, ], }; expect(hasHtmlEmbedNode(doc)).toBe(false); }); it('returns false for non-object / null / array inputs', () => { expect(hasHtmlEmbedNode(null)).toBe(false); expect(hasHtmlEmbedNode(undefined)).toBe(false); expect(hasHtmlEmbedNode('htmlEmbed')).toBe(false); // A bare array (no `content` wrapper) has no node `type`, so it's false. expect(hasHtmlEmbedNode([{ type: 'htmlEmbed' }] as any)).toBe(false); }); }); describe('canAuthorHtmlEmbed', () => { it('allows owner and admin', () => { expect(canAuthorHtmlEmbed('owner')).toBe(true); expect(canAuthorHtmlEmbed('admin')).toBe(true); }); it('denies member and unknown/empty roles', () => { expect(canAuthorHtmlEmbed('member')).toBe(false); expect(canAuthorHtmlEmbed(null)).toBe(false); expect(canAuthorHtmlEmbed(undefined)).toBe(false); expect(canAuthorHtmlEmbed('viewer')).toBe(false); }); }); describe('isHtmlEmbedFeatureEnabled', () => { it('is true only when settings.htmlEmbed === true', () => { expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true); }); it('defaults to false (absent / false / non-object)', () => { expect(isHtmlEmbedFeatureEnabled({})).toBe(false); expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: false })).toBe(false); expect(isHtmlEmbedFeatureEnabled(null)).toBe(false); expect(isHtmlEmbedFeatureEnabled(undefined)).toBe(false); // Truthy-but-not-true values must NOT enable the feature. expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: 'true' as any })).toBe(false); }); }); describe('htmlEmbedAllowed (toggle AND admin)', () => { it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => { expect(htmlEmbedAllowed(false, 'admin')).toBe(false); expect(htmlEmbedAllowed(false, 'owner')).toBe(false); }); it('toggle OFF + member => not allowed', () => { expect(htmlEmbedAllowed(false, 'member')).toBe(false); }); it('toggle ON + admin/owner => allowed', () => { expect(htmlEmbedAllowed(true, 'admin')).toBe(true); expect(htmlEmbedAllowed(true, 'owner')).toBe(true); }); it('toggle ON + member/unknown => not allowed', () => { expect(htmlEmbedAllowed(true, 'member')).toBe(false); expect(htmlEmbedAllowed(true, null)).toBe(false); expect(htmlEmbedAllowed(true, undefined)).toBe(false); expect(htmlEmbedAllowed(true, 'viewer')).toBe(false); }); }); // The shared write-path strip ritual extracted from the 5 plain call-sites // (collab handler, page create/duplicate, import, file-import-task, // transclusion-unsync). Tested here once instead of being re-verified in each // call-site's spec. describe('stripHtmlEmbedIfNotAllowed (shared write-path gate)', () => { const docWithEmbed = () => ({ type: 'doc', content: [ { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }, { type: 'htmlEmbed', attrs: { source: '' } }, ], }); const docWithoutEmbed = () => ({ type: 'doc', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }], }); it('keeps the doc unchanged when feature is ON and role is admin (allowed)', () => { const json = docWithEmbed(); const onStrip = jest.fn(); const result = stripHtmlEmbedIfNotAllowed(json, { featureEnabled: true, role: 'admin', onStrip, }); // Allowed => same reference returned, embed preserved, no side-effect. expect(result).toBe(json); expect(hasHtmlEmbedNode(result)).toBe(true); expect(onStrip).not.toHaveBeenCalled(); }); it('keeps the doc unchanged for an owner when feature is ON (allowed)', () => { const json = docWithEmbed(); const onStrip = jest.fn(); const result = stripHtmlEmbedIfNotAllowed(json, { featureEnabled: true, role: 'owner', onStrip, }); expect(result).toBe(json); expect(hasHtmlEmbedNode(result)).toBe(true); expect(onStrip).not.toHaveBeenCalled(); }); it('strips the embed when the feature is OFF (even for an admin)', () => { const json = docWithEmbed(); const onStrip = jest.fn(); const result = stripHtmlEmbedIfNotAllowed(json, { featureEnabled: false, role: 'admin', onStrip, }); expect(hasHtmlEmbedNode(result)).toBe(false); expect(onStrip).toHaveBeenCalledTimes(1); }); it('strips the embed for a non-admin when the feature is ON', () => { const json = docWithEmbed(); const onStrip = jest.fn(); const result = stripHtmlEmbedIfNotAllowed(json, { featureEnabled: true, role: 'member', onStrip, }); expect(hasHtmlEmbedNode(result)).toBe(false); expect(onStrip).toHaveBeenCalledTimes(1); }); it('strips the embed for a null/undefined role when the feature is ON', () => { for (const role of [null, undefined]) { const onStrip = jest.fn(); const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), { featureEnabled: true, role, onStrip, }); expect(hasHtmlEmbedNode(result)).toBe(false); expect(onStrip).toHaveBeenCalledTimes(1); } }); it('returns input unchanged and does NOT call onStrip when no embed is present', () => { const json = docWithoutEmbed(); const onStrip = jest.fn(); // Not allowed (feature OFF), but there is nothing to strip. const result = stripHtmlEmbedIfNotAllowed(json, { featureEnabled: false, role: 'member', onStrip, }); expect(result).toBe(json); expect(onStrip).not.toHaveBeenCalled(); }); it('calls onStrip exactly once per strip', () => { const onStrip = jest.fn(); stripHtmlEmbedIfNotAllowed(docWithEmbed(), { featureEnabled: false, role: 'member', onStrip, }); expect(onStrip).toHaveBeenCalledTimes(1); }); it('works without an onStrip callback (optional)', () => { const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), { featureEnabled: false, role: 'member', }); expect(hasHtmlEmbedNode(result)).toBe(false); }); }); // NOTE: a previous revision of this file re-implemented the write-path admin // gate as a local `applyAdminGate` stand-in and asserted against THAT. A // deleted/misplaced real guard would have kept those green. The stand-in is // removed. The collab store, REST/MCP update, and transclusion-unsync paths are // now tested against their REAL code in: // - collaboration/extensions/persistence.extension.html-embed.spec.ts // - collaboration/collaboration.handler.html-embed.spec.ts // - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts // - core/page/services/page-service-html-embed-identity.spec.ts (create/dup) // - integrations/import/services/import-html-embed-identity.spec.ts (import) // // The case below stays here because it asserts a REAL parse path // (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a // re-implemented gate. describe('htmlEmbed smuggled via the markdown/html form (real parse + real helpers)', () => { it('the parsed node is detected and stripped by the real helpers', () => { // The markdown/html create formats decode to the same htmlEmbed node, so the // gate (run on the parsed JSON) covers them identically. const source = ''; const encoded = encodeHtmlEmbedSource(source); const html = `
`; const parsed = htmlToJson(html); expect(hasHtmlEmbedNode(parsed)).toBe(true); // A non-admin role gates to strip via the real helpers. expect(canAuthorHtmlEmbed('member')).toBe(false); const stripped = stripHtmlEmbedNodes(parsed); expect(hasHtmlEmbedNode(stripped)).toBe(false); }); }); describe('htmlEmbed source base64 codec', () => { it('round-trips arbitrary source including UTF-8', () => { const source = ''; const encoded = encodeHtmlEmbedSource(source); expect(encoded).not.toContain('<'); expect(decodeHtmlEmbedSource(encoded)).toBe(source); }); }); describe('htmlEmbed node HTML <-> JSON round-trip', () => { it('preserves the raw source through HTML -> JSON', () => { const source = ''; const encoded = encodeHtmlEmbedSource(source); const html = `
`; const json = htmlToJson(html); const node = findFirstChild(json, 'htmlEmbed'); expect(node).toBeDefined(); expect(node.attrs.source).toBe(source); }); it('round-trips JSON -> HTML -> JSON keeping the source', () => { const source = '
raw & markup
'; const json = { type: 'doc', content: [{ type: 'htmlEmbed', attrs: { source } }], }; const html = jsonToHtml(json); // The static HTML carries the encoded source but does NOT inline the raw // markup (it must not be an injection vector by itself). expect(html).toContain('data-type="htmlEmbed"'); expect(html).not.toContain('onclick'); const back = htmlToJson(html); const node = findFirstChild(back, 'htmlEmbed'); expect(node).toBeDefined(); expect(node.attrs.source).toBe(source); }); });