test: cover features since 053a9c0d + repair test tooling
Add ~330 tests across server (Jest), client (Vitest), editor-ext (Vitest)
and packages/mcp (node:test) for the gitmost features added since
053a9c0d: AI chat, AI agent roles, public-share assistant, MCP per-user
auth, HTML embed, page templates/embed, realtime tree, tree
expand/collapse, and the AI-settings UI.
Test-tooling fixes (prerequisite, were silently hiding coverage):
- Repair 3 page-template specs broken by the 11-arg TransclusionService
constructor; they never compiled, so template access-control / content
-leak / unsync-strip coverage was fictitious.
- Build @docmost/editor-ext before server tests via a `pretest` hook;
the stale dist omitted the new HtmlEmbed/PageEmbed exports (TS2305).
- Let jest resolve the .tsx email templates: add `tsx` to
moduleFileExtensions and widen the ts-jest transform to (t|j)sx?.
Behaviour-preserving "extract pure core" refactors that the tests drive:
- server: resolveShareAssistantRequest + uiMessageTextLength
(public-share controller), decideBasicGate + mapAuthResultToResponse
(mcp), buildErrorAssistantRecord (ai-chat), jsonbObject export (roles).
- client: render-raw-html + shouldExecute/canEdit, decide-embed-state,
page-embed picker utils, tree-socket reducers, open/close branch maps,
isEndpointConfigured/resolveKeyField; buildTreeWithChildren now treats
a permission-trimmed orphan as a root instead of crashing.
Deferred (need a test DB or HTTP harness, documented in the specs):
repo-level Postgres integration tests and the public-share XFF E2E.
Pre-existing DI/lib0-ESM suite failures are untouched and out of scope.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
import { markdownToHtml, encodeHtmlEmbedSource } from '@docmost/editor-ext';
|
||||
import { htmlToJson } from '../../../collaboration/collaboration.util';
|
||||
import { hasHtmlEmbedNode, stripHtmlEmbedNodes } from './html-embed.util';
|
||||
|
||||
/**
|
||||
* CONTRACT (security): an attacker who controls imported markdown/HTML could try
|
||||
* to smuggle an htmlEmbed in the *serialized* DOM form —
|
||||
* <div data-type="htmlEmbed" data-source="...">
|
||||
* — directly, bypassing the editor's `<!--html-embed:-->` comment marker.
|
||||
*
|
||||
* This exercises the REAL server import conversion path that ImportService uses
|
||||
* (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
|
||||
* link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
|
||||
* the ACTUAL behaviour so we know whether the strip gate can be bypassed.
|
||||
*
|
||||
* FINDING (documented): the raw embed div DOES round-trip through marked +
|
||||
* htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
|
||||
* `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
|
||||
* detectable and STRIPPABLE — the write-path gate covers it.
|
||||
*/
|
||||
describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
|
||||
it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
|
||||
const source = '<script>steal()</script>';
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
const md = [
|
||||
'Hello',
|
||||
'',
|
||||
`<div data-type="htmlEmbed" data-source="${encoded}"></div>`,
|
||||
'',
|
||||
'World',
|
||||
].join('\n');
|
||||
|
||||
const html = await markdownToHtml(md);
|
||||
// marked preserves the raw block-level div verbatim.
|
||||
expect(html).toContain('data-type="htmlEmbed"');
|
||||
|
||||
const json = htmlToJson(html);
|
||||
// The div parses into a real htmlEmbed node carrying the decoded source.
|
||||
expect(hasHtmlEmbedNode(json)).toBe(true);
|
||||
|
||||
// Because it is detected, the write-path gate can strip it for non-admins.
|
||||
const stripped = stripHtmlEmbedNodes(json);
|
||||
expect(hasHtmlEmbedNode(stripped)).toBe(false);
|
||||
// Surrounding non-embed content is retained.
|
||||
expect(JSON.stringify(stripped)).toContain('Hello');
|
||||
expect(JSON.stringify(stripped)).toContain('World');
|
||||
});
|
||||
|
||||
it('round-trips through direct HTML conversion (htmlToJson) and is DETECTED', () => {
|
||||
const source = '<script>steal()</script>';
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
const html = `<p>Hello</p><div data-type="htmlEmbed" data-source="${encoded}"></div><p>World</p>`;
|
||||
|
||||
const json = htmlToJson(html);
|
||||
expect(hasHtmlEmbedNode(json)).toBe(true);
|
||||
expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false);
|
||||
});
|
||||
|
||||
it('is still DETECTED even when the data-source is NOT valid base64', async () => {
|
||||
// A naive raw inline source (HTML-escaped, not base64) still parses as an
|
||||
// htmlEmbed NODE — the decoder just yields an empty source. Detection (and
|
||||
// therefore stripping) does not depend on the source being well-formed, so
|
||||
// the bypass cannot be hidden by sending a malformed data-source.
|
||||
const md = `<div data-type="htmlEmbed" data-source="<script>x</script>"></div>`;
|
||||
const html = await markdownToHtml(md);
|
||||
const json = htmlToJson(html);
|
||||
expect(hasHtmlEmbedNode(json)).toBe(true);
|
||||
expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -92,6 +92,102 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
const result = stripHtmlEmbedNodes(doc);
|
||||
expect(result).toEqual(doc);
|
||||
});
|
||||
|
||||
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
|
||||
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'callout',
|
||||
content: [
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'deep keep' }],
|
||||
},
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
const result = stripHtmlEmbedNodes(doc);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
const col = findFirstChild(result, 'column');
|
||||
// Sibling paragraph survives; only the embed is removed.
|
||||
expect(col.content).toHaveLength(1);
|
||||
expect(col.content[0].type).toBe('paragraph');
|
||||
expect(col.content[0].content[0].text).toBe('deep keep');
|
||||
});
|
||||
|
||||
it('returns non-object / null / array-without-content nodes unchanged', () => {
|
||||
// Non-object inputs are returned as-is (callers persist what they got).
|
||||
expect(stripHtmlEmbedNodes(null as any)).toBeNull();
|
||||
expect(stripHtmlEmbedNodes(undefined as any)).toBeUndefined();
|
||||
expect(stripHtmlEmbedNodes('not-a-node' as any)).toBe('not-a-node');
|
||||
expect(stripHtmlEmbedNodes(42 as any)).toBe(42);
|
||||
|
||||
// An object node with no `content` array is returned shallow-cloned, equal.
|
||||
const leaf = { type: 'paragraph', attrs: { id: 'x' } };
|
||||
const out = stripHtmlEmbedNodes(leaf);
|
||||
expect(out).toEqual(leaf);
|
||||
expect(out).not.toBe(leaf); // new object, input not mutated
|
||||
});
|
||||
|
||||
it('yields empty content (not null/undefined) for a doc whose only child is an htmlEmbed', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<b>only</b>' } }],
|
||||
};
|
||||
const result = stripHtmlEmbedNodes(doc) as any;
|
||||
expect(Array.isArray(result.content)).toBe(true);
|
||||
expect(result.content).toHaveLength(0);
|
||||
expect(result.content).not.toBeNull();
|
||||
expect(result.content).not.toBeUndefined();
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
|
||||
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
|
||||
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
|
||||
expect(hasHtmlEmbedNode(rootEmbed)).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false for a doc with embed-like TEXT but no htmlEmbed node', () => {
|
||||
// The literal string "htmlEmbed" appears only as text content, not as a
|
||||
// node type, so it must NOT be detected.
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'type: htmlEmbed <div data-type="htmlEmbed">' },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
expect(hasHtmlEmbedNode(doc)).toBe(false);
|
||||
});
|
||||
|
||||
it('returns false for non-object / null / array inputs', () => {
|
||||
expect(hasHtmlEmbedNode(null)).toBe(false);
|
||||
expect(hasHtmlEmbedNode(undefined)).toBe(false);
|
||||
expect(hasHtmlEmbedNode('htmlEmbed')).toBe(false);
|
||||
// A bare array (no `content` wrapper) has no node `type`, so it's false.
|
||||
expect(hasHtmlEmbedNode([{ type: 'htmlEmbed' }] as any)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('canAuthorHtmlEmbed', () => {
|
||||
|
||||
Reference in New Issue
Block a user