test: cover features since 053a9c0d + repair test tooling

Add ~330 tests across server (Jest), client (Vitest), editor-ext (Vitest)
and packages/mcp (node:test) for the gitmost features added since
053a9c0d: AI chat, AI agent roles, public-share assistant, MCP per-user
auth, HTML embed, page templates/embed, realtime tree, tree
expand/collapse, and the AI-settings UI.

Test-tooling fixes (prerequisite, were silently hiding coverage):
- Repair 3 page-template specs broken by the 11-arg TransclusionService
  constructor; they never compiled, so template access-control / content
  -leak / unsync-strip coverage was fictitious.
- Build @docmost/editor-ext before server tests via a `pretest` hook;
  the stale dist omitted the new HtmlEmbed/PageEmbed exports (TS2305).
- Let jest resolve the .tsx email templates: add `tsx` to
  moduleFileExtensions and widen the ts-jest transform to (t|j)sx?.

Behaviour-preserving "extract pure core" refactors that the tests drive:
- server: resolveShareAssistantRequest + uiMessageTextLength
  (public-share controller), decideBasicGate + mapAuthResultToResponse
  (mcp), buildErrorAssistantRecord (ai-chat), jsonbObject export (roles).
- client: render-raw-html + shouldExecute/canEdit, decide-embed-state,
  page-embed picker utils, tree-socket reducers, open/close branch maps,
  isEndpointConfigured/resolveKeyField; buildTreeWithChildren now treats
  a permission-trimmed orphan as a root instead of crashing.

Deferred (need a test DB or HTTP harness, documented in the specs):
repo-level Postgres integration tests and the public-share XFF E2E.
Pre-existing DI/lib0-ESM suite failures are untouched and out of scope.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-20 23:40:40 +03:00
parent 692c0abe13
commit 90d3fab483
56 changed files with 5668 additions and 447 deletions

View File

@@ -0,0 +1,70 @@
import { markdownToHtml, encodeHtmlEmbedSource } from '@docmost/editor-ext';
import { htmlToJson } from '../../../collaboration/collaboration.util';
import { hasHtmlEmbedNode, stripHtmlEmbedNodes } from './html-embed.util';
/**
* CONTRACT (security): an attacker who controls imported markdown/HTML could try
* to smuggle an htmlEmbed in the *serialized* DOM form —
* <div data-type="htmlEmbed" data-source="...">
* — directly, bypassing the editor's `<!--html-embed:-->` comment marker.
*
* This exercises the REAL server import conversion path that ImportService uses
* (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
* link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
* the ACTUAL behaviour so we know whether the strip gate can be bypassed.
*
* FINDING (documented): the raw embed div DOES round-trip through marked +
* htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
* `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
* detectable and STRIPPABLE — the write-path gate covers it.
*/
describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
const source = '<script>steal()</script>';
const encoded = encodeHtmlEmbedSource(source);
const md = [
'Hello',
'',
`<div data-type="htmlEmbed" data-source="${encoded}"></div>`,
'',
'World',
].join('\n');
const html = await markdownToHtml(md);
// marked preserves the raw block-level div verbatim.
expect(html).toContain('data-type="htmlEmbed"');
const json = htmlToJson(html);
// The div parses into a real htmlEmbed node carrying the decoded source.
expect(hasHtmlEmbedNode(json)).toBe(true);
// Because it is detected, the write-path gate can strip it for non-admins.
const stripped = stripHtmlEmbedNodes(json);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
// Surrounding non-embed content is retained.
expect(JSON.stringify(stripped)).toContain('Hello');
expect(JSON.stringify(stripped)).toContain('World');
});
it('round-trips through direct HTML conversion (htmlToJson) and is DETECTED', () => {
const source = '<script>steal()</script>';
const encoded = encodeHtmlEmbedSource(source);
const html = `<p>Hello</p><div data-type="htmlEmbed" data-source="${encoded}"></div><p>World</p>`;
const json = htmlToJson(html);
expect(hasHtmlEmbedNode(json)).toBe(true);
expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false);
});
it('is still DETECTED even when the data-source is NOT valid base64', async () => {
// A naive raw inline source (HTML-escaped, not base64) still parses as an
// htmlEmbed NODE — the decoder just yields an empty source. Detection (and
// therefore stripping) does not depend on the source being well-formed, so
// the bypass cannot be hidden by sending a malformed data-source.
const md = `<div data-type="htmlEmbed" data-source="&lt;script&gt;x&lt;/script&gt;"></div>`;
const html = await markdownToHtml(md);
const json = htmlToJson(html);
expect(hasHtmlEmbedNode(json)).toBe(true);
expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false);
});
});

View File

@@ -92,6 +92,102 @@ describe('stripHtmlEmbedNodes', () => {
const result = stripHtmlEmbedNodes(doc);
expect(result).toEqual(doc);
});
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
const doc = {
type: 'doc',
content: [
{
type: 'callout',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{
type: 'paragraph',
content: [{ type: 'text', text: 'deep keep' }],
},
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
],
},
],
},
],
},
],
};
const result = stripHtmlEmbedNodes(doc);
expect(hasHtmlEmbedNode(result)).toBe(false);
const col = findFirstChild(result, 'column');
// Sibling paragraph survives; only the embed is removed.
expect(col.content).toHaveLength(1);
expect(col.content[0].type).toBe('paragraph');
expect(col.content[0].content[0].text).toBe('deep keep');
});
it('returns non-object / null / array-without-content nodes unchanged', () => {
// Non-object inputs are returned as-is (callers persist what they got).
expect(stripHtmlEmbedNodes(null as any)).toBeNull();
expect(stripHtmlEmbedNodes(undefined as any)).toBeUndefined();
expect(stripHtmlEmbedNodes('not-a-node' as any)).toBe('not-a-node');
expect(stripHtmlEmbedNodes(42 as any)).toBe(42);
// An object node with no `content` array is returned shallow-cloned, equal.
const leaf = { type: 'paragraph', attrs: { id: 'x' } };
const out = stripHtmlEmbedNodes(leaf);
expect(out).toEqual(leaf);
expect(out).not.toBe(leaf); // new object, input not mutated
});
it('yields empty content (not null/undefined) for a doc whose only child is an htmlEmbed', () => {
const doc = {
type: 'doc',
content: [{ type: 'htmlEmbed', attrs: { source: '<b>only</b>' } }],
};
const result = stripHtmlEmbedNodes(doc) as any;
expect(Array.isArray(result.content)).toBe(true);
expect(result.content).toHaveLength(0);
expect(result.content).not.toBeNull();
expect(result.content).not.toBeUndefined();
expect(hasHtmlEmbedNode(result)).toBe(false);
});
});
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
expect(hasHtmlEmbedNode(rootEmbed)).toBe(true);
});
it('returns false for a doc with embed-like TEXT but no htmlEmbed node', () => {
// The literal string "htmlEmbed" appears only as text content, not as a
// node type, so it must NOT be detected.
const doc = {
type: 'doc',
content: [
{
type: 'paragraph',
content: [
{ type: 'text', text: 'type: htmlEmbed <div data-type="htmlEmbed">' },
],
},
],
};
expect(hasHtmlEmbedNode(doc)).toBe(false);
});
it('returns false for non-object / null / array inputs', () => {
expect(hasHtmlEmbedNode(null)).toBe(false);
expect(hasHtmlEmbedNode(undefined)).toBe(false);
expect(hasHtmlEmbedNode('htmlEmbed')).toBe(false);
// A bare array (no `content` wrapper) has no node `type`, so it's false.
expect(hasHtmlEmbedNode([{ type: 'htmlEmbed' }] as any)).toBe(false);
});
});
describe('canAuthorHtmlEmbed', () => {