test: cover features since 053a9c0d + repair test tooling

Add ~330 tests across server (Jest), client (Vitest), editor-ext (Vitest) and packages/mcp (node:test) for the gitmost features added since 053a9c0d: AI chat, AI agent roles, public-share assistant, MCP per-user auth, HTML embed, page templates/embed, realtime tree, tree expand/collapse, and the AI-settings UI. Test-tooling fixes (prerequisite, were silently hiding coverage): - Repair 3 page-template specs broken by the 11-arg TransclusionService constructor; they never compiled, so template access-control / content -leak / unsync-strip coverage was fictitious. - Build @docmost/editor-ext before server tests via a `pretest` hook; the stale dist omitted the new HtmlEmbed/PageEmbed exports (TS2305). - Let jest resolve the .tsx email templates: add `tsx` to moduleFileExtensions and widen the ts-jest transform to (t|j)sx?. Behaviour-preserving "extract pure core" refactors that the tests drive: - server: resolveShareAssistantRequest + uiMessageTextLength (public-share controller), decideBasicGate + mapAuthResultToResponse (mcp), buildErrorAssistantRecord (ai-chat), jsonbObject export (roles). - client: render-raw-html + shouldExecute/canEdit, decide-embed-state, page-embed picker utils, tree-socket reducers, open/close branch maps, isEndpointConfigured/resolveKeyField; buildTreeWithChildren now treats a permission-trimmed orphan as a root instead of crashing. Deferred (need a test DB or HTTP harness, documented in the specs): repo-level Postgres integration tests and the public-share XFF E2E. Pre-existing DI/lib0-ESM suite failures are untouched and out of scope. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-20 23:40:40 +03:00
parent 692c0abe13
commit 90d3fab483
56 changed files with 5668 additions and 447 deletions
--- a/apps/server/src/common/helpers/prosemirror/html-embed-import-detect.spec.ts
+++ b/apps/server/src/common/helpers/prosemirror/html-embed-import-detect.spec.ts
@@ -0,0 +1,70 @@
+import { markdownToHtml, encodeHtmlEmbedSource } from '@docmost/editor-ext';
+import { htmlToJson } from '../../../collaboration/collaboration.util';
+import { hasHtmlEmbedNode, stripHtmlEmbedNodes } from './html-embed.util';
+
+/**
+ * CONTRACT (security): an attacker who controls imported markdown/HTML could try
+ * to smuggle an htmlEmbed in the *serialized* DOM form —
+ *   <div data-type="htmlEmbed" data-source="...">
+ * — directly, bypassing the editor's `<!--html-embed:-->` comment marker.
+ *
+ * This exercises the REAL server import conversion path that ImportService uses
+ * (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
+ * link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
+ * the ACTUAL behaviour so we know whether the strip gate can be bypassed.
+ *
+ * FINDING (documented): the raw embed div DOES round-trip through marked +
+ * htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
+ * `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
+ * detectable and STRIPPABLE — the write-path gate covers it.
+ */
+describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
+  it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
+    const source = '<script>steal()</script>';
+    const encoded = encodeHtmlEmbedSource(source);
+    const md = [
+      'Hello',
+      '',
+      `<div data-type="htmlEmbed" data-source="${encoded}"></div>`,
+      '',
+      'World',
+    ].join('\n');
+
+    const html = await markdownToHtml(md);
+    // marked preserves the raw block-level div verbatim.
+    expect(html).toContain('data-type="htmlEmbed"');
+
+    const json = htmlToJson(html);
+    // The div parses into a real htmlEmbed node carrying the decoded source.
+    expect(hasHtmlEmbedNode(json)).toBe(true);
+
+    // Because it is detected, the write-path gate can strip it for non-admins.
+    const stripped = stripHtmlEmbedNodes(json);
+    expect(hasHtmlEmbedNode(stripped)).toBe(false);
+    // Surrounding non-embed content is retained.
+    expect(JSON.stringify(stripped)).toContain('Hello');
+    expect(JSON.stringify(stripped)).toContain('World');
+  });
+
+  it('round-trips through direct HTML conversion (htmlToJson) and is DETECTED', () => {
+    const source = '<script>steal()</script>';
+    const encoded = encodeHtmlEmbedSource(source);
+    const html = `<p>Hello</p><div data-type="htmlEmbed" data-source="${encoded}"></div><p>World</p>`;
+
+    const json = htmlToJson(html);
+    expect(hasHtmlEmbedNode(json)).toBe(true);
+    expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false);
+  });
+
+  it('is still DETECTED even when the data-source is NOT valid base64', async () => {
+    // A naive raw inline source (HTML-escaped, not base64) still parses as an
+    // htmlEmbed NODE — the decoder just yields an empty source. Detection (and
+    // therefore stripping) does not depend on the source being well-formed, so
+    // the bypass cannot be hidden by sending a malformed data-source.
+    const md = `<div data-type="htmlEmbed" data-source="&lt;script&gt;x&lt;/script&gt;"></div>`;
+    const html = await markdownToHtml(md);
+    const json = htmlToJson(html);
+    expect(hasHtmlEmbedNode(json)).toBe(true);
+    expect(hasHtmlEmbedNode(stripHtmlEmbedNodes(json))).toBe(false);
+  });
+});
--- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
+++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
@@ -92,6 +92,102 @@ describe('stripHtmlEmbedNodes', () => {
    const result = stripHtmlEmbedNodes(doc);
    expect(result).toEqual(doc);
  });
+
+  it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
+    // htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
+    const doc = {
+      type: 'doc',
+      content: [
+        {
+          type: 'callout',
+          content: [
+            {
+              type: 'columns',
+              content: [
+                {
+                  type: 'column',
+                  content: [
+                    {
+                      type: 'paragraph',
+                      content: [{ type: 'text', text: 'deep keep' }],
+                    },
+                    { type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
+                  ],
+                },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = stripHtmlEmbedNodes(doc);
+    expect(hasHtmlEmbedNode(result)).toBe(false);
+    const col = findFirstChild(result, 'column');
+    // Sibling paragraph survives; only the embed is removed.
+    expect(col.content).toHaveLength(1);
+    expect(col.content[0].type).toBe('paragraph');
+    expect(col.content[0].content[0].text).toBe('deep keep');
+  });
+
+  it('returns non-object / null / array-without-content nodes unchanged', () => {
+    // Non-object inputs are returned as-is (callers persist what they got).
+    expect(stripHtmlEmbedNodes(null as any)).toBeNull();
+    expect(stripHtmlEmbedNodes(undefined as any)).toBeUndefined();
+    expect(stripHtmlEmbedNodes('not-a-node' as any)).toBe('not-a-node');
+    expect(stripHtmlEmbedNodes(42 as any)).toBe(42);
+
+    // An object node with no `content` array is returned shallow-cloned, equal.
+    const leaf = { type: 'paragraph', attrs: { id: 'x' } };
+    const out = stripHtmlEmbedNodes(leaf);
+    expect(out).toEqual(leaf);
+    expect(out).not.toBe(leaf); // new object, input not mutated
+  });
+
+  it('yields empty content (not null/undefined) for a doc whose only child is an htmlEmbed', () => {
+    const doc = {
+      type: 'doc',
+      content: [{ type: 'htmlEmbed', attrs: { source: '<b>only</b>' } }],
+    };
+    const result = stripHtmlEmbedNodes(doc) as any;
+    expect(Array.isArray(result.content)).toBe(true);
+    expect(result.content).toHaveLength(0);
+    expect(result.content).not.toBeNull();
+    expect(result.content).not.toBeUndefined();
+    expect(hasHtmlEmbedNode(result)).toBe(false);
+  });
+});
+
+describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
+  it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
+    const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
+    expect(hasHtmlEmbedNode(rootEmbed)).toBe(true);
+  });
+
+  it('returns false for a doc with embed-like TEXT but no htmlEmbed node', () => {
+    // The literal string "htmlEmbed" appears only as text content, not as a
+    // node type, so it must NOT be detected.
+    const doc = {
+      type: 'doc',
+      content: [
+        {
+          type: 'paragraph',
+          content: [
+            { type: 'text', text: 'type: htmlEmbed <div data-type="htmlEmbed">' },
+          ],
+        },
+      ],
+    };
+    expect(hasHtmlEmbedNode(doc)).toBe(false);
+  });
+
+  it('returns false for non-object / null / array inputs', () => {
+    expect(hasHtmlEmbedNode(null)).toBe(false);
+    expect(hasHtmlEmbedNode(undefined)).toBe(false);
+    expect(hasHtmlEmbedNode('htmlEmbed')).toBe(false);
+    // A bare array (no `content` wrapper) has no node `type`, so it's false.
+    expect(hasHtmlEmbedNode([{ type: 'htmlEmbed' }] as any)).toBe(false);
+  });
 });

 describe('canAuthorHtmlEmbed', () => {