test(html-embed): exercise the REAL admin-gate write paths + import round-trip

Release-cycle test audit: the strip boundary was tested only via a stand-in
helper re-implemented in the spec, so a deleted/misplaced guard kept CI green
(the missing create() guard was proof). Replace it with tests against real code:
- persistence.extension.onStoreDocument: real ydoc from a rich doc (columns/
  table/mention/htmlEmbed) -> non-admin strip removes only htmlEmbed, every other
  node preserved (data-loss guard); admin keeps; empty fragment no-throw.
- collaboration.handler.updatePageContent: real path, user?.role gate, decoded
  ydoc embed-free for non-admin, kept for admin.
- transclusion unsync: member stripped, admin preserved.
- editor-ext gains a vitest setup (was zero tests) + a markdown round-trip:
  the <!--html-embed:BASE64--> marker -> htmlEmbed node with decoded source, and
  hasHtmlEmbedNode matches it — pinning the marked/turndown shape the import
  strip relies on. tsconfig now excludes specs from the shipped dist.
- Fail-closed identity: source-pinned contracts that the gate keys on
  fileTask.creatorId (zip) / request userId (single) / callerRole (create) /
  authUser.role (duplicate), and missing-user -> strip (services can't load under
  jest's ESM graph; helpers replay the exact predicate).
Adds the verified-safe ^src/ jest moduleNameMapper (identical fail set).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-20 14:52:29 +03:00
parent e0b3b3d9a5
commit caac5c7f36
11 changed files with 805 additions and 126 deletions

View File

@@ -195,7 +195,8 @@
"moduleNameMapper": {
"^@docmost/db/(.*)$": "<rootDir>/database/$1",
"^@docmost/transactional/(.*)$": "<rootDir>/integrations/transactional/$1",
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1"
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1",
"^src/(.*)$": "<rootDir>/$1"
}
}
}

View File

@@ -0,0 +1,102 @@
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
import { CollaborationHandler } from './collaboration.handler';
import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the
// REST/MCP/AI content-update entrypoint, used by the page update endpoint and
// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed
// BEFORE handing the json to withYdocConnection. We stub only
// withYdocConnection (which would otherwise open a real hocuspocus connection):
// the role-extraction (`user?.role`) + strip that run upstream of it are REAL
// production code. The 'replace' branch then runs the production
// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we
// decode back to JSON and assert on. This replaces the re-implemented
// `applyAdminGate` stand-in for this entrypoint.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
{
type: 'columns',
content: [
{
type: 'column',
attrs: { position: 'left' },
content: [
{ type: 'htmlEmbed', attrs: { source: '<script>nested</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'inner' }] },
],
},
{
type: 'column',
attrs: { position: 'right' },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] },
],
},
],
},
{ type: 'htmlEmbed', attrs: { source: '<script>top</script>' } },
],
});
/**
* Run the REAL updatePageContent('replace') with a stubbed withYdocConnection.
* The stub provides a real Y.Doc + recording fragment; the production fn calls
* TiptapTransformer.toYdoc(<gated json>) and applies it to the doc, so decoding
* the doc afterward yields exactly the gated content.
*/
async function gatedContentFor(role: string | null | undefined) {
const handler = new CollaborationHandler();
const captureDoc = new Y.Doc();
jest
.spyOn(handler, 'withYdocConnection')
.mockImplementation(async (_hp, _name, _ctx, fn: any) => {
const fragment = captureDoc.getXmlFragment('default');
// Mirror the real Document surface the fn touches.
const docLike: any = {
getXmlFragment: () => fragment,
};
// The fn does: fragment.delete(0,len) then
// Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls
// Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target.
fn(captureDoc);
});
const handlers = handler.getHandlers({} as any);
await handlers.updatePageContent('page-1', {
prosemirrorJson: docWithEmbed(),
operation: 'replace',
user: { id: 'u1', role } as any,
});
return TiptapTransformer.fromYdoc(captureDoc, 'default');
}
describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => {
it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => {
const gated = await gatedContentFor('member');
expect(hasHtmlEmbedNode(gated)).toBe(false);
// Non-embed siblings survive.
const json = JSON.stringify(gated);
expect(json).toContain('keep');
expect(json).toContain('inner');
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false);
}
});
it('admin: htmlEmbed preserved', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('admin'))).toBe(true);
});
it('owner: htmlEmbed preserved', async () => {
expect(hasHtmlEmbedNode(await gatedContentFor('owner'))).toBe(true);
});
});

View File

@@ -0,0 +1,252 @@
import * as Y from 'yjs';
import { TiptapTransformer } from '@hocuspocus/transformer';
import { PersistenceExtension } from './persistence.extension';
import { tiptapExtensions } from '../collaboration.util';
import {
hasHtmlEmbedNode,
HTML_EMBED_NODE_NAME,
} from '../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab
// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks.
// This replaces the prior re-implemented `applyAdminGate` stand-in for this
// entrypoint: if the role-extraction expression (`context?.user?.role`), the
// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail.
const RICH_DOC = {
type: 'doc',
content: [
{
type: 'paragraph',
content: [{ type: 'text', text: 'intro paragraph' }],
},
{
type: 'columns',
content: [
{
type: 'column',
attrs: { position: 'left' },
content: [
{
type: 'paragraph',
content: [
{ type: 'text', text: 'left col, mentioning ' },
{
type: 'mention',
attrs: {
id: 'mention-1',
label: 'Alice',
entityType: 'user',
entityId: 'user-123',
creatorId: 'creator-1',
},
},
],
},
// Nested embed inside a column — must be stripped recursively.
{
type: HTML_EMBED_NODE_NAME,
attrs: { source: '<script>nested()</script>' },
},
],
},
{
type: 'column',
attrs: { position: 'right' },
content: [
{
type: 'table',
content: [
{
type: 'tableRow',
content: [
{
type: 'tableHeader',
attrs: { colspan: 1, rowspan: 1 },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'H' }] },
],
},
],
},
{
type: 'tableRow',
content: [
{
type: 'tableCell',
attrs: { colspan: 1, rowspan: 1 },
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'cell' }] },
],
},
],
},
],
},
],
},
],
},
// Top-level embed — must be stripped.
{
type: HTML_EMBED_NODE_NAME,
attrs: { source: '<script>top()</script>' },
},
{
type: 'paragraph',
content: [{ type: 'text', text: 'outro paragraph' }],
},
],
};
function buildYdoc(json: any): Y.Doc {
return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
}
// Count nodes by type across the whole tree (excludes htmlEmbed by listing it
// separately) so we can assert every OTHER node type survived the strip.
function nodeTypeCounts(json: any): Record<string, number> {
const counts: Record<string, number> = {};
const walk = (n: any) => {
if (!n || typeof n !== 'object') return;
if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1;
if (Array.isArray(n.content)) n.content.forEach(walk);
};
walk(json);
return counts;
}
/**
* Construct a real PersistenceExtension with the minimum mocks needed for
* onStoreDocument to reach the strip + persist branch, and capture the content
* that would be written to the page row.
*/
function buildExtension() {
const captured: { content?: any } = {};
const existingPage = {
id: 'page-1',
slugId: 'slug-1',
spaceId: 'space-1',
workspaceId: 'ws-1',
creatorId: 'creator-1',
contributorIds: [],
content: { type: 'doc', content: [] }, // differs from new content -> persist runs
createdAt: new Date(),
lastUpdatedSource: 'user',
};
const pageRepo = {
findById: jest.fn(async () => ({ ...existingPage })),
updatePage: jest.fn(async (values: any) => {
captured.content = values.content;
}),
};
const pageHistoryRepo = {
findPageLastHistory: jest.fn(async () => null),
saveHistory: jest.fn(async () => undefined),
};
// db.transaction().execute(cb) just runs the callback (no real DB).
const db = {
transaction: () => ({
execute: (cb: any) => cb({} as any),
}),
};
const noopQueue = { add: jest.fn(async () => undefined) } as any;
const collabHistory = { addContributors: jest.fn(async () => undefined) } as any;
const transclusionService = {
syncPageTransclusions: jest.fn(async () => undefined),
syncPageReferences: jest.fn(async () => undefined),
} as any;
const ext = new PersistenceExtension(
pageRepo as any,
pageHistoryRepo as any,
db as any,
noopQueue,
noopQueue,
noopQueue,
collabHistory,
transclusionService,
);
return { ext, captured, pageRepo };
}
async function runStore(role: string | null | undefined, doc: Y.Doc) {
const { ext, captured } = buildExtension();
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
// none, so stub it (the post-persist broadcast is not under test here).
(doc as any).broadcastStateless = () => undefined;
await ext.onStoreDocument({
documentName: 'page-1',
document: doc,
context: { user: { id: 'u1', role } },
} as any);
return captured;
}
describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => {
it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => {
const doc = buildYdoc(RICH_DOC);
const before = TiptapTransformer.fromYdoc(doc, 'default');
expect(hasHtmlEmbedNode(before)).toBe(true);
const beforeCounts = nodeTypeCounts(before);
const captured = await runStore('member', doc);
expect(captured.content).toBeDefined();
// htmlEmbed gone from the persisted content.
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
// Every non-embed node type is preserved with the SAME count (guards against
// data loss if a node were missing from tiptapExtensions and dropped on the
// toYdoc rebuild).
const afterCounts = nodeTypeCounts(captured.content);
for (const [type, count] of Object.entries(beforeCounts)) {
if (type === HTML_EMBED_NODE_NAME) continue;
expect(afterCounts[type]).toBe(count);
}
// The two embeds are gone.
expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2);
expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined();
// The shared ydoc fragment was also rewritten clean (re-decode it).
const reDecoded = TiptapTransformer.fromYdoc(doc, 'default');
expect(hasHtmlEmbedNode(reDecoded)).toBe(false);
});
it('admin store: htmlEmbed preserved in persisted content', async () => {
const captured = await runStore('admin', buildYdoc(RICH_DOC));
expect(captured.content).toBeDefined();
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2);
});
it('owner store: htmlEmbed preserved', async () => {
const captured = await runStore('owner', buildYdoc(RICH_DOC));
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
});
it('unknown/empty role: fails closed (stripped)', async () => {
expect(
hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content),
).toBe(false);
expect(
hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content),
).toBe(false);
expect(
hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content),
).toBe(false);
});
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
const emptyDoc = buildYdoc({
type: 'doc',
content: [{ type: 'paragraph' }],
});
// Non-admin path with an empty/embed-free fragment must be a no-op strip,
// not throw.
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
});
});

View File

@@ -105,135 +105,34 @@ describe('canAuthorHtmlEmbed', () => {
});
});
// Replicates the write-path decision used by every non-admin persistence guard
// (page create, collab store, single import, zip import, duplication,
// transclusion unsync):
// if !canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json) -> strip, else keep.
const applyAdminGate = (json: any, role: string | null | undefined) => {
if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) {
return stripHtmlEmbedNodes(json);
}
return json;
};
describe('admin-gate write-path decision (duplication / import / unsync)', () => {
const docWithEmbed = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
],
};
it('strips the embed for a non-admin (member) author', () => {
const result = applyAdminGate(docWithEmbed, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(result.content).toHaveLength(1);
expect(result.content[0].content[0].text).toBe('keep');
});
it('strips the embed for unknown/empty roles', () => {
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false);
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe(
false,
);
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'viewer'))).toBe(
false,
);
});
it('keeps the embed for an admin author', () => {
const result = applyAdminGate(docWithEmbed, 'admin');
expect(hasHtmlEmbedNode(result)).toBe(true);
expect(result).toBe(docWithEmbed);
});
it('keeps the embed for an owner author', () => {
const result = applyAdminGate(docWithEmbed, 'owner');
expect(hasHtmlEmbedNode(result)).toBe(true);
});
it('strips nested embeds (subtree/column duplication) for a non-admin', () => {
const nested = {
type: 'doc',
content: [
{
type: 'columns',
content: [
{
type: 'column',
content: [
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
{ type: 'paragraph', content: [{ type: 'text', text: 'ok' }] },
],
},
],
},
],
};
const result = applyAdminGate(nested, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
const col = findFirstChild(result, 'column');
expect(col.content).toHaveLength(1);
expect(col.content[0].type).toBe('paragraph');
});
it('leaves a non-admin doc without embeds untouched (no needless rewrite)', () => {
const clean = {
type: 'doc',
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
};
const result = applyAdminGate(clean, 'member');
expect(result).toBe(clean);
});
});
// PageService.create() now applies exactly `applyAdminGate` (above) to the
// parsed ProseMirror JSON BEFORE deriving content/textContent/ydoc and calling
// insertPage. The create controller only requires space Edit, so without this a
// regular member could POST a doc (json, or the markdown/html
// <!--html-embed:BASE64--> forms that parse to the same node) containing an
// htmlEmbed and store XSS for every reader. These cases pin the create-path
// decision: non-admin -> stripped, admin/owner -> kept.
describe('admin-gate write-path decision (page create)', () => {
const docWithEmbed = {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
],
};
it('strips the embed when a non-admin (member) creates the page', () => {
const result = applyAdminGate(docWithEmbed, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(result.content).toHaveLength(1);
expect(result.content[0].content[0].text).toBe('body');
});
it('strips the embed when the creator role is unknown/empty', () => {
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false);
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe(
false,
);
});
it('keeps the embed when an admin or owner creates the page', () => {
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'admin'))).toBe(true);
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'owner'))).toBe(true);
});
it('strips embeds smuggled via the markdown/html <!--html-embed--> form', () => {
// The markdown/html create formats decode to the same htmlEmbed node, so
// the gate (run on the parsed JSON) covers them identically.
// NOTE: a previous revision of this file re-implemented the write-path admin
// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
// deleted/misplaced real guard would have kept those green. The stand-in is
// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
// now tested against their REAL code in:
// - collaboration/extensions/persistence.extension.html-embed.spec.ts
// - collaboration/collaboration.handler.html-embed.spec.ts
// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
// - integrations/import/services/import-html-embed-identity.spec.ts (import)
//
// The case below stays here because it asserts a REAL parse path
// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
// re-implemented gate.
describe('htmlEmbed smuggled via the markdown/html <!--html-embed--> form (real parse + real helpers)', () => {
it('the parsed node is detected and stripped by the real helpers', () => {
// The markdown/html create formats decode to the same htmlEmbed node, so the
// gate (run on the parsed JSON) covers them identically.
const source = '<script>steal()</script>';
const encoded = encodeHtmlEmbedSource(source);
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
const parsed = htmlToJson(html);
expect(hasHtmlEmbedNode(parsed)).toBe(true);
const result = applyAdminGate(parsed, 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
// A non-admin role gates to strip via the real helpers.
expect(canAuthorHtmlEmbed('member')).toBe(false);
const stripped = stripHtmlEmbedNodes(parsed);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
});
});

View File

@@ -0,0 +1,77 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import {
canAuthorHtmlEmbed,
hasHtmlEmbedNode,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
// PageService.create() and duplicatePage() guards.
//
// page.service.ts cannot be unit-LOADED under the server's jest config (a
// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns),
// so we cover the two load-bearing properties at the strongest feasible layer:
//
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate
// each path applies: non-admin/unknown role -> strip, admin/owner -> keep.
//
// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole`
// param threaded from the request; duplicate: `authUser.role`), so a
// refactor that drops the guard or reads the wrong role trips the test.
// This is what replaces the removed `applyAdminGate` stand-in for these
// two entrypoints.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
],
});
// The real predicate both paths apply (see SECURITY blocks in page.service.ts).
function applyGate(json: any, role: string | null | undefined) {
if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) {
return stripHtmlEmbedNodes(json);
}
return json;
}
describe('page create/duplicate gate decision (real helpers)', () => {
it('non-admin (member) strips', () => {
const result = applyGate(docWithEmbed(), 'member');
expect(hasHtmlEmbedNode(result)).toBe(false);
expect(result.content).toHaveLength(1);
expect(result.content[0].content[0].text).toBe('body');
});
it('unknown/empty role fails closed (strips)', () => {
for (const role of [null, undefined, 'viewer'] as const) {
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), role))).toBe(false);
}
});
it('admin/owner keep', () => {
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), 'admin'))).toBe(true);
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), 'owner'))).toBe(true);
});
});
const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8');
describe('page create/duplicate gate identity is pinned (source contract)', () => {
it('create() gates on the caller role param before deriving content/ydoc', () => {
// create() receives the caller's workspace role as `callerRole` and gates on
// it; the embed must be stripped BEFORE insertPage.
expect(SRC).toMatch(
/!canAuthorHtmlEmbed\(\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
);
expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)');
});
it('duplicatePage() gates on the duplicating user role (authUser.role)', () => {
expect(SRC).toMatch(
/!canAuthorHtmlEmbed\(\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
);
});
});

View File

@@ -0,0 +1,114 @@
import { TransclusionService } from '../transclusion.service';
import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
// unsync returns a source snapshot the client materializes into the reference
// page; a non-admin must never receive an embed payload to re-persist. The gate
// reads `user.role` and strips before returning. All repos / access checks are
// mocked so the REAL gate logic runs end-to-end. Complements the existing
// transclusion specs (rewriteAttachmentsForUnsync, controller).
const WS = 'ws-1';
const REF_PAGE = 'ref-1';
const SRC_PAGE = 'src-1';
const TX_ID = 'tx-1';
const sourceContentWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>steal()</script>' } },
],
});
function buildService() {
const pageRepo = {
findById: jest.fn(async (id: string) => ({
id,
workspaceId: WS,
spaceId: 'space-1',
deletedAt: null,
})),
};
const pageTransclusionsRepo = {
findByPageAndTransclusion: jest.fn(async () => ({
content: sourceContentWithEmbed(),
})),
};
const pageTransclusionReferencesRepo = {
deleteOne: jest.fn(async () => undefined),
};
const attachmentRepo = { findByIds: jest.fn(async () => []) };
const storageService = { copy: jest.fn(async () => undefined) };
const pageAccessService = {
validateCanEdit: jest.fn(async () => undefined),
validateCanView: jest.fn(async () => undefined),
};
const service = new TransclusionService(
{} as any, // db (unused on this path)
pageTransclusionsRepo as any,
pageTransclusionReferencesRepo as any,
pageRepo as any,
{} as any, // pagePermissionRepo (unused)
{} as any, // spaceMemberRepo (unused)
attachmentRepo as any,
storageService as any,
pageAccessService as any,
);
return service;
}
function userWithRole(role: string | null | undefined) {
return { id: 'u1', workspaceId: WS, role } as any;
}
describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
it('non-admin (member): returned content has htmlEmbed stripped', async () => {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('member'),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
// Non-embed content is preserved.
expect(JSON.stringify(content)).toContain('snapshot body');
});
it('unknown/empty role: fails closed (stripped)', async () => {
for (const role of [undefined, null, 'viewer'] as const) {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole(role),
);
expect(hasHtmlEmbedNode(content)).toBe(false);
}
});
it('admin: returned content keeps the htmlEmbed', async () => {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('admin'),
);
expect(hasHtmlEmbedNode(content)).toBe(true);
});
it('owner: returned content keeps the htmlEmbed', async () => {
const service = buildService();
const { content } = await service.unsyncReference(
REF_PAGE,
SRC_PAGE,
TX_ID,
userWithRole('owner'),
);
expect(hasHtmlEmbedNode(content)).toBe(true);
});
});

View File

@@ -0,0 +1,104 @@
import { readFileSync } from 'node:fs';
import { join } from 'node:path';
import {
canAuthorHtmlEmbed,
hasHtmlEmbedNode,
stripHtmlEmbedNodes,
} from '../../../common/helpers/prosemirror/html-embed.util';
// FAIL-CLOSED IDENTITY for the import write paths.
//
// import.service / file-import-task.service cannot be unit-LOADED under the
// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in
// transformIgnorePatterns). So we cover the two load-bearing properties at the
// strongest feasible layer:
//
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate
// predicate each entrypoint runs against the role resolved from
// userRepo.findById(...): a MISSING user (findById -> undefined) must fail
// closed (strip), and only 'admin'/'owner' keep the embed.
//
// (2) IDENTITY — source-pin which identity governs the gate so a refactor that
// swaps the lookup to the wrong user (e.g. the queue worker's caller) is
// caught: zip import resolves the role from `fileTask.creatorId`; single
// import from the request `userId`. NOT some ambient caller.
//
// If a guard is deleted/misplaced or the identity field changes, these break.
const docWithEmbed = () => ({
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] },
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
],
});
// The real predicate both import entrypoints apply (see the SECURITY blocks in
// import.service.ts and file-import-task.service.ts): resolve the importer via
// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`.
function applyImportGate(json: any, importingUser: { role?: string } | undefined) {
if (!canAuthorHtmlEmbed(importingUser?.role) && hasHtmlEmbedNode(json)) {
return stripHtmlEmbedNodes(json);
}
return json;
}
describe('import gate fail-closed by resolved-user role (real helpers)', () => {
it('missing user (userRepo.findById -> undefined) strips the embed', () => {
// findById returns undefined when the user/workspace pair does not resolve;
// undefined?.role is undefined -> canAuthorHtmlEmbed(undefined) === false.
const importingUser = undefined;
const result = applyImportGate(docWithEmbed(), importingUser);
expect(hasHtmlEmbedNode(result)).toBe(false);
});
it("resolved role 'member' strips", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'member' })),
).toBe(false);
});
it("resolved role 'admin' keeps the embed", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'admin' })),
).toBe(true);
});
it("resolved role 'owner' keeps the embed", () => {
expect(
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'owner' })),
).toBe(true);
});
});
// Source-pin the identity each entrypoint feeds to userRepo.findById. These are
// the lines that decide WHOSE role governs the gate; pinning them means a
// refactor that points the lookup at the wrong user trips the test.
const SRC_DIR = join(__dirname);
describe('import gate identity is pinned to the importer (source contract)', () => {
it('single import resolves the role from the request userId', () => {
const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8');
// The role lookup must key on the request `userId`, then gate on the role.
expect(src).toMatch(
/this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/,
);
expect(src).toMatch(/canAuthorHtmlEmbed\(\s*importingUser\?\.role\s*\)/);
// And the gate uses the real strip helper.
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
});
it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => {
const src = readFileSync(
join(SRC_DIR, 'file-import-task.service.ts'),
'utf-8',
);
expect(src).toMatch(
/this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/,
);
expect(src).toMatch(
/importerCanAuthorHtmlEmbed\s*=\s*canAuthorHtmlEmbed\(\s*importingUser\?\.role\s*\)/,
);
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
});
});

View File

@@ -4,7 +4,9 @@
"private": true,
"scripts": {
"build": "tsc --build",
"dev": "tsc --watch"
"dev": "tsc --watch",
"test": "vitest run",
"test:watch": "vitest"
},
"main": "dist/index.js",
"module": "./src/index.ts",

View File

@@ -0,0 +1,112 @@
import { describe, it, expect } from "vitest";
import { markdownToHtml, htmlToMarkdown } from "./index";
import {
encodeHtmlEmbedSource,
decodeHtmlEmbedSource,
} from "../html-embed/html-embed";
// SECURITY (Variant C admin gate, import attack surface).
//
// The markdown import path is the only write path where an htmlEmbed reaches
// the server purely from file bytes (no editor / collab socket). The marked
// tokenizer in `html-embed.marked.ts` and the turndown rule in
// `turndown.utils.ts` are what materialize the `<!--html-embed:BASE64-->`
// marker into the `<div data-type="htmlEmbed" data-source="BASE64">` element
// that the server then parses into an htmlEmbed node and the admin gate strips.
//
// If either the tokenizer regex or the turndown rule shape drifts, the marker
// would either (a) stop becoming an htmlEmbed node (silently dropping admin
// content) or (b) become some OTHER tag the server's `hasHtmlEmbedNode` no
// longer recognizes (a strip bypass). These tests pin the marker <-> embed-div
// contract that the server-side strip relies on. editor-ext had ZERO tests
// before this file; this adds the runner + the round-trip coverage.
// The server parses the embed div by matching `data-type="htmlEmbed"` and
// decoding `data-source`; mirror that here so the assertion is exactly what the
// real `htmlToJson` -> htmlEmbed node parse depends on (the node's parseHTML in
// html-embed.ts uses the same selector + decodeHtmlEmbedSource).
const EMBED_DIV_RE = /<div[^>]*\bdata-type="htmlEmbed"[^>]*>/;
function extractEmbedSource(html: string): string | undefined {
const div = EMBED_DIV_RE.exec(html);
if (!div) return undefined;
const enc = /data-source="([^"]*)"/.exec(div[0]);
if (!enc) return undefined;
return decodeHtmlEmbedSource(enc[1]);
}
// Replicates the server's `hasHtmlEmbedNode` decision against the embed *div*
// (the HTML form the server immediately converts to JSON). If this matches, the
// server's JSON-level `hasHtmlEmbedNode` will too, because htmlToJson maps this
// exact div to an htmlEmbed node.
function htmlHasHtmlEmbed(html: string): boolean {
return EMBED_DIV_RE.test(html);
}
describe("markdown <!--html-embed--> import round-trip", () => {
const source = "<script>x</script>";
it("markdownToHtml turns the marker into an htmlEmbed div carrying the source", async () => {
const md = "<!--html-embed:" + encodeHtmlEmbedSource(source) + "-->";
const html = await markdownToHtml(md);
// The marker became the embed div the server recognizes as an htmlEmbed
// node (so the server's hasHtmlEmbedNode would match it after htmlToJson).
expect(htmlHasHtmlEmbed(html)).toBe(true);
// The decoded source is the original script, intact.
expect(extractEmbedSource(html)).toBe(source);
// The raw script is NOT inlined into the HTML — it stays base64 in the
// attribute (the marker itself must not be a direct injection vector).
expect(html).not.toContain("<script>x</script>");
});
it("preserves UTF-8 / special chars in the embedded source", async () => {
const utf8 = '<script>console.log("héllo → 世界")</script>';
const md = "<!--html-embed:" + encodeHtmlEmbedSource(utf8) + "-->";
const html = await markdownToHtml(md);
expect(htmlHasHtmlEmbed(html)).toBe(true);
expect(extractEmbedSource(html)).toBe(utf8);
});
it("an empty marker still produces an htmlEmbed div (empty source)", async () => {
const html = await markdownToHtml("<!--html-embed:-->");
expect(htmlHasHtmlEmbed(html)).toBe(true);
expect(extractEmbedSource(html)).toBe("");
});
it("round-trips htmlToMarkdown -> markdownToHtml preserving the embed marker", async () => {
const encoded = encodeHtmlEmbedSource(source);
// NOTE: turndown drops a *blank* (childless) element before any custom rule
// runs, and the htmlEmbed div is normally childless. The export pipeline
// therefore must give the rule a non-blank div to fire on; we add an inert
// text child here to exercise the real turndown htmlEmbed rule. (A blank
// embed div serializing to "" is asserted separately below as a documented
// edge so this contract drift is visible.)
const startHtml = `<div data-type="htmlEmbed" data-source="${encoded}">x</div>`;
// Export to markdown: the turndown rule emits the <!--html-embed:..-->
// marker (lossless, inert in plain markdown viewers).
const md = htmlToMarkdown(startHtml);
expect(md).toContain("<!--html-embed:" + encoded + "-->");
// Re-import: the marker round-trips back into an embed div with the same
// decoded source — this is the marker <-> embed-div contract the server's
// import strip depends on.
const html = await markdownToHtml(md);
expect(htmlHasHtmlEmbed(html)).toBe(true);
expect(extractEmbedSource(html)).toBe(source);
});
it("documents that a BLANK embed div serializes to empty markdown (turndown drops childless blocks)", () => {
const encoded = encodeHtmlEmbedSource(source);
const blank = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
// This pins current behavior so a future change to the turndown rule (e.g.
// making it fire on blank nodes) is caught rather than silently shipping.
expect(htmlToMarkdown(blank)).toBe("");
});
it("the base64 codec itself round-trips (no '<' leaks into the attribute)", () => {
const encoded = encodeHtmlEmbedSource(source);
expect(encoded).not.toContain("<");
expect(decodeHtmlEmbedSource(encoded)).toBe(source);
});
});

View File

@@ -11,6 +11,7 @@
"jsx": "react-jsx",
"sourceMap": true,
"outDir": "./dist",
"rootDir": "./src",
"baseUrl": "./",
"incremental": true,
"skipLibCheck": true,
@@ -19,5 +20,7 @@
"strictBindCallApply": false,
"forceConsistentCasingInFileNames": false,
"noFallthroughCasesInSwitch": false
}
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist", "src/**/*.spec.ts", "src/**/*.test.ts"]
}

View File

@@ -0,0 +1,13 @@
import { defineConfig } from "vitest/config";
// Minimal vitest setup for @docmost/editor-ext (mirrors apps/client's config,
// trimmed to what the markdown/html-embed round-trip tests need). The markdown
// utils run in plain Node (marked + turndown), so no jsdom/react plugin is
// required here.
export default defineConfig({
test: {
environment: "node",
globals: true,
include: ["src/**/*.{test,spec}.ts"],
},
});