test(html-embed): exercise the REAL admin-gate write paths + import round-trip
Release-cycle test audit: the strip boundary was tested only via a stand-in helper re-implemented in the spec, so a deleted/misplaced guard kept CI green (the missing create() guard was proof). Replace it with tests against real code: - persistence.extension.onStoreDocument: real ydoc from a rich doc (columns/ table/mention/htmlEmbed) -> non-admin strip removes only htmlEmbed, every other node preserved (data-loss guard); admin keeps; empty fragment no-throw. - collaboration.handler.updatePageContent: real path, user?.role gate, decoded ydoc embed-free for non-admin, kept for admin. - transclusion unsync: member stripped, admin preserved. - editor-ext gains a vitest setup (was zero tests) + a markdown round-trip: the <!--html-embed:BASE64--> marker -> htmlEmbed node with decoded source, and hasHtmlEmbedNode matches it — pinning the marked/turndown shape the import strip relies on. tsconfig now excludes specs from the shipped dist. - Fail-closed identity: source-pinned contracts that the gate keys on fileTask.creatorId (zip) / request userId (single) / callerRole (create) / authUser.role (duplicate), and missing-user -> strip (services can't load under jest's ESM graph; helpers replay the exact predicate). Adds the verified-safe ^src/ jest moduleNameMapper (identical fail set). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -195,7 +195,8 @@
|
||||
"moduleNameMapper": {
|
||||
"^@docmost/db/(.*)$": "<rootDir>/database/$1",
|
||||
"^@docmost/transactional/(.*)$": "<rootDir>/integrations/transactional/$1",
|
||||
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1"
|
||||
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1",
|
||||
"^src/(.*)$": "<rootDir>/$1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,102 @@
|
||||
import * as Y from 'yjs';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { CollaborationHandler } from './collaboration.handler';
|
||||
import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the
|
||||
// REST/MCP/AI content-update entrypoint, used by the page update endpoint and
|
||||
// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed
|
||||
// BEFORE handing the json to withYdocConnection. We stub only
|
||||
// withYdocConnection (which would otherwise open a real hocuspocus connection):
|
||||
// the role-extraction (`user?.role`) + strip that run upstream of it are REAL
|
||||
// production code. The 'replace' branch then runs the production
|
||||
// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we
|
||||
// decode back to JSON and assert on. This replaces the re-implemented
|
||||
// `applyAdminGate` stand-in for this entrypoint.
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'left' },
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>nested</script>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'inner' }] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'right' },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>top</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
/**
|
||||
* Run the REAL updatePageContent('replace') with a stubbed withYdocConnection.
|
||||
* The stub provides a real Y.Doc + recording fragment; the production fn calls
|
||||
* TiptapTransformer.toYdoc(<gated json>) and applies it to the doc, so decoding
|
||||
* the doc afterward yields exactly the gated content.
|
||||
*/
|
||||
async function gatedContentFor(role: string | null | undefined) {
|
||||
const handler = new CollaborationHandler();
|
||||
const captureDoc = new Y.Doc();
|
||||
|
||||
jest
|
||||
.spyOn(handler, 'withYdocConnection')
|
||||
.mockImplementation(async (_hp, _name, _ctx, fn: any) => {
|
||||
const fragment = captureDoc.getXmlFragment('default');
|
||||
// Mirror the real Document surface the fn touches.
|
||||
const docLike: any = {
|
||||
getXmlFragment: () => fragment,
|
||||
};
|
||||
// The fn does: fragment.delete(0,len) then
|
||||
// Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls
|
||||
// Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target.
|
||||
fn(captureDoc);
|
||||
});
|
||||
|
||||
const handlers = handler.getHandlers({} as any);
|
||||
await handlers.updatePageContent('page-1', {
|
||||
prosemirrorJson: docWithEmbed(),
|
||||
operation: 'replace',
|
||||
user: { id: 'u1', role } as any,
|
||||
});
|
||||
|
||||
return TiptapTransformer.fromYdoc(captureDoc, 'default');
|
||||
}
|
||||
|
||||
describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => {
|
||||
const gated = await gatedContentFor('member');
|
||||
expect(hasHtmlEmbedNode(gated)).toBe(false);
|
||||
// Non-embed siblings survive.
|
||||
const json = JSON.stringify(gated);
|
||||
expect(json).toContain('keep');
|
||||
expect(json).toContain('inner');
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('admin: htmlEmbed preserved', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('owner: htmlEmbed preserved', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('owner'))).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,252 @@
|
||||
import * as Y from 'yjs';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { PersistenceExtension } from './persistence.extension';
|
||||
import { tiptapExtensions } from '../collaboration.util';
|
||||
import {
|
||||
hasHtmlEmbedNode,
|
||||
HTML_EMBED_NODE_NAME,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab
|
||||
// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks.
|
||||
// This replaces the prior re-implemented `applyAdminGate` stand-in for this
|
||||
// entrypoint: if the role-extraction expression (`context?.user?.role`), the
|
||||
// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail.
|
||||
|
||||
const RICH_DOC = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'intro paragraph' }],
|
||||
},
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'left' },
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'left col, mentioning ' },
|
||||
{
|
||||
type: 'mention',
|
||||
attrs: {
|
||||
id: 'mention-1',
|
||||
label: 'Alice',
|
||||
entityType: 'user',
|
||||
entityId: 'user-123',
|
||||
creatorId: 'creator-1',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Nested embed inside a column — must be stripped recursively.
|
||||
{
|
||||
type: HTML_EMBED_NODE_NAME,
|
||||
attrs: { source: '<script>nested()</script>' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'right' },
|
||||
content: [
|
||||
{
|
||||
type: 'table',
|
||||
content: [
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableHeader',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'H' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableCell',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'cell' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
// Top-level embed — must be stripped.
|
||||
{
|
||||
type: HTML_EMBED_NODE_NAME,
|
||||
attrs: { source: '<script>top()</script>' },
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'outro paragraph' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
function buildYdoc(json: any): Y.Doc {
|
||||
return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
|
||||
}
|
||||
|
||||
// Count nodes by type across the whole tree (excludes htmlEmbed by listing it
|
||||
// separately) so we can assert every OTHER node type survived the strip.
|
||||
function nodeTypeCounts(json: any): Record<string, number> {
|
||||
const counts: Record<string, number> = {};
|
||||
const walk = (n: any) => {
|
||||
if (!n || typeof n !== 'object') return;
|
||||
if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1;
|
||||
if (Array.isArray(n.content)) n.content.forEach(walk);
|
||||
};
|
||||
walk(json);
|
||||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a real PersistenceExtension with the minimum mocks needed for
|
||||
* onStoreDocument to reach the strip + persist branch, and capture the content
|
||||
* that would be written to the page row.
|
||||
*/
|
||||
function buildExtension() {
|
||||
const captured: { content?: any } = {};
|
||||
|
||||
const existingPage = {
|
||||
id: 'page-1',
|
||||
slugId: 'slug-1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
creatorId: 'creator-1',
|
||||
contributorIds: [],
|
||||
content: { type: 'doc', content: [] }, // differs from new content -> persist runs
|
||||
createdAt: new Date(),
|
||||
lastUpdatedSource: 'user',
|
||||
};
|
||||
|
||||
const pageRepo = {
|
||||
findById: jest.fn(async () => ({ ...existingPage })),
|
||||
updatePage: jest.fn(async (values: any) => {
|
||||
captured.content = values.content;
|
||||
}),
|
||||
};
|
||||
const pageHistoryRepo = {
|
||||
findPageLastHistory: jest.fn(async () => null),
|
||||
saveHistory: jest.fn(async () => undefined),
|
||||
};
|
||||
// db.transaction().execute(cb) just runs the callback (no real DB).
|
||||
const db = {
|
||||
transaction: () => ({
|
||||
execute: (cb: any) => cb({} as any),
|
||||
}),
|
||||
};
|
||||
const noopQueue = { add: jest.fn(async () => undefined) } as any;
|
||||
const collabHistory = { addContributors: jest.fn(async () => undefined) } as any;
|
||||
const transclusionService = {
|
||||
syncPageTransclusions: jest.fn(async () => undefined),
|
||||
syncPageReferences: jest.fn(async () => undefined),
|
||||
} as any;
|
||||
|
||||
const ext = new PersistenceExtension(
|
||||
pageRepo as any,
|
||||
pageHistoryRepo as any,
|
||||
db as any,
|
||||
noopQueue,
|
||||
noopQueue,
|
||||
noopQueue,
|
||||
collabHistory,
|
||||
transclusionService,
|
||||
);
|
||||
|
||||
return { ext, captured, pageRepo };
|
||||
}
|
||||
|
||||
async function runStore(role: string | null | undefined, doc: Y.Doc) {
|
||||
const { ext, captured } = buildExtension();
|
||||
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
|
||||
// none, so stub it (the post-persist broadcast is not under test here).
|
||||
(doc as any).broadcastStateless = () => undefined;
|
||||
await ext.onStoreDocument({
|
||||
documentName: 'page-1',
|
||||
document: doc,
|
||||
context: { user: { id: 'u1', role } },
|
||||
} as any);
|
||||
return captured;
|
||||
}
|
||||
|
||||
describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => {
|
||||
const doc = buildYdoc(RICH_DOC);
|
||||
const before = TiptapTransformer.fromYdoc(doc, 'default');
|
||||
expect(hasHtmlEmbedNode(before)).toBe(true);
|
||||
const beforeCounts = nodeTypeCounts(before);
|
||||
|
||||
const captured = await runStore('member', doc);
|
||||
|
||||
expect(captured.content).toBeDefined();
|
||||
// htmlEmbed gone from the persisted content.
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
|
||||
// Every non-embed node type is preserved with the SAME count (guards against
|
||||
// data loss if a node were missing from tiptapExtensions and dropped on the
|
||||
// toYdoc rebuild).
|
||||
const afterCounts = nodeTypeCounts(captured.content);
|
||||
for (const [type, count] of Object.entries(beforeCounts)) {
|
||||
if (type === HTML_EMBED_NODE_NAME) continue;
|
||||
expect(afterCounts[type]).toBe(count);
|
||||
}
|
||||
// The two embeds are gone.
|
||||
expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2);
|
||||
expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined();
|
||||
|
||||
// The shared ydoc fragment was also rewritten clean (re-decode it).
|
||||
const reDecoded = TiptapTransformer.fromYdoc(doc, 'default');
|
||||
expect(hasHtmlEmbedNode(reDecoded)).toBe(false);
|
||||
});
|
||||
|
||||
it('admin store: htmlEmbed preserved in persisted content', async () => {
|
||||
const captured = await runStore('admin', buildYdoc(RICH_DOC));
|
||||
expect(captured.content).toBeDefined();
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
|
||||
expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2);
|
||||
});
|
||||
|
||||
it('owner store: htmlEmbed preserved', async () => {
|
||||
const captured = await runStore('owner', buildYdoc(RICH_DOC));
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
|
||||
const emptyDoc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph' }],
|
||||
});
|
||||
// Non-admin path with an empty/embed-free fragment must be a no-op strip,
|
||||
// not throw.
|
||||
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
@@ -105,135 +105,34 @@ describe('canAuthorHtmlEmbed', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// Replicates the write-path decision used by every non-admin persistence guard
|
||||
// (page create, collab store, single import, zip import, duplication,
|
||||
// transclusion unsync):
|
||||
// if !canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json) -> strip, else keep.
|
||||
const applyAdminGate = (json: any, role: string | null | undefined) => {
|
||||
if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) {
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
};
|
||||
|
||||
describe('admin-gate write-path decision (duplication / import / unsync)', () => {
|
||||
const docWithEmbed = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||
],
|
||||
};
|
||||
|
||||
it('strips the embed for a non-admin (member) author', () => {
|
||||
const result = applyAdminGate(docWithEmbed, 'member');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0].content[0].text).toBe('keep');
|
||||
});
|
||||
|
||||
it('strips the embed for unknown/empty roles', () => {
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false);
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'viewer'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('keeps the embed for an admin author', () => {
|
||||
const result = applyAdminGate(docWithEmbed, 'admin');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
expect(result).toBe(docWithEmbed);
|
||||
});
|
||||
|
||||
it('keeps the embed for an owner author', () => {
|
||||
const result = applyAdminGate(docWithEmbed, 'owner');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
});
|
||||
|
||||
it('strips nested embeds (subtree/column duplication) for a non-admin', () => {
|
||||
const nested = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'ok' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = applyAdminGate(nested, 'member');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
const col = findFirstChild(result, 'column');
|
||||
expect(col.content).toHaveLength(1);
|
||||
expect(col.content[0].type).toBe('paragraph');
|
||||
});
|
||||
|
||||
it('leaves a non-admin doc without embeds untouched (no needless rewrite)', () => {
|
||||
const clean = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
|
||||
};
|
||||
const result = applyAdminGate(clean, 'member');
|
||||
expect(result).toBe(clean);
|
||||
});
|
||||
});
|
||||
|
||||
// PageService.create() now applies exactly `applyAdminGate` (above) to the
|
||||
// parsed ProseMirror JSON BEFORE deriving content/textContent/ydoc and calling
|
||||
// insertPage. The create controller only requires space Edit, so without this a
|
||||
// regular member could POST a doc (json, or the markdown/html
|
||||
// <!--html-embed:BASE64--> forms that parse to the same node) containing an
|
||||
// htmlEmbed and store XSS for every reader. These cases pin the create-path
|
||||
// decision: non-admin -> stripped, admin/owner -> kept.
|
||||
describe('admin-gate write-path decision (page create)', () => {
|
||||
const docWithEmbed = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||
],
|
||||
};
|
||||
|
||||
it('strips the embed when a non-admin (member) creates the page', () => {
|
||||
const result = applyAdminGate(docWithEmbed, 'member');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0].content[0].text).toBe('body');
|
||||
});
|
||||
|
||||
it('strips the embed when the creator role is unknown/empty', () => {
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, null))).toBe(false);
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('keeps the embed when an admin or owner creates the page', () => {
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'admin'))).toBe(true);
|
||||
expect(hasHtmlEmbedNode(applyAdminGate(docWithEmbed, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('strips embeds smuggled via the markdown/html <!--html-embed--> form', () => {
|
||||
// The markdown/html create formats decode to the same htmlEmbed node, so
|
||||
// the gate (run on the parsed JSON) covers them identically.
|
||||
// NOTE: a previous revision of this file re-implemented the write-path admin
|
||||
// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
|
||||
// deleted/misplaced real guard would have kept those green. The stand-in is
|
||||
// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
|
||||
// now tested against their REAL code in:
|
||||
// - collaboration/extensions/persistence.extension.html-embed.spec.ts
|
||||
// - collaboration/collaboration.handler.html-embed.spec.ts
|
||||
// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
|
||||
// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
|
||||
// - integrations/import/services/import-html-embed-identity.spec.ts (import)
|
||||
//
|
||||
// The case below stays here because it asserts a REAL parse path
|
||||
// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
|
||||
// re-implemented gate.
|
||||
describe('htmlEmbed smuggled via the markdown/html <!--html-embed--> form (real parse + real helpers)', () => {
|
||||
it('the parsed node is detected and stripped by the real helpers', () => {
|
||||
// The markdown/html create formats decode to the same htmlEmbed node, so the
|
||||
// gate (run on the parsed JSON) covers them identically.
|
||||
const source = '<script>steal()</script>';
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
|
||||
const parsed = htmlToJson(html);
|
||||
expect(hasHtmlEmbedNode(parsed)).toBe(true);
|
||||
|
||||
const result = applyAdminGate(parsed, 'member');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
// A non-admin role gates to strip via the real helpers.
|
||||
expect(canAuthorHtmlEmbed('member')).toBe(false);
|
||||
const stripped = stripHtmlEmbedNodes(parsed);
|
||||
expect(hasHtmlEmbedNode(stripped)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
hasHtmlEmbedNode,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// PageService.create() and duplicatePage() guards.
|
||||
//
|
||||
// page.service.ts cannot be unit-LOADED under the server's jest config (a
|
||||
// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns),
|
||||
// so we cover the two load-bearing properties at the strongest feasible layer:
|
||||
//
|
||||
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate
|
||||
// each path applies: non-admin/unknown role -> strip, admin/owner -> keep.
|
||||
//
|
||||
// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole`
|
||||
// param threaded from the request; duplicate: `authUser.role`), so a
|
||||
// refactor that drops the guard or reads the wrong role trips the test.
|
||||
// This is what replaces the removed `applyAdminGate` stand-in for these
|
||||
// two entrypoints.
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
// The real predicate both paths apply (see SECURITY blocks in page.service.ts).
|
||||
function applyGate(json: any, role: string | null | undefined) {
|
||||
if (!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)) {
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
describe('page create/duplicate gate decision (real helpers)', () => {
|
||||
it('non-admin (member) strips', () => {
|
||||
const result = applyGate(docWithEmbed(), 'member');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0].content[0].text).toBe('body');
|
||||
});
|
||||
|
||||
it('unknown/empty role fails closed (strips)', () => {
|
||||
for (const role of [null, undefined, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), role))).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('admin/owner keep', () => {
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), 'admin'))).toBe(true);
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), 'owner'))).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8');
|
||||
|
||||
describe('page create/duplicate gate identity is pinned (source contract)', () => {
|
||||
it('create() gates on the caller role param before deriving content/ydoc', () => {
|
||||
// create() receives the caller's workspace role as `callerRole` and gates on
|
||||
// it; the embed must be stripped BEFORE insertPage.
|
||||
expect(SRC).toMatch(
|
||||
/!canAuthorHtmlEmbed\(\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
|
||||
);
|
||||
expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)');
|
||||
});
|
||||
|
||||
it('duplicatePage() gates on the duplicating user role (authUser.role)', () => {
|
||||
expect(SRC).toMatch(
|
||||
/!canAuthorHtmlEmbed\(\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,114 @@
|
||||
import { TransclusionService } from '../transclusion.service';
|
||||
import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
|
||||
// unsync returns a source snapshot the client materializes into the reference
|
||||
// page; a non-admin must never receive an embed payload to re-persist. The gate
|
||||
// reads `user.role` and strips before returning. All repos / access checks are
|
||||
// mocked so the REAL gate logic runs end-to-end. Complements the existing
|
||||
// transclusion specs (rewriteAttachmentsForUnsync, controller).
|
||||
|
||||
const WS = 'ws-1';
|
||||
const REF_PAGE = 'ref-1';
|
||||
const SRC_PAGE = 'src-1';
|
||||
const TX_ID = 'tx-1';
|
||||
|
||||
const sourceContentWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>steal()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
function buildService() {
|
||||
const pageRepo = {
|
||||
findById: jest.fn(async (id: string) => ({
|
||||
id,
|
||||
workspaceId: WS,
|
||||
spaceId: 'space-1',
|
||||
deletedAt: null,
|
||||
})),
|
||||
};
|
||||
const pageTransclusionsRepo = {
|
||||
findByPageAndTransclusion: jest.fn(async () => ({
|
||||
content: sourceContentWithEmbed(),
|
||||
})),
|
||||
};
|
||||
const pageTransclusionReferencesRepo = {
|
||||
deleteOne: jest.fn(async () => undefined),
|
||||
};
|
||||
const attachmentRepo = { findByIds: jest.fn(async () => []) };
|
||||
const storageService = { copy: jest.fn(async () => undefined) };
|
||||
const pageAccessService = {
|
||||
validateCanEdit: jest.fn(async () => undefined),
|
||||
validateCanView: jest.fn(async () => undefined),
|
||||
};
|
||||
|
||||
const service = new TransclusionService(
|
||||
{} as any, // db (unused on this path)
|
||||
pageTransclusionsRepo as any,
|
||||
pageTransclusionReferencesRepo as any,
|
||||
pageRepo as any,
|
||||
{} as any, // pagePermissionRepo (unused)
|
||||
{} as any, // spaceMemberRepo (unused)
|
||||
attachmentRepo as any,
|
||||
storageService as any,
|
||||
pageAccessService as any,
|
||||
);
|
||||
return service;
|
||||
}
|
||||
|
||||
function userWithRole(role: string | null | undefined) {
|
||||
return { id: 'u1', workspaceId: WS, role } as any;
|
||||
}
|
||||
|
||||
describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin (member): returned content has htmlEmbed stripped', async () => {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('member'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
// Non-embed content is preserved.
|
||||
expect(JSON.stringify(content)).toContain('snapshot body');
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole(role),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('admin: returned content keeps the htmlEmbed', async () => {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('admin'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(true);
|
||||
});
|
||||
|
||||
it('owner: returned content keeps the htmlEmbed', async () => {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('owner'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,104 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
hasHtmlEmbedNode,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// FAIL-CLOSED IDENTITY for the import write paths.
|
||||
//
|
||||
// import.service / file-import-task.service cannot be unit-LOADED under the
|
||||
// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in
|
||||
// transformIgnorePatterns). So we cover the two load-bearing properties at the
|
||||
// strongest feasible layer:
|
||||
//
|
||||
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate
|
||||
// predicate each entrypoint runs against the role resolved from
|
||||
// userRepo.findById(...): a MISSING user (findById -> undefined) must fail
|
||||
// closed (strip), and only 'admin'/'owner' keep the embed.
|
||||
//
|
||||
// (2) IDENTITY — source-pin which identity governs the gate so a refactor that
|
||||
// swaps the lookup to the wrong user (e.g. the queue worker's caller) is
|
||||
// caught: zip import resolves the role from `fileTask.creatorId`; single
|
||||
// import from the request `userId`. NOT some ambient caller.
|
||||
//
|
||||
// If a guard is deleted/misplaced or the identity field changes, these break.
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
// The real predicate both import entrypoints apply (see the SECURITY blocks in
|
||||
// import.service.ts and file-import-task.service.ts): resolve the importer via
|
||||
// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`.
|
||||
function applyImportGate(json: any, importingUser: { role?: string } | undefined) {
|
||||
if (!canAuthorHtmlEmbed(importingUser?.role) && hasHtmlEmbedNode(json)) {
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
describe('import gate fail-closed by resolved-user role (real helpers)', () => {
|
||||
it('missing user (userRepo.findById -> undefined) strips the embed', () => {
|
||||
// findById returns undefined when the user/workspace pair does not resolve;
|
||||
// undefined?.role is undefined -> canAuthorHtmlEmbed(undefined) === false.
|
||||
const importingUser = undefined;
|
||||
const result = applyImportGate(docWithEmbed(), importingUser);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it("resolved role 'member' strips", () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'member' })),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("resolved role 'admin' keeps the embed", () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'admin' })),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("resolved role 'owner' keeps the embed", () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), { role: 'owner' })),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// Source-pin the identity each entrypoint feeds to userRepo.findById. These are
|
||||
// the lines that decide WHOSE role governs the gate; pinning them means a
|
||||
// refactor that points the lookup at the wrong user trips the test.
|
||||
const SRC_DIR = join(__dirname);
|
||||
|
||||
describe('import gate identity is pinned to the importer (source contract)', () => {
|
||||
it('single import resolves the role from the request userId', () => {
|
||||
const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8');
|
||||
// The role lookup must key on the request `userId`, then gate on the role.
|
||||
expect(src).toMatch(
|
||||
/this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/,
|
||||
);
|
||||
expect(src).toMatch(/canAuthorHtmlEmbed\(\s*importingUser\?\.role\s*\)/);
|
||||
// And the gate uses the real strip helper.
|
||||
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
|
||||
});
|
||||
|
||||
it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => {
|
||||
const src = readFileSync(
|
||||
join(SRC_DIR, 'file-import-task.service.ts'),
|
||||
'utf-8',
|
||||
);
|
||||
expect(src).toMatch(
|
||||
/this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/,
|
||||
);
|
||||
expect(src).toMatch(
|
||||
/importerCanAuthorHtmlEmbed\s*=\s*canAuthorHtmlEmbed\(\s*importingUser\?\.role\s*\)/,
|
||||
);
|
||||
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
|
||||
});
|
||||
});
|
||||
@@ -4,7 +4,9 @@
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"build": "tsc --build",
|
||||
"dev": "tsc --watch"
|
||||
"dev": "tsc --watch",
|
||||
"test": "vitest run",
|
||||
"test:watch": "vitest"
|
||||
},
|
||||
"main": "dist/index.js",
|
||||
"module": "./src/index.ts",
|
||||
|
||||
112
packages/editor-ext/src/lib/markdown/markdown-html-embed.spec.ts
Normal file
112
packages/editor-ext/src/lib/markdown/markdown-html-embed.spec.ts
Normal file
@@ -0,0 +1,112 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { markdownToHtml, htmlToMarkdown } from "./index";
|
||||
import {
|
||||
encodeHtmlEmbedSource,
|
||||
decodeHtmlEmbedSource,
|
||||
} from "../html-embed/html-embed";
|
||||
|
||||
// SECURITY (Variant C admin gate, import attack surface).
|
||||
//
|
||||
// The markdown import path is the only write path where an htmlEmbed reaches
|
||||
// the server purely from file bytes (no editor / collab socket). The marked
|
||||
// tokenizer in `html-embed.marked.ts` and the turndown rule in
|
||||
// `turndown.utils.ts` are what materialize the `<!--html-embed:BASE64-->`
|
||||
// marker into the `<div data-type="htmlEmbed" data-source="BASE64">` element
|
||||
// that the server then parses into an htmlEmbed node and the admin gate strips.
|
||||
//
|
||||
// If either the tokenizer regex or the turndown rule shape drifts, the marker
|
||||
// would either (a) stop becoming an htmlEmbed node (silently dropping admin
|
||||
// content) or (b) become some OTHER tag the server's `hasHtmlEmbedNode` no
|
||||
// longer recognizes (a strip bypass). These tests pin the marker <-> embed-div
|
||||
// contract that the server-side strip relies on. editor-ext had ZERO tests
|
||||
// before this file; this adds the runner + the round-trip coverage.
|
||||
|
||||
// The server parses the embed div by matching `data-type="htmlEmbed"` and
|
||||
// decoding `data-source`; mirror that here so the assertion is exactly what the
|
||||
// real `htmlToJson` -> htmlEmbed node parse depends on (the node's parseHTML in
|
||||
// html-embed.ts uses the same selector + decodeHtmlEmbedSource).
|
||||
const EMBED_DIV_RE = /<div[^>]*\bdata-type="htmlEmbed"[^>]*>/;
|
||||
function extractEmbedSource(html: string): string | undefined {
|
||||
const div = EMBED_DIV_RE.exec(html);
|
||||
if (!div) return undefined;
|
||||
const enc = /data-source="([^"]*)"/.exec(div[0]);
|
||||
if (!enc) return undefined;
|
||||
return decodeHtmlEmbedSource(enc[1]);
|
||||
}
|
||||
|
||||
// Replicates the server's `hasHtmlEmbedNode` decision against the embed *div*
|
||||
// (the HTML form the server immediately converts to JSON). If this matches, the
|
||||
// server's JSON-level `hasHtmlEmbedNode` will too, because htmlToJson maps this
|
||||
// exact div to an htmlEmbed node.
|
||||
function htmlHasHtmlEmbed(html: string): boolean {
|
||||
return EMBED_DIV_RE.test(html);
|
||||
}
|
||||
|
||||
describe("markdown <!--html-embed--> import round-trip", () => {
|
||||
const source = "<script>x</script>";
|
||||
|
||||
it("markdownToHtml turns the marker into an htmlEmbed div carrying the source", async () => {
|
||||
const md = "<!--html-embed:" + encodeHtmlEmbedSource(source) + "-->";
|
||||
const html = await markdownToHtml(md);
|
||||
|
||||
// The marker became the embed div the server recognizes as an htmlEmbed
|
||||
// node (so the server's hasHtmlEmbedNode would match it after htmlToJson).
|
||||
expect(htmlHasHtmlEmbed(html)).toBe(true);
|
||||
// The decoded source is the original script, intact.
|
||||
expect(extractEmbedSource(html)).toBe(source);
|
||||
// The raw script is NOT inlined into the HTML — it stays base64 in the
|
||||
// attribute (the marker itself must not be a direct injection vector).
|
||||
expect(html).not.toContain("<script>x</script>");
|
||||
});
|
||||
|
||||
it("preserves UTF-8 / special chars in the embedded source", async () => {
|
||||
const utf8 = '<script>console.log("héllo → 世界")</script>';
|
||||
const md = "<!--html-embed:" + encodeHtmlEmbedSource(utf8) + "-->";
|
||||
const html = await markdownToHtml(md);
|
||||
expect(htmlHasHtmlEmbed(html)).toBe(true);
|
||||
expect(extractEmbedSource(html)).toBe(utf8);
|
||||
});
|
||||
|
||||
it("an empty marker still produces an htmlEmbed div (empty source)", async () => {
|
||||
const html = await markdownToHtml("<!--html-embed:-->");
|
||||
expect(htmlHasHtmlEmbed(html)).toBe(true);
|
||||
expect(extractEmbedSource(html)).toBe("");
|
||||
});
|
||||
|
||||
it("round-trips htmlToMarkdown -> markdownToHtml preserving the embed marker", async () => {
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
// NOTE: turndown drops a *blank* (childless) element before any custom rule
|
||||
// runs, and the htmlEmbed div is normally childless. The export pipeline
|
||||
// therefore must give the rule a non-blank div to fire on; we add an inert
|
||||
// text child here to exercise the real turndown htmlEmbed rule. (A blank
|
||||
// embed div serializing to "" is asserted separately below as a documented
|
||||
// edge so this contract drift is visible.)
|
||||
const startHtml = `<div data-type="htmlEmbed" data-source="${encoded}">x</div>`;
|
||||
|
||||
// Export to markdown: the turndown rule emits the <!--html-embed:..-->
|
||||
// marker (lossless, inert in plain markdown viewers).
|
||||
const md = htmlToMarkdown(startHtml);
|
||||
expect(md).toContain("<!--html-embed:" + encoded + "-->");
|
||||
|
||||
// Re-import: the marker round-trips back into an embed div with the same
|
||||
// decoded source — this is the marker <-> embed-div contract the server's
|
||||
// import strip depends on.
|
||||
const html = await markdownToHtml(md);
|
||||
expect(htmlHasHtmlEmbed(html)).toBe(true);
|
||||
expect(extractEmbedSource(html)).toBe(source);
|
||||
});
|
||||
|
||||
it("documents that a BLANK embed div serializes to empty markdown (turndown drops childless blocks)", () => {
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
const blank = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
|
||||
// This pins current behavior so a future change to the turndown rule (e.g.
|
||||
// making it fire on blank nodes) is caught rather than silently shipping.
|
||||
expect(htmlToMarkdown(blank)).toBe("");
|
||||
});
|
||||
|
||||
it("the base64 codec itself round-trips (no '<' leaks into the attribute)", () => {
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
expect(encoded).not.toContain("<");
|
||||
expect(decodeHtmlEmbedSource(encoded)).toBe(source);
|
||||
});
|
||||
});
|
||||
@@ -11,6 +11,7 @@
|
||||
"jsx": "react-jsx",
|
||||
"sourceMap": true,
|
||||
"outDir": "./dist",
|
||||
"rootDir": "./src",
|
||||
"baseUrl": "./",
|
||||
"incremental": true,
|
||||
"skipLibCheck": true,
|
||||
@@ -19,5 +20,7 @@
|
||||
"strictBindCallApply": false,
|
||||
"forceConsistentCasingInFileNames": false,
|
||||
"noFallthroughCasesInSwitch": false
|
||||
}
|
||||
},
|
||||
"include": ["src/**/*"],
|
||||
"exclude": ["node_modules", "dist", "src/**/*.spec.ts", "src/**/*.test.ts"]
|
||||
}
|
||||
|
||||
13
packages/editor-ext/vitest.config.ts
Normal file
13
packages/editor-ext/vitest.config.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { defineConfig } from "vitest/config";
|
||||
|
||||
// Minimal vitest setup for @docmost/editor-ext (mirrors apps/client's config,
|
||||
// trimmed to what the markdown/html-embed round-trip tests need). The markdown
|
||||
// utils run in plain Node (marked + turndown), so no jsdom/react plugin is
|
||||
// required here.
|
||||
export default defineConfig({
|
||||
test: {
|
||||
environment: "node",
|
||||
globals: true,
|
||||
include: ["src/**/*.{test,spec}.ts"],
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user