@@ -3,6 +3,7 @@ import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { PersistenceExtension } from './persistence.extension';
|
||||
import { tiptapExtensions } from '../collaboration.util';
|
||||
import {
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
HTML_EMBED_NODE_NAME,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
@@ -121,7 +122,7 @@ function nodeTypeCounts(json: any): Record<string, number> {
|
||||
* onStoreDocument to reach the strip + persist branch, and capture the content
|
||||
* that would be written to the page row.
|
||||
*/
|
||||
function buildExtension(featureEnabled = true) {
|
||||
function buildExtension(featureEnabled = true, priorContent?: any) {
|
||||
const captured: { content?: any } = {};
|
||||
|
||||
const existingPage = {
|
||||
@@ -131,7 +132,10 @@ function buildExtension(featureEnabled = true) {
|
||||
workspaceId: 'ws-1',
|
||||
creatorId: 'creator-1',
|
||||
contributorIds: [],
|
||||
content: { type: 'doc', content: [] }, // differs from new content -> persist runs
|
||||
// The currently-persisted content. Defaults to an empty doc (differs from
|
||||
// new content -> persist runs); a test may pass a prior admin embed here to
|
||||
// exercise the preserve-admin-embed branch.
|
||||
content: priorContent ?? { type: 'doc', content: [] },
|
||||
createdAt: new Date(),
|
||||
lastUpdatedSource: 'user',
|
||||
};
|
||||
@@ -186,8 +190,9 @@ async function runStore(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext, captured } = buildExtension(featureEnabled);
|
||||
const { ext, captured } = buildExtension(featureEnabled, priorContent);
|
||||
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
|
||||
// none, so stub it (the post-persist broadcast is not under test here).
|
||||
(doc as any).broadcastStateless = () => undefined;
|
||||
@@ -268,6 +273,62 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)'
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin store: PRESERVES an admin embed already in the persisted content through an unrelated edit', async () => {
|
||||
// Prior persisted content already holds an admin-authored embed.
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// A non-admin makes an UNRELATED edit (tweaks the paragraph) but the embed
|
||||
// is still present in the merged doc.
|
||||
const edited = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
|
||||
const captured = await runStore('member', buildYdoc(edited), true, prior);
|
||||
expect(captured.content).toBeDefined();
|
||||
// The admin's pre-existing embed survives the non-admin store.
|
||||
expect(collectHtmlEmbedSources(captured.content)).toEqual(
|
||||
new Set([ADMIN_SOURCE]),
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin store: strips a NEWLY-added embed while keeping the prior admin one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Non-admin keeps the admin embed, makes an unrelated paragraph edit (so the
|
||||
// store is not a no-op and is persisted), and ALSO adds a brand-new embed.
|
||||
const edited = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
};
|
||||
|
||||
const captured = await runStore('member', buildYdoc(edited), true, prior);
|
||||
expect(captured.content).toBeDefined();
|
||||
// Only the admin-vetted source remains; the newly-introduced one is stripped.
|
||||
expect(collectHtmlEmbedSources(captured.content)).toEqual(
|
||||
new Set([ADMIN_SOURCE]),
|
||||
);
|
||||
});
|
||||
|
||||
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
|
||||
const emptyDoc = buildYdoc({
|
||||
type: 'doc',
|
||||
@@ -278,3 +339,118 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)'
|
||||
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges
|
||||
// the shared ydoc sub-second, before the 10s store debounce. We call it directly
|
||||
// (it is the debounced timer body) and assert the ydoc fragment no longer yields
|
||||
// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds
|
||||
// already in the persisted content survive.
|
||||
describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => {
|
||||
async function runGuard(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext } = buildExtension(featureEnabled, priorContent);
|
||||
await (ext as any).guardHtmlEmbed(
|
||||
'page-1',
|
||||
doc,
|
||||
{ user: { id: 'u1', role, workspaceId: 'ws-1' } },
|
||||
);
|
||||
}
|
||||
|
||||
it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => {
|
||||
// Prior persisted content has NO embed; the live doc has one a non-admin
|
||||
// just added.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
|
||||
// The shared ydoc fragment no longer yields any htmlEmbed.
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Live doc keeps the admin embed AND adds a brand-new one.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
await runGuard('member', doc, true, prior);
|
||||
|
||||
// Only the admin-vetted source survives in the shared ydoc.
|
||||
expect(
|
||||
collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')),
|
||||
).toEqual(new Set([ADMIN_SOURCE]));
|
||||
});
|
||||
|
||||
it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => {
|
||||
// Even an embed that matches the prior content is stripped when the toggle
|
||||
// is OFF, because the OFF path passes allowed=null (strip everything) and
|
||||
// never reads the prior content for an allow-list.
|
||||
const SOURCE = '<script>any()</script>';
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } },
|
||||
],
|
||||
});
|
||||
await runGuard('member', doc, false, {
|
||||
type: 'doc',
|
||||
content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('admin role: guard is a defensive no-op (embed preserved)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>ok()</script>' } },
|
||||
],
|
||||
});
|
||||
await runGuard('admin', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }],
|
||||
});
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,9 +40,12 @@ import {
|
||||
} from '../constants';
|
||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripDisallowedHtmlEmbedNodes,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
@@ -56,6 +59,21 @@ export class PersistenceExtension implements Extension {
|
||||
// coalescing window" per document and OR it across all edits in the window,
|
||||
// so the snapshot is marked 'agent' regardless of who wrote last.
|
||||
private agentTouched: Map<string, boolean> = new Map();
|
||||
// Per-document debounce timers for the early htmlEmbed guard (Gitea #26).
|
||||
// onChange schedules a short (~300ms) debounced strip that converges the
|
||||
// shared ydoc for all connected clients well before the 10s store debounce,
|
||||
// shrinking the pre-persist broadcast window of a non-admin's transient embed.
|
||||
private htmlEmbedGuardTimers: Map<string, NodeJS.Timeout> = new Map();
|
||||
// Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated
|
||||
// in onLoadDocument (which already loads the page + has workspace context) and
|
||||
// read in onChange to gate early-guard scheduling: when the toggle is OFF (the
|
||||
// common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and
|
||||
// rely on the onStoreDocument strip as the backstop (when OFF the embed does
|
||||
// not execute in editable mode anyway). Cleared in afterUnloadDocument.
|
||||
// STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF
|
||||
// until the document is reloaded, so the early guard won't schedule — accepted,
|
||||
// the onStoreDocument backstop still strips on persist.
|
||||
private htmlEmbedToggleByDoc: Map<string, boolean> = new Map();
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -87,6 +105,23 @@ export class PersistenceExtension implements Extension {
|
||||
return;
|
||||
}
|
||||
|
||||
// Cache the workspace htmlEmbed toggle for this document (Gitea #26). We
|
||||
// already have the page (hence its workspaceId) here, so resolve the toggle
|
||||
// once and cache it keyed by documentName. onChange reads this to decide
|
||||
// whether to schedule the early guard at all — when OFF we skip the guard
|
||||
// entirely (no timer, no fromYdoc, no DB read). Cleared in
|
||||
// afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note.
|
||||
try {
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(page.workspaceId))?.settings,
|
||||
);
|
||||
this.htmlEmbedToggleByDoc.set(documentName, enabled);
|
||||
} catch (err) {
|
||||
// Fail OFF: if the toggle can't be resolved, never schedule the early
|
||||
// guard; the onStoreDocument backstop still strips on persist.
|
||||
this.htmlEmbedToggleByDoc.set(documentName, false);
|
||||
}
|
||||
|
||||
if (page.ydoc) {
|
||||
this.logger.debug(`ydoc loaded from db: ${pageId}`);
|
||||
|
||||
@@ -130,25 +165,51 @@ export class PersistenceExtension implements Extension {
|
||||
// every htmlEmbed node before it is written to the page row AND before the
|
||||
// ydoc state is re-encoded, so the node cannot be reintroduced by a
|
||||
// non-admin via the collab socket.
|
||||
// NOTE (residual risk): the gate is keyed to the storing connection's user.
|
||||
// If an admin already authored an htmlEmbed and a non-admin's later store
|
||||
// does not touch it, this strip would remove the admin's embed on that
|
||||
// non-admin store. This is intentionally conservative (fail closed): the
|
||||
// admin re-adds/keeps the node on their own next edit. A future refinement
|
||||
// could diff against the previously persisted admin-authored embeds.
|
||||
// NOTE (defense-in-depth refinement, Gitea #29): the gate is keyed to the
|
||||
// storing connection's user, but it no longer blindly strips EVERY embed on
|
||||
// a non-admin store. We distinguish two cases inside the !allowed branch:
|
||||
// - Feature toggle OFF => strip ALL embeds (the feature is disabled for
|
||||
// everyone; existing embeds get cleaned up on the next save).
|
||||
// - Toggle ON but the storer is a NON-admin => strip only NEWLY-introduced
|
||||
// embeds and PRESERVE embeds already present in the currently-persisted
|
||||
// page content (admin-authored, already vetted). So a non-admin still
|
||||
// cannot ADD an embed, but an unrelated edit (e.g. a paragraph tweak) no
|
||||
// longer destroys an admin's existing embed (the prior data-loss bug).
|
||||
// The pre-existing-embed identity is the raw `attrs.source` (see
|
||||
// collectHtmlEmbedSources). A non-admin who copies an existing admin embed's
|
||||
// exact source elsewhere passes — acceptable, that HTML is already vetted.
|
||||
//
|
||||
// ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in
|
||||
// the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs
|
||||
// update to connected clients immediately, so a non-admin's transient
|
||||
// htmlEmbed can execute in OTHER open editors' browsers in the brief window
|
||||
// before this persist strips it. The exposure is limited to concurrent
|
||||
// AUTHENTICATED space members who have the doc open with Edit rights
|
||||
// (semi-trusted) — anonymous public-share/readonly viewers do NOT open a
|
||||
// collab socket (ReadonlyPageEditor renders fetched, already-stripped
|
||||
// content; HocuspocusProvider is only used by the authenticated editable
|
||||
// page-editor), and the PERSISTED page row plus every share/readonly read
|
||||
// path are protected by this strip. The window is therefore accepted rather
|
||||
// than mitigated with an inbound beforeBroadcast strip.
|
||||
// ACCEPTED RESIDUAL RISK (toggle-ON allow-list TOCTOU): the allow-list is a
|
||||
// best-effort snapshot read OUTSIDE the locked transaction (the prior content
|
||||
// is pre-read above, but inside executeTx the row is re-read withLock without
|
||||
// recomputing the allow-list). A concurrent admin store that changes the
|
||||
// persisted embeds between the pre-read and this write can make the preserve
|
||||
// decision use a slightly stale snapshot — worst case one embed transiently
|
||||
// kept or dropped; it converges on the next store, with no auth bypass or
|
||||
// broader data loss. The race is accepted because it only affects concurrent
|
||||
// authenticated editors on the (rare) toggle-ON non-admin path, converges on
|
||||
// the next store, and the persisted row plus every share/readonly read path
|
||||
// remain protected by the strip.
|
||||
//
|
||||
// RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26):
|
||||
// this strip runs in the debounced onStoreDocument (up to 10s), but
|
||||
// hocuspocus broadcasts each inbound Yjs update to connected clients
|
||||
// immediately, so a non-admin's transient htmlEmbed can execute in OTHER open
|
||||
// editors' browsers in the window before this persist strips it. The exposure
|
||||
// is limited to concurrent AUTHENTICATED space members who have the doc open
|
||||
// with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do
|
||||
// NOT open a collab socket (ReadonlyPageEditor renders fetched,
|
||||
// already-stripped content; HocuspocusProvider is only used by the
|
||||
// authenticated editable page-editor), and the PERSISTED page row plus every
|
||||
// share/readonly read path are protected by this strip.
|
||||
// The window is now SHRUNK to sub-second by an onChange-debounced early guard
|
||||
// (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as
|
||||
// this block and re-encodes the cleaned ydoc, converging the doc for all
|
||||
// clients long before this 10s store debounce fires. This onStoreDocument
|
||||
// strip remains the authoritative backstop for persistence. The irreducible
|
||||
// residual is only the VERY FIRST inbound broadcast before the ~300ms debounce
|
||||
// fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the
|
||||
// node before that first relay, so it cannot be eliminated entirely.
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature
|
||||
// toggle is ON and the storing user is an admin/owner. OFF (default) =>
|
||||
// stripped for everyone (existing embeds get cleaned up on next save).
|
||||
@@ -157,22 +218,44 @@ export class PersistenceExtension implements Extension {
|
||||
);
|
||||
if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) {
|
||||
if (hasHtmlEmbedNode(tiptapJson)) {
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
tiptapJson = stripHtmlEmbedNodes(tiptapJson);
|
||||
// Reflect the stripped content back into the shared ydoc so the node is
|
||||
// removed for all connected clients, not just the persisted row.
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
let strippedJson: typeof tiptapJson;
|
||||
if (htmlEmbedEnabled === false) {
|
||||
// Toggle OFF: feature disabled for everyone -> strip ALL embeds.
|
||||
strippedJson = stripHtmlEmbedNodes(tiptapJson);
|
||||
} else {
|
||||
// Toggle ON, non-admin storer: preserve embeds already present in the
|
||||
// currently-persisted (admin-vetted) page content; strip only the
|
||||
// newly-introduced ones. Pre-read the prior content — a small extra
|
||||
// query only on this rare non-admin + toggle-ON path.
|
||||
const prior = await this.pageRepo.findById(pageId, {
|
||||
includeContent: true,
|
||||
});
|
||||
const allowed = collectHtmlEmbedSources(prior?.content);
|
||||
strippedJson = stripDisallowedHtmlEmbedNodes(tiptapJson, allowed);
|
||||
}
|
||||
|
||||
// Only mutate the ydoc + log when the strip actually removed something;
|
||||
// an unnecessary ydoc rewrite would churn the doc for all clients. With
|
||||
// the toggle-ON branch a non-admin store that only touches admin-vetted
|
||||
// embeds leaves the content unchanged here.
|
||||
if (!isDeepStrictEqual(strippedJson, tiptapJson)) {
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
tiptapJson = strippedJson;
|
||||
// Reflect the stripped content back into the shared ydoc so the node
|
||||
// is removed for all connected clients, not just the persisted row.
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
tiptapJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
tiptapJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -346,12 +429,168 @@ export class PersistenceExtension implements Extension {
|
||||
if (data.context?.actor === 'agent') {
|
||||
this.agentTouched.set(documentName, true);
|
||||
}
|
||||
|
||||
// Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced
|
||||
// guard ONLY when (a) this document's workspace toggle is cached ON and
|
||||
// (b) the changing connection's user is a NON-admin (cannot author
|
||||
// htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer,
|
||||
// no fromYdoc, no DB read — killing the OFF-case overhead (the common
|
||||
// default); the onStoreDocument strip is the backstop and an OFF embed does
|
||||
// not execute in editable mode anyway. We do NO expensive work here — we only
|
||||
// (re)schedule the timer; the debounce coalesces rapid edits into a single
|
||||
// guard check.
|
||||
if (
|
||||
userId &&
|
||||
this.htmlEmbedToggleByDoc.get(documentName) === true &&
|
||||
!canAuthorHtmlEmbed(data.context?.user?.role)
|
||||
) {
|
||||
const existing = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (existing) {
|
||||
clearTimeout(existing);
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
void this.guardHtmlEmbed(documentName, data.document, data.context);
|
||||
}, 300);
|
||||
this.htmlEmbedGuardTimers.set(documentName, timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the
|
||||
* onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of
|
||||
* waiting for the 10s store debounce, so a non-admin's transient embed is
|
||||
* removed from the shared ydoc — and re-broadcast as cleaned state — for all
|
||||
* connected clients in sub-second time. onStoreDocument remains the
|
||||
* authoritative persistence backstop; this is an ADDITIONAL early pass.
|
||||
*
|
||||
* CONCURRENCY (the critical invariant): the Y.Doc mutation is a single
|
||||
* SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the
|
||||
* applyUpdate write. ALL async work (the workspace toggle lookup and the
|
||||
* persisted-content read for the allow-list) happens FIRST, before that block.
|
||||
* Because JS is single-threaded, a synchronous block cannot interleave with
|
||||
* inbound Yjs update handlers, so a concurrent edit that lands while we await
|
||||
* cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then
|
||||
* delete + rebuild + applyUpdate without yielding. (An earlier version awaited
|
||||
* DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap
|
||||
* was lost — this restructure fixes that.)
|
||||
*
|
||||
* The allow-list is a best-effort snapshot read outside any lock (TOCTOU
|
||||
* accepted, same as onStoreDocument): worst case one embed is transiently kept
|
||||
* or dropped; it converges on the next guard/store, with no auth bypass.
|
||||
*
|
||||
* Loop-safety: the corrective applyUpdate has a null origin, so the re-fired
|
||||
* onChange carries no userId and is not rescheduled; and after a strip no
|
||||
* htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the
|
||||
* hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer
|
||||
* would crash the process — so the whole body is wrapped in try/catch.
|
||||
*/
|
||||
private async guardHtmlEmbed(
|
||||
documentName: string,
|
||||
document: Y.Doc,
|
||||
context: any,
|
||||
): Promise<void> {
|
||||
// Defensive: ensure no stale timer entry survives for this document.
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
try {
|
||||
// Re-check defensively: onChange only schedules for non-admins, but if an
|
||||
// admin/owner somehow reaches here, the embed is authored content — do
|
||||
// nothing (onStoreDocument's toggle-AND-admin gate handles persistence).
|
||||
if (canAuthorHtmlEmbed(context?.user?.role)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ----
|
||||
// Resolve the workspace toggle exactly as onStoreDocument does. When OFF we
|
||||
// strip everything; when ON we use the preserve logic (keep admin-vetted
|
||||
// embeds, strip only the non-admin's newly-introduced ones).
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(context?.user?.workspaceId))
|
||||
?.settings,
|
||||
);
|
||||
|
||||
// The allow-list (admin-vetted sources already in the persisted content).
|
||||
// null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block,
|
||||
// so no await sits between the doc snapshot and the doc write.
|
||||
let allowed: Set<string> | null = null;
|
||||
if (enabled !== false) {
|
||||
const prior = await this.pageRepo.findById(getPageId(documentName), {
|
||||
includeContent: true,
|
||||
});
|
||||
allowed = collectHtmlEmbedSources(prior?.content);
|
||||
}
|
||||
|
||||
// The awaits above may have let the document be unloaded/destroyed. If so,
|
||||
// bail — mutating a destroyed doc is pointless and could throw (the
|
||||
// try/catch is the ultimate safety net regardless).
|
||||
if ((document as { isDestroyed?: boolean }).isDestroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ----
|
||||
// Because there is no await between fromYdoc and applyUpdate, no inbound
|
||||
// Yjs update can interleave, so a concurrent edit cannot be lost.
|
||||
const json = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
// Cheap exit: nothing to guard if the doc has no embed at all. This is also
|
||||
// why a post-strip re-fire is a no-op (loop-safe).
|
||||
if (!hasHtmlEmbedNode(json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const strippedJson =
|
||||
allowed === null
|
||||
? stripHtmlEmbedNodes(json)
|
||||
: stripDisallowedHtmlEmbedNodes(json, allowed);
|
||||
|
||||
// Nothing was stripped (e.g. the only embed is an admin-vetted one) — do
|
||||
// not churn the shared ydoc for all clients.
|
||||
if (isDeepStrictEqual(strippedJson, json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Reflect the stripped content back into the shared ydoc EXACTLY as
|
||||
// onStoreDocument does, so the node is removed for all connected clients,
|
||||
// not just on the eventual persist. This re-encode broadcasts the cleaned
|
||||
// state; after it hasHtmlEmbedNode is false, so any later guard fire is a
|
||||
// cheap no-op (loop-safe).
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
strippedJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
} catch (err) {
|
||||
// NEVER rethrow out of a timer-scheduled call.
|
||||
this.logger.error(
|
||||
`Early htmlEmbed guard failed on ${documentName}`,
|
||||
err,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async afterUnloadDocument(data: afterUnloadDocumentPayload) {
|
||||
const documentName = data.documentName;
|
||||
this.contributors.delete(documentName);
|
||||
this.agentTouched.delete(documentName);
|
||||
// Drop the cached toggle for this document so a reload re-resolves it (and
|
||||
// picks up a mid-session admin toggle flip).
|
||||
this.htmlEmbedToggleByDoc.delete(documentName);
|
||||
// Clear any pending early-guard timer so it cannot fire after the document
|
||||
// is unloaded (leak / use-after-unload prevention).
|
||||
const timer = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
}
|
||||
}
|
||||
|
||||
private consumeContributors(documentName: string): string[] {
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripDisallowedHtmlEmbedNodes,
|
||||
stripHtmlEmbedNodes,
|
||||
} from './html-embed.util';
|
||||
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
|
||||
@@ -93,6 +95,17 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
expect(result).toEqual(doc);
|
||||
});
|
||||
|
||||
it('neutralizes a root node that is itself an htmlEmbed', () => {
|
||||
// Defensive: the PM root is always a `doc`, so this is unreachable in normal
|
||||
// use, but the helper must still never return a bare htmlEmbed.
|
||||
const root = {
|
||||
type: 'htmlEmbed',
|
||||
attrs: { source: '<script>alert(1)</script>' },
|
||||
};
|
||||
const result = stripHtmlEmbedNodes(root);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
|
||||
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
|
||||
const doc = {
|
||||
@@ -158,6 +171,169 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('collectHtmlEmbedSources', () => {
|
||||
it('collects the source of every htmlEmbed node, including nested ones', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<b>top</b>' } },
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<i>nested</i>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const sources = collectHtmlEmbedSources(doc);
|
||||
expect(sources).toEqual(new Set(['<b>top</b>', '<i>nested</i>']));
|
||||
});
|
||||
|
||||
it('returns an empty set for a doc with no embeds', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
|
||||
};
|
||||
expect(collectHtmlEmbedSources(doc).size).toBe(0);
|
||||
});
|
||||
|
||||
it('gracefully skips embeds with absent attrs or non-string source', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed' }, // no attrs
|
||||
{ type: 'htmlEmbed', attrs: {} }, // no source
|
||||
{ type: 'htmlEmbed', attrs: { source: 42 } }, // non-string
|
||||
{ type: 'htmlEmbed', attrs: { source: '<ok/>' } },
|
||||
],
|
||||
};
|
||||
expect(collectHtmlEmbedSources(doc)).toEqual(new Set(['<ok/>']));
|
||||
});
|
||||
|
||||
it('returns an empty set for non-object input', () => {
|
||||
expect(collectHtmlEmbedSources(null).size).toBe(0);
|
||||
expect(collectHtmlEmbedSources(undefined).size).toBe(0);
|
||||
expect(collectHtmlEmbedSources('x' as any).size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripDisallowedHtmlEmbedNodes', () => {
|
||||
it('keeps an embed whose source is allowed and removes the rest', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<new-evil/>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
|
||||
// The allowed embed and the paragraph survive; the new embed is gone.
|
||||
expect(result.content).toHaveLength(2);
|
||||
expect(result.content[0].attrs.source).toBe('<vetted/>');
|
||||
expect(result.content[1].type).toBe('paragraph');
|
||||
});
|
||||
|
||||
it('keeps BOTH embeds when two nodes share the same allowed source', () => {
|
||||
// Source-identity semantics: identity is the raw `attrs.source`, so a
|
||||
// non-admin who duplicates an existing admin-vetted source keeps both copies.
|
||||
// This is intended — the raw HTML is already vetted, so a duplicate is safe.
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'mid' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
const embeds = result.content.filter(
|
||||
(n: any) => n.type === 'htmlEmbed',
|
||||
);
|
||||
expect(embeds).toHaveLength(2);
|
||||
expect(embeds.every((n: any) => n.attrs.source === '<vetted/>')).toBe(true);
|
||||
});
|
||||
|
||||
it('removes a newly-introduced embed when nothing is allowed', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set());
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('filters nested embeds by the allow-list (e.g. inside columns)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<new/>' } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
const col = findFirstChild(result, 'column');
|
||||
expect(col.content).toHaveLength(1);
|
||||
expect(col.content[0].attrs.source).toBe('<vetted/>');
|
||||
});
|
||||
|
||||
it('treats an embed with absent/non-string source as not allowed (stripped)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed' },
|
||||
{ type: 'htmlEmbed', attrs: {} },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('does not mutate the input document', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
|
||||
};
|
||||
stripDisallowedHtmlEmbedNodes(doc, new Set());
|
||||
expect(doc.content).toHaveLength(1);
|
||||
expect(doc.content[0].type).toBe('htmlEmbed');
|
||||
});
|
||||
|
||||
it('neutralizes a root node that is itself a disallowed htmlEmbed', () => {
|
||||
const root = { type: 'htmlEmbed', attrs: { source: '<new/>' } };
|
||||
const result = stripDisallowedHtmlEmbedNodes(root, new Set());
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => {
|
||||
const root = { type: 'htmlEmbed', attrs: { source: '<vetted/>' } };
|
||||
const result = stripDisallowedHtmlEmbedNodes(root, new Set(['<vetted/>']));
|
||||
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
|
||||
});
|
||||
|
||||
it('returns non-object input unchanged', () => {
|
||||
expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
|
||||
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
|
||||
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
|
||||
|
||||
@@ -22,6 +22,15 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||
|
||||
const node = pmJson as unknown as JSONContent;
|
||||
|
||||
// Defensive root-type check: if the ROOT node is itself an htmlEmbed, the
|
||||
// children-filtering below could never drop it, so a bare htmlEmbed would be
|
||||
// returned as-is. This branch is unreachable in normal use (the PM document
|
||||
// root is always a `doc`) and exists only to make the helper total — a bare
|
||||
// htmlEmbed can never be returned by this function.
|
||||
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||
return { type: 'doc', content: [] } as unknown as T;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
const filtered: JSONContent[] = [];
|
||||
for (const child of node.content) {
|
||||
@@ -39,6 +48,107 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the document and collect a stable identity for every `htmlEmbed` node.
|
||||
*
|
||||
* The identity is the node's `attrs.source` string — the raw HTML the embed
|
||||
* renders. Two embeds that render the exact same HTML are treated as the same
|
||||
* identity. Used by the collab persist path to know which embeds are ALREADY
|
||||
* present in the currently-persisted (admin-vetted) page content, so a later
|
||||
* non-admin store can strip only NEWLY-introduced embeds while preserving the
|
||||
* pre-existing admin-authored ones.
|
||||
*
|
||||
* Absent attrs or a non-string/absent `source` are skipped gracefully (such a
|
||||
* node contributes no identity to the set).
|
||||
*/
|
||||
export function collectHtmlEmbedSources(pmJson: unknown): Set<string> {
|
||||
const sources = new Set<string>();
|
||||
|
||||
const walk = (node: unknown): void => {
|
||||
if (!node || typeof node !== 'object') {
|
||||
return;
|
||||
}
|
||||
const n = node as JSONContent;
|
||||
if (n.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (n.attrs as Record<string, unknown> | undefined)?.source;
|
||||
if (typeof source === 'string') {
|
||||
sources.add(source);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(n.content)) {
|
||||
for (const child of n.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
walk(pmJson);
|
||||
return sources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose
|
||||
* `attrs.source` is in `allowedSources`; remove the rest.
|
||||
*
|
||||
* Used on the collab persist path when the feature toggle is ON but the storing
|
||||
* user is a NON-admin: `allowedSources` is the set of embed sources already
|
||||
* present in the currently-persisted page content (admin-authored, already
|
||||
* vetted). A non-admin therefore cannot ADD a new embed, but their unrelated
|
||||
* edit also cannot destroy an admin's existing one.
|
||||
*
|
||||
* NOTE: identity is the raw source string, so a non-admin who COPIES an existing
|
||||
* admin embed's exact source into a NEW location passes this check. That is
|
||||
* acceptable — the source is already admin-vetted content present in the doc; no
|
||||
* new untrusted HTML is introduced.
|
||||
*
|
||||
* Returns a NEW document; the input is not mutated. Same defensive root-type
|
||||
* check pattern as {@link stripHtmlEmbedNodes}.
|
||||
*/
|
||||
export function stripDisallowedHtmlEmbedNodes<T = JSONContent>(
|
||||
pmJson: T,
|
||||
allowedSources: Set<string>,
|
||||
): T {
|
||||
if (!pmJson || typeof pmJson !== 'object') {
|
||||
return pmJson;
|
||||
}
|
||||
|
||||
const node = pmJson as unknown as JSONContent;
|
||||
|
||||
// Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is
|
||||
// itself an htmlEmbed and its source is NOT allowed, the children-filtering
|
||||
// below could never drop it, so neutralize it here. Unreachable in normal use
|
||||
// (the PM document root is always a `doc`).
|
||||
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (node.attrs as Record<string, unknown> | undefined)?.source;
|
||||
if (typeof source === 'string' && allowedSources.has(source)) {
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
return { type: 'doc', content: [] } as unknown as T;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
const filtered: JSONContent[] = [];
|
||||
for (const child of node.content) {
|
||||
// Drop a disallowed htmlEmbed child (newly introduced); keep an allowed
|
||||
// one (already present in the persisted, admin-vetted content).
|
||||
if (child && child.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (child.attrs as Record<string, unknown> | undefined)
|
||||
?.source;
|
||||
if (typeof source === 'string' && allowedSources.has(source)) {
|
||||
filtered.push({ ...child });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
|
||||
// also filtered by the same allow-list.
|
||||
filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources));
|
||||
}
|
||||
return { ...node, content: filtered } as unknown as T;
|
||||
}
|
||||
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the document contains at least one `htmlEmbed` node anywhere
|
||||
* in its tree. Useful to decide whether a strip pass actually changed anything
|
||||
|
||||
@@ -39,6 +39,11 @@ import {
|
||||
} from '../casl/interfaces/space-ability.type';
|
||||
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
import { RecentPageDto } from './dto/recent-page.dto';
|
||||
import { CreatedByUserDto } from './dto/created-by-user.dto';
|
||||
import { DuplicatePageDto } from './dto/duplicate-page.dto';
|
||||
@@ -63,6 +68,7 @@ export class PageController {
|
||||
constructor(
|
||||
private readonly pageService: PageService,
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly pageHistoryService: PageHistoryService,
|
||||
private readonly spaceAbility: SpaceAbilityFactory,
|
||||
private readonly pageAccessService: PageAccessService,
|
||||
@@ -92,6 +98,18 @@ export class PageController {
|
||||
|
||||
const permissions = { canEdit, hasRestriction };
|
||||
|
||||
if (page.content) {
|
||||
const workspace = await this.workspaceRepo.findById(page.workspaceId);
|
||||
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
|
||||
// Kill-switch: when the workspace feature is OFF, never serve raw
|
||||
// htmlEmbed nodes on the read path (mirrors the public-share strip),
|
||||
// so disabling the feature is an immediate, total kill-switch and not
|
||||
// dependent on the page being re-saved. Admin-authored content only.
|
||||
// Fail-closed: a missing workspace resolves to OFF and is stripped.
|
||||
page.content = stripHtmlEmbedNodes(page.content) as any;
|
||||
}
|
||||
}
|
||||
|
||||
if (dto.format && dto.format !== 'json' && page.content) {
|
||||
const contentOutput =
|
||||
dto.format === 'markdown'
|
||||
@@ -536,6 +554,16 @@ export class PageController {
|
||||
|
||||
await this.pageAccessService.validateCanView(page, user);
|
||||
|
||||
if (history.content) {
|
||||
const workspace = await this.workspaceRepo.findById(page.workspaceId);
|
||||
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
|
||||
// Kill-switch: history snapshots are an authenticated read path too, so
|
||||
// strip htmlEmbed when the workspace feature is OFF (same as /info and
|
||||
// the public-share path). Fail-closed on a missing workspace.
|
||||
history.content = stripHtmlEmbedNodes(history.content) as any;
|
||||
}
|
||||
}
|
||||
|
||||
return history;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,25 +1,31 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab
|
||||
// write paths: PageService.create() and PageService.duplicatePage(). Both build
|
||||
// content/textContent/ydoc directly and persist, bypassing the collab
|
||||
// onStoreDocument strip, so each must run the incoming document through the
|
||||
// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not
|
||||
// allowed, `stripHtmlEmbedNodes`) BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL PageService with every constructor dep mocked,
|
||||
// feeds content containing an `htmlEmbed`, calls the real method, and asserts on
|
||||
// the PERSISTED content (captured at the repo insert / db insert boundary) that
|
||||
// the embed was actually stripped (member/unknown role) or preserved
|
||||
// (admin/owner + toggle ON). Mirrors the GOOD pattern in
|
||||
// transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// page.service.ts pulls in the collaboration gateway (a transitive ESM chain
|
||||
// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so
|
||||
// that single module is mocked away — it is never used on the create/duplicate
|
||||
// gate paths.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
|
||||
// PageService.create() and duplicatePage() guards.
|
||||
//
|
||||
// page.service.ts cannot be unit-LOADED under the server's jest config (a
|
||||
// transitive ESM dep, @sindresorhus/slugify, is not in transformIgnorePatterns),
|
||||
// so we cover the two load-bearing properties at the strongest feasible layer:
|
||||
//
|
||||
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact predicate
|
||||
// each path applies: non-admin/unknown role -> strip, admin/owner -> keep.
|
||||
//
|
||||
// (2) IDENTITY — source-pin which role each path reads (create: the `callerRole`
|
||||
// param threaded from the request; duplicate: `authUser.role`), so a
|
||||
// refactor that drops the guard or reads the wrong role trips the test.
|
||||
// This is what replaces the removed `applyAdminGate` stand-in for these
|
||||
// two entrypoints.
|
||||
import { PageService } from './page.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'u1';
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
@@ -29,74 +35,206 @@ const docWithEmbed = () => ({
|
||||
],
|
||||
});
|
||||
|
||||
// The real predicate both paths apply (see SECURITY blocks in page.service.ts):
|
||||
// toggle AND admin.
|
||||
function applyGate(
|
||||
json: any,
|
||||
featureEnabled: boolean,
|
||||
role: string | null | undefined,
|
||||
) {
|
||||
if (!htmlEmbedAllowed(featureEnabled, role) && hasHtmlEmbedNode(json)) {
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
// Minimal chainable kysely stub. `nextPagePosition` (used by create) and
|
||||
// duplicatePage's bulk insert go through `this.db`; only the calls those paths
|
||||
// make need to resolve. `capturedInserts` collects every page row handed to
|
||||
// `insertInto('pages').values(...)` so we can assert on the persisted content.
|
||||
function buildDb(capturedInserts: any[]) {
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
selectAll: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
execute: async () => [],
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = {
|
||||
selectFrom: () => selectChain,
|
||||
insertInto: (table: string) => ({
|
||||
values: (rows: any) => {
|
||||
if (table === 'pages') {
|
||||
for (const row of Array.isArray(rows) ? rows : [rows]) {
|
||||
capturedInserts.push(row);
|
||||
}
|
||||
}
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb): run the callback with `db`
|
||||
// itself acting as the transaction so any in-tx inserts are captured too.
|
||||
transaction: () => ({ execute: async (cb: any) => cb(db) }),
|
||||
};
|
||||
return db;
|
||||
}
|
||||
|
||||
describe('page create/duplicate gate decision (real helpers)', () => {
|
||||
it('toggle ON + non-admin (member) strips', () => {
|
||||
const result = applyGate(docWithEmbed(), true, 'member');
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0].content[0].text).toBe('body');
|
||||
});
|
||||
// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled`
|
||||
// drives the workspace toggle the gate reads via workspaceRepo.findById.
|
||||
function buildService(opts: {
|
||||
featureEnabled: boolean;
|
||||
capturedInserts: any[];
|
||||
rootPage?: any; // for duplicatePage
|
||||
}) {
|
||||
const { featureEnabled, capturedInserts } = opts;
|
||||
|
||||
it('toggle ON + unknown/empty role fails closed (strips)', () => {
|
||||
for (const role of [null, undefined, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, role))).toBe(
|
||||
false,
|
||||
);
|
||||
}
|
||||
});
|
||||
const pageRepo: any = {
|
||||
findById: jest.fn(async () => null), // no parent page in create tests
|
||||
// create() persists here; capture the row so we can inspect content.
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
capturedInserts.push(row);
|
||||
return { id: 'new-page', slugId: 'slug-1', ...row };
|
||||
}),
|
||||
getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)),
|
||||
};
|
||||
|
||||
it('toggle ON + admin/owner keep', () => {
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'admin'))).toBe(
|
||||
true,
|
||||
);
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), true, 'owner'))).toBe(
|
||||
true,
|
||||
const pagePermissionRepo: any = {
|
||||
// duplicatePage filters accessible pages; grant the root so it is copied.
|
||||
filterAccessiblePageIds: jest.fn(async () =>
|
||||
opts.rootPage ? [opts.rootPage.id] : [],
|
||||
),
|
||||
};
|
||||
|
||||
const workspaceRepo: any = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const attachmentRepo: any = { findByIds: jest.fn(async () => []) };
|
||||
const storageService: any = { copy: jest.fn(async () => undefined) };
|
||||
const noopQueue: any = { add: jest.fn(async () => undefined) };
|
||||
const eventEmitter: any = { emit: jest.fn() };
|
||||
const collaborationGateway: any = {};
|
||||
const watcherService: any = {};
|
||||
// duplicatePage fires transclusion bulk inserts after persisting; they are
|
||||
// best-effort (wrapped in try/catch) and irrelevant to the gate.
|
||||
const transclusionService: any = {
|
||||
insertTransclusionsForPages: jest.fn(async () => undefined),
|
||||
insertReferencesForPages: jest.fn(async () => undefined),
|
||||
insertTemplateReferencesForPages: jest.fn(async () => undefined),
|
||||
};
|
||||
|
||||
const db = buildDb(capturedInserts);
|
||||
|
||||
const service = new PageService(
|
||||
pageRepo,
|
||||
pagePermissionRepo,
|
||||
attachmentRepo,
|
||||
db,
|
||||
storageService,
|
||||
noopQueue, // attachmentQueue
|
||||
noopQueue, // aiQueue
|
||||
noopQueue, // generalQueue
|
||||
eventEmitter,
|
||||
collaborationGateway,
|
||||
watcherService,
|
||||
transclusionService,
|
||||
workspaceRepo,
|
||||
);
|
||||
return service;
|
||||
}
|
||||
|
||||
describe('PageService.create htmlEmbed admin gate (real code)', () => {
|
||||
// Run create() and return the content actually persisted via insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
callerRole: string | null | undefined,
|
||||
) {
|
||||
const capturedInserts: any[] = [];
|
||||
const service = buildService({ featureEnabled, capturedInserts });
|
||||
await service.create(
|
||||
USER,
|
||||
WS,
|
||||
{
|
||||
spaceId: SPACE,
|
||||
title: 'p',
|
||||
// 'json' format is used as-is by parseProsemirrorContent (passed to the
|
||||
// real jsonToNode schema validation), so hand it the PM-JSON object.
|
||||
content: docWithEmbed(),
|
||||
format: 'json' as any,
|
||||
} as any,
|
||||
callerRole,
|
||||
);
|
||||
expect(capturedInserts).toHaveLength(1);
|
||||
return capturedInserts[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, 'member');
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
// Non-embed content survives.
|
||||
expect(JSON.stringify(content)).toContain('body');
|
||||
});
|
||||
|
||||
it('toggle OFF strips for everyone (admin/owner/member)', () => {
|
||||
for (const role of ['admin', 'owner', 'member'] as const) {
|
||||
expect(hasHtmlEmbedNode(applyGate(docWithEmbed(), false, role))).toBe(
|
||||
false,
|
||||
);
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const SRC = readFileSync(join(__dirname, 'page.service.ts'), 'utf-8');
|
||||
describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => {
|
||||
// Duplicate a single source page that contains an embed and return the content
|
||||
// persisted for the copy (captured at db.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
role: string | null | undefined,
|
||||
) {
|
||||
const rootPage: any = {
|
||||
id: 'src-page',
|
||||
slugId: 'src-slug',
|
||||
title: 'Source',
|
||||
icon: null,
|
||||
position: 'a0',
|
||||
spaceId: SPACE,
|
||||
workspaceId: WS,
|
||||
parentPageId: null,
|
||||
content: docWithEmbed(),
|
||||
};
|
||||
const capturedInserts: any[] = [];
|
||||
const service = buildService({ featureEnabled, capturedInserts, rootPage });
|
||||
const authUser: any = { id: USER, workspaceId: WS, role };
|
||||
await service.duplicatePage(rootPage, undefined, authUser);
|
||||
// The bulk insert is the page persist boundary; one source page -> one copy.
|
||||
const pageRows = capturedInserts.filter((r) => r.content);
|
||||
expect(pageRows.length).toBeGreaterThanOrEqual(1);
|
||||
return pageRows[0].content;
|
||||
}
|
||||
|
||||
describe('page create/duplicate gate identity is pinned (source contract)', () => {
|
||||
it('create() gates on toggle AND the caller role param before deriving content/ydoc', () => {
|
||||
// create() receives the caller's workspace role as `callerRole` and gates on
|
||||
// the combined toggle-AND-admin predicate; the embed must be stripped BEFORE
|
||||
// insertPage.
|
||||
expect(SRC).toMatch(
|
||||
/!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*callerRole\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
|
||||
);
|
||||
expect(SRC).toContain('prosemirrorJson = stripHtmlEmbedNodes(prosemirrorJson)');
|
||||
it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, 'member');
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('body');
|
||||
});
|
||||
|
||||
it('duplicatePage() gates on toggle AND the duplicating user role (authUser.role)', () => {
|
||||
expect(SRC).toMatch(
|
||||
/!htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*authUser\.role\s*\)\s*&&\s*hasHtmlEmbedNode\(\s*prosemirrorJson\s*\)/,
|
||||
);
|
||||
it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('both paths resolve the toggle from the workspace settings', () => {
|
||||
expect(SRC).toContain('isHtmlEmbedFeatureEnabled(');
|
||||
expect(SRC).toContain('this.workspaceRepo.findById(');
|
||||
it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,123 +1,266 @@
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { join } from 'node:path';
|
||||
import {
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
// Exercises the REAL htmlEmbed admin gate on the two import write paths:
|
||||
//
|
||||
// (1) ImportService.importPage() — single .html/.md upload
|
||||
// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
|
||||
//
|
||||
// Both build content/textContent/ydoc directly and persist (bypassing the
|
||||
// collab onStoreDocument strip), so each must run the imported document through
|
||||
// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
|
||||
// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
|
||||
// `stripHtmlEmbedNodes` BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL services with deps mocked, feeds an imported
|
||||
// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
|
||||
// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
|
||||
// content (captured at the insert boundary) is stripped for a non-admin /
|
||||
// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
|
||||
// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// Three modules are mocked away because they pull transitive ESM deps that
|
||||
// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
|
||||
// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
|
||||
// import-attachment). None of them participate in the gate decision:
|
||||
// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
|
||||
// faithful passthroughs (the embed div has no href/iframe, so the real
|
||||
// normalizer would leave it untouched anyway).
|
||||
// - import-attachment: attachment rewriting; passthrough returns html as-is.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
jest.mock('../utils/import-formatter', () => ({
|
||||
normalizeImportHtml: () => {},
|
||||
formatImportHtml: async (opts: any) => ({
|
||||
html: opts.html,
|
||||
backlinks: [],
|
||||
pageIcon: undefined,
|
||||
}),
|
||||
}));
|
||||
jest.mock('./import-attachment.service', () => ({
|
||||
ImportAttachmentService: class {},
|
||||
}));
|
||||
|
||||
// FAIL-CLOSED IDENTITY for the import write paths.
|
||||
//
|
||||
// import.service / file-import-task.service cannot be unit-LOADED under the
|
||||
// server's jest config (a transitive ESM dep, @sindresorhus/slugify, is not in
|
||||
// transformIgnorePatterns). So we cover the two load-bearing properties at the
|
||||
// strongest feasible layer:
|
||||
//
|
||||
// (1) BEHAVIOR — using the REAL html-embed helpers, replay the exact gate
|
||||
// predicate each entrypoint runs against the role resolved from
|
||||
// userRepo.findById(...): a MISSING user (findById -> undefined) must fail
|
||||
// closed (strip), and only 'admin'/'owner' keep the embed.
|
||||
//
|
||||
// (2) IDENTITY — source-pin which identity governs the gate so a refactor that
|
||||
// swaps the lookup to the wrong user (e.g. the queue worker's caller) is
|
||||
// caught: zip import resolves the role from `fileTask.creatorId`; single
|
||||
// import from the request `userId`. NOT some ambient caller.
|
||||
//
|
||||
// If a guard is deleted/misplaced or the identity field changes, these break.
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { ImportService } from './import.service';
|
||||
import { FileImportTaskService } from './file-import-task.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'imported body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>x</script>' } },
|
||||
],
|
||||
});
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'importer-1';
|
||||
|
||||
// The real predicate both import entrypoints apply (see the SECURITY blocks in
|
||||
// import.service.ts and file-import-task.service.ts): resolve the importer via
|
||||
// userRepo.findById, then `!canAuthorHtmlEmbed(role) && hasHtmlEmbedNode(json)`.
|
||||
function applyImportGate(
|
||||
json: any,
|
||||
featureEnabled: boolean,
|
||||
importingUser: { role?: string } | undefined,
|
||||
) {
|
||||
if (
|
||||
!htmlEmbedAllowed(featureEnabled, importingUser?.role) &&
|
||||
hasHtmlEmbedNode(json)
|
||||
) {
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
|
||||
// `<div data-type="htmlEmbed" data-source="BASE64">` into an htmlEmbed PM node
|
||||
// (base64 below decodes to `<script>x</script>`).
|
||||
const HTML_WITH_EMBED =
|
||||
'<p>imported body</p>' +
|
||||
'<div data-type="htmlEmbed" data-source="PHNjcmlwdD54PC9zY3JpcHQ+"></div>';
|
||||
|
||||
function workspaceRepoFor(featureEnabled: boolean) {
|
||||
return {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
describe('import gate fail-closed by toggle AND resolved-user role (real helpers)', () => {
|
||||
it('toggle ON + missing user (userRepo.findById -> undefined) strips the embed', () => {
|
||||
// findById returns undefined when the user/workspace pair does not resolve;
|
||||
// undefined?.role is undefined -> htmlEmbedAllowed(true, undefined) === false.
|
||||
const result = applyImportGate(docWithEmbed(), true, undefined);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
// userRepo.findById resolves the importer's role (or undefined for a missing
|
||||
// user -> fail closed).
|
||||
function userRepoFor(user: { role?: string } | undefined) {
|
||||
return { findById: jest.fn(async () => user) };
|
||||
}
|
||||
|
||||
describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
|
||||
// Run importPage with a single uploaded .html and return the persisted content
|
||||
// captured at pageRepo.insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const pageRepo: any = {
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
captured.push(row);
|
||||
return { id: 'p1', slugId: 's1', ...row };
|
||||
}),
|
||||
};
|
||||
// db is only used for getNewPagePosition (a select chain).
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = { selectFrom: () => selectChain };
|
||||
|
||||
const service = new ImportService(
|
||||
pageRepo,
|
||||
userRepoFor(user) as any,
|
||||
{ putBuffer: jest.fn() } as any, // storageService (unused on this path)
|
||||
db,
|
||||
{ add: jest.fn() } as any, // fileTaskQueue (unused)
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
);
|
||||
|
||||
const file: any = {
|
||||
filename: 'doc.html',
|
||||
toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
|
||||
};
|
||||
await service.importPage(Promise.resolve(file), USER, SPACE, WS);
|
||||
expect(captured).toHaveLength(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it("toggle ON + resolved role 'member' strips", () => {
|
||||
it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'member' })),
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("toggle ON + resolved role 'admin' keeps the embed", () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'admin' })),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("toggle ON + resolved role 'owner' keeps the embed", () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), true, { role: 'owner' })),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF strips for every role (admin/owner/member)', () => {
|
||||
for (const role of ['admin', 'owner', 'member'] as const) {
|
||||
expect(
|
||||
hasHtmlEmbedNode(applyImportGate(docWithEmbed(), false, { role })),
|
||||
).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// Source-pin the identity each entrypoint feeds to userRepo.findById. These are
|
||||
// the lines that decide WHOSE role governs the gate; pinning them means a
|
||||
// refactor that points the lookup at the wrong user trips the test.
|
||||
const SRC_DIR = join(__dirname);
|
||||
describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
|
||||
let extractDir: string;
|
||||
|
||||
describe('import gate identity is pinned to the importer (source contract)', () => {
|
||||
it('single import resolves the role from the request userId', () => {
|
||||
const src = readFileSync(join(SRC_DIR, 'import.service.ts'), 'utf-8');
|
||||
// The role lookup must key on the request `userId`, then gate on the role.
|
||||
expect(src).toMatch(
|
||||
/this\.userRepo\.findById\(\s*userId\s*,\s*workspaceId\s*\)/,
|
||||
);
|
||||
expect(src).toMatch(
|
||||
/htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*\)/,
|
||||
);
|
||||
// And the toggle is resolved from the workspace settings.
|
||||
expect(src).toContain('isHtmlEmbedFeatureEnabled(');
|
||||
// And the gate uses the real strip helper.
|
||||
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
|
||||
beforeEach(async () => {
|
||||
// Real temp dir holding a single .html page that carries the embed; the
|
||||
// method reads it from disk via fs.readFile.
|
||||
extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
|
||||
await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
|
||||
});
|
||||
|
||||
it('zip import resolves the role from fileTask.creatorId (NOT the queue caller)', () => {
|
||||
const src = readFileSync(
|
||||
join(SRC_DIR, 'file-import-task.service.ts'),
|
||||
'utf-8',
|
||||
afterEach(async () => {
|
||||
await fs.rm(extractDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// Run processGenericImport over the temp dir and return the content persisted
|
||||
// for the imported page (captured at trx.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const trxInsertChain = (table: string) => ({
|
||||
values: (row: any) => {
|
||||
if (table === 'pages') captured.push(row);
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
});
|
||||
const trx: any = { insertInto: trxInsertChain };
|
||||
const db: any = {
|
||||
// spaces lookup at the top of processGenericImport
|
||||
selectFrom: () => ({
|
||||
select: () => ({
|
||||
where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
|
||||
}),
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb)
|
||||
transaction: () => ({ execute: async (cb: any) => cb(trx) }),
|
||||
};
|
||||
|
||||
// importService stub: only the real, gate-relevant helpers are used. We give
|
||||
// it the REAL implementations by delegating to a real ImportService for
|
||||
// processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
|
||||
// strip path runs for real.
|
||||
const realImport = new ImportService(
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
);
|
||||
expect(src).toMatch(
|
||||
/this\.userRepo\.findById\(\s*fileTask\.creatorId\s*,\s*fileTask\.workspaceId\s*,?\s*\)/,
|
||||
const importService: any = {
|
||||
processHTML: (html: string) => realImport.processHTML(html),
|
||||
extractTitleAndRemoveHeading: (s: any) =>
|
||||
realImport.extractTitleAndRemoveHeading(s),
|
||||
createYdoc: (j: any) => realImport.createYdoc(j),
|
||||
};
|
||||
|
||||
const importAttachmentService: any = {
|
||||
// passthrough: no attachment rewriting, return html unchanged
|
||||
processAttachments: jest.fn(async (opts: any) => opts.html),
|
||||
};
|
||||
|
||||
const service = new FileImportTaskService(
|
||||
{ putBuffer: jest.fn() } as any, // storageService
|
||||
importService,
|
||||
{ nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
|
||||
{ insertBacklink: jest.fn() } as any, // backlinkRepo
|
||||
db,
|
||||
importAttachmentService,
|
||||
userRepoFor(user) as any,
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
{ emit: jest.fn() } as any, // eventEmitter
|
||||
{ logBatchWithContext: jest.fn() } as any, // auditService
|
||||
);
|
||||
expect(src).toMatch(
|
||||
/importerCanAuthorHtmlEmbed\s*=\s*htmlEmbedAllowed\(\s*htmlEmbedEnabled\s*,\s*importingUser\?\.role\s*,?\s*\)/,
|
||||
|
||||
const fileTask: any = {
|
||||
id: 'task-1',
|
||||
creatorId: USER,
|
||||
workspaceId: WS,
|
||||
spaceId: SPACE,
|
||||
source: 'generic',
|
||||
};
|
||||
|
||||
await service.processGenericImport({ extractDir, fileTask });
|
||||
expect(captured.length).toBeGreaterThanOrEqual(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
expect(src).toContain('isHtmlEmbedFeatureEnabled(');
|
||||
expect(src).toContain('stripHtmlEmbedNodes(prosemirrorJson)');
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user