fix(html-embed): shrink the collab broadcast window with an early onChange guard (#26)
A non-admin's transient htmlEmbed could execute in other open editors until the debounced (10s) onStoreDocument strip. Add a ~300ms onChange-debounced early strip (guardHtmlEmbed) that converges the shared ydoc for everyone far sooner. Safety-critical details: - Scheduled from onChange ONLY for non-admins AND only when the workspace toggle is ON (cached per-document in onLoadDocument), so the common toggle-OFF case does zero extra work. - guardHtmlEmbed does ALL async work (toggle + persisted allow-list read) FIRST, then performs fromYdoc -> strip -> fragment.delete -> applyUpdate in a single SYNCHRONOUS, await-free block, so no inbound Yjs update can interleave and a concurrent edit can never be clobbered. Bails if document.isDestroyed. - Reuses the #29 preserve logic (admin-vetted embeds survive; only the non-admin's new ones are stripped). Loop-safe (corrective update has null origin -> no reschedule; post-strip no embed -> cheap no-op). Per-document timer cleared on unload. onStoreDocument stays the authoritative backstop. The irreducible residual is only the very first inbound broadcast before the debounce fires — Hocuspocus exposes no synchronous beforeBroadcast filter. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -339,3 +339,118 @@ describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)'
|
||||
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges
|
||||
// the shared ydoc sub-second, before the 10s store debounce. We call it directly
|
||||
// (it is the debounced timer body) and assert the ydoc fragment no longer yields
|
||||
// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds
|
||||
// already in the persisted content survive.
|
||||
describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => {
|
||||
async function runGuard(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext } = buildExtension(featureEnabled, priorContent);
|
||||
await (ext as any).guardHtmlEmbed(
|
||||
'page-1',
|
||||
doc,
|
||||
{ user: { id: 'u1', role, workspaceId: 'ws-1' } },
|
||||
);
|
||||
}
|
||||
|
||||
it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => {
|
||||
// Prior persisted content has NO embed; the live doc has one a non-admin
|
||||
// just added.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
|
||||
// The shared ydoc fragment no longer yields any htmlEmbed.
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Live doc keeps the admin embed AND adds a brand-new one.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
await runGuard('member', doc, true, prior);
|
||||
|
||||
// Only the admin-vetted source survives in the shared ydoc.
|
||||
expect(
|
||||
collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')),
|
||||
).toEqual(new Set([ADMIN_SOURCE]));
|
||||
});
|
||||
|
||||
it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => {
|
||||
// Even an embed that matches the prior content is stripped when the toggle
|
||||
// is OFF, because the OFF path passes allowed=null (strip everything) and
|
||||
// never reads the prior content for an allow-list.
|
||||
const SOURCE = '<script>any()</script>';
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } },
|
||||
],
|
||||
});
|
||||
await runGuard('member', doc, false, {
|
||||
type: 'doc',
|
||||
content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('admin role: guard is a defensive no-op (embed preserved)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>ok()</script>' } },
|
||||
],
|
||||
});
|
||||
await runGuard('admin', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }],
|
||||
});
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,6 +40,7 @@ import {
|
||||
} from '../constants';
|
||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
@@ -58,6 +59,21 @@ export class PersistenceExtension implements Extension {
|
||||
// coalescing window" per document and OR it across all edits in the window,
|
||||
// so the snapshot is marked 'agent' regardless of who wrote last.
|
||||
private agentTouched: Map<string, boolean> = new Map();
|
||||
// Per-document debounce timers for the early htmlEmbed guard (Gitea #26).
|
||||
// onChange schedules a short (~300ms) debounced strip that converges the
|
||||
// shared ydoc for all connected clients well before the 10s store debounce,
|
||||
// shrinking the pre-persist broadcast window of a non-admin's transient embed.
|
||||
private htmlEmbedGuardTimers: Map<string, NodeJS.Timeout> = new Map();
|
||||
// Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated
|
||||
// in onLoadDocument (which already loads the page + has workspace context) and
|
||||
// read in onChange to gate early-guard scheduling: when the toggle is OFF (the
|
||||
// common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and
|
||||
// rely on the onStoreDocument strip as the backstop (when OFF the embed does
|
||||
// not execute in editable mode anyway). Cleared in afterUnloadDocument.
|
||||
// STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF
|
||||
// until the document is reloaded, so the early guard won't schedule — accepted,
|
||||
// the onStoreDocument backstop still strips on persist.
|
||||
private htmlEmbedToggleByDoc: Map<string, boolean> = new Map();
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -89,6 +105,23 @@ export class PersistenceExtension implements Extension {
|
||||
return;
|
||||
}
|
||||
|
||||
// Cache the workspace htmlEmbed toggle for this document (Gitea #26). We
|
||||
// already have the page (hence its workspaceId) here, so resolve the toggle
|
||||
// once and cache it keyed by documentName. onChange reads this to decide
|
||||
// whether to schedule the early guard at all — when OFF we skip the guard
|
||||
// entirely (no timer, no fromYdoc, no DB read). Cleared in
|
||||
// afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note.
|
||||
try {
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(page.workspaceId))?.settings,
|
||||
);
|
||||
this.htmlEmbedToggleByDoc.set(documentName, enabled);
|
||||
} catch (err) {
|
||||
// Fail OFF: if the toggle can't be resolved, never schedule the early
|
||||
// guard; the onStoreDocument backstop still strips on persist.
|
||||
this.htmlEmbedToggleByDoc.set(documentName, false);
|
||||
}
|
||||
|
||||
if (page.ydoc) {
|
||||
this.logger.debug(`ydoc loaded from db: ${pageId}`);
|
||||
|
||||
@@ -158,18 +191,25 @@ export class PersistenceExtension implements Extension {
|
||||
// the next store, and the persisted row plus every share/readonly read path
|
||||
// remain protected by the strip.
|
||||
//
|
||||
// ACCEPTED RESIDUAL RISK (pre-persist broadcast window): this strip runs in
|
||||
// the debounced onStoreDocument, but hocuspocus broadcasts each inbound Yjs
|
||||
// update to connected clients immediately, so a non-admin's transient
|
||||
// htmlEmbed can execute in OTHER open editors' browsers in the brief window
|
||||
// before this persist strips it. The exposure is limited to concurrent
|
||||
// AUTHENTICATED space members who have the doc open with Edit rights
|
||||
// (semi-trusted) — anonymous public-share/readonly viewers do NOT open a
|
||||
// collab socket (ReadonlyPageEditor renders fetched, already-stripped
|
||||
// content; HocuspocusProvider is only used by the authenticated editable
|
||||
// page-editor), and the PERSISTED page row plus every share/readonly read
|
||||
// path are protected by this strip. The window is therefore accepted rather
|
||||
// than mitigated with an inbound beforeBroadcast strip.
|
||||
// RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26):
|
||||
// this strip runs in the debounced onStoreDocument (up to 10s), but
|
||||
// hocuspocus broadcasts each inbound Yjs update to connected clients
|
||||
// immediately, so a non-admin's transient htmlEmbed can execute in OTHER open
|
||||
// editors' browsers in the window before this persist strips it. The exposure
|
||||
// is limited to concurrent AUTHENTICATED space members who have the doc open
|
||||
// with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do
|
||||
// NOT open a collab socket (ReadonlyPageEditor renders fetched,
|
||||
// already-stripped content; HocuspocusProvider is only used by the
|
||||
// authenticated editable page-editor), and the PERSISTED page row plus every
|
||||
// share/readonly read path are protected by this strip.
|
||||
// The window is now SHRUNK to sub-second by an onChange-debounced early guard
|
||||
// (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as
|
||||
// this block and re-encodes the cleaned ydoc, converging the doc for all
|
||||
// clients long before this 10s store debounce fires. This onStoreDocument
|
||||
// strip remains the authoritative backstop for persistence. The irreducible
|
||||
// residual is only the VERY FIRST inbound broadcast before the ~300ms debounce
|
||||
// fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the
|
||||
// node before that first relay, so it cannot be eliminated entirely.
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature
|
||||
// toggle is ON and the storing user is an admin/owner. OFF (default) =>
|
||||
// stripped for everyone (existing embeds get cleaned up on next save).
|
||||
@@ -389,12 +429,168 @@ export class PersistenceExtension implements Extension {
|
||||
if (data.context?.actor === 'agent') {
|
||||
this.agentTouched.set(documentName, true);
|
||||
}
|
||||
|
||||
// Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced
|
||||
// guard ONLY when (a) this document's workspace toggle is cached ON and
|
||||
// (b) the changing connection's user is a NON-admin (cannot author
|
||||
// htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer,
|
||||
// no fromYdoc, no DB read — killing the OFF-case overhead (the common
|
||||
// default); the onStoreDocument strip is the backstop and an OFF embed does
|
||||
// not execute in editable mode anyway. We do NO expensive work here — we only
|
||||
// (re)schedule the timer; the debounce coalesces rapid edits into a single
|
||||
// guard check.
|
||||
if (
|
||||
userId &&
|
||||
this.htmlEmbedToggleByDoc.get(documentName) === true &&
|
||||
!canAuthorHtmlEmbed(data.context?.user?.role)
|
||||
) {
|
||||
const existing = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (existing) {
|
||||
clearTimeout(existing);
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
void this.guardHtmlEmbed(documentName, data.document, data.context);
|
||||
}, 300);
|
||||
this.htmlEmbedGuardTimers.set(documentName, timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the
|
||||
* onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of
|
||||
* waiting for the 10s store debounce, so a non-admin's transient embed is
|
||||
* removed from the shared ydoc — and re-broadcast as cleaned state — for all
|
||||
* connected clients in sub-second time. onStoreDocument remains the
|
||||
* authoritative persistence backstop; this is an ADDITIONAL early pass.
|
||||
*
|
||||
* CONCURRENCY (the critical invariant): the Y.Doc mutation is a single
|
||||
* SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the
|
||||
* applyUpdate write. ALL async work (the workspace toggle lookup and the
|
||||
* persisted-content read for the allow-list) happens FIRST, before that block.
|
||||
* Because JS is single-threaded, a synchronous block cannot interleave with
|
||||
* inbound Yjs update handlers, so a concurrent edit that lands while we await
|
||||
* cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then
|
||||
* delete + rebuild + applyUpdate without yielding. (An earlier version awaited
|
||||
* DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap
|
||||
* was lost — this restructure fixes that.)
|
||||
*
|
||||
* The allow-list is a best-effort snapshot read outside any lock (TOCTOU
|
||||
* accepted, same as onStoreDocument): worst case one embed is transiently kept
|
||||
* or dropped; it converges on the next guard/store, with no auth bypass.
|
||||
*
|
||||
* Loop-safety: the corrective applyUpdate has a null origin, so the re-fired
|
||||
* onChange carries no userId and is not rescheduled; and after a strip no
|
||||
* htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the
|
||||
* hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer
|
||||
* would crash the process — so the whole body is wrapped in try/catch.
|
||||
*/
|
||||
private async guardHtmlEmbed(
|
||||
documentName: string,
|
||||
document: Y.Doc,
|
||||
context: any,
|
||||
): Promise<void> {
|
||||
// Defensive: ensure no stale timer entry survives for this document.
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
try {
|
||||
// Re-check defensively: onChange only schedules for non-admins, but if an
|
||||
// admin/owner somehow reaches here, the embed is authored content — do
|
||||
// nothing (onStoreDocument's toggle-AND-admin gate handles persistence).
|
||||
if (canAuthorHtmlEmbed(context?.user?.role)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ----
|
||||
// Resolve the workspace toggle exactly as onStoreDocument does. When OFF we
|
||||
// strip everything; when ON we use the preserve logic (keep admin-vetted
|
||||
// embeds, strip only the non-admin's newly-introduced ones).
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(context?.user?.workspaceId))
|
||||
?.settings,
|
||||
);
|
||||
|
||||
// The allow-list (admin-vetted sources already in the persisted content).
|
||||
// null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block,
|
||||
// so no await sits between the doc snapshot and the doc write.
|
||||
let allowed: Set<string> | null = null;
|
||||
if (enabled !== false) {
|
||||
const prior = await this.pageRepo.findById(getPageId(documentName), {
|
||||
includeContent: true,
|
||||
});
|
||||
allowed = collectHtmlEmbedSources(prior?.content);
|
||||
}
|
||||
|
||||
// The awaits above may have let the document be unloaded/destroyed. If so,
|
||||
// bail — mutating a destroyed doc is pointless and could throw (the
|
||||
// try/catch is the ultimate safety net regardless).
|
||||
if ((document as { isDestroyed?: boolean }).isDestroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ----
|
||||
// Because there is no await between fromYdoc and applyUpdate, no inbound
|
||||
// Yjs update can interleave, so a concurrent edit cannot be lost.
|
||||
const json = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
// Cheap exit: nothing to guard if the doc has no embed at all. This is also
|
||||
// why a post-strip re-fire is a no-op (loop-safe).
|
||||
if (!hasHtmlEmbedNode(json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const strippedJson =
|
||||
allowed === null
|
||||
? stripHtmlEmbedNodes(json)
|
||||
: stripDisallowedHtmlEmbedNodes(json, allowed);
|
||||
|
||||
// Nothing was stripped (e.g. the only embed is an admin-vetted one) — do
|
||||
// not churn the shared ydoc for all clients.
|
||||
if (isDeepStrictEqual(strippedJson, json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Reflect the stripped content back into the shared ydoc EXACTLY as
|
||||
// onStoreDocument does, so the node is removed for all connected clients,
|
||||
// not just on the eventual persist. This re-encode broadcasts the cleaned
|
||||
// state; after it hasHtmlEmbedNode is false, so any later guard fire is a
|
||||
// cheap no-op (loop-safe).
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
strippedJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
} catch (err) {
|
||||
// NEVER rethrow out of a timer-scheduled call.
|
||||
this.logger.error(
|
||||
`Early htmlEmbed guard failed on ${documentName}`,
|
||||
err,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async afterUnloadDocument(data: afterUnloadDocumentPayload) {
|
||||
const documentName = data.documentName;
|
||||
this.contributors.delete(documentName);
|
||||
this.agentTouched.delete(documentName);
|
||||
// Drop the cached toggle for this document so a reload re-resolves it (and
|
||||
// picks up a mid-session admin toggle flip).
|
||||
this.htmlEmbedToggleByDoc.delete(documentName);
|
||||
// Clear any pending early-guard timer so it cannot fire after the document
|
||||
// is unloaded (leak / use-after-unload prevention).
|
||||
const timer = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
}
|
||||
}
|
||||
|
||||
private consumeContributors(documentName: string): string[] {
|
||||
|
||||
Reference in New Issue
Block a user