diff --git a/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx b/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx index d29c19dc..237c8ae4 100644 --- a/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx +++ b/apps/client/src/features/editor/components/page-embed/page-embed-lookup-context.tsx @@ -25,6 +25,15 @@ const PageEmbedLookupContext = createContext(null); * transclusion lookup context but keys purely on `sourcePageId`. On public * shares there is no lookup in MVP, so the context simply isn't mounted (the * node view renders a placeholder when the context is absent). + * + * NOTE (intentional near-duplicate of `transclusion-lookup-context.tsx`): this + * provider duplicates that file's batching / de-dup / cache machinery; only the + * lookup key (sourcePageId here vs sourcePageId+transclusionId there) and the + * API call differ. Unifying them now would mean a generic, parameterised lookup + * provider — a larger client refactor that isn't worth it for just two + * consumers. Per Gitea #94, extract a shared generic provider when a THIRD + * lookup consumer appears; until then keep the two in sync by hand. (Tracked, + * deliberately deferred — not forgotten.) */ export function PageEmbedLookupProvider({ children, diff --git a/apps/server/src/core/page/transclusion/transclusion.service.ts b/apps/server/src/core/page/transclusion/transclusion.service.ts index c3397031..ebee79f9 100644 --- a/apps/server/src/core/page/transclusion/transclusion.service.ts +++ b/apps/server/src/core/page/transclusion/transclusion.service.ts @@ -273,6 +273,18 @@ export class TransclusionService { * accumulate cross-workspace edges, but rows are still NOT per-viewer * permission-filtered. EVERY consumer of these rows MUST permission-filter at * read time (as `lookupTemplate` does via `filterViewerAccessiblePageIds`). + * + * NOTE (write-only graph — intentional, not dead): as of now the + * `page_template_references` table is WRITE-ONLY in production. It is populated + * by three paths (this diff-sync, `insertTemplateReferencesForPages` for new + * pages, and the collab persistence flush) but has NO production reader: the + * only read of the table is `findByReferencePageId` below, used purely to + * compute this sync's insert/delete diff — there is no reverse-navigation + * consumer yet (issue #34's dead `findReferencePageIdsBySource` reader was + * already removed). The graph is retained deliberately for an upcoming + * "used in N pages" reverse-navigation consumer; keep writing it so that + * feature has correct history when it lands. Do not remove the write graph or + * its migration just because nothing reads it today. (See Gitea #94.) */ async syncPageTemplateReferences( referencePageId: string, diff --git a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts index 6431b71f..c49b1bf5 100644 --- a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts +++ b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts @@ -20,6 +20,79 @@ export type PageEmbedSnapshot = { sourcePageId: string; }; +/** + * Generic, internal "collect every node of one PM type from a doc" walker that + * the three public `collect*FromPmJson` collectors are built on. They all share + * the exact same recursion (block-container descent + the #55 depth cap), and + * differed only in (target type, how a matched node maps to an output snapshot, + * how matches are deduped, and whether the walk descends into a + * `transclusionSource`). Centralising the recursion here keeps that shared logic + * — especially the depth guard — in one place so the collectors can't drift. + * + * Behaviour knobs (each collector wires these to reproduce its EXACT prior output): + * - `type`: only nodes whose `node.type` equals this are passed to `map`. + * - `map`: turns a matched node into a snapshot, or returns `undefined` to skip + * it (e.g. a transclusion with no id, or a reference missing attrs). + * - `key`: dedup key for a produced snapshot. Snapshots sharing a key collapse + * to a single entry; `lastWins` decides which one survives. + * - `lastWins`: when true (transclusions), a later duplicate overwrites the + * earlier one (Map semantics); when false (references, page embeds), the + * first occurrence wins and later duplicates are ignored. Either way the + * surviving entries keep first-seen insertion order. + * - `skipChildrenOfType`: a node type whose subtree the walk must NOT enter. + * References/embeds pass `transclusionSource` here (the schema forbids them + * inside a source, so a malformed inbound doc can't smuggle one in). The + * transclusion collector leaves this undefined because the matched type IS + * `transclusionSource` and matched nodes already short-circuit recursion. + * + * A matched node never recurses into its own children: every target type here is + * either an atom (reference/pageEmbed) or a boundary we deliberately don't nest + * into (transclusionSource), exactly as the original collectors behaved. + */ +function collectNodes( + doc: unknown, + opts: { + type: string; + map: (node: any) => T | undefined; + key: (snapshot: T) => string; + lastWins?: boolean; + skipChildrenOfType?: string; + }, +): T[] { + if (!doc || typeof doc !== 'object') return []; + + const { type, map, key, lastWins = false, skipChildrenOfType } = opts; + const byKey = new Map(); + + const visit = (node: any, depth: number): void => { + if (!node || typeof node !== 'object') return; + // Depth guard against a pathological/cyclic non-JSON input (see + // MAX_PM_WALK_DEPTH); unreachable on real docs. + if (depth > MAX_PM_WALK_DEPTH) return; + + if (node.type === type) { + const snapshot = map(node); + if (snapshot !== undefined) { + const k = key(snapshot); + if (lastWins || !byKey.has(k)) byKey.set(k, snapshot); + } + return; // matched node: atom or boundary — do not recurse into children + } + + // Don't descend into an isolated subtree (schema-enforced boundary). + if (skipChildrenOfType !== undefined && node.type === skipChildrenOfType) { + return; + } + + if (Array.isArray(node.content)) { + for (const child of node.content) visit(child, depth + 1); + } + }; + + visit(doc, 0); + return Array.from(byKey.values()); +} + /** * Walks a ProseMirror JSON document and returns one snapshot per top-level * `transclusion` node. Does not recurse into transclusions (schema disallows @@ -30,34 +103,23 @@ export type PageEmbedSnapshot = { export function collectTransclusionsFromPmJson( doc: unknown, ): TransclusionNodeSnapshot[] { - if (!doc || typeof doc !== 'object') return []; - - const byId = new Map(); - - const visit = (node: any, depth: number): void => { - if (!node || typeof node !== 'object') return; - // Depth guard against a pathological/cyclic non-JSON input (see - // MAX_PM_WALK_DEPTH); unreachable on real docs. - if (depth > MAX_PM_WALK_DEPTH) return; - - if (node.type === TRANSCLUSION_TYPE) { + // last-wins on duplicate ids (Map.set overwrites) — matches prior behaviour. + return collectNodes(doc, { + type: TRANSCLUSION_TYPE, + map: (node) => { const id = node.attrs?.id; - if (typeof id === 'string' && id.length > 0) { - byId.set(id, { - transclusionId: id, - content: { type: 'doc', content: node.content ?? [] }, - }); - } - return; // do not recurse into transclusion children - } - - if (Array.isArray(node.content)) { - for (const child of node.content) visit(child, depth + 1); - } - }; - - visit(doc, 0); - return Array.from(byId.values()); + if (typeof id !== 'string' || id.length === 0) return undefined; + return { + transclusionId: id, + content: { type: 'doc', content: node.content ?? [] }, + }; + }, + key: (snapshot) => snapshot.transclusionId, + lastWins: true, + // No skipChildrenOfType: TRANSCLUSION_TYPE is itself the matched type, and a + // matched node already short-circuits recursion (the schema also forbids a + // transclusion nested inside another). + }); } /** @@ -70,46 +132,26 @@ export function collectTransclusionsFromPmJson( export function collectReferencesFromPmJson( doc: unknown, ): TransclusionReferenceSnapshot[] { - if (!doc || typeof doc !== 'object') return []; - - const seen = new Set(); - const out: TransclusionReferenceSnapshot[] = []; - - const visit = (node: any, depth: number): void => { - if (!node || typeof node !== 'object') return; - // Depth guard against a pathological/cyclic non-JSON input (see - // MAX_PM_WALK_DEPTH); unreachable on real docs. - if (depth > MAX_PM_WALK_DEPTH) return; - - if (node.type === REFERENCE_TYPE) { + // first-wins dedup on (sourcePageId, transclusionId); skip recursing into a + // transclusionSource (schema forbids references inside one). + return collectNodes(doc, { + type: REFERENCE_TYPE, + map: (node) => { const sourcePageId = node.attrs?.sourcePageId; const transclusionId = node.attrs?.transclusionId; if ( - typeof sourcePageId === 'string' && - sourcePageId.length > 0 && - typeof transclusionId === 'string' && - transclusionId.length > 0 + typeof sourcePageId !== 'string' || + sourcePageId.length === 0 || + typeof transclusionId !== 'string' || + transclusionId.length === 0 ) { - const key = `${sourcePageId}::${transclusionId}`; - if (!seen.has(key)) { - seen.add(key); - out.push({ sourcePageId, transclusionId }); - } + return undefined; } - return; // atom node - no children - } - - // References cannot live inside a source (schema-enforced); skip recursing - // so a malformed inbound doc can't sneak in a nested reference here. - if (node.type === TRANSCLUSION_TYPE) return; - - if (Array.isArray(node.content)) { - for (const child of node.content) visit(child, depth + 1); - } - }; - - visit(doc, 0); - return out; + return { sourcePageId, transclusionId }; + }, + key: (snapshot) => `${snapshot.sourcePageId}::${snapshot.transclusionId}`, + skipChildrenOfType: TRANSCLUSION_TYPE, + }); } /** @@ -182,35 +224,17 @@ export function remapPageEmbedSourceIds( export function collectPageEmbedsFromPmJson( doc: unknown, ): PageEmbedSnapshot[] { - if (!doc || typeof doc !== 'object') return []; - - const seen = new Set(); - const out: PageEmbedSnapshot[] = []; - - const visit = (node: any, depth: number): void => { - if (!node || typeof node !== 'object') return; - // Stop before the stack can overflow on a pathological/cyclic non-JSON - // input; far above any real doc nesting so genuine docs are unaffected. - if (depth > MAX_PM_WALK_DEPTH) return; - - if (node.type === PAGE_EMBED_TYPE) { + // first-wins dedup on sourcePageId; skip recursing into a transclusionSource. + return collectNodes(doc, { + type: PAGE_EMBED_TYPE, + map: (node) => { const sourcePageId = node.attrs?.sourcePageId; - if (typeof sourcePageId === 'string' && sourcePageId.length > 0) { - if (!seen.has(sourcePageId)) { - seen.add(sourcePageId); - out.push({ sourcePageId }); - } + if (typeof sourcePageId !== 'string' || sourcePageId.length === 0) { + return undefined; } - return; // atom node - no children - } - - if (node.type === TRANSCLUSION_TYPE) return; - - if (Array.isArray(node.content)) { - for (const child of node.content) visit(child, depth + 1); - } - }; - - visit(doc, 0); - return out; + return { sourcePageId }; + }, + key: (snapshot) => snapshot.sourcePageId, + skipChildrenOfType: TRANSCLUSION_TYPE, + }); }