refactor(transclusion): unify the ProseMirror collectors into collectNodes (#94)
The three collect*FromPmJson collectors shared the same recursion (and the #55 depth cap) but were copy-pasted, so a future edit could diverge them. Extract a generic collectNodes(doc, {type, map, key, lastWins, skipChildrenOfType}) and reimplement all three on it, byte-output-identical (transclusions last-wins; references/embeds first-wins + transclusionSource skip). Documents (not removes) the write-only page_template_references graph and the near-duplicate client lookup-context as tracked follow-ups, per the issue's guidance. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -273,6 +273,18 @@ export class TransclusionService {
|
||||
* accumulate cross-workspace edges, but rows are still NOT per-viewer
|
||||
* permission-filtered. EVERY consumer of these rows MUST permission-filter at
|
||||
* read time (as `lookupTemplate` does via `filterViewerAccessiblePageIds`).
|
||||
*
|
||||
* NOTE (write-only graph — intentional, not dead): as of now the
|
||||
* `page_template_references` table is WRITE-ONLY in production. It is populated
|
||||
* by three paths (this diff-sync, `insertTemplateReferencesForPages` for new
|
||||
* pages, and the collab persistence flush) but has NO production reader: the
|
||||
* only read of the table is `findByReferencePageId` below, used purely to
|
||||
* compute this sync's insert/delete diff — there is no reverse-navigation
|
||||
* consumer yet (issue #34's dead `findReferencePageIdsBySource` reader was
|
||||
* already removed). The graph is retained deliberately for an upcoming
|
||||
* "used in N pages" reverse-navigation consumer; keep writing it so that
|
||||
* feature has correct history when it lands. Do not remove the write graph or
|
||||
* its migration just because nothing reads it today. (See Gitea #94.)
|
||||
*/
|
||||
async syncPageTemplateReferences(
|
||||
referencePageId: string,
|
||||
|
||||
@@ -20,6 +20,79 @@ export type PageEmbedSnapshot = {
|
||||
sourcePageId: string;
|
||||
};
|
||||
|
||||
/**
|
||||
* Generic, internal "collect every node of one PM type from a doc" walker that
|
||||
* the three public `collect*FromPmJson` collectors are built on. They all share
|
||||
* the exact same recursion (block-container descent + the #55 depth cap), and
|
||||
* differed only in (target type, how a matched node maps to an output snapshot,
|
||||
* how matches are deduped, and whether the walk descends into a
|
||||
* `transclusionSource`). Centralising the recursion here keeps that shared logic
|
||||
* — especially the depth guard — in one place so the collectors can't drift.
|
||||
*
|
||||
* Behaviour knobs (each collector wires these to reproduce its EXACT prior output):
|
||||
* - `type`: only nodes whose `node.type` equals this are passed to `map`.
|
||||
* - `map`: turns a matched node into a snapshot, or returns `undefined` to skip
|
||||
* it (e.g. a transclusion with no id, or a reference missing attrs).
|
||||
* - `key`: dedup key for a produced snapshot. Snapshots sharing a key collapse
|
||||
* to a single entry; `lastWins` decides which one survives.
|
||||
* - `lastWins`: when true (transclusions), a later duplicate overwrites the
|
||||
* earlier one (Map semantics); when false (references, page embeds), the
|
||||
* first occurrence wins and later duplicates are ignored. Either way the
|
||||
* surviving entries keep first-seen insertion order.
|
||||
* - `skipChildrenOfType`: a node type whose subtree the walk must NOT enter.
|
||||
* References/embeds pass `transclusionSource` here (the schema forbids them
|
||||
* inside a source, so a malformed inbound doc can't smuggle one in). The
|
||||
* transclusion collector leaves this undefined because the matched type IS
|
||||
* `transclusionSource` and matched nodes already short-circuit recursion.
|
||||
*
|
||||
* A matched node never recurses into its own children: every target type here is
|
||||
* either an atom (reference/pageEmbed) or a boundary we deliberately don't nest
|
||||
* into (transclusionSource), exactly as the original collectors behaved.
|
||||
*/
|
||||
function collectNodes<T>(
|
||||
doc: unknown,
|
||||
opts: {
|
||||
type: string;
|
||||
map: (node: any) => T | undefined;
|
||||
key: (snapshot: T) => string;
|
||||
lastWins?: boolean;
|
||||
skipChildrenOfType?: string;
|
||||
},
|
||||
): T[] {
|
||||
if (!doc || typeof doc !== 'object') return [];
|
||||
|
||||
const { type, map, key, lastWins = false, skipChildrenOfType } = opts;
|
||||
const byKey = new Map<string, T>();
|
||||
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Depth guard against a pathological/cyclic non-JSON input (see
|
||||
// MAX_PM_WALK_DEPTH); unreachable on real docs.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === type) {
|
||||
const snapshot = map(node);
|
||||
if (snapshot !== undefined) {
|
||||
const k = key(snapshot);
|
||||
if (lastWins || !byKey.has(k)) byKey.set(k, snapshot);
|
||||
}
|
||||
return; // matched node: atom or boundary — do not recurse into children
|
||||
}
|
||||
|
||||
// Don't descend into an isolated subtree (schema-enforced boundary).
|
||||
if (skipChildrenOfType !== undefined && node.type === skipChildrenOfType) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc, 0);
|
||||
return Array.from(byKey.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks a ProseMirror JSON document and returns one snapshot per top-level
|
||||
* `transclusion` node. Does not recurse into transclusions (schema disallows
|
||||
@@ -30,34 +103,23 @@ export type PageEmbedSnapshot = {
|
||||
export function collectTransclusionsFromPmJson(
|
||||
doc: unknown,
|
||||
): TransclusionNodeSnapshot[] {
|
||||
if (!doc || typeof doc !== 'object') return [];
|
||||
|
||||
const byId = new Map<string, TransclusionNodeSnapshot>();
|
||||
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Depth guard against a pathological/cyclic non-JSON input (see
|
||||
// MAX_PM_WALK_DEPTH); unreachable on real docs.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === TRANSCLUSION_TYPE) {
|
||||
// last-wins on duplicate ids (Map.set overwrites) — matches prior behaviour.
|
||||
return collectNodes<TransclusionNodeSnapshot>(doc, {
|
||||
type: TRANSCLUSION_TYPE,
|
||||
map: (node) => {
|
||||
const id = node.attrs?.id;
|
||||
if (typeof id === 'string' && id.length > 0) {
|
||||
byId.set(id, {
|
||||
transclusionId: id,
|
||||
content: { type: 'doc', content: node.content ?? [] },
|
||||
});
|
||||
}
|
||||
return; // do not recurse into transclusion children
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc, 0);
|
||||
return Array.from(byId.values());
|
||||
if (typeof id !== 'string' || id.length === 0) return undefined;
|
||||
return {
|
||||
transclusionId: id,
|
||||
content: { type: 'doc', content: node.content ?? [] },
|
||||
};
|
||||
},
|
||||
key: (snapshot) => snapshot.transclusionId,
|
||||
lastWins: true,
|
||||
// No skipChildrenOfType: TRANSCLUSION_TYPE is itself the matched type, and a
|
||||
// matched node already short-circuits recursion (the schema also forbids a
|
||||
// transclusion nested inside another).
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -70,46 +132,26 @@ export function collectTransclusionsFromPmJson(
|
||||
export function collectReferencesFromPmJson(
|
||||
doc: unknown,
|
||||
): TransclusionReferenceSnapshot[] {
|
||||
if (!doc || typeof doc !== 'object') return [];
|
||||
|
||||
const seen = new Set<string>();
|
||||
const out: TransclusionReferenceSnapshot[] = [];
|
||||
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Depth guard against a pathological/cyclic non-JSON input (see
|
||||
// MAX_PM_WALK_DEPTH); unreachable on real docs.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === REFERENCE_TYPE) {
|
||||
// first-wins dedup on (sourcePageId, transclusionId); skip recursing into a
|
||||
// transclusionSource (schema forbids references inside one).
|
||||
return collectNodes<TransclusionReferenceSnapshot>(doc, {
|
||||
type: REFERENCE_TYPE,
|
||||
map: (node) => {
|
||||
const sourcePageId = node.attrs?.sourcePageId;
|
||||
const transclusionId = node.attrs?.transclusionId;
|
||||
if (
|
||||
typeof sourcePageId === 'string' &&
|
||||
sourcePageId.length > 0 &&
|
||||
typeof transclusionId === 'string' &&
|
||||
transclusionId.length > 0
|
||||
typeof sourcePageId !== 'string' ||
|
||||
sourcePageId.length === 0 ||
|
||||
typeof transclusionId !== 'string' ||
|
||||
transclusionId.length === 0
|
||||
) {
|
||||
const key = `${sourcePageId}::${transclusionId}`;
|
||||
if (!seen.has(key)) {
|
||||
seen.add(key);
|
||||
out.push({ sourcePageId, transclusionId });
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
return; // atom node - no children
|
||||
}
|
||||
|
||||
// References cannot live inside a source (schema-enforced); skip recursing
|
||||
// so a malformed inbound doc can't sneak in a nested reference here.
|
||||
if (node.type === TRANSCLUSION_TYPE) return;
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc, 0);
|
||||
return out;
|
||||
return { sourcePageId, transclusionId };
|
||||
},
|
||||
key: (snapshot) => `${snapshot.sourcePageId}::${snapshot.transclusionId}`,
|
||||
skipChildrenOfType: TRANSCLUSION_TYPE,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -182,35 +224,17 @@ export function remapPageEmbedSourceIds<T>(
|
||||
export function collectPageEmbedsFromPmJson(
|
||||
doc: unknown,
|
||||
): PageEmbedSnapshot[] {
|
||||
if (!doc || typeof doc !== 'object') return [];
|
||||
|
||||
const seen = new Set<string>();
|
||||
const out: PageEmbedSnapshot[] = [];
|
||||
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Stop before the stack can overflow on a pathological/cyclic non-JSON
|
||||
// input; far above any real doc nesting so genuine docs are unaffected.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === PAGE_EMBED_TYPE) {
|
||||
// first-wins dedup on sourcePageId; skip recursing into a transclusionSource.
|
||||
return collectNodes<PageEmbedSnapshot>(doc, {
|
||||
type: PAGE_EMBED_TYPE,
|
||||
map: (node) => {
|
||||
const sourcePageId = node.attrs?.sourcePageId;
|
||||
if (typeof sourcePageId === 'string' && sourcePageId.length > 0) {
|
||||
if (!seen.has(sourcePageId)) {
|
||||
seen.add(sourcePageId);
|
||||
out.push({ sourcePageId });
|
||||
}
|
||||
if (typeof sourcePageId !== 'string' || sourcePageId.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
return; // atom node - no children
|
||||
}
|
||||
|
||||
if (node.type === TRANSCLUSION_TYPE) return;
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc, 0);
|
||||
return out;
|
||||
return { sourcePageId };
|
||||
},
|
||||
key: (snapshot) => snapshot.sourcePageId,
|
||||
skipChildrenOfType: TRANSCLUSION_TYPE,
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user