From c0d312d8f5af92c5d60ed67aad2be2b2d81a8bb7 Mon Sep 17 00:00:00 2001 From: claude code agent 227 Date: Sun, 21 Jun 2026 03:17:37 +0300 Subject: [PATCH] harden(transclusion): depth-cap the ProseMirror collectors (#55) collectPageEmbedsFromPmJson (and the sibling collectors/remap) recursed with no guard, so a pathological/cyclic non-JSON input could stack-overflow (RangeError). Add a depth cap (1000, far above any real doc nesting) so such input is handled gracefully. Normal documents are unaffected. Updates a stale test that asserted the old throwing behavior. Co-Authored-By: Claude Opus 4.8 --- .../transclusion/spec/page-embed.util.spec.ts | 17 ++++--- .../transclusion-prosemirror.util.spec.ts | 46 +++++++++++++++++++ .../utils/transclusion-prosemirror.util.ts | 43 ++++++++++++----- 3 files changed, 87 insertions(+), 19 deletions(-) diff --git a/apps/server/src/core/page/transclusion/spec/page-embed.util.spec.ts b/apps/server/src/core/page/transclusion/spec/page-embed.util.spec.ts index 9219154c..fd3d9986 100644 --- a/apps/server/src/core/page/transclusion/spec/page-embed.util.spec.ts +++ b/apps/server/src/core/page/transclusion/spec/page-embed.util.spec.ts @@ -131,15 +131,18 @@ describe('collectPageEmbedsFromPmJson', () => { expect(collectPageEmbedsFromPmJson(doc)).toEqual([{ sourcePageId: 'deep' }]); }); - it('terminates (does not silently hang) on a self-referencing/cyclic object', () => { - // FINDING: there is NO explicit cycle guard. A hand-built cyclic JS object - // (which cannot arise from JSON parsing — the real input path) makes the - // recursive walk overflow the stack and throw a RangeError. It TERMINATES - // with a controlled error rather than recursing unboundedly forever, and a - // non-cyclic (JSON-shaped) document is never affected. + it('returns gracefully (does not throw) on a self-referencing/cyclic object', () => { + // A depth guard (see MAX_PM_WALK_DEPTH) defends against a hand-built cyclic + // JS object — which cannot arise from JSON parsing, the real input path — + // so the recursive walk stops at the cap instead of overflowing the stack. + // A non-cyclic (JSON-shaped) document is never affected. const node: any = { type: 'doc', content: [] }; node.content.push(node); // content array references its own parent node - expect(() => collectPageEmbedsFromPmJson(node)).toThrow(RangeError); + let got: ReturnType; + expect(() => { + got = collectPageEmbedsFromPmJson(node); + }).not.toThrow(); + expect(got!).toEqual([]); }); }); diff --git a/apps/server/src/core/page/transclusion/spec/transclusion-prosemirror.util.spec.ts b/apps/server/src/core/page/transclusion/spec/transclusion-prosemirror.util.spec.ts index 1661a090..705a4a83 100644 --- a/apps/server/src/core/page/transclusion/spec/transclusion-prosemirror.util.spec.ts +++ b/apps/server/src/core/page/transclusion/spec/transclusion-prosemirror.util.spec.ts @@ -1,4 +1,5 @@ import { + collectPageEmbedsFromPmJson, collectReferencesFromPmJson, collectTransclusionsFromPmJson, } from '../utils/transclusion-prosemirror.util'; @@ -238,3 +239,48 @@ describe('collectReferencesFromPmJson', () => { ]); }); }); + +describe('collectPageEmbedsFromPmJson', () => { + it('returns [] for null/undefined doc', () => { + expect(collectPageEmbedsFromPmJson(null)).toEqual([]); + expect(collectPageEmbedsFromPmJson(undefined)).toEqual([]); + }); + + it('collects unique sourcePageIds from pageEmbed nodes', () => { + const doc = { + type: 'doc', + content: [ + { type: 'pageEmbed', attrs: { sourcePageId: 'p1' } }, + { type: 'pageEmbed', attrs: { sourcePageId: 'p1' } }, + { type: 'pageEmbed', attrs: { sourcePageId: 'p2' } }, + ], + }; + expect(collectPageEmbedsFromPmJson(doc)).toEqual([ + { sourcePageId: 'p1' }, + { sourcePageId: 'p2' }, + ]); + }); + + it('does not throw (returns gracefully) on a self-referential / cyclic doc', () => { + // A cycle is unreachable via JSON.parse, but a hand-built non-JSON input + // could carry one; the depth guard must stop the recursion instead of + // overflowing the stack. + const node: any = { type: 'doc', content: [] }; + node.content.push(node); // self-reference + + let got: ReturnType; + expect(() => { + got = collectPageEmbedsFromPmJson(node); + }).not.toThrow(); + expect(got!).toEqual([]); + }); + + it('does not throw on nesting far beyond the depth cap', () => { + // Build a chain deeper than MAX_PM_WALK_DEPTH (1000) ending in a pageEmbed. + let inner: any = { type: 'pageEmbed', attrs: { sourcePageId: 'deep' } }; + for (let i = 0; i < 5000; i++) { + inner = { type: 'doc', content: [inner] }; + } + expect(() => collectPageEmbedsFromPmJson(inner)).not.toThrow(); + }); +}); diff --git a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts index d8cdf3bf..6431b71f 100644 --- a/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts +++ b/apps/server/src/core/page/transclusion/utils/transclusion-prosemirror.util.ts @@ -4,6 +4,13 @@ const TRANSCLUSION_TYPE = 'transclusionSource'; const REFERENCE_TYPE = 'transclusionReference'; const PAGE_EMBED_TYPE = 'pageEmbed'; +// Hard cap on recursion depth while walking a ProseMirror doc. Real documents +// nest only a handful of levels deep, so this ceiling is unreachable on any +// genuine input. It exists purely to defend against a pathological or cyclic +// non-JSON input (JSON.parse can't produce cycles, but other callers might +// hand us a hand-built/cyclic object) so the recursion can't overflow the stack. +const MAX_PM_WALK_DEPTH = 1000; + export type TransclusionReferenceSnapshot = { sourcePageId: string; transclusionId: string; @@ -27,8 +34,11 @@ export function collectTransclusionsFromPmJson( const byId = new Map(); - const visit = (node: any): void => { + const visit = (node: any, depth: number): void => { if (!node || typeof node !== 'object') return; + // Depth guard against a pathological/cyclic non-JSON input (see + // MAX_PM_WALK_DEPTH); unreachable on real docs. + if (depth > MAX_PM_WALK_DEPTH) return; if (node.type === TRANSCLUSION_TYPE) { const id = node.attrs?.id; @@ -42,11 +52,11 @@ export function collectTransclusionsFromPmJson( } if (Array.isArray(node.content)) { - for (const child of node.content) visit(child); + for (const child of node.content) visit(child, depth + 1); } }; - visit(doc); + visit(doc, 0); return Array.from(byId.values()); } @@ -65,8 +75,11 @@ export function collectReferencesFromPmJson( const seen = new Set(); const out: TransclusionReferenceSnapshot[] = []; - const visit = (node: any): void => { + const visit = (node: any, depth: number): void => { if (!node || typeof node !== 'object') return; + // Depth guard against a pathological/cyclic non-JSON input (see + // MAX_PM_WALK_DEPTH); unreachable on real docs. + if (depth > MAX_PM_WALK_DEPTH) return; if (node.type === REFERENCE_TYPE) { const sourcePageId = node.attrs?.sourcePageId; @@ -91,11 +104,11 @@ export function collectReferencesFromPmJson( if (node.type === TRANSCLUSION_TYPE) return; if (Array.isArray(node.content)) { - for (const child of node.content) visit(child); + for (const child of node.content) visit(child, depth + 1); } }; - visit(doc); + visit(doc, 0); return out; } @@ -133,8 +146,11 @@ export function remapPageEmbedSourceIds( doc: T, idMap: Map, ): T { - const visit = (node: any): void => { + const visit = (node: any, depth: number): void => { if (!node || typeof node !== 'object') return; + // Depth guard against a pathological/cyclic non-JSON input (see + // MAX_PM_WALK_DEPTH); unreachable on real docs. + if (depth > MAX_PM_WALK_DEPTH) return; if (node.type === PAGE_EMBED_TYPE) { if (node.attrs) { @@ -149,11 +165,11 @@ export function remapPageEmbedSourceIds( if (node.type === TRANSCLUSION_TYPE) return; if (Array.isArray(node.content)) { - for (const child of node.content) visit(child); + for (const child of node.content) visit(child, depth + 1); } }; - visit(doc); + visit(doc, 0); return doc; } @@ -171,8 +187,11 @@ export function collectPageEmbedsFromPmJson( const seen = new Set(); const out: PageEmbedSnapshot[] = []; - const visit = (node: any): void => { + const visit = (node: any, depth: number): void => { if (!node || typeof node !== 'object') return; + // Stop before the stack can overflow on a pathological/cyclic non-JSON + // input; far above any real doc nesting so genuine docs are unaffected. + if (depth > MAX_PM_WALK_DEPTH) return; if (node.type === PAGE_EMBED_TYPE) { const sourcePageId = node.attrs?.sourcePageId; @@ -188,10 +207,10 @@ export function collectPageEmbedsFromPmJson( if (node.type === TRANSCLUSION_TYPE) return; if (Array.isArray(node.content)) { - for (const child of node.content) visit(child); + for (const child of node.content) visit(child, depth + 1); } }; - visit(doc); + visit(doc, 0); return out; }