harden(transclusion): depth-cap the ProseMirror collectors (#55)

collectPageEmbedsFromPmJson (and the sibling collectors/remap) recursed with no
guard, so a pathological/cyclic non-JSON input could stack-overflow (RangeError).
Add a depth cap (1000, far above any real doc nesting) so such input is handled
gracefully. Normal documents are unaffected. Updates a stale test that asserted
the old throwing behavior.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-21 03:17:37 +03:00
parent 5215913533
commit c0d312d8f5
3 changed files with 87 additions and 19 deletions

View File

@@ -131,15 +131,18 @@ describe('collectPageEmbedsFromPmJson', () => {
expect(collectPageEmbedsFromPmJson(doc)).toEqual([{ sourcePageId: 'deep' }]);
});
it('terminates (does not silently hang) on a self-referencing/cyclic object', () => {
// FINDING: there is NO explicit cycle guard. A hand-built cyclic JS object
// (which cannot arise from JSON parsing the real input path) makes the
// recursive walk overflow the stack and throw a RangeError. It TERMINATES
// with a controlled error rather than recursing unboundedly forever, and a
// non-cyclic (JSON-shaped) document is never affected.
it('returns gracefully (does not throw) on a self-referencing/cyclic object', () => {
// A depth guard (see MAX_PM_WALK_DEPTH) defends against a hand-built cyclic
// JS object — which cannot arise from JSON parsing, the real input path
// so the recursive walk stops at the cap instead of overflowing the stack.
// A non-cyclic (JSON-shaped) document is never affected.
const node: any = { type: 'doc', content: [] };
node.content.push(node); // content array references its own parent node
expect(() => collectPageEmbedsFromPmJson(node)).toThrow(RangeError);
let got: ReturnType<typeof collectPageEmbedsFromPmJson>;
expect(() => {
got = collectPageEmbedsFromPmJson(node);
}).not.toThrow();
expect(got!).toEqual([]);
});
});

View File

@@ -1,4 +1,5 @@
import {
collectPageEmbedsFromPmJson,
collectReferencesFromPmJson,
collectTransclusionsFromPmJson,
} from '../utils/transclusion-prosemirror.util';
@@ -238,3 +239,48 @@ describe('collectReferencesFromPmJson', () => {
]);
});
});
describe('collectPageEmbedsFromPmJson', () => {
it('returns [] for null/undefined doc', () => {
expect(collectPageEmbedsFromPmJson(null)).toEqual([]);
expect(collectPageEmbedsFromPmJson(undefined)).toEqual([]);
});
it('collects unique sourcePageIds from pageEmbed nodes', () => {
const doc = {
type: 'doc',
content: [
{ type: 'pageEmbed', attrs: { sourcePageId: 'p1' } },
{ type: 'pageEmbed', attrs: { sourcePageId: 'p1' } },
{ type: 'pageEmbed', attrs: { sourcePageId: 'p2' } },
],
};
expect(collectPageEmbedsFromPmJson(doc)).toEqual([
{ sourcePageId: 'p1' },
{ sourcePageId: 'p2' },
]);
});
it('does not throw (returns gracefully) on a self-referential / cyclic doc', () => {
// A cycle is unreachable via JSON.parse, but a hand-built non-JSON input
// could carry one; the depth guard must stop the recursion instead of
// overflowing the stack.
const node: any = { type: 'doc', content: [] };
node.content.push(node); // self-reference
let got: ReturnType<typeof collectPageEmbedsFromPmJson>;
expect(() => {
got = collectPageEmbedsFromPmJson(node);
}).not.toThrow();
expect(got!).toEqual([]);
});
it('does not throw on nesting far beyond the depth cap', () => {
// Build a chain deeper than MAX_PM_WALK_DEPTH (1000) ending in a pageEmbed.
let inner: any = { type: 'pageEmbed', attrs: { sourcePageId: 'deep' } };
for (let i = 0; i < 5000; i++) {
inner = { type: 'doc', content: [inner] };
}
expect(() => collectPageEmbedsFromPmJson(inner)).not.toThrow();
});
});

View File

@@ -4,6 +4,13 @@ const TRANSCLUSION_TYPE = 'transclusionSource';
const REFERENCE_TYPE = 'transclusionReference';
const PAGE_EMBED_TYPE = 'pageEmbed';
// Hard cap on recursion depth while walking a ProseMirror doc. Real documents
// nest only a handful of levels deep, so this ceiling is unreachable on any
// genuine input. It exists purely to defend against a pathological or cyclic
// non-JSON input (JSON.parse can't produce cycles, but other callers might
// hand us a hand-built/cyclic object) so the recursion can't overflow the stack.
const MAX_PM_WALK_DEPTH = 1000;
export type TransclusionReferenceSnapshot = {
sourcePageId: string;
transclusionId: string;
@@ -27,8 +34,11 @@ export function collectTransclusionsFromPmJson(
const byId = new Map<string, TransclusionNodeSnapshot>();
const visit = (node: any): void => {
const visit = (node: any, depth: number): void => {
if (!node || typeof node !== 'object') return;
// Depth guard against a pathological/cyclic non-JSON input (see
// MAX_PM_WALK_DEPTH); unreachable on real docs.
if (depth > MAX_PM_WALK_DEPTH) return;
if (node.type === TRANSCLUSION_TYPE) {
const id = node.attrs?.id;
@@ -42,11 +52,11 @@ export function collectTransclusionsFromPmJson(
}
if (Array.isArray(node.content)) {
for (const child of node.content) visit(child);
for (const child of node.content) visit(child, depth + 1);
}
};
visit(doc);
visit(doc, 0);
return Array.from(byId.values());
}
@@ -65,8 +75,11 @@ export function collectReferencesFromPmJson(
const seen = new Set<string>();
const out: TransclusionReferenceSnapshot[] = [];
const visit = (node: any): void => {
const visit = (node: any, depth: number): void => {
if (!node || typeof node !== 'object') return;
// Depth guard against a pathological/cyclic non-JSON input (see
// MAX_PM_WALK_DEPTH); unreachable on real docs.
if (depth > MAX_PM_WALK_DEPTH) return;
if (node.type === REFERENCE_TYPE) {
const sourcePageId = node.attrs?.sourcePageId;
@@ -91,11 +104,11 @@ export function collectReferencesFromPmJson(
if (node.type === TRANSCLUSION_TYPE) return;
if (Array.isArray(node.content)) {
for (const child of node.content) visit(child);
for (const child of node.content) visit(child, depth + 1);
}
};
visit(doc);
visit(doc, 0);
return out;
}
@@ -133,8 +146,11 @@ export function remapPageEmbedSourceIds<T>(
doc: T,
idMap: Map<string, string>,
): T {
const visit = (node: any): void => {
const visit = (node: any, depth: number): void => {
if (!node || typeof node !== 'object') return;
// Depth guard against a pathological/cyclic non-JSON input (see
// MAX_PM_WALK_DEPTH); unreachable on real docs.
if (depth > MAX_PM_WALK_DEPTH) return;
if (node.type === PAGE_EMBED_TYPE) {
if (node.attrs) {
@@ -149,11 +165,11 @@ export function remapPageEmbedSourceIds<T>(
if (node.type === TRANSCLUSION_TYPE) return;
if (Array.isArray(node.content)) {
for (const child of node.content) visit(child);
for (const child of node.content) visit(child, depth + 1);
}
};
visit(doc);
visit(doc, 0);
return doc;
}
@@ -171,8 +187,11 @@ export function collectPageEmbedsFromPmJson(
const seen = new Set<string>();
const out: PageEmbedSnapshot[] = [];
const visit = (node: any): void => {
const visit = (node: any, depth: number): void => {
if (!node || typeof node !== 'object') return;
// Stop before the stack can overflow on a pathological/cyclic non-JSON
// input; far above any real doc nesting so genuine docs are unaffected.
if (depth > MAX_PM_WALK_DEPTH) return;
if (node.type === PAGE_EMBED_TYPE) {
const sourcePageId = node.attrs?.sourcePageId;
@@ -188,10 +207,10 @@ export function collectPageEmbedsFromPmJson(
if (node.type === TRANSCLUSION_TYPE) return;
if (Array.isArray(node.content)) {
for (const child of node.content) visit(child);
for (const child of node.content) visit(child, depth + 1);
}
};
visit(doc);
visit(doc, 0);
return out;
}