harden(transclusion): depth-cap the ProseMirror collectors (#55)
collectPageEmbedsFromPmJson (and the sibling collectors/remap) recursed with no guard, so a pathological/cyclic non-JSON input could stack-overflow (RangeError). Add a depth cap (1000, far above any real doc nesting) so such input is handled gracefully. Normal documents are unaffected. Updates a stale test that asserted the old throwing behavior. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -131,15 +131,18 @@ describe('collectPageEmbedsFromPmJson', () => {
|
||||
expect(collectPageEmbedsFromPmJson(doc)).toEqual([{ sourcePageId: 'deep' }]);
|
||||
});
|
||||
|
||||
it('terminates (does not silently hang) on a self-referencing/cyclic object', () => {
|
||||
// FINDING: there is NO explicit cycle guard. A hand-built cyclic JS object
|
||||
// (which cannot arise from JSON parsing — the real input path) makes the
|
||||
// recursive walk overflow the stack and throw a RangeError. It TERMINATES
|
||||
// with a controlled error rather than recursing unboundedly forever, and a
|
||||
// non-cyclic (JSON-shaped) document is never affected.
|
||||
it('returns gracefully (does not throw) on a self-referencing/cyclic object', () => {
|
||||
// A depth guard (see MAX_PM_WALK_DEPTH) defends against a hand-built cyclic
|
||||
// JS object — which cannot arise from JSON parsing, the real input path —
|
||||
// so the recursive walk stops at the cap instead of overflowing the stack.
|
||||
// A non-cyclic (JSON-shaped) document is never affected.
|
||||
const node: any = { type: 'doc', content: [] };
|
||||
node.content.push(node); // content array references its own parent node
|
||||
expect(() => collectPageEmbedsFromPmJson(node)).toThrow(RangeError);
|
||||
let got: ReturnType<typeof collectPageEmbedsFromPmJson>;
|
||||
expect(() => {
|
||||
got = collectPageEmbedsFromPmJson(node);
|
||||
}).not.toThrow();
|
||||
expect(got!).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import {
|
||||
collectPageEmbedsFromPmJson,
|
||||
collectReferencesFromPmJson,
|
||||
collectTransclusionsFromPmJson,
|
||||
} from '../utils/transclusion-prosemirror.util';
|
||||
@@ -238,3 +239,48 @@ describe('collectReferencesFromPmJson', () => {
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('collectPageEmbedsFromPmJson', () => {
|
||||
it('returns [] for null/undefined doc', () => {
|
||||
expect(collectPageEmbedsFromPmJson(null)).toEqual([]);
|
||||
expect(collectPageEmbedsFromPmJson(undefined)).toEqual([]);
|
||||
});
|
||||
|
||||
it('collects unique sourcePageIds from pageEmbed nodes', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'pageEmbed', attrs: { sourcePageId: 'p1' } },
|
||||
{ type: 'pageEmbed', attrs: { sourcePageId: 'p1' } },
|
||||
{ type: 'pageEmbed', attrs: { sourcePageId: 'p2' } },
|
||||
],
|
||||
};
|
||||
expect(collectPageEmbedsFromPmJson(doc)).toEqual([
|
||||
{ sourcePageId: 'p1' },
|
||||
{ sourcePageId: 'p2' },
|
||||
]);
|
||||
});
|
||||
|
||||
it('does not throw (returns gracefully) on a self-referential / cyclic doc', () => {
|
||||
// A cycle is unreachable via JSON.parse, but a hand-built non-JSON input
|
||||
// could carry one; the depth guard must stop the recursion instead of
|
||||
// overflowing the stack.
|
||||
const node: any = { type: 'doc', content: [] };
|
||||
node.content.push(node); // self-reference
|
||||
|
||||
let got: ReturnType<typeof collectPageEmbedsFromPmJson>;
|
||||
expect(() => {
|
||||
got = collectPageEmbedsFromPmJson(node);
|
||||
}).not.toThrow();
|
||||
expect(got!).toEqual([]);
|
||||
});
|
||||
|
||||
it('does not throw on nesting far beyond the depth cap', () => {
|
||||
// Build a chain deeper than MAX_PM_WALK_DEPTH (1000) ending in a pageEmbed.
|
||||
let inner: any = { type: 'pageEmbed', attrs: { sourcePageId: 'deep' } };
|
||||
for (let i = 0; i < 5000; i++) {
|
||||
inner = { type: 'doc', content: [inner] };
|
||||
}
|
||||
expect(() => collectPageEmbedsFromPmJson(inner)).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -4,6 +4,13 @@ const TRANSCLUSION_TYPE = 'transclusionSource';
|
||||
const REFERENCE_TYPE = 'transclusionReference';
|
||||
const PAGE_EMBED_TYPE = 'pageEmbed';
|
||||
|
||||
// Hard cap on recursion depth while walking a ProseMirror doc. Real documents
|
||||
// nest only a handful of levels deep, so this ceiling is unreachable on any
|
||||
// genuine input. It exists purely to defend against a pathological or cyclic
|
||||
// non-JSON input (JSON.parse can't produce cycles, but other callers might
|
||||
// hand us a hand-built/cyclic object) so the recursion can't overflow the stack.
|
||||
const MAX_PM_WALK_DEPTH = 1000;
|
||||
|
||||
export type TransclusionReferenceSnapshot = {
|
||||
sourcePageId: string;
|
||||
transclusionId: string;
|
||||
@@ -27,8 +34,11 @@ export function collectTransclusionsFromPmJson(
|
||||
|
||||
const byId = new Map<string, TransclusionNodeSnapshot>();
|
||||
|
||||
const visit = (node: any): void => {
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Depth guard against a pathological/cyclic non-JSON input (see
|
||||
// MAX_PM_WALK_DEPTH); unreachable on real docs.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === TRANSCLUSION_TYPE) {
|
||||
const id = node.attrs?.id;
|
||||
@@ -42,11 +52,11 @@ export function collectTransclusionsFromPmJson(
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child);
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc);
|
||||
visit(doc, 0);
|
||||
return Array.from(byId.values());
|
||||
}
|
||||
|
||||
@@ -65,8 +75,11 @@ export function collectReferencesFromPmJson(
|
||||
const seen = new Set<string>();
|
||||
const out: TransclusionReferenceSnapshot[] = [];
|
||||
|
||||
const visit = (node: any): void => {
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Depth guard against a pathological/cyclic non-JSON input (see
|
||||
// MAX_PM_WALK_DEPTH); unreachable on real docs.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === REFERENCE_TYPE) {
|
||||
const sourcePageId = node.attrs?.sourcePageId;
|
||||
@@ -91,11 +104,11 @@ export function collectReferencesFromPmJson(
|
||||
if (node.type === TRANSCLUSION_TYPE) return;
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child);
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc);
|
||||
visit(doc, 0);
|
||||
return out;
|
||||
}
|
||||
|
||||
@@ -133,8 +146,11 @@ export function remapPageEmbedSourceIds<T>(
|
||||
doc: T,
|
||||
idMap: Map<string, string>,
|
||||
): T {
|
||||
const visit = (node: any): void => {
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Depth guard against a pathological/cyclic non-JSON input (see
|
||||
// MAX_PM_WALK_DEPTH); unreachable on real docs.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === PAGE_EMBED_TYPE) {
|
||||
if (node.attrs) {
|
||||
@@ -149,11 +165,11 @@ export function remapPageEmbedSourceIds<T>(
|
||||
if (node.type === TRANSCLUSION_TYPE) return;
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child);
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc);
|
||||
visit(doc, 0);
|
||||
return doc;
|
||||
}
|
||||
|
||||
@@ -171,8 +187,11 @@ export function collectPageEmbedsFromPmJson(
|
||||
const seen = new Set<string>();
|
||||
const out: PageEmbedSnapshot[] = [];
|
||||
|
||||
const visit = (node: any): void => {
|
||||
const visit = (node: any, depth: number): void => {
|
||||
if (!node || typeof node !== 'object') return;
|
||||
// Stop before the stack can overflow on a pathological/cyclic non-JSON
|
||||
// input; far above any real doc nesting so genuine docs are unaffected.
|
||||
if (depth > MAX_PM_WALK_DEPTH) return;
|
||||
|
||||
if (node.type === PAGE_EMBED_TYPE) {
|
||||
const sourcePageId = node.attrs?.sourcePageId;
|
||||
@@ -188,10 +207,10 @@ export function collectPageEmbedsFromPmJson(
|
||||
if (node.type === TRANSCLUSION_TYPE) return;
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) visit(child);
|
||||
for (const child of node.content) visit(child, depth + 1);
|
||||
}
|
||||
};
|
||||
|
||||
visit(doc);
|
||||
visit(doc, 0);
|
||||
return out;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user