test(integrations/client/packages): batch 2-4 unit coverage + zip-slip guard extraction

Batch 2-4 of the test-strategy rollout. Test-only except one minimal,
behaviour-preserving extraction in file.utils.ts. All suites green:
server 82 suites/836+1todo, editor-ext 86, mcp 270, client (new files) 86.

integrations (server):
- file.utils.ts: extract pure `isEntryPathSafe(entryName, targetDir)` from
  extractZipInternal so the zip-slip/path-traversal guard is unit-testable;
  call site rerouted, behaviour identical (only a warn-message string merged).
- file.utils.zip-safety.spec.ts: traversal/strip/__MACOSX/prefix-confusion
  cases (mutation-resistant: fails if containment loses the path.sep).
- import-formatter / import.utils / table-utils / export utils / import.service
  extractTitleAndRemoveHeading: pure import/export transforms, Notion/XWiki
  formatting, table colspan widths (idempotent), slug/link rewriting.

client:
- safeRedirectPath: open-redirect guard, every reject branch independently.
- buildChatMarkdown (fence anti-breakout), label-colors, normalize-label,
  share tree build, page URL builders, notification time-grouping (fake clock).

packages:
- editor-ext: deriveFootnoteId golden table, parseHtmlEmbedHeight crafted
  values, orphan footnote extraction.
- mcp: deriveFootnoteId parity (drift guard vs editor-ext), applyTextEdits
  idempotency + cross-block replaceAll, diffDocs/summarizeChange on reorder.

Reviewed (APPROVE): extraction behaviour-preserving, assertions mutation-resistant.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude_code
2026-06-21 18:22:15 +03:00
parent f8e8ada581
commit 0b2af34029
20 changed files with 2495 additions and 17 deletions
@@ -30,6 +30,52 @@ export function getFileTaskFolderPath(
}
}
/**
* Pure path-safety decision for a single ZIP entry (zip-slip / path-traversal guard).
*
* Reproduces exactly the inline check previously embedded in `extractZipInternal`:
* 1. Strip any leading slashes from the entry name.
* 2. Reject names that fail `yauzl.validateFileName` (e.g. backslashes,
* relative `..` segments, drive letters).
* 3. Reject `__MACOSX/` metadata entries.
* 4. Resolve the entry against the target directory and require it to stay
* strictly inside `targetDir` using a `targetResolved + path.sep` prefix check
* (the trailing separator prevents sibling-directory prefix confusion, e.g.
* `/tmp/x` must not match `/tmp/x-evil`).
*
* @param entryName The decoded (UTF-8) entry file name from the archive.
* @param targetDir Directory the archive is being extracted into.
* @returns `{ safe }` and, when safe, the resolved absolute path of the entry.
*/
export function isEntryPathSafe(
entryName: string,
targetDir: string,
): { safe: boolean; resolved?: string } {
// Strip leading slashes so absolute-looking entries cannot escape the target.
const safe = entryName.replace(/^\/+/, '');
const validationError = yauzl.validateFileName(safe);
if (validationError) {
return { safe: false };
}
// Skip macOS resource-fork metadata entries.
if (safe.startsWith('__MACOSX/')) {
return { safe: false };
}
const fullPath = path.join(targetDir, safe);
const resolved = path.resolve(fullPath);
const targetResolved = path.resolve(targetDir);
// Containment check: resolved path must live strictly inside the target dir.
if (!resolved.startsWith(targetResolved + path.sep)) {
return { safe: false };
}
return { safe: true, resolved };
}
/**
* Extracts a ZIP archive.
*/
@@ -103,29 +149,15 @@ function extractZipInternal(
const name = entry.fileName.toString('utf8');
const safe = name.replace(/^\/+/, '');
const validationError = yauzl.validateFileName(safe);
if (validationError) {
console.warn(`Skipping invalid entry (${validationError})`);
zipfile.readEntry();
return;
}
if (safe.startsWith('__MACOSX/')) {
// Zip-slip / path-traversal guard (see isEntryPathSafe).
if (!isEntryPathSafe(name, target).safe) {
console.warn(`Skipping unsafe entry: ${safe}`);
zipfile.readEntry();
return;
}
const fullPath = path.join(target, safe);
const resolved = path.resolve(fullPath);
const targetResolved = path.resolve(target);
if (!resolved.startsWith(targetResolved + path.sep)) {
console.warn(`Skipping entry (path outside target): ${safe}`);
zipfile.readEntry();
return;
}
// Handle directories
if (/\/$/.test(name)) {
try {
@@ -0,0 +1,105 @@
import * as path from 'path';
import { isEntryPathSafe } from './file.utils';
/**
* Unit tests for isEntryPathSafe: the pure zip-slip / path-traversal guard
* extracted from extractZipInternal. The contract reproduced from the
* production inline check is, in order:
* 1. strip leading slashes from the entry name;
* 2. reject names that fail yauzl.validateFileName (relative `..` segments,
* backslashes, drive letters, etc.);
* 3. reject `__MACOSX/` metadata entries;
* 4. resolve the (stripped) entry under the target dir and require it to stay
* strictly inside the target via a `targetResolved + path.sep` prefix check.
*
* The separator in step 4 is the load-bearing detail: it prevents sibling-dir
* prefix confusion (e.g. target `/tmp/x` vs `/tmp/x-evil`). The tests below are
* written so that weakening that check to a bare `startsWith(targetResolved)`
* makes at least one test fail.
*/
describe('isEntryPathSafe', () => {
// Use an absolute target; on the test platform path.sep is '/'.
const target = path.resolve('/tmp/x');
it('accepts a normal nested entry and resolves it inside the target', () => {
const result = isEntryPathSafe('a/b/c.png', target);
expect(result.safe).toBe(true);
expect(result.resolved).toBe(path.join(target, 'a/b/c.png'));
// Resolved path must live strictly under the target directory.
expect(result.resolved!.startsWith(target + path.sep)).toBe(true);
});
it('strips a single leading slash and then treats the entry as safe', () => {
const result = isEntryPathSafe('/a/b/c.png', target);
expect(result.safe).toBe(true);
expect(result.resolved).toBe(path.join(target, 'a/b/c.png'));
});
it('strips multiple leading slashes and then treats the entry as safe', () => {
const result = isEntryPathSafe('///a/b.png', target);
expect(result.safe).toBe(true);
expect(result.resolved).toBe(path.join(target, 'a/b.png'));
});
it('skips (marks unsafe) __MACOSX metadata entries', () => {
const result = isEntryPathSafe('__MACOSX/foo', target);
expect(result.safe).toBe(false);
expect(result.resolved).toBeUndefined();
});
it('rejects a relative ../../ traversal entry', () => {
// yauzl.validateFileName flags this as an "invalid relative path", so it is
// rejected before the containment check ever runs. Either way: unsafe.
const result = isEntryPathSafe('../../etc/passwd', target);
expect(result.safe).toBe(false);
expect(result.resolved).toBeUndefined();
});
it('rejects an entry whose resolved path would land in a sibling directory (prefix confusion)', () => {
// The classic off-by-one: target `/tmp/x` must NOT contain `/tmp/x-evil`.
// Such an escape can only be expressed with a `..` segment, which the guard
// rejects. This asserts the guard holds for the sibling-escape attempt.
const result = isEntryPathSafe('../x-evil/p', target);
expect(result.safe).toBe(false);
expect(result.resolved).toBeUndefined();
});
it('rejects an entry that resolves to exactly the target dir (no trailing separator)', () => {
// `.` resolves to the target itself. The strict `targetResolved + path.sep`
// prefix check rejects it; a weakened `startsWith(targetResolved)` (without
// the separator) would WRONGLY accept it. This test is the mutation killer
// for the separator: if the separator is dropped, this assertion fails.
const result = isEntryPathSafe('.', target);
expect(result.safe).toBe(false);
expect(result.resolved).toBeUndefined();
});
it('keeps the target/sibling boundary: a bare-prefix sibling is not inside the target', () => {
// Direct statement of the invariant the separator protects. The resolved
// sibling path shares the target's basename as a prefix but is a different
// directory; only the `+ path.sep` form correctly classifies it as outside.
const target2 = path.resolve('/tmp/x');
const siblingResolved = path.resolve(path.join(target2, '..', 'x-evil', 'p'));
expect(siblingResolved.startsWith(target2)).toBe(true); // weak (buggy) check matches
expect(siblingResolved.startsWith(target2 + path.sep)).toBe(false); // strict check rejects
});
it('rejects an entry containing a backslash via yauzl.validateFileName', () => {
// Backslashes are flagged by yauzl.validateFileName as invalid characters,
// so such entries are unsafe regardless of where they would resolve.
const result = isEntryPathSafe('a\\b.png', target);
expect(result.safe).toBe(false);
expect(result.resolved).toBeUndefined();
});
it('accepts a stripped absolute path that lands inside the target', () => {
// Documented ACTUAL behaviour: an entry like `/etc/passwd` has its leading
// slash stripped to `etc/passwd`, which resolves to <target>/etc/passwd —
// strictly inside the target, hence safe. (This is the point of the strip:
// an absolute-looking entry is re-anchored under the target rather than
// escaping to the filesystem root.)
const result = isEntryPathSafe('/etc/passwd', target);
expect(result.safe).toBe(true);
expect(result.resolved).toBe(path.join(target, 'etc/passwd'));
});
});
@@ -0,0 +1,403 @@
// @sindresorhus/slugify ships as ESM and is not in jest's transform allowlist,
// so it cannot be imported under ts-jest here. Mock it with a deterministic
// lowercase/dash slugifier that matches the real output for the simple ASCII
// titles used in these tests (e.g. "Real Title" -> "real-title"). This keeps
// the test focused on the formatter's own slug-composition logic.
jest.mock('@sindresorhus/slugify', () => ({
__esModule: true,
default: (input: string) =>
String(input)
.trim()
.toLowerCase()
.replace(/[^a-z0-9]+/g, '-')
.replace(/^-+|-+$/g, ''),
}));
import { load, CheerioAPI, Cheerio } from 'cheerio';
import {
rewriteInternalLinksToMentionHtml,
notionFormatter,
xwikiFormatter,
defaultHtmlFormatter,
unwrapFromParagraph,
} from './import-formatter';
/**
* Unit tests for import-formatter.ts. These are pure DOM transforms driven by
* cheerio. Each test loads a snippet, runs the target function against the
* cheerio root, and asserts the mutated markup / return value. Assertions are
* written to fail if the corresponding branch were silently removed.
*/
type PageMeta = { id: string; title: string; slugId: string };
function makeRoot(html: string): { $: CheerioAPI; $root: Cheerio<any> } {
const $ = load(html);
return { $, $root: $.root() };
}
describe('rewriteInternalLinksToMentionHtml', () => {
const creatorId = 'creator-1';
const sourcePageId = 'source-page-1';
const workspaceId = 'workspace-1';
it('replaces an internal link whose text equals the page title with a mention span', async () => {
const meta: PageMeta = {
id: 'target-id-1',
title: 'Design Doc',
slugId: 'slugABC',
};
// currentFilePath dir is "docs"; href "./target.md" resolves to "docs/target.md"
const map = new Map<string, PageMeta>([['docs/target.md', meta]]);
const { $, $root } = makeRoot(
'<a href="./target.md">Design Doc</a>',
);
const backlinks = await rewriteInternalLinksToMentionHtml(
$,
$root,
'docs/index.md',
map,
creatorId,
sourcePageId,
workspaceId,
);
const $mention = $root.find('span[data-type="mention"]');
expect($mention.length).toBe(1);
expect($mention.attr('data-entity-type')).toBe('page');
expect($mention.attr('data-entity-id')).toBe('target-id-1');
expect($mention.attr('data-label')).toBe('Design Doc');
expect($mention.attr('data-slug-id')).toBe('slugABC');
expect($mention.attr('data-creator-id')).toBe(creatorId);
expect($mention.attr('data-id')).toBeTruthy();
expect($mention.text()).toBe('Design Doc');
// original anchor must be gone
expect($root.find('a').length).toBe(0);
expect(backlinks).toEqual([
{ sourcePageId, targetPageId: 'target-id-1', workspaceId },
]);
});
it('rewrites href to /s/{space}/p/{slug} when text differs from the title', async () => {
const meta: PageMeta = {
id: 'target-id-2',
title: 'Real Title',
slugId: 'slug999',
};
const map = new Map<string, PageMeta>([['docs/target.md', meta]]);
const { $, $root } = makeRoot(
'<a href="./target.md">click here</a>',
);
const backlinks = await rewriteInternalLinksToMentionHtml(
$,
$root,
'docs/index.md',
map,
creatorId,
sourcePageId,
workspaceId,
'myspace',
);
// still an anchor, no mention span
expect($root.find('span[data-type="mention"]').length).toBe(0);
const $a = $root.find('a');
expect($a.length).toBe(1);
// slugify('Real Title') => 'real-title'
expect($a.attr('href')).toBe('/s/myspace/p/real-title-slug999');
expect($a.attr('data-internal')).toBe('true');
expect($a.text()).toBe('click here');
expect(backlinks).toEqual([
{ sourcePageId, targetPageId: 'target-id-2', workspaceId },
]);
});
it('uses /p/{slug} when no spaceSlug is provided', async () => {
const meta: PageMeta = {
id: 'target-id-3',
title: 'Other Page',
slugId: 'slug777',
};
const map = new Map<string, PageMeta>([['docs/target.md', meta]]);
const { $, $root } = makeRoot('<a href="./target.md">label</a>');
await rewriteInternalLinksToMentionHtml(
$,
$root,
'docs/index.md',
map,
creatorId,
sourcePageId,
workspaceId,
);
expect($root.find('a').attr('href')).toBe('/p/other-page-slug777');
});
it('leaves external http and /api/ hrefs untouched and records no backlink', async () => {
const map = new Map<string, PageMeta>();
const { $, $root } = makeRoot(
'<a href="https://example.com/page">ext</a><a href="/api/files/x">api</a>',
);
const backlinks = await rewriteInternalLinksToMentionHtml(
$,
$root,
'docs/index.md',
map,
creatorId,
sourcePageId,
workspaceId,
);
const hrefs = $root
.find('a')
.map((_, el) => $(el).attr('href'))
.get();
expect(hrefs).toEqual(['https://example.com/page', '/api/files/x']);
expect($root.find('a').first().attr('data-internal')).toBeUndefined();
expect(backlinks).toEqual([]);
});
it('falls back without throwing on a malformed decodeURIComponent href', async () => {
const meta: PageMeta = {
id: 'target-id-4',
title: 'Broken',
slugId: 'slug000',
};
// The raw (un-decodable) href is what gets joined: "docs/%E0%A4%A.md".
const map = new Map<string, PageMeta>([['docs/%E0%A4%A.md', meta]]);
const { $, $root } = makeRoot('<a href="%E0%A4%A.md">Broken</a>');
let backlinks: any;
await expect(
(async () => {
backlinks = await rewriteInternalLinksToMentionHtml(
$,
$root,
'docs/index.md',
map,
creatorId,
sourcePageId,
workspaceId,
);
})(),
).resolves.not.toThrow();
// Because the raw path matched the map, it still produced a mention.
expect($root.find('span[data-type="mention"]').length).toBe(1);
expect(backlinks).toEqual([
{ sourcePageId, targetPageId: 'target-id-4', workspaceId },
]);
});
it('accumulates one backlink per resolved link', async () => {
const a: PageMeta = { id: 'id-a', title: 'A', slugId: 's-a' };
const b: PageMeta = { id: 'id-b', title: 'B', slugId: 's-b' };
const map = new Map<string, PageMeta>([
['docs/a.md', a],
['docs/b.md', b],
]);
const { $, $root } = makeRoot(
'<a href="./a.md">A</a><a href="./b.md">B</a>',
);
const backlinks = await rewriteInternalLinksToMentionHtml(
$,
$root,
'docs/index.md',
map,
creatorId,
sourcePageId,
workspaceId,
);
expect(backlinks).toEqual([
{ sourcePageId, targetPageId: 'id-a', workspaceId },
{ sourcePageId, targetPageId: 'id-b', workspaceId },
]);
});
});
describe('notionFormatter', () => {
it('converts a multi-column column-list to data-type="columns" with the right layout', () => {
const html =
'<div class="column-list">' +
'<div class="column"><p>one</p></div>' +
'<div class="column"><p>two</p></div>' +
'<div class="column"><p>three</p></div>' +
'</div>';
const { $, $root } = makeRoot(html);
notionFormatter($, $root);
const $cols = $root.find('div[data-type="columns"]');
expect($cols.length).toBe(1);
// 3 columns => COLUMN_LAYOUTS[3] === 'three_equal'
expect($cols.attr('data-layout')).toBe('three_equal');
expect($root.find('div[data-type="column"]').length).toBe(3);
// original column-list wrapper is gone
expect($root.find('div.column-list').length).toBe(0);
});
it('uses two_equal layout for exactly two columns', () => {
const html =
'<div class="column-list">' +
'<div class="column"><p>one</p></div>' +
'<div class="column"><p>two</p></div>' +
'</div>';
const { $, $root } = makeRoot(html);
notionFormatter($, $root);
expect($root.find('div[data-type="columns"]').attr('data-layout')).toBe(
'two_equal',
);
});
it('converts figure.equation into a mathBlock with the tex text', () => {
const html =
'<figure class="equation">' +
'<annotation encoding="application/x-tex">E = mc^2</annotation>' +
'</figure>';
const { $, $root } = makeRoot(html);
notionFormatter($, $root);
const $math = $root.find('div[data-type="mathBlock"]');
expect($math.length).toBe(1);
expect($math.attr('data-katex')).toBe('true');
expect($math.text()).toBe('E = mc^2');
expect($root.find('figure.equation').length).toBe(0);
});
it('converts ul.to-do-list items to a taskList with data-checked reflecting checkbox-on', () => {
const html =
'<ul class="to-do-list">' +
'<li><div class="checkbox checkbox-on"></div>' +
'<span class="to-do-children-checked">done item</span></li>' +
'<li><div class="checkbox checkbox-off"></div>' +
'<span class="to-do-children-unchecked">open item</span></li>' +
'</ul>';
const { $, $root } = makeRoot(html);
notionFormatter($, $root);
const $list = $root.find('ul[data-type="taskList"]');
expect($list.length).toBe(1);
const $items = $list.find('li[data-type="taskItem"]');
expect($items.length).toBe(2);
expect($items.eq(0).attr('data-checked')).toBe('true');
expect($items.eq(1).attr('data-checked')).toBe('false');
// checked item has a checked input; unchecked does not
expect($items.eq(0).find('input[checked]').length).toBe(1);
expect($items.eq(1).find('input[checked]').length).toBe(0);
// text is carried over
expect($items.eq(0).find('p').text()).toBe('done item');
expect($items.eq(1).find('p').text()).toBe('open item');
});
});
describe('xwikiFormatter', () => {
it('replaces the root with the contents of #xwikicontent when present', () => {
const html =
'<div id="header">junk</div>' +
'<div id="xwikicontent"><p>real body</p><h2>heading</h2></div>';
const { $, $root } = makeRoot(html);
xwikiFormatter($, $root);
expect($root.find('#header').length).toBe(0);
expect($root.find('#xwikicontent').length).toBe(0);
expect($root.find('p').text()).toBe('real body');
expect($root.find('h2').text()).toBe('heading');
});
it('leaves HTML without #xwikicontent unchanged', () => {
const html = '<div id="header">junk</div><p>body</p>';
const { $, $root } = makeRoot(html);
const before = $root.html();
xwikiFormatter($, $root);
expect($root.html()).toBe(before);
});
});
describe('defaultHtmlFormatter', () => {
it('replaces a recognized provider anchor with a data-type="embed" div', () => {
const url = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ';
const { $, $root } = makeRoot(`<a href="${url}">video</a>`);
defaultHtmlFormatter($, $root);
const $embed = $root.find('div[data-type="embed"]');
expect($embed.length).toBe(1);
expect($embed.attr('data-provider')).toBe('youtube');
expect($embed.attr('data-src')).toBe(url);
// the anchor is gone
expect($root.find('a').length).toBe(0);
});
it('leaves an anchor as a link when provider resolves to iframe', () => {
// A plain non-provider URL falls through to the default iframe provider,
// which the formatter explicitly skips.
const url = 'https://example.com/some/page';
const { $, $root } = makeRoot(`<a href="${url}">site</a>`);
defaultHtmlFormatter($, $root);
expect($root.find('div[data-type="embed"]').length).toBe(0);
const $a = $root.find('a');
expect($a.length).toBe(1);
expect($a.attr('href')).toBe(url);
});
});
describe('unwrapFromParagraph', () => {
it('replaces the wrapper entirely when the node is the only child of a <p>', () => {
const { $, $root } = makeRoot('<p><img src="x.png"></p>');
const $node = $root.find('img');
unwrapFromParagraph($, $node);
// the <p> wrapper is gone, the img is hoisted to the root
expect($root.find('p').length).toBe(0);
expect($root.find('img').length).toBe(1);
});
it('moves the node before the wrapper when there are sibling contents', () => {
const { $, $root } = makeRoot('<p>text before <img src="x.png"></p>');
const $node = $root.find('img');
unwrapFromParagraph($, $node);
// img moved out; the paragraph still holds the sibling text
const html = $root.html() || '';
// img must appear before the paragraph in document order
const imgIndex = html.indexOf('<img');
const pIndex = html.indexOf('<p');
expect(imgIndex).toBeGreaterThanOrEqual(0);
expect(pIndex).toBeGreaterThanOrEqual(0);
expect(imgIndex).toBeLessThan(pIndex);
expect($root.find('p').text()).toContain('text before');
});
it('returns (does not infinite-loop) on adversarial nesting', () => {
// Node wrapped in nested <a> and <p> wrappers.
const { $, $root } = makeRoot(
'<p><a href="#"><img src="x.png"></a></p>',
);
const $node = $root.find('img');
// If unwrapFromParagraph looped forever this call would hang the test.
expect(() => unwrapFromParagraph($, $node)).not.toThrow();
// It fully unwrapped: no surrounding p/a left around the img.
expect($node.closest('p, a').length).toBe(0);
expect($root.find('img').length).toBe(1);
});
});
@@ -0,0 +1,137 @@
import {
stripNotionID,
extractNotionPartialId,
resolveRelativeAttachmentPath,
} from './import.utils';
/**
* Unit tests for the pure helpers in import.utils.ts:
* - stripNotionID / extractNotionPartialId: filename suffix parsing.
* - resolveRelativeAttachmentPath: maps an HTML-relative attachment href onto
* a key that exists in the extracted-archive candidate map.
*/
describe('stripNotionID', () => {
it('strips a 32-hex suffix preceded by a space separator', () => {
// 32 hex chars with a leading space.
const id = 'a1b2c3d4e5f60718293a4b5c6d7e8f90';
expect(stripNotionID(`My Page ${id}`)).toBe('My Page');
});
it('strips a 32-hex suffix preceded by a dash separator', () => {
const id = 'a1b2c3d4e5f60718293a4b5c6d7e8f90';
expect(stripNotionID(`My-Page-${id}`)).toBe('My-Page');
});
it('strips a 32-hex suffix with no separator', () => {
const id = 'a1b2c3d4e5f60718293a4b5c6d7e8f90';
expect(stripNotionID(`MyPage${id}`)).toBe('MyPage');
});
it('strips a partial UUID suffix "{4}-{4}"', () => {
expect(stripNotionID('Cool 324d-35ab')).toBe('Cool');
});
it('leaves a name without an ID unchanged', () => {
expect(stripNotionID('Just A Title')).toBe('Just A Title');
});
});
describe('extractNotionPartialId', () => {
it('returns prefix/suffix (lowercased) for a partial UUID folder name', () => {
expect(extractNotionPartialId('Cool 324D-35AB')).toEqual({
prefix: '324d',
suffix: '35ab',
});
});
it('returns null when there is no partial UUID suffix', () => {
expect(extractNotionPartialId('No Id Here')).toBeNull();
});
it('returns null when the suffix lacks the leading space', () => {
// The regex requires a leading space before "{4}-{4}".
expect(extractNotionPartialId('Name324d-35ab')).toBeNull();
});
});
describe('resolveRelativeAttachmentPath', () => {
it('returns the direct candidate when it exists', () => {
const candidates = new Map<string, string>([
['attachments/file.png', '/abs/attachments/file.png'],
]);
expect(
resolveRelativeAttachmentPath(
'./attachments/file.png',
'pages',
candidates,
),
).toBe('attachments/file.png');
});
it('strips the Confluence "download/attachments/" prefix to match the archive layout', () => {
const candidates = new Map<string, string>([
['attachments/123/diagram.png', '/abs/attachments/123/diagram.png'],
]);
expect(
resolveRelativeAttachmentPath(
'download/attachments/123/diagram.png',
'pages',
candidates,
),
).toBe('attachments/123/diagram.png');
});
it('decodes a percent-encoded name before matching', () => {
const candidates = new Map<string, string>([
['attachments/my file.png', '/abs/attachments/my file.png'],
]);
expect(
resolveRelativeAttachmentPath(
'attachments/my%20file.png',
'pages',
candidates,
),
).toBe('attachments/my file.png');
});
it('falls back to the raw (still-encoded) value on a malformed escape without throwing', () => {
// "%E0%A4" is an incomplete UTF-8 sequence; decodeURIComponent throws and
// the helper keeps the raw string, which then matches the candidate key.
const candidates = new Map<string, string>([
['attachments/%E0%A4.png', '/abs/attachments/%E0%A4.png'],
]);
let result: string | null = null;
expect(() => {
result = resolveRelativeAttachmentPath(
'attachments/%E0%A4.png',
'pages',
candidates,
);
}).not.toThrow();
expect(result).toBe('attachments/%E0%A4.png');
});
it('returns null when nothing matches', () => {
const candidates = new Map<string, string>([
['attachments/other.png', '/abs/attachments/other.png'],
]);
expect(
resolveRelativeAttachmentPath(
'./attachments/missing.png',
'pages',
candidates,
),
).toBeNull();
});
it('matches via the pageDir-joined fallback path', () => {
// raw resolves under pageDir when neither the direct nor confluence key hit.
const candidates = new Map<string, string>([
['pages/sub/img.png', '/abs/pages/sub/img.png'],
]);
expect(
resolveRelativeAttachmentPath('sub/img.png', 'pages', candidates),
).toBe('pages/sub/img.png');
});
});
@@ -0,0 +1,105 @@
import { load, CheerioAPI, Cheerio } from 'cheerio';
import { normalizeTableColumnWidths } from './table-utils';
/**
* Unit tests for normalizeTableColumnWidths: it writes a `colwidth` attribute
* onto the first-row cells of every <table>, deriving widths from a <colgroup>
* or the first row, accounting for colspan, and falling back to a default
* per-column width (150px) when no pixel widths are present. Re-running the
* transform on its own output must be a no-op (idempotent).
*/
const DEFAULT = 150;
function run(html: string): { $: CheerioAPI; $root: Cheerio<any> } {
const $ = load(html);
const $root = $.root();
normalizeTableColumnWidths($, $root);
return { $, $root };
}
function firstRowColwidths($root: Cheerio<any>): (string | undefined)[] {
return $root
.find('table')
.first()
.find('> tbody > tr, > thead > tr, > tr')
.first()
.children('td, th')
.map((_, el) => (el as any).attribs?.colwidth)
.get();
}
describe('normalizeTableColumnWidths', () => {
it('applies colgroup <col width> to the first-row cells', () => {
const html =
'<table>' +
'<colgroup><col width="120"><col width="80"></colgroup>' +
'<tbody><tr><td>a</td><td>b</td></tr></tbody>' +
'</table>';
const { $root } = run(html);
expect(firstRowColwidths($root)).toEqual(['120', '80']);
});
it('falls back to first-row cell widths when there is no colgroup', () => {
const html =
'<table><tbody>' +
'<tr><td style="width: 200px">a</td><td width="90">b</td></tr>' +
'</tbody></table>';
const { $root } = run(html);
expect(firstRowColwidths($root)).toEqual(['200', '90']);
});
it('splits a colspan width across the spanned columns', () => {
// colspan=2 with width 100 => each derived column ~50, the spanning cell
// then gets the joined slice "50,50".
const html =
'<table><tbody>' +
'<tr><td colspan="2" width="100">merged</td></tr>' +
'</tbody></table>';
const { $root } = run(html);
expect(firstRowColwidths($root)).toEqual(['50,50']);
});
it('ignores em/% widths (treated as no width) and applies the default', () => {
const html =
'<table><tbody>' +
'<tr><td style="width: 10em">a</td><td style="width: 50%">b</td></tr>' +
'</tbody></table>';
const { $root } = run(html);
expect(firstRowColwidths($root)).toEqual([String(DEFAULT), String(DEFAULT)]);
});
it('applies the default per-column width to a markdown-style table with no widths', () => {
const html =
'<table><tbody>' +
'<tr><td>a</td><td>b</td><td>c</td></tr>' +
'<tr><td>1</td><td>2</td><td>3</td></tr>' +
'</tbody></table>';
const { $root } = run(html);
expect(firstRowColwidths($root)).toEqual([
String(DEFAULT),
String(DEFAULT),
String(DEFAULT),
]);
});
it('is idempotent: re-running on its own output changes nothing', () => {
const html =
'<table>' +
'<colgroup><col width="120"><col width="80"></colgroup>' +
'<tbody><tr><td>a</td><td>b</td></tr></tbody>' +
'</table>';
const { $, $root } = run(html);
const afterFirst = $root.html();
// second pass
normalizeTableColumnWidths($, $root);
expect($root.html()).toBe(afterFirst);
expect(firstRowColwidths($root)).toEqual(['120', '80']);
});
});