Files
gitmost/packages/git-sync/build/engine/sanitize.js
claude code agent 227 ed3b65c36b Merge remote-tracking branch 'gitea/develop' into batch/issues-2026-06-25
# Conflicts:
#	apps/server/src/core/ai-chat/ai-chat.service.spec.ts
#	apps/server/src/core/ai-chat/ai-chat.service.ts
2026-06-25 12:48:47 +03:00

98 lines
3.5 KiB
JavaScript

/**
* Deterministic filename strategy (SPEC §12).
*
* The file name is COSMETIC — the source of truth for the file<->page link is
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
* functions are intentionally dependency-free and pure, so they are trivially
* unit-testable.
*/
// Printable characters forbidden in file names on common filesystems (mainly
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
// Spaces are NOT in this set; whitespace is normalized separately below.
// ASCII control characters (code points 0..31) are stripped in a separate pass
// (see stripControlChars) to keep this literal free of embedded control bytes.
const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g;
// Runs of whitespace (including tabs/newlines) collapse to a single space.
const WHITESPACE_RUN_RE = /\s+/g;
// Reserved Windows device names (case-insensitive). A bare match (with or
// without an extension) is unusable as a file name, so it is prefixed with "_".
const RESERVED_WINDOWS_NAMES = new Set([
"con",
"prn",
"aux",
"nul",
"com1",
"com2",
"com3",
"com4",
"com5",
"com6",
"com7",
"com8",
"com9",
"lpt1",
"lpt2",
"lpt3",
"lpt4",
"lpt5",
"lpt6",
"lpt7",
"lpt8",
"lpt9",
]);
// Cap on the sanitized length to stay well within filesystem path-component
// limits (255 bytes on most FSes) while leaving room for an extension and a
// disambiguation suffix.
const MAX_LENGTH = 120;
/**
* Replace every ASCII control character (code points 0..31) with "-". Done by
* scanning code points rather than a control-range regex literal, so the source
* file carries no embedded control bytes.
*/
function stripControlChars(input) {
let out = "";
for (let i = 0; i < input.length; i++) {
out += input.charCodeAt(i) < 32 ? "-" : input[i];
}
return out;
}
/**
* Sanitize a page title into a safe file-name component (WITHOUT extension).
*
* Steps: replace forbidden / control characters with "-", collapse whitespace
* runs to a single space, trim, cap the length, then guard against an empty
* result, an all-dots result, or a reserved Windows device name by prefixing
* with "_".
*/
export function sanitizeTitle(title) {
let name = stripControlChars(title ?? "")
.replace(FORBIDDEN_PRINTABLE_RE, "-")
.replace(WHITESPACE_RUN_RE, " ")
.trim();
if (name.length > MAX_LENGTH) {
name = name.slice(0, MAX_LENGTH).trim();
}
// Compare the base name (before the first dot) against reserved names, so
// both "CON" and "con.md" are caught.
const base = name.split(".")[0]?.toLowerCase() ?? "";
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
// reserved Windows device name is unusable as a path component. The all-dots
// case is a path-traversal hazard in particular: an unprefixed ".." would
// become a parent-directory segment and let a page escape the vault, so it
// MUST be neutralized here (becomes "_..", which is a literal file name).
if (name.length === 0 ||
/^\.+$/.test(name) ||
RESERVED_WINDOWS_NAMES.has(base)) {
name = "_" + name;
}
return name;
}
/**
* Disambiguate a sanitized name when two siblings in the same folder collapse
* to the same name. Appends a stable suffix built from the page's `slugId`, so
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
*/
export function disambiguate(name, slugId) {
return `${name} ~${slugId}`;
}