24b903aaf3
The git-sync converter + engine source lived only on the #119 branch; develop had just the dead compiled build/. Bring the whole package (src + ~700 tests) onto develop under CI, with NO consumer wired — git-sync stays fully inert in develop (nothing in apps/server imports it), so runtime behavior is unchanged. This unblocks #293 (extract the shared converter package from the landed source) and lets #119's functionality land LAST, already writing the canonical format (per the #326 landing order). - packages/git-sync: src (lib converter + engine) + test corpus + configs. - Remove develop's dead committed packages/git-sync/build/; gitignore it (built in CI/Docker via pnpm build, never committed — no src/build drift). - pnpm-lock.yaml: add the @docmost/git-sync importer (a missing workspace package in the lock is a CI blocker). `pnpm install --frozen-lockfile` passes. - NO server integration / loader / Dockerfile runtime changes (those come with #119 at step 6). Verified: tsc clean; vitest 711 passed | 1 expected-fail, 0 failures, 0 type errors; pnpm --frozen-lockfile EXIT 0; apps/server has no git-sync import. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
110 lines
3.5 KiB
TypeScript
110 lines
3.5 KiB
TypeScript
/**
|
|
* Deterministic filename strategy (SPEC §12).
|
|
*
|
|
* The file name is COSMETIC — the source of truth for the file<->page link is
|
|
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
|
|
* functions are intentionally dependency-free and pure, so they are trivially
|
|
* unit-testable.
|
|
*/
|
|
|
|
// Printable characters forbidden in file names on common filesystems (mainly
|
|
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
|
|
// Spaces are NOT in this set; whitespace is normalized separately below.
|
|
// ASCII control characters (code points 0..31) are stripped in a separate pass
|
|
// (see stripControlChars) to keep this literal free of embedded control bytes.
|
|
const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g;
|
|
|
|
// Runs of whitespace (including tabs/newlines) collapse to a single space.
|
|
const WHITESPACE_RUN_RE = /\s+/g;
|
|
|
|
// Reserved Windows device names (case-insensitive). A bare match (with or
|
|
// without an extension) is unusable as a file name, so it is prefixed with "_".
|
|
const RESERVED_WINDOWS_NAMES = new Set([
|
|
"con",
|
|
"prn",
|
|
"aux",
|
|
"nul",
|
|
"com1",
|
|
"com2",
|
|
"com3",
|
|
"com4",
|
|
"com5",
|
|
"com6",
|
|
"com7",
|
|
"com8",
|
|
"com9",
|
|
"lpt1",
|
|
"lpt2",
|
|
"lpt3",
|
|
"lpt4",
|
|
"lpt5",
|
|
"lpt6",
|
|
"lpt7",
|
|
"lpt8",
|
|
"lpt9",
|
|
]);
|
|
|
|
// Cap on the sanitized length to stay well within filesystem path-component
|
|
// limits (255 bytes on most FSes) while leaving room for an extension and a
|
|
// disambiguation suffix.
|
|
const MAX_LENGTH = 120;
|
|
|
|
/**
|
|
* Replace every ASCII control character (code points 0..31) with "-". Done by
|
|
* scanning code points rather than a control-range regex literal, so the source
|
|
* file carries no embedded control bytes.
|
|
*/
|
|
function stripControlChars(input: string): string {
|
|
let out = "";
|
|
for (let i = 0; i < input.length; i++) {
|
|
out += input.charCodeAt(i) < 32 ? "-" : input[i];
|
|
}
|
|
return out;
|
|
}
|
|
|
|
/**
|
|
* Sanitize a page title into a safe file-name component (WITHOUT extension).
|
|
*
|
|
* Steps: replace forbidden / control characters with "-", collapse whitespace
|
|
* runs to a single space, trim, cap the length, then guard against an empty
|
|
* result, an all-dots result, or a reserved Windows device name by prefixing
|
|
* with "_".
|
|
*/
|
|
export function sanitizeTitle(title: string): string {
|
|
let name = stripControlChars(title ?? "")
|
|
.replace(FORBIDDEN_PRINTABLE_RE, "-")
|
|
.replace(WHITESPACE_RUN_RE, " ")
|
|
.trim();
|
|
|
|
if (name.length > MAX_LENGTH) {
|
|
name = name.slice(0, MAX_LENGTH).trim();
|
|
}
|
|
|
|
// Compare the base name (before the first dot) against reserved names, so
|
|
// both "CON" and "con.md" are caught.
|
|
const base = name.split(".")[0]?.toLowerCase() ?? "";
|
|
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
|
|
// reserved Windows device name is unusable as a path component. The all-dots
|
|
// case is a path-traversal hazard in particular: an unprefixed ".." would
|
|
// become a parent-directory segment and let a page escape the vault, so it
|
|
// MUST be neutralized here (becomes "_..", which is a literal file name).
|
|
if (
|
|
name.length === 0 ||
|
|
/^\.+$/.test(name) ||
|
|
RESERVED_WINDOWS_NAMES.has(base)
|
|
) {
|
|
name = "_" + name;
|
|
}
|
|
|
|
return name;
|
|
}
|
|
|
|
/**
|
|
* Disambiguate a sanitized name when two siblings in the same folder collapse
|
|
* to the same name. Appends a stable suffix built from the page's `slugId`, so
|
|
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
|
|
*/
|
|
export function disambiguate(name: string, slugId: string): string {
|
|
return `${name} ~${slugId}`;
|
|
}
|