diff --git a/packages/git-sync/build/engine/client.types.d.ts b/packages/git-sync/build/engine/client.types.d.ts new file mode 100644 index 00000000..dea018c7 --- /dev/null +++ b/packages/git-sync/build/engine/client.types.d.ts @@ -0,0 +1,108 @@ +/** + * The client seam (plan §3.1). Upstream `pull.ts`/`push.ts` reached into the + * REST `DocmostClient` from the `docmost-client` package via `Pick` subsets. That package is NOT vendored here (the gitmost server writes + * NATIVELY — through repositories + collab `openDirectConnection`, plan §3.2/§3.3), + * so the engine must depend on a narrow STRUCTURAL interface instead. + * + * `GitSyncClient` is that interface: the native datasource (server side, a later + * step) implements it, and the vendored engine only ever uses `Pick` subsets of it. The signatures below MIRROR exactly the methods the + * vendored `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine + * reads off each result) — verified against the upstream `DocmostClient` + * (packages/docmost-client/src/client.ts) so a real REST client is still + * structurally assignable, and so the native adapter has a precise contract. + */ +/** + * A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body). + * The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`, + * which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this + * lite shape documents the fields the tree walk surfaces. Upstream nodes also + * carry `position`, `icon`, `hasChildren` — kept open via the index signature. + */ +export interface GitSyncPageNodeLite { + id: string; + slugId?: string; + title?: string; + parentPageId?: string | null; + hasChildren?: boolean; + /** Upstream `listSpaceTree` nodes carry extra fields (position, icon, …). */ + [key: string]: unknown; +} +/** + * The structural client the engine depends on. Only `Pick` + * subsets are ever used: + * - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`), + * - push writes: `importPageMarkdown` / `createPage` / `deletePage` / + * `movePage` / `renamePage`, + * - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`. + */ +export interface GitSyncClient { + /** + * Full tree of page nodes for the space (or the subtree rooted at + * `rootPageId`), each WITHOUT body content. `complete` is `false` when the + * walk was truncated / a fetch failed — the pull side suppresses absence + * deletions on an incomplete tree (SPEC §8). Native impl returns + * `complete: true` always (reads the DB, not a paginated REST endpoint). + */ + listSpaceTree(spaceId: string, rootPageId?: string): Promise<{ + pages: GitSyncPageNodeLite[]; + complete: boolean; + }>; + /** + * One page WITH its ProseMirror body content. `applyPullActions` reads + * `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and + * `content` (to stabilize/serialize). `updatedAt` is carried for the + * poll-suppression loop-guard. + */ + getPageJson(pageId: string): Promise<{ + id: string; + slugId: string; + title: string; + parentPageId: string | null; + spaceId: string; + updatedAt: string; + content: unknown; + }>; + /** + * Replace a page's body from a self-contained markdown file (meta + body). + * The collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite. + * `applyPushActions` reads only an optional `updatedAt` off the result + * (via `extractUpdatedAt`, tolerant of extra fields). + */ + importPageMarkdown(pageId: string, fullMarkdown: string): Promise<{ + updatedAt?: string; + [key: string]: unknown; + }>; + /** + * Create a new page and return the assigned id at `data.id` + * (`applyPushActions` reads `result.data.id`, then writes it back into the + * file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard. + */ + createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{ + data: { + id: string; + }; + updatedAt?: string; + [key: string]: unknown; + }>; + /** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */ + deletePage(pageId: string): Promise; + /** + * Reparent a page (and optionally set its fractional-index `position`). The + * engine passes `position` UNDEFINED for now; the native impl computes a + * default between siblings (plan §3.2). Result is not inspected. + */ + movePage(pageId: string, parentPageId: string | null, position?: string): Promise; + /** Change a page's title only (no body touch). Result is not inspected. */ + renamePage(pageId: string, title: string): Promise; + /** + * Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8). + * `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk. + */ + listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise; + /** List soft-deleted (trashed) pages for the space (deletion detection). */ + listTrash(spaceId: string): Promise; + /** Restore a soft-deleted page from Trash. Result is not inspected. */ + restorePage(pageId: string): Promise; +} diff --git a/packages/git-sync/build/engine/client.types.js b/packages/git-sync/build/engine/client.types.js new file mode 100644 index 00000000..53a06e3e --- /dev/null +++ b/packages/git-sync/build/engine/client.types.js @@ -0,0 +1,17 @@ +"use strict"; +/** + * The client seam (plan §3.1). Upstream `pull.ts`/`push.ts` reached into the + * REST `DocmostClient` from the `docmost-client` package via `Pick` subsets. That package is NOT vendored here (the gitmost server writes + * NATIVELY — through repositories + collab `openDirectConnection`, plan §3.2/§3.3), + * so the engine must depend on a narrow STRUCTURAL interface instead. + * + * `GitSyncClient` is that interface: the native datasource (server side, a later + * step) implements it, and the vendored engine only ever uses `Pick` subsets of it. The signatures below MIRROR exactly the methods the + * vendored `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine + * reads off each result) — verified against the upstream `DocmostClient` + * (packages/docmost-client/src/client.ts) so a real REST client is still + * structurally assignable, and so the native adapter has a precise contract. + */ +Object.defineProperty(exports, "__esModule", { value: true }); diff --git a/packages/git-sync/build/engine/config-errors.d.ts b/packages/git-sync/build/engine/config-errors.d.ts new file mode 100644 index 00000000..3e710684 --- /dev/null +++ b/packages/git-sync/build/engine/config-errors.d.ts @@ -0,0 +1 @@ +export declare function loadSettingsOrExit(factory: () => T): T; diff --git a/packages/git-sync/build/engine/config-errors.js b/packages/git-sync/build/engine/config-errors.js new file mode 100644 index 00000000..baa5cbee --- /dev/null +++ b/packages/git-sync/build/engine/config-errors.js @@ -0,0 +1,53 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.loadSettingsOrExit = loadSettingsOrExit; +const zod_1 = require("zod"); +// Turn a ZodError from settings validation into a clear, actionable startup +// message that names the offending env var(s), then exit(1) — no raw stack +// trace. Mirrors the Python new-project skeleton's load_settings_or_exit. +// A non-ZodError is left to propagate unchanged. +function loadSettingsOrExit(factory) { + try { + return factory(); + } + catch (err) { + if (!(err instanceof zod_1.ZodError)) + throw err; + const missing = []; + const invalid = []; + for (const issue of err.issues) { + const name = issue.path.length ? String(issue.path[0]) : '?'; + // A missing required variable surfaces as an `invalid_type` issue whose + // received value was `undefined`. zod 3 exposed `issue.received` directly; + // zod 4 dropped that field and instead folds it into the message + // ("expected string, received undefined"). Detect both shapes so the + // missing-vs-invalid split holds across zod majors. NOTE: an invalid (but + // present) value uses a different code (invalid_format / invalid_value) or + // an `invalid_type` message that reports a non-undefined received (e.g. + // "received NaN" from a coerced number), so neither is misread as missing. + const i = issue; + const isMissing = issue.code === 'invalid_type' && + (i.received === 'undefined' || + /received undefined/i.test(i.message ?? '')); + if (isMissing) + missing.push(name); + else + invalid.push(`${name}: ${issue.message}`); + } + const lines = ['Configuration error in environment / .env:']; + if (missing.length) { + lines.push(' Missing required variable(s):'); + for (const n of [...new Set(missing)]) + lines.push(` - ${n}`); + } + if (invalid.length) { + lines.push(' Invalid value(s):'); + for (const item of invalid) + lines.push(` - ${item}`); + } + lines.push(''); + lines.push('Set them in .env (see .env.example) and try again.'); + process.stderr.write(lines.join('\n') + '\n'); + process.exit(1); + } +} diff --git a/packages/git-sync/build/engine/git.d.ts b/packages/git-sync/build/engine/git.d.ts new file mode 100644 index 00000000..85cba296 --- /dev/null +++ b/packages/git-sync/build/engine/git.d.ts @@ -0,0 +1,259 @@ +/** Bot identity used for engine-authored vault commits (SPEC §7.3). */ +export declare const BOT_AUTHOR_NAME = "Docmost Sync"; +export declare const BOT_AUTHOR_EMAIL = "docmost-sync@local"; +/** Default branch the vault repo is initialized on. */ +export declare const DEFAULT_BRANCH = "main"; +/** + * One row of `git diff --name-status` (SPEC §6 "ФС → Docmost"). `status` is the + * single-letter change code (`-M` rename detection on), `path` is the (new) file + * path; for a rename/copy (`R`/`C`) `oldPath` is the source and `path` is the + * destination, with `score` carrying git's similarity index (0–100). + */ +export interface DiffEntry { + status: "A" | "M" | "D" | "R" | "C"; + /** New (destination) path. For A/M/D it is the only path. */ + path: string; + /** Source path — present only for R/C. */ + oldPath?: string; + /** Rename/copy similarity score (0–100) — present only for R/C. */ + score?: number; +} +/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */ +export interface MergeResult { + /** True when the merge applied cleanly (fast-forward or clean 3-way). */ + ok: boolean; + /** True when the merge stopped on conflicts (markers left in the worktree). */ + conflict: boolean; + /** Raw combined stdout+stderr, for logging/diagnostics. */ + output: string; +} +/** Options for an engine-authored commit (provenance, SPEC §7.3). */ +export interface CommitOptions { + authorName: string; + authorEmail: string; + /** + * Trailer lines appended to the commit message body (e.g. + * `Docmost-Sync-Source: docmost`). These are the machine-readable provenance + * the loop-guard keys on (SPEC §12, "commit-attribution"). + */ + trailers?: string[]; +} +/** + * A git wrapper bound to a single vault path. Construct once per vault; every + * method runs git with `cwd = vaultPath`. + */ +export declare class VaultGit { + private readonly vaultPath; + constructor(vaultPath: string); + /** + * Preflight: verify a runnable `git` binary is on PATH. The daemon shells out + * to system `git` for every vault operation, so a missing binary (e.g. a slim + * container image without git) must fail fast with an actionable message + * rather than a cryptic ENOENT deep inside the first real git call. Presence + * check only — we do NOT gate on a specific version. Runs `git --version` + * with NO `cwd` (the vault dir may not exist yet at preflight time). + */ + assertGitAvailable(): Promise; + /** + * Run a git command in the vault and return trimmed stdout. THIN wrapper over + * the single `runRaw` primitive: throws a clear, unified Error (including + * stderr/stdout) on a non-zero exit. + */ + private run; + /** + * The ONE primitive every git invocation in this module flows through. Builds + * the full argv (`--no-pager -c core.quotepath=false `), env, cwd, and + * maxBuffer, runs git, and NEVER throws — it returns the exit info so callers + * can treat a non-zero exit as either an error (`run`) or a meaningful state + * (e.g. a merge conflict, a porcelain diff that "fails" deliberately). + * + * - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never + * blocks on a pager and always prints verbatim UTF-8 paths (no octal + * escaping/quoting). `quotepath=false` is the baseline for ALL path- + * printing commands (ls-files, diff --name-only, …). + * - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the + * vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`. + * - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras). + * - On a spawn/exec error we capture the error `message` too, so a failure + * before git could write to stderr (e.g. ENOENT) is NOT lost. + */ + private runRaw; + /** + * Ensure the vault directory exists and is an initialized git repo on `main` + * with an initial (empty) commit so branches exist. Idempotent: safe to call + * on every run. Sets a LOCAL bot identity for the vault repo if none is set + * (so engine commits never fall back to a global/unset identity). + */ + ensureRepo(): Promise; + /** True if `cwd` is inside a git work-tree (the vault is initialized). */ + private isRepo; + /** True if a LOCAL git config key is set in the vault repo. */ + private hasLocalConfig; + /** True if the repo has at least one commit (HEAD resolves). */ + private hasAnyCommit; + /** True if a branch with the given name exists. */ + branchExists(name: string): Promise; + /** + * Create `name` from `fromBranch` if it does not already exist. No-op (and no + * checkout) when the branch is already present. + */ + ensureBranch(name: string, fromBranch: string): Promise; + /** Name of the currently checked-out branch. */ + currentBranch(): Promise; + /** Check out an existing branch. */ + checkout(name: string): Promise; + /** Stage everything (adds, modifications, deletions). */ + stageAll(): Promise; + /** + * True if the vault is mid-merge (an unresolved merge from a previous run, + * SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged + * (conflicted) index entries (`git ls-files -u`). The pull cycle checks this + * BEFORE any checkout so a left-over merge produces a clear, actionable + * message instead of a raw "you need to resolve your current index first" + * failure deep inside `checkout`. This is what makes re-runs converge + * (resumability, SPEC §12). + */ + isMergeInProgress(): Promise; + /** + * Commit the currently STAGED changes with an explicit author/committer + * identity and the given trailers appended to the message body (SPEC §7.3 + * provenance). Returns `true` if a commit was made, `false` if there was + * nothing to commit (graceful no-op). The caller is expected to have staged + * its changes first (e.g. via `stageAll`). + */ + commit(message: string, opts: CommitOptions): Promise; + /** + * Low-level commit used by both `commit` and `ensureRepo`'s initial commit. + * Builds the full message with appended trailers and sets author + committer + * identity via env vars (so the committer matches the author, not the repo + * default). + */ + private commitRaw; + /** + * Merge `fromBranch` into the current branch (`git merge --no-edit`). + * Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict + * state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict + * markers are left in the worktree for manual resolution by a later increment, + * and — critically — nothing is pushed to Docmost (we never write to Docmost + * anyway). + */ + merge(fromBranch: string): Promise; + /** True if the index has any unmerged (conflicted) paths. */ + private hasUnmergedPaths; + /** + * List tracked files on the current branch (paths relative to the vault + * root, forward-slash separated). An optional glob (a git pathspec) narrows + * the listing, e.g. `"*.md"`. + * + * The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic + * (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files` + * returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`), + * which `src/pull.ts` `readExisting` would then parse as garbage paths, + * breaking move/duplicate detection. We defeat that two ways at once: + * - `core.quotepath=false` disables the octal-escape/quoting. It is now the + * `runRaw` argv baseline (prepended to EVERY invocation), so we no longer + * pass it inline here. + * - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline + * ambiguity), which we split on `\0`. + * We read the RAW stdout (NOT the trimming `run()` helper, which would mangle + * the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths + * are returned verbatim — git already emits forward slashes. + */ + listTrackedFiles(glob?: string): Promise; + /** + * Diff two refs with `--name-status -M -z` and parse the NUL-delimited output + * (SPEC §6: the FS→Docmost push direction diffs `main` against + * `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed + * file is reported as a single `R` row with both its old and new path instead + * of a delete+add pair — that distinction is what lets the push planner tell a + * move from a delete+create (SPEC §8 "Move vs delete"). + * + * `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has + * Cyrillic file names) with NO quoting/escaping. The record shape differs by + * status: + * - A/M/D: `status\0path\0` + * - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`) + * We read the RAW stdout (not the trimming `run()` helper, which would mangle + * the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the + * tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim. + */ + diffNameStatus(fromRef: string, toRef: string): Promise; + /** + * Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist. + * `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an + * unknown ref, so a non-zero exit maps cleanly to `null`. Used to read + * `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push. + */ + revParse(ref: string): Promise; + /** + * Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`, + * named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5: + * "что из `main` уже отражено в Docmost"). + */ + readRef(ref: string): Promise; + /** + * Point `ref` at `target` (`git update-ref `). Used to advance + * `refs/docmost/last-pushed` to the just-pushed `main` commit after a push + * (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts. + */ + updateRef(ref: string, target: string): Promise; + /** + * Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward, + * i.e. the current `branch` tip is an ancestor of `toCommit` (verified via + * `git merge-base --is-ancestor `). Used to advance the + * `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a + * push succeeds, Docmost already contains the pushed `main` content, so the + * mirror must reflect it — otherwise the NEXT pull would diff our own write + * back and re-pull it (loop-guard). + * + * SAFETY — never force, never clobber divergent history: + * - If `branch` IS an ancestor of `toCommit`, advance it with + * `git update-ref refs/heads/ `. The `docmost` branch is + * NOT checked out during a push (push works on `main`), so updating the ref + * directly is safe and avoids any working-tree touch. + * - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward), + * do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and + * let the caller log it. We must never overwrite a `docmost` history that + * has commits the push base does not contain. + * + * Returns `{ ok: true }` when the branch was advanced (or already at + * `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise. + * A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason. + */ + fastForwardBranch(branch: string, toCommit: string): Promise<{ + ok: boolean; + reason?: string; + }>; + /** + * Read a file's content at a specific ref (`git show :`), or `null` + * if the path does not exist there. Used by the push direction to read the + * PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its + * `docmost:meta` — and therefore its `pageId` — can be recovered to translate + * the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones + * that had a pageId, are deleted in Docmost). A non-zero exit (path absent at + * that ref) maps to `null` rather than throwing. + */ + showFileAtRef(ref: string, path: string): Promise; +} +/** + * Build the environment for a vault git invocation (SPEC §12 cwd-isolation). + * Used by the single `runRaw` primitive every git command flows through, so + * these pins apply uniformly (including the `git --version` preflight). + * + * cwd-isolation is this module's central safety guarantee: every git command + * MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An + * inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently + * redirect the operation away from `cwd` (e.g. to the source repo or another + * checkout), defeating that guarantee. So we always strip them, regardless of + * whatever else the caller adds (author/committer identity, etc.). + * + * Exported for unit testing. + */ +export declare function vaultGitEnv(extra?: Record): NodeJS.ProcessEnv; +/** + * Build a commit message body with trailer lines appended (SPEC §7.3). The + * trailers are separated from the subject by a blank line so `git interpret- + * trailers` / `git log --format=%(trailers)` parse them as trailers. + * Exported for unit testing. + */ +export declare function buildCommitMessage(subject: string, trailers?: string[]): string; diff --git a/packages/git-sync/build/engine/git.js b/packages/git-sync/build/engine/git.js new file mode 100644 index 00000000..35745cf9 --- /dev/null +++ b/packages/git-sync/build/engine/git.js @@ -0,0 +1,577 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.VaultGit = exports.DEFAULT_BRANCH = exports.BOT_AUTHOR_EMAIL = exports.BOT_AUTHOR_NAME = void 0; +exports.vaultGitEnv = vaultGitEnv; +exports.buildCommitMessage = buildCommitMessage; +/** + * Thin async wrapper over the system `git` binary (SPEC §5: state store = git). + * + * IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`, + * which is the vault's OWN git repository (default `data/vault`), SEPARATE from + * the docmost-sync source repo. This module MUST NEVER run git against the + * source repo. `data/` is gitignored by the source repo, so a nested repo under + * `data/vault` is safe. The pull cycle is READ-ONLY toward Docmost; this module + * only touches the local vault git, never a git remote (push is deferred, see + * SPEC §7). + * + * Implementation notes: + * - We shell out via `node:child_process` `execFile` (promisified), passing + * ARGS AS AN ARRAY — no shell, so there is no command injection surface even + * if a page title / branch name contains shell metacharacters. + * - EVERY git invocation funnels through the single `runRaw` primitive, which + * ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git + * never blocks on a pager and always prints verbatim UTF-8 paths). There is + * no exception — even the `git --version` preflight goes through `runRaw`. + * - "nothing to commit" is treated as a graceful no-op, not an error. + */ +const node_child_process_1 = require("node:child_process"); +const promises_1 = require("node:fs/promises"); +const node_util_1 = require("node:util"); +const execFileAsync = (0, node_util_1.promisify)(node_child_process_1.execFile); +/** Bot identity used for engine-authored vault commits (SPEC §7.3). */ +exports.BOT_AUTHOR_NAME = "Docmost Sync"; +exports.BOT_AUTHOR_EMAIL = "docmost-sync@local"; +/** Default branch the vault repo is initialized on. */ +exports.DEFAULT_BRANCH = "main"; +/** + * A git wrapper bound to a single vault path. Construct once per vault; every + * method runs git with `cwd = vaultPath`. + */ +class VaultGit { + vaultPath; + constructor(vaultPath) { + this.vaultPath = vaultPath; + } + /** + * Preflight: verify a runnable `git` binary is on PATH. The daemon shells out + * to system `git` for every vault operation, so a missing binary (e.g. a slim + * container image without git) must fail fast with an actionable message + * rather than a cryptic ENOENT deep inside the first real git call. Presence + * check only — we do NOT gate on a specific version. Runs `git --version` + * with NO `cwd` (the vault dir may not exist yet at preflight time). + */ + async assertGitAvailable() { + // Goes through the single `runRaw` primitive like every other invocation. + // `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at + // preflight time, so we must not point git at a missing directory. + const r = await this.runRaw(["--version"], { cwd: null }); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error("git binary not found or not runnable — install git (the vault state " + + `store requires it). Underlying error: ${detail}`); + } + } + /** + * Run a git command in the vault and return trimmed stdout. THIN wrapper over + * the single `runRaw` primitive: throws a clear, unified Error (including + * stderr/stdout) on a non-zero exit. + */ + async run(args, opts) { + const r = await this.runRaw(args, opts); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ${args.join(" ")} failed: ${detail}`); + } + return r.stdout.trim(); + } + /** + * The ONE primitive every git invocation in this module flows through. Builds + * the full argv (`--no-pager -c core.quotepath=false `), env, cwd, and + * maxBuffer, runs git, and NEVER throws — it returns the exit info so callers + * can treat a non-zero exit as either an error (`run`) or a meaningful state + * (e.g. a merge conflict, a porcelain diff that "fails" deliberately). + * + * - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never + * blocks on a pager and always prints verbatim UTF-8 paths (no octal + * escaping/quoting). `quotepath=false` is the baseline for ALL path- + * printing commands (ls-files, diff --name-only, …). + * - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the + * vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`. + * - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras). + * - On a spawn/exec error we capture the error `message` too, so a failure + * before git could write to stderr (e.g. ENOENT) is NOT lost. + */ + async runRaw(args, opts) { + const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath); + try { + const { stdout, stderr } = await execFileAsync("git", ["--no-pager", "-c", "core.quotepath=false", ...args], { + // Generous buffer: file listings / porcelain output on a large vault + // can be sizable. + ...(cwd !== undefined ? { cwd } : {}), + maxBuffer: 64 * 1024 * 1024, + env: vaultGitEnv(opts?.env), + }); + return { code: 0, stdout, stderr }; + } + catch (err) { + const e = err; + return { + code: typeof e.code === "number" ? e.code : 1, + stdout: e.stdout ?? "", + // Preserve the error message when there is no stderr (e.g. a spawn + // failure like ENOENT, where promisified execFile sets stderr to an + // EMPTY STRING — so `||`, not `??`, to fall through to `message`). + stderr: e.stderr || e.message || "", + }; + } + } + /** + * Ensure the vault directory exists and is an initialized git repo on `main` + * with an initial (empty) commit so branches exist. Idempotent: safe to call + * on every run. Sets a LOCAL bot identity for the vault repo if none is set + * (so engine commits never fall back to a global/unset identity). + */ + async ensureRepo() { + await (0, promises_1.mkdir)(this.vaultPath, { recursive: true }); + if (!(await this.isRepo())) { + // `git init -b main` sets the initial branch on modern git; we still + // guard the branch name below for safety on older binaries. + await this.run(["init", "-b", exports.DEFAULT_BRANCH]); + } + // Set a local identity for the vault repo if unset, so engine commits have + // a deterministic committer even on a machine with no global git config. + if (!(await this.hasLocalConfig("user.name"))) { + await this.run(["config", "user.name", exports.BOT_AUTHOR_NAME]); + } + if (!(await this.hasLocalConfig("user.email"))) { + await this.run(["config", "user.email", exports.BOT_AUTHOR_EMAIL]); + } + // Neutralize correctness-affecting git config in the vault's LOCAL config so + // a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just + // output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL + // values (which override global/system) are the right scope. Set + // UNCONDITIONALLY every run — idempotent and cheap; `git config ` + // writes to `--local` by default inside the repo. These MUST be in place + // before any add/commit/checkout that could be affected, hence they run + // before the initial-commit block below. + // - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true + // would rewrite LF<->CRLF on add/checkout, making our deterministic, + // byte-stable markdown churn and breaking the round-trip invariant. + // `false` guarantees git stores/checks out verbatim bytes. + // - core.safecrlf=false — avoid CRLF-related warnings/aborts on add. + // - commit.gpgsign=false — the headless daemon must never try to GPG-sign + // a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0). + // - core.attributesFile=/dev/null — neutralize the user's GLOBAL + // gitattributes so a global clean/smudge filter (filter..clean) + // cannot rewrite the STORED blob and break §11 byte-stability (a config + // that core.autocrlf=false does not cover). POSIX-only path, which is + // fine: the daemon runs on Linux (Docker) / macOS. A system + // /etc/gitattributes remains the host admin's domain (out of scope). + // NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a + // human running git by hand in the vault must inherit the same neutralized + // behavior; a transient `-c` would not persist. (core.quotepath, by + // contrast, only affects OUR parsing of output and so is baked into the + // `runRaw` argv baseline instead.) + try { + await this.run(["config", "core.autocrlf", "false"]); + await this.run(["config", "core.safecrlf", "false"]); + await this.run(["config", "commit.gpgsign", "false"]); + await this.run(["config", "core.attributesFile", "/dev/null"]); + } + catch (err) { + const detail = err instanceof Error ? err.message : String(err); + throw new Error(`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` + + "/.git/config is writable and not locked (e.g. stale config.lock): " + + detail); + } + // Create the initial empty commit on `main` if the repo has no commits yet, + // so both `main` and (later) `docmost` branches have a common base. + if (!(await this.hasAnyCommit())) { + // Make sure we are on the default branch before the first commit (covers + // the older-git case where `init -b` was not honored). + await this.run(["checkout", "-B", exports.DEFAULT_BRANCH]); + await this.commitRaw("init vault", { + authorName: exports.BOT_AUTHOR_NAME, + authorEmail: exports.BOT_AUTHOR_EMAIL, + allowEmpty: true, + }); + } + } + /** True if `cwd` is inside a git work-tree (the vault is initialized). */ + async isRepo() { + const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]); + return r.code === 0 && r.stdout.trim() === "true"; + } + /** True if a LOCAL git config key is set in the vault repo. */ + async hasLocalConfig(key) { + const r = await this.runRaw(["config", "--local", "--get", key]); + return r.code === 0 && r.stdout.trim().length > 0; + } + /** True if the repo has at least one commit (HEAD resolves). */ + async hasAnyCommit() { + const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]); + return r.code === 0; + } + /** True if a branch with the given name exists. */ + async branchExists(name) { + const r = await this.runRaw([ + "rev-parse", + "--verify", + `refs/heads/${name}`, + ]); + return r.code === 0; + } + /** + * Create `name` from `fromBranch` if it does not already exist. No-op (and no + * checkout) when the branch is already present. + */ + async ensureBranch(name, fromBranch) { + if (await this.branchExists(name)) + return; + await this.run(["branch", name, fromBranch]); + } + /** Name of the currently checked-out branch. */ + async currentBranch() { + return this.run(["rev-parse", "--abbrev-ref", "HEAD"]); + } + /** Check out an existing branch. */ + async checkout(name) { + await this.run(["checkout", name]); + } + /** Stage everything (adds, modifications, deletions). */ + async stageAll() { + await this.run(["add", "-A"]); + } + /** + * True if the vault is mid-merge (an unresolved merge from a previous run, + * SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged + * (conflicted) index entries (`git ls-files -u`). The pull cycle checks this + * BEFORE any checkout so a left-over merge produces a clear, actionable + * message instead of a raw "you need to resolve your current index first" + * failure deep inside `checkout`. This is what makes re-runs converge + * (resumability, SPEC §12). + */ + async isMergeInProgress() { + // MERGE_HEAD exists exactly while a merge is in progress. + const mergeHead = await this.runRaw([ + "rev-parse", + "--verify", + "--quiet", + "MERGE_HEAD", + ]); + if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0) + return true; + // Fallback / belt-and-suspenders: any unmerged index entries also mean the + // working tree is mid-conflict and a checkout would refuse. + const unmerged = await this.runRaw(["ls-files", "-u"]); + return unmerged.code === 0 && unmerged.stdout.trim().length > 0; + } + /** + * Commit the currently STAGED changes with an explicit author/committer + * identity and the given trailers appended to the message body (SPEC §7.3 + * provenance). Returns `true` if a commit was made, `false` if there was + * nothing to commit (graceful no-op). The caller is expected to have staged + * its changes first (e.g. via `stageAll`). + */ + async commit(message, opts) { + // Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a + // deterministic re-pull of unchanged pages produces identical bytes, so + // git sees no diff and we must not error). + const staged = await this.runRaw([ + "diff", + "--cached", + "--quiet", + ]); + // `diff --cached --quiet` exits 0 when the index matches HEAD (nothing + // staged), 1 when there are staged changes. + if (staged.code === 0) + return false; + await this.commitRaw(message, opts); + return true; + } + /** + * Low-level commit used by both `commit` and `ensureRepo`'s initial commit. + * Builds the full message with appended trailers and sets author + committer + * identity via env vars (so the committer matches the author, not the repo + * default). + */ + async commitRaw(message, opts) { + const fullMessage = buildCommitMessage(message, opts.trailers); + // `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath + // (or any injected hook) must never interfere with engine commits in our + // dedicated vault repo. + const args = ["commit", "--no-verify", "-m", fullMessage]; + if (opts.allowEmpty) + args.push("--allow-empty"); + // Route through the single `runRaw` primitive; set author + committer + // identity via env vars (so the committer matches the author, not the repo + // default). Throw via the same unified message on a non-zero exit. + const r = await this.runRaw(args, { + env: { + GIT_AUTHOR_NAME: opts.authorName, + GIT_AUTHOR_EMAIL: opts.authorEmail, + GIT_COMMITTER_NAME: opts.authorName, + GIT_COMMITTER_EMAIL: opts.authorEmail, + }, + }); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ${args.join(" ")} failed: ${detail}`); + } + } + /** + * Merge `fromBranch` into the current branch (`git merge --no-edit`). + * Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict + * state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict + * markers are left in the worktree for manual resolution by a later increment, + * and — critically — nothing is pushed to Docmost (we never write to Docmost + * anyway). + */ + async merge(fromBranch) { + const r = await this.runRaw(["merge", "--no-edit", fromBranch]); + const output = `${r.stdout}\n${r.stderr}`.trim(); + if (r.code === 0) { + return { ok: true, conflict: false, output }; + } + // A non-zero exit on merge most commonly means a conflict. Confirm by + // checking for unmerged paths (porcelain "U" status) so we don't mislabel + // an unrelated failure as a conflict. + const conflict = await this.hasUnmergedPaths(); + return { ok: false, conflict, output }; + } + /** True if the index has any unmerged (conflicted) paths. */ + async hasUnmergedPaths() { + const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]); + return r.code === 0 && r.stdout.trim().length > 0; + } + /** + * List tracked files on the current branch (paths relative to the vault + * root, forward-slash separated). An optional glob (a git pathspec) narrows + * the listing, e.g. `"*.md"`. + * + * The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic + * (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files` + * returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`), + * which `src/pull.ts` `readExisting` would then parse as garbage paths, + * breaking move/duplicate detection. We defeat that two ways at once: + * - `core.quotepath=false` disables the octal-escape/quoting. It is now the + * `runRaw` argv baseline (prepended to EVERY invocation), so we no longer + * pass it inline here. + * - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline + * ambiguity), which we split on `\0`. + * We read the RAW stdout (NOT the trimming `run()` helper, which would mangle + * the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths + * are returned verbatim — git already emits forward slashes. + */ + async listTrackedFiles(glob) { + const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ls-files failed: ${detail}`); + } + return r.stdout.split("\0").filter((p) => p.length > 0); + } + /** + * Diff two refs with `--name-status -M -z` and parse the NUL-delimited output + * (SPEC §6: the FS→Docmost push direction diffs `main` against + * `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed + * file is reported as a single `R` row with both its old and new path instead + * of a delete+add pair — that distinction is what lets the push planner tell a + * move from a delete+create (SPEC §8 "Move vs delete"). + * + * `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has + * Cyrillic file names) with NO quoting/escaping. The record shape differs by + * status: + * - A/M/D: `status\0path\0` + * - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`) + * We read the RAW stdout (not the trimming `run()` helper, which would mangle + * the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the + * tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim. + */ + async diffNameStatus(fromRef, toRef) { + const r = await this.runRaw([ + "diff", + "--name-status", + "-M", + "-z", + fromRef, + toRef, + ]); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git diff --name-status failed: ${detail}`); + } + // Tokens alternate: ... With `-z`, + // each token (status code AND each path) is its own NUL-delimited field. + const tokens = r.stdout.split("\0").filter((t) => t.length > 0); + const entries = []; + let i = 0; + while (i < tokens.length) { + const raw = tokens[i++]; + // The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading + // letter is the change kind; any trailing digits are the similarity score. + const letter = raw[0]; + if (letter === "R" || letter === "C") { + const score = Number.parseInt(raw.slice(1), 10); + const oldPath = tokens[i++]; + const path = tokens[i++]; + if (oldPath === undefined || path === undefined) + break; // malformed tail + entries.push({ + status: letter, + path, + oldPath, + ...(Number.isFinite(score) ? { score } : {}), + }); + } + else if (letter === "A" || letter === "M" || letter === "D") { + const path = tokens[i++]; + if (path === undefined) + break; // malformed tail + entries.push({ status: letter, path }); + } + else { + // Unknown/other status (e.g. T type-change, U unmerged) — consume one + // path token defensively so the walk stays aligned, but do not emit it + // (the push planner only handles A/M/D/R/C). + i++; + } + } + return entries; + } + /** + * Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist. + * `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an + * unknown ref, so a non-zero exit maps cleanly to `null`. Used to read + * `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push. + */ + async revParse(ref) { + const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]); + if (r.code !== 0) + return null; + const sha = r.stdout.trim(); + return sha.length > 0 ? sha : null; + } + /** + * Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`, + * named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5: + * "что из `main` уже отражено в Docmost"). + */ + async readRef(ref) { + return this.revParse(ref); + } + /** + * Point `ref` at `target` (`git update-ref `). Used to advance + * `refs/docmost/last-pushed` to the just-pushed `main` commit after a push + * (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts. + */ + async updateRef(ref, target) { + await this.run(["update-ref", ref, target]); + } + /** + * Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward, + * i.e. the current `branch` tip is an ancestor of `toCommit` (verified via + * `git merge-base --is-ancestor `). Used to advance the + * `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a + * push succeeds, Docmost already contains the pushed `main` content, so the + * mirror must reflect it — otherwise the NEXT pull would diff our own write + * back and re-pull it (loop-guard). + * + * SAFETY — never force, never clobber divergent history: + * - If `branch` IS an ancestor of `toCommit`, advance it with + * `git update-ref refs/heads/ `. The `docmost` branch is + * NOT checked out during a push (push works on `main`), so updating the ref + * directly is safe and avoids any working-tree touch. + * - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward), + * do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and + * let the caller log it. We must never overwrite a `docmost` history that + * has commits the push base does not contain. + * + * Returns `{ ok: true }` when the branch was advanced (or already at + * `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise. + * A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason. + */ + async fastForwardBranch(branch, toCommit) { + const branchRef = `refs/heads/${branch}`; + // Resolve both endpoints first so a missing ref is a clean refusal, not a + // confusing `merge-base` failure. + const branchSha = await this.revParse(branchRef); + if (branchSha === null) { + return { ok: false, reason: `branch ${branch} does not exist` }; + } + const targetSha = await this.revParse(toCommit); + if (targetSha === null) { + return { ok: false, reason: `target ${toCommit} does not resolve` }; + } + // Already at the target -> a no-op fast-forward (still ok). + if (branchSha === targetSha) + return { ok: true }; + // `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a + // true ancestor is a fast-forward; anything else is divergent and refused. + const ancestor = await this.runRaw([ + "merge-base", + "--is-ancestor", + branchSha, + targetSha, + ]); + if (ancestor.code !== 0) { + return { ok: false, reason: "not-fast-forward" }; + } + // Safe to advance: the branch is not checked out during push, so a direct + // ref update avoids a checkout/working-tree touch. + await this.updateRef(branchRef, targetSha); + return { ok: true }; + } + /** + * Read a file's content at a specific ref (`git show :`), or `null` + * if the path does not exist there. Used by the push direction to read the + * PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its + * `docmost:meta` — and therefore its `pageId` — can be recovered to translate + * the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones + * that had a pageId, are deleted in Docmost). A non-zero exit (path absent at + * that ref) maps to `null` rather than throwing. + */ + async showFileAtRef(ref, path) { + // `git show :` requires the path relative to the repo root; pass + // it verbatim (forward-slash, matching `listTrackedFiles` / diff output). + const r = await this.runRaw(["show", `${ref}:${path}`]); + if (r.code !== 0) + return null; + return r.stdout; + } +} +exports.VaultGit = VaultGit; +/** + * Build the environment for a vault git invocation (SPEC §12 cwd-isolation). + * Used by the single `runRaw` primitive every git command flows through, so + * these pins apply uniformly (including the `git --version` preflight). + * + * cwd-isolation is this module's central safety guarantee: every git command + * MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An + * inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently + * redirect the operation away from `cwd` (e.g. to the source repo or another + * checkout), defeating that guarantee. So we always strip them, regardless of + * whatever else the caller adds (author/committer identity, etc.). + * + * Exported for unit testing. + */ +function vaultGitEnv(extra) { + const env = { + ...process.env, + // Locale-independent output (defense in depth). We never parse localized + // prose, but pinning the locale prevents a future regression where some + // git message we DO key on is translated by an inherited LC_ALL/LANG. + LC_ALL: "C", + LANG: "C", + // Never page (we already pass --no-pager, but a stray GIT_PAGER could still + // bite) and never block on an interactive prompt (e.g. credentials) — the + // daemon runs unattended and must not hang. + GIT_PAGER: "cat", + GIT_TERMINAL_PROMPT: "0", + ...extra, + }; + delete env.GIT_DIR; + delete env.GIT_WORK_TREE; + return env; +} +/** + * Build a commit message body with trailer lines appended (SPEC §7.3). The + * trailers are separated from the subject by a blank line so `git interpret- + * trailers` / `git log --format=%(trailers)` parse them as trailers. + * Exported for unit testing. + */ +function buildCommitMessage(subject, trailers) { + if (!trailers || trailers.length === 0) + return subject; + return `${subject}\n\n${trailers.join("\n")}`; +} diff --git a/packages/git-sync/build/engine/pull.d.ts b/packages/git-sync/build/engine/pull.d.ts new file mode 100644 index 00000000..e2f119c0 --- /dev/null +++ b/packages/git-sync/build/engine/pull.d.ts @@ -0,0 +1,136 @@ +import type { GitSyncClient } from "./client.types"; +import { type PageNode } from "./layout"; +import { VaultGit } from "./git"; +import { type MovedEntry, type DeletionDecision } from "./reconcile"; +/** + * Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real + * `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile` + * rooted at the vault; tests pass fakes so the parsing/skip rules are unit- + * testable without a real git repo or filesystem. + */ +export interface ReadExistingDeps { + /** List tracked .md paths (forward-slash, vault-relative). */ + listTracked: () => Promise; + /** Read a tracked file's text by its (forward-slash) vault-relative path. */ + readFile: (relPath: string) => Promise; +} +/** + * Read every tracked .md file in the vault and parse its `docmost:meta` to + * recover `{ pageId, relPath }`. Files without a parseable pageId in meta are + * skipped (they are not engine-tracked pages — e.g. a stray hand-written file). + * + * The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules: + * - a `readFile` rejection (tracked but missing on disk, a mid-operation race) + * -> skipped, NOT thrown; the next pull converges; + * - unparseable meta (`parseDocmostMarkdown` throws) -> skipped; + * - parseable but no `pageId` in meta -> skipped. + */ +export declare function readExisting(deps: ReadExistingDeps): Promise<{ + pageId: string; + relPath: string; +}[]>; +/** + * Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live + * tree nodes + completeness flag (from `listSpaceTree`) and the parsed + * `existing` tracked files (from `readExisting`). + */ +export interface PullActionsInput { + /** Live page nodes for the space (from `listSpaceTree`). */ + pages: PageNode[]; + /** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */ + treeComplete: boolean; + /** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */ + existing: { + pageId: string; + relPath: string; + }[]; +} +/** + * The PURE decisions object computed by `computePullActions` (no IO). It holds + * the reconciliation plan plus the SPEC §8 absence-deletion decision, with the + * suppression already folded in: `toDelete` is the POST-suppression set the + * caller should actually remove (empty when `deletionDecision.apply` is false). + */ +export interface PullActions { + /** Pages to (re)write at their relPath (add + update + move target). */ + toWrite: { + pageId: string; + relPath: string; + }[]; + /** Moves: write new path, then remove old path (only on a successful write). */ + moved: MovedEntry[]; + /** + * Absence-based paths to delete AFTER suppression. Empty when the decision + * suppressed deletions this cycle, so the caller can apply it unconditionally. + */ + toDelete: string[]; + /** Why absence deletions were (or were not) applied (for logging + tests). */ + deletionDecision: DeletionDecision; + /** Tracked-file count (for the suppression log messages). */ + existingCount: number; + /** Planned absence-delete count BEFORE suppression (for the log message). */ + plannedDeleteCount: number; +} +/** + * PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live + * tree nodes + completeness + existing tracked files and returns the full set of + * decisions with NO IO: + * + * - builds the vault layout (deterministic relPath per live page), + * - `planReconciliation` -> toWrite / moved / absence-toDelete, + * - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch + + * empty-live + mass-delete guard), folded IN here so `toDelete` is the + * POST-suppression set (empty when suppressed). + * + * Moves are NOT governed by the suppression: a moved page is present in `live`, + * so its old-path removal is real (the caller still gates it on the write + * succeeding). The expensive content fetch / file write / git ops happen in the + * thin `applyPullActions`. + */ +export declare function computePullActions(input: PullActionsInput): PullActions; +/** + * Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these + * to the live client, the vault git wrapper, and `node:fs/promises`; tests pass + * fakes that RECORD calls so the ordering + the move-on-success data-loss guard + * are testable without real git/fs/network. + */ +export interface ApplyPullActionsDeps { + client: Pick; + git: Pick; + /** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */ + writeFile: (absPath: string, text: string) => Promise; + /** Recursive mkdir of an ABSOLUTE directory path. */ + mkdir: (absDir: string) => Promise; + /** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */ + rm: (absPath: string) => Promise; +} +/** Outcome counters from `applyPullActions` (for the summary + tests). */ +export interface ApplyResult { + written: number; + movedApplied: number; + deleted: number; + failed: number; + committed: boolean; + merge: { + ok: boolean; + conflict: boolean; + output: string; + }; +} +/** + * THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current + * order, with all the original safety guards preserved bit-for-bit: + * + * 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize + * (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page + * never aborts the pull (bounded-concurrency pool, fault-tolerant). + * 2. apply MOVE old-path removals — ONLY when the planner marked the old path + * removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a + * failed move-write keeps the old path so the page never vanishes). + * 3. apply (post-suppression) absence deletes. + * 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted + * counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9). + * + * `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps. + */ +export declare function applyPullActions(deps: ApplyPullActionsDeps, actions: PullActions, vaultRoot: string): Promise; diff --git a/packages/git-sync/build/engine/pull.js b/packages/git-sync/build/engine/pull.js new file mode 100644 index 00000000..122cae08 --- /dev/null +++ b/packages/git-sync/build/engine/pull.js @@ -0,0 +1,303 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.readExisting = readExisting; +exports.computePullActions = computePullActions; +exports.applyPullActions = applyPullActions; +/** + * Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС"). + * + * This increment turns the read-only mirror into the git-backed pull cycle: + * + * 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12); + * ensureBranch("docmost", "main") (SPEC §5 branches) + * 2. checkout docmost + * 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the + * desired `live` files (relPath via the pure sanitize/disambiguation layout) + * 4. parse `existing` tracked .md files (pageId + relPath from docmost:meta) + * 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete + * is absence-only, moves are separate + * 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree + * fetch (SPEC §8) and behind the mass-delete guard (defense in depth) + * 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11); + * apply moved-old-path removals (only when the move write SUCCEEDED) and + * absence-delete removals (only when the decision allowed them) + * 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3) + * 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved, + * SPEC §9); push is deferred (SPEC §7) + * 10. one-line summary + * + * DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost + * (read-only: listSpaceTree + getPageJson). All git operations run against + * the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts). + * + * VENDORED into gitmost (plan §2.1/§3.1): the client seam is the native + * `GitSyncClient` (`Pick`), not the upstream REST + * `DocmostClient`; the upstream CLI `main()` entry point is dropped (the gitmost + * server drives the engine in-process). Engine LOGIC is byte-identical. + */ +const node_path_1 = require("node:path"); +const node_path_2 = require("node:path"); +const index_1 = require("../lib/index"); +const layout_1 = require("./layout"); +const git_1 = require("./git"); +const reconcile_1 = require("./reconcile"); +const stabilize_1 = require("./stabilize"); +// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do. +const DOCMOST_BRANCH = "docmost"; +// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12). +const SOURCE_TRAILER = "Docmost-Sync-Source: docmost"; +// Number of pages fetched/stabilized concurrently. Bounded so a large space +// does not open thousands of simultaneous requests/conversions at once. +const CONCURRENCY = 6; +// How often to log incremental progress (every N completed pages). +const PROGRESS_EVERY = 25; +/** Convert a vault-relative path (forward-slash) to an absolute FS path. */ +function relToAbs(vaultRoot, relPath) { + return [vaultRoot, ...relPath.split("/")].join("/"); +} +/** Convert an absolute/relative segment list under the vault to a relPath. */ +function segmentsToRelPath(segments, stem) { + return [...segments, `${stem}.md`].join("/"); +} +/** + * Read every tracked .md file in the vault and parse its `docmost:meta` to + * recover `{ pageId, relPath }`. Files without a parseable pageId in meta are + * skipped (they are not engine-tracked pages — e.g. a stray hand-written file). + * + * The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules: + * - a `readFile` rejection (tracked but missing on disk, a mid-operation race) + * -> skipped, NOT thrown; the next pull converges; + * - unparseable meta (`parseDocmostMarkdown` throws) -> skipped; + * - parseable but no `pageId` in meta -> skipped. + */ +async function readExisting(deps) { + const tracked = await deps.listTracked(); + const existing = []; + for (const relPath of tracked) { + // git ls-files always emits forward-slash paths; normalize just in case. + const rel = relPath.split(node_path_2.sep).join("/"); + let text; + try { + text = await deps.readFile(rel); + } + catch { + // Tracked but missing on disk (mid-operation race) — skip; the next pull + // converges. + continue; + } + let pageId; + try { + const { meta } = (0, index_1.parseDocmostMarkdown)(text); + pageId = meta?.pageId; + } + catch { + // Unparseable meta — not engine-tracked; leave it alone. + pageId = undefined; + } + if (pageId) + existing.push({ pageId, relPath: rel }); + } + return existing; +} +/** + * PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live + * tree nodes + completeness + existing tracked files and returns the full set of + * decisions with NO IO: + * + * - builds the vault layout (deterministic relPath per live page), + * - `planReconciliation` -> toWrite / moved / absence-toDelete, + * - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch + + * empty-live + mass-delete guard), folded IN here so `toDelete` is the + * POST-suppression set (empty when suppressed). + * + * Moves are NOT governed by the suppression: a moved page is present in `live`, + * so its old-path removal is real (the caller still gates it on the write + * succeeding). The expensive content fetch / file write / git ops happen in the + * thin `applyPullActions`. + */ +function computePullActions(input) { + const { pages, treeComplete, existing } = input; + const layout = (0, layout_1.buildVaultLayout)(pages); + const live = []; + for (const p of pages) { + if (!p || !p.id) + continue; + const entry = layout.get(p.id); + if (!entry) + continue; + live.push({ + pageId: p.id, + relPath: segmentsToRelPath(entry.segments, entry.stem), + }); + } + // Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only; + // `plan.moved` carries move old-path removals separately. + const plan = (0, reconcile_1.planReconciliation)(live, existing); + // Decide whether the ABSENCE-based deletions may be applied this cycle + // (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard. + // Moves are NOT governed by this. + const deletionDecision = (0, reconcile_1.decideAbsenceDeletions)({ + treeComplete, + liveCount: live.length, + existingCount: existing.length, + deleteCount: plan.toDelete.length, + }); + return { + toWrite: plan.toWrite, + moved: plan.moved, + // Fold the suppression in: a suppressed cycle deletes nothing. + toDelete: deletionDecision.apply ? plan.toDelete : [], + deletionDecision, + existingCount: existing.length, + plannedDeleteCount: plan.toDelete.length, + }; +} +/** + * THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current + * order, with all the original safety guards preserved bit-for-bit: + * + * 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize + * (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page + * never aborts the pull (bounded-concurrency pool, fault-tolerant). + * 2. apply MOVE old-path removals — ONLY when the planner marked the old path + * removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a + * failed move-write keeps the old path so the page never vanishes). + * 3. apply (post-suppression) absence deletes. + * 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted + * counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9). + * + * `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps. + */ +async function applyPullActions(deps, actions, vaultRoot) { + const { client, git } = deps; + // Emit the SPEC §8 suppression warnings (preserved from the original `main`). + const decision = actions.deletionDecision; + if (!decision.apply) { + if (decision.reason === "incomplete-fetch") { + console.warn("pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)"); + } + else if (decision.reason === "empty-live") { + console.warn(`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` + + `tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` + + `Docmost is reachable.`); + } + else { + console.warn(`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` + + `tracked file(s) (mass-delete guard) — deletions suppressed this ` + + `cycle (SPEC §8). Verify the live Docmost tree, then re-run.`); + } + } + // 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11). + let written = 0; + let failed = 0; + let completed = 0; + let nextIndex = 0; + // pageIds whose write FAILED. A moved page whose new-path write failed must + // NOT have its old path removed (otherwise the page vanishes entirely). + const failedPageIds = new Set(); + const writeOne = async (w) => { + try { + const page = await client.getPageJson(w.pageId); + const meta = { + version: 1, + pageId: page.id, + slugId: page.slugId, + title: page.title, + spaceId: page.spaceId, + parentPageId: page.parentPageId ?? null, + }; + const text = await (0, stabilize_1.stabilizePageFile)(page.content, meta); + const abs = relToAbs(vaultRoot, w.relPath); + await deps.mkdir((0, node_path_1.dirname)(abs)); + await deps.writeFile(abs, text); + written++; + } + catch (err) { + failed++; + failedPageIds.add(w.pageId); + console.error(`pull: failed page ${w.pageId}:`, err instanceof Error ? err.message : String(err)); + } + finally { + completed++; + if (completed % PROGRESS_EVERY === 0) { + console.log(`pulled ${completed}/${actions.toWrite.length}`); + } + } + }; + // Bounded-concurrency pool (dependency-free): a fixed set of runners each + // take the next index until the write list is exhausted. One bad page never + // aborts the whole pull (mirrors the fault-tolerant tree walk). + const runner = async () => { + while (true) { + const i = nextIndex++; + if (i >= actions.toWrite.length) + return; + await writeOne(actions.toWrite[i]); + } + }; + await Promise.all(Array.from({ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, () => runner())); + // Helper: `rm` with force:true is a no-op if the file is already gone. + const removePath = async (rel, what) => { + try { + await deps.rm(relToAbs(vaultRoot, rel)); + return true; + } + catch (err) { + console.error(`pull: failed to ${what} ${rel}:`, err instanceof Error ? err.message : String(err)); + return false; + } + }; + // 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its + // old path is genuinely stale — NOT subject to the incomplete-fetch + // suppression. BUT only remove the old path when (a) the planner marked it + // removable (not reused by another live page) AND (b) the new-path write + // actually SUCCEEDED — otherwise we would delete the only copy of a page + // whose move-write failed (⭐ data-loss guard). + let movedApplied = 0; + for (const m of actions.moved) { + if (!m.removeOldPath) + continue; + if (failedPageIds.has(m.pageId)) { + console.warn(`pull: move write for ${m.pageId} failed — keeping old path ` + + `${m.fromRelPath} (SPEC §8)`); + continue; + } + if (await removePath(m.fromRelPath, "remove moved old path")) + movedApplied++; + } + // 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the + // post-suppression set (empty when the decision suppressed them, SPEC §8). + let deleted = 0; + for (const rel of actions.toDelete) { + if (await removePath(rel, "delete")) + deleted++; + } + // 4. Stage + commit on `docmost` (only if there is something to commit). + // Deterministic stabilized output means unchanged pages produce identical + // bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the + // ACTUAL work applied (pages written + files deleted), not the planned size, + // so a run with failures does not over-report (SPEC §5 nit). + const subject = deleted > 0 + ? `docmost: sync ${written} page(s), ${deleted} deleted` + : `docmost: sync ${written} page(s)`; + await git.stageAll(); + const committed = await git.commit(subject, { + authorName: git_1.BOT_AUTHOR_NAME, + authorEmail: git_1.BOT_AUTHOR_EMAIL, + trailers: [SOURCE_TRAILER], + }); + // Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9); + // we never push to Docmost. Push to a git remote is deferred (SPEC §7). + await git.checkout(git_1.DEFAULT_BRANCH); + const merge = await git.merge(DOCMOST_BRANCH); + if (merge.conflict) { + console.error("pull: merge of docmost -> main CONFLICTED. Conflict markers were left " + + "in the vault for manual resolution (SPEC §9). Nothing is pushed to " + + "Docmost (read-only). Resolve locally, then re-run."); + } + else if (!merge.ok) { + console.error(`pull: merge of docmost -> main failed: ${merge.output}`); + } + console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7)."); + return { written, movedApplied, deleted, failed, committed, merge }; +} diff --git a/packages/git-sync/build/engine/push.d.ts b/packages/git-sync/build/engine/push.d.ts new file mode 100644 index 00000000..9e159508 --- /dev/null +++ b/packages/git-sync/build/engine/push.d.ts @@ -0,0 +1,480 @@ +/** + * Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment. + * + * This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref + * primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns + * a git diff into a classified action set with NO IO, and a THIN injectable + * applier (`applyPushActions`) exercised in tests via fakes only. + * + * Direction is vault -> Docmost. The diff is `main` against + * `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is + * translated into a Docmost mutation by `pageId` identity (SPEC §4): + * - A without pageId -> create_page (then write the assigned pageId back). + * - A with pageId -> update (restored/copied file; the page already exists). + * - M -> update content (collab/Yjs path, SPEC §2/§15.6). + * - D -> delete_page (pageId recovered from the PRE-IMAGE meta). + * - R -> rename/move (CLASSIFIED here, APPLIED in push #3). + * + * MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves + * each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH- + * derived parent (SPEC §5: the file path is the source of truth for tree + * position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions` + * then calls `move_page` / `rename_page` (both for a reparent+retitle), or + * records a NO-OP for a cosmetic local-only file-path rename. + * + * VENDORED into gitmost (plan §2.1/§3.1): the client seam is the native + * `GitSyncClient` (`Pick`), not the upstream REST + * `DocmostClient`; the upstream CLI `main()` entry point is dropped (the gitmost + * server drives the engine in-process). Engine LOGIC is byte-identical. + */ +import { type DocmostMdMeta } from "../lib/index"; +import type { GitSyncClient } from "./client.types"; +import type { DiffEntry } from "./git"; +import { VaultGit } from "./git"; +import { type Settings } from "./settings"; +export type { DiffEntry } from "./git"; +/** A page to CREATE in Docmost (new local file, meta has no pageId yet). */ +export interface CreateAction { + /** Vault-relative path of the new file. */ + path: string; +} +/** A page whose CONTENT changed (meta carries the existing pageId). */ +export interface UpdateAction { + pageId: string; + /** Vault-relative path of the changed file. */ + path: string; +} +/** A page to soft-delete in Docmost (Trash, SPEC §8). */ +export interface DeleteAction { + pageId: string; +} +/** A renamed/moved page (same pageId, new path). Resolution DEFERRED. */ +export interface RenameMoveAction { + pageId: string; + oldPath: string; + newPath: string; +} +/** + * A CLASSIFIED rename/move (push #3): a `RenameMoveAction` resolved into the + * Docmost op(s) it actually needs. The file PATH is the source of truth for tree + * position (SPEC §5: "истина связи — pageId, не путь" — the path is COSMETIC and + * LOCAL, the page identity is its pageId), so we compare the RESOLVED parent of + * the new path against the resolved parent of the old path, and the title in the + * current meta against the title in the previous meta. Each sub-op is emitted + * ONLY when something real changed: + * - `move` — the resolved parent page changed (reparent in Docmost). A `null` + * `parentPageId` means the new parent is ROOT (the file sits at the space + * root, no enclosing folder). + * - `rename` — the page title changed (a pure title edit in Docmost). + * - `noop` — neither changed: a purely LOCAL file-path rename (same parent, + * same title). The page identity is its pageId, so Docmost is NOT called. + * `move` and `rename` are independent and may BOTH be present (reparent + retitle). + */ +export interface RenameMoveActionClassified { + pageId: string; + oldPath: string; + newPath: string; + /** Present iff the resolved parent changed -> `move_page` (reparent). */ + move?: { + parentPageId: string | null; + }; + /** Present iff the title changed -> `rename_page` (title-only). */ + rename?: { + title: string; + }; + /** True iff neither parent nor title changed (cosmetic local-only rename). */ + noop?: true; +} +/** + * Injected resolvers for the PURE `classifyRenameMoves` (push #3). Both are PURE + * given a path + side; the real `main` (a follow-up) wires them to the file tree + * (`readFile` for `current`, `git.showFileAtRef` for `prev`), tests pass plain + * lookups. SPEC §5 path-as-truth: + * - `metaAt`: the file's `docmost:meta` at that side (for the title). + * - `resolveParentPageId`: the pageId of the page whose FILE is the parent + * FOLDER's `.md` (one level up from the given path), or `null` for ROOT. + */ +export interface ClassifyRenameMovesDeps { + metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null; + resolveParentPageId: (path: string, side: MetaSide) => string | null; +} +/** + * PURE classifier for the `renamesMoves` produced by `computePushActions` + * (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the + * Docmost op(s) it needs, with NO IO (both resolvers are injected). + * + * SPEC §5 — the file PATH is the source of truth for tree position, NOT the + * (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from + * `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing + * folder, via `deps.resolveParentPageId`. The title comes from the meta. + * + * For each entry: + * - `newParent = resolveParentPageId(newPath, 'current')`, + * `oldParent = resolveParentPageId(oldPath, 'prev')`. + * - `newTitle = metaAt(newPath,'current')?.title`, + * `oldTitle = metaAt(oldPath,'prev')?.title`. + * - include `move` iff `newParent !== oldParent` (a real reparent), + * - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from + * `oldTitle` (a real title edit; an empty/absent new title is never a rename), + * - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename; + * the page is its pageId, so Docmost is not touched). + */ +export declare function classifyRenameMoves(renamesMoves: RenameMoveAction[], deps: ClassifyRenameMovesDeps): RenameMoveActionClassified[]; +/** The classified set of push actions (PURE output of `computePushActions`). */ +export interface PushActions { + creates: CreateAction[]; + updates: UpdateAction[]; + deletes: DeleteAction[]; + renamesMoves: RenameMoveAction[]; + /** + * Diff rows that could NOT be classified into an action, with a reason — e.g. + * a deleted file whose PRE-IMAGE meta carried no recoverable pageId (the + * untracked-file guard, SPEC §8: only files that were tracked with a pageId + * are deleted in Docmost). Carried so the caller can log them. + */ + skipped: { + path: string; + status: DiffEntry["status"]; + reason: string; + }[]; +} +/** + * Which tree a `metaAt` lookup reads the file's `docmost:meta` from: + * - `current`: the current `main` tree (the live file content) — used for + * A/M/R, where the file still exists. + * - `prev`: the last-pushed PRE-IMAGE (e.g. `refs/docmost/last-pushed:`) + * — used for D, where the file is gone from `main` but its pageId must be + * recovered from the version Docmost last knew (SPEC §6/§8). + */ +export type MetaSide = "current" | "prev"; +/** Input to the PURE planner. `metaAt` is injected (no IO inside the planner). */ +export interface PushActionsInput { + /** Diff rows of `main` vs `refs/docmost/last-pushed` (SPEC §6 step 2). */ + changes: DiffEntry[]; + /** + * Resolve a file's `docmost:meta` at a given side, or `null` if the file is + * absent there / has no parseable meta. PURE injection: the real `main` reads + * the working tree (current) or `git show :` (prev); tests + * pass a plain lookup. + */ + metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null; +} +/** + * PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost + * action by `pageId` identity, with NO IO (the `metaAt` resolver is injected). + * + * Classification rules: + * - `A` (added): + * - current meta HAS a pageId -> UPDATE (a restored/copied file whose + * page already exists; we push its content rather than create a dup). + * - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a + * brand-new local file; the page does not exist in Docmost yet). + * - current meta has NO pageId and NO usable spaceId -> SKIP with reason + * `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId + * (§16), and a new local file may carry only partial human meta. We + * refuse to create rather than guess a space (SPEC §8 guard spirit). + * - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified + * file somehow lost its pageId it is skipped — there is nothing to target.) + * - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path, + * 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason + * (untracked-file guard, SPEC §8: never delete an untracked page). + * - `R` (renamed/moved): same pageId (from current meta), path changed -> + * RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is + * DEFERRED to the next increment; here we only record oldPath/newPath/ + * pageId. If the renamed file has no recoverable pageId it is SKIPPED. + * (`C` copy is treated the same as `R` for recording purposes.) + */ +export declare function computePushActions(input: PushActionsInput): PushActions; +/** The marker the push direction advances after a successful push (SPEC §5/§6). */ +export declare const LAST_PUSHED_REF = "refs/docmost/last-pushed"; +/** + * The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It + * reflects "what Docmost currently contains"; advancing it to the pushed `main` + * commit closes the loop so the next pull diffs empty for the pushed pages. + */ +export declare const DOCMOST_BRANCH = "docmost"; +/** + * Injectable IO for `applyPushActions`. The real `main` (NEXT increment) wires + * these to the live client, `node:fs/promises`, and the vault git wrapper; this + * increment drives them only through FAKES in tests (no live destructive run). + * - `client`: the create/update/delete/move/rename subset of `GitSyncClient`. + * - `readFile`/`writeFile`: read a changed file's body / write a file back + * (by vault-relative path; the applier does not resolve absolute paths so + * fakes stay trivial). + * - `git`: `updateRef` (advance `refs/docmost/last-pushed`) and + * `fastForwardBranch` (advance the `docmost` mirror after a clean push, the + * loop-close — SPEC §6 step 3 / §10). + */ +export interface ApplyPushDeps { + client: Pick; + /** Read a changed file's full text by its vault-relative path. */ + readFile: (path: string) => Promise; + /** Write a file's full text by its vault-relative path. */ + writeFile: (path: string, text: string) => Promise; + /** + * `updateRef` advances `refs/docmost/last-pushed`; `fastForwardBranch` advances + * the `docmost` mirror after a clean push. `showFileAtRef` reads a file's text + * at a ref (used by the move/rename classifier to resolve the PREVIOUS parent + * folder's `.md` at `refs/docmost/last-pushed`, SPEC §5 path-as-truth). + */ + git: Pick; +} +/** A file whose meta was rewritten with a freshly-assigned pageId (post-create). */ +export interface WrittenBackPage { + path: string; + pageId: string; +} +/** + * The per-page push record consulted by a FUTURE poll-suppression (SPEC §10): a + * pulled page whose body hash + `updatedAt` match a record here is OUR OWN write + * and must not be re-pulled. PRODUCED here; CONSUMED on the pull side later. + */ +export interface PushedPageRecord { + /** The Docmost pageId that was updated/created. */ + pageId: string; + /** + * The `updatedAt` from the create/update client result, when the result + * exposed one. Absent when the (fake) client did not return it. + */ + updatedAt?: string; + /** Stable hash of the markdown BODY that was pushed (SPEC §10 "хэш тела"). */ + bodyHash: string; +} +/** + * One page whose operation FAILED during apply (SPEC §12 resumability). The bad + * page is isolated — recorded here — and the rest of the batch still runs; the + * refs are NOT advanced when there is any failure, so a re-run retries cleanly. + */ +export interface PushFailure { + kind: "update" | "create" | "delete" | "move" | "rename"; + /** The pageId for update/delete/move/rename; absent for a never-id'd create. */ + pageId?: string; + /** The vault-relative path for create/update/move/rename; absent for delete. */ + path?: string; + /** The error message captured from the thrown error. */ + error: string; +} +/** + * A rename/move action that resolved to a NO-OP (push #3, SPEC §5): a purely + * LOCAL file-path rename whose resolved parent AND title are both unchanged. The + * page identity is its pageId and the path is COSMETIC/local-only, so Docmost is + * NOT called — the skip is recorded here (with the reason) for logging. + */ +export interface PushNoop { + pageId: string; + oldPath: string; + newPath: string; + /** Why no Docmost op was emitted (currently always a path-only rename). */ + reason: "path-only-rename"; +} +/** Structured outcome of `applyPushActions` (counts + write-backs + noops). */ +export interface ApplyPushResult { + created: number; + updated: number; + deleted: number; + /** Pages reparented in Docmost via `move_page` (push #3, SPEC §5/§16). */ + moved: number; + /** Pages retitled in Docmost via `rename_page` (push #3, SPEC §5/§6). */ + renamed: number; + /** + * Files whose `docmost:meta` was rewritten with the pageId Docmost assigned on + * create — these now need a FOLLOW-UP commit (the meta on disk changed). The + * commit itself is the caller's job (NEXT increment); recorded here so it is + * not lost. + */ + writtenBack: WrittenBackPage[]; + /** + * Per-page push records (pageId + optional `updatedAt` + body hash) for every + * page successfully updated/created — the §10 loop-guard data a future + * poll-suppression (pull side) will consult so it does not re-pull our own + * write. Deletes are not included (no body was pushed). + */ + pushed: PushedPageRecord[]; + /** + * Pages whose operation threw — isolated and recorded, the batch continued + * (SPEC §12). Non-empty here means the refs were NOT advanced. + */ + failures: PushFailure[]; + /** + * Rename/move actions that resolved to a NO-OP — a purely LOCAL file-path + * rename (same parent, same title). NO Docmost call was made for these (SPEC + * §5: the page is its pageId, the path is local-only). Recorded for logging. + */ + noops: PushNoop[]; + /** Diff rows the planner could not classify (carried through for logging). */ + skipped: PushActions["skipped"]; + /** Whether `refs/docmost/last-pushed` was advanced (only on a CLEAN push). */ + lastPushedAdvanced: boolean; + /** + * Result of fast-forwarding the `docmost` mirror branch after a CLEAN push + * (the loop-close, SPEC §6 step 3 / §10). `null` when no advance was attempted + * (no `pushedCommit`, or there were failures). `{ ok:false, reason }` when a + * non-fast-forward was REFUSED (divergent `docmost` history is never clobbered). + */ + docmostFastForward: { + ok: boolean; + reason?: string; + } | null; +} +/** + * THIN IO applier for the COMMON push cases (create/update/delete). Exercised + * via FAKES only in this increment — there is no live wiring. + * + * - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`. + * This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb + * overwrite. The full self-contained markdown (meta + body) is sent as-is; + * `importPageMarkdown` parses the meta/body itself. + * - CREATE: derive title/spaceId/parentPageId from the file's current meta, + * `client.createPage(...)`, take the assigned pageId from the result, and + * write it BACK into the file's `docmost:meta` (re-serialized via + * `serializeDocmostMarkdownBody`, body preserved) so the file becomes + * tracked. The write-back is recorded in `writtenBack` (a follow-up commit + * is needed — NEXT increment). + * - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8). + * - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry + * with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for + * the parent pageId — path-as-truth — and the meta for the title), then: + * - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent; + * `position` is UNDEFINED for now — the client supplies a default), + * - `rename` -> `client.renamePage(pageId, title)` (title-only), + * - BOTH -> move (reparent) THEN rename (title), in that order, + * - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only + * file-path rename: the page is its pageId, the path is local, SPEC §5). + * + * FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation + * is wrapped in its own try/catch: a single failing page is recorded in + * `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES — + * one bad page must never block the rest. Crucially, the refs are advanced ONLY + * when `failures.length === 0`: a PARTIAL push must NOT advance + * `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the + * whole batch cleanly (the already-applied pages are idempotent re-applies). + * + * LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a + * `pushedCommit` is supplied: + * - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND + * - fast-forward the `docmost` mirror branch to it via + * `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects + * what Docmost now contains and the NEXT pull diffs EMPTY for these pages + * (it does not re-pull our own write). The ff is REFUSED (not forced) if + * `docmost` is not an ancestor of the pushed commit; the result is surfaced + * in `docmostFastForward`. On ANY failure, NEITHER ref is advanced. + * + * LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the + * result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body + * hash of what was pushed plus the write's `updatedAt` (when the client returned + * one). A future pull-side poll-suppression consults this so it does not re-pull + * our own write; producing it is in scope here, consuming it is deferred. + * + * @param pushedCommit The `main` commit just reflected into Docmost (SHA or + * commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan). + */ +export declare function applyPushActions(deps: ApplyPushDeps, actions: PushActions, pushedCommit?: string): Promise; +/** + * SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative + * (forward-slash) path. `buildVaultLayout` puts a page with children at + * `<...>/Title.md` and nests its children under `<...>/Title/`, so for + * `newPath = /Child.md` the parent page's file is `.md` (the enclosing + * folder, one level up). A path with NO enclosing folder (`Child.md`, at the + * space root) has no parent folder file -> `null` (the parent is ROOT). + */ +export declare function parentFolderFile(path: string): string | null; +/** + * The human ("local") git identity used for engine-made commits on `main` in the + * push direction (SPEC §7.3). The provenance is carried by the trailer (below), + * which the loop-guard keys on; the identity is for history readability only. + * When the vault repo already has a configured `user.name`/`user.email`, git + * uses that for the working-tree commit; this is the fallback the daemon stamps. + */ +export declare const LOCAL_AUTHOR_NAME = "Local"; +export declare const LOCAL_AUTHOR_EMAIL = "local@local"; +/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */ +export declare const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local"; +/** + * Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that + * touches the outside world is here so tests pass fakes). `makeClient` is a + * FACTORY, not a client — a dry-run must build NO client at all (it is never + * called), and only `--apply` invokes it. + */ +export interface PushDeps { + settings: Settings; + git: Pick; + /** Build a real client — called ONLY on `--apply`, never on dry-run. */ + makeClient: (settings: Settings) => ApplyPushDeps["client"]; + /** Read a file's full text by its vault-relative (forward-slash) path. */ + readFile: (path: string) => Promise; + /** Write a file's full text by its vault-relative path. */ + writeFile: (path: string, text: string) => Promise; + /** Structured logger (defaults to console in `main`; a recorder in tests). */ + log: (line: string) => void; +} +/** The structured outcome of a `runPush` cycle (returned + summarized). */ +export interface PushRunResult { + /** Which path ran: `dry-run` (plan only) or `apply` (Docmost mutated). */ + mode: "dry-run" | "apply"; + /** Why the cycle stopped before planning, if it did (e.g. a left-over merge). */ + aborted?: "merge-in-progress"; + /** The diff base the plan was computed against (`last-pushed` else `docmost`). */ + base?: { + ref: string; + source: "last-pushed" | "docmost"; + sha: string | null; + }; + /** The `main` commit the plan targets (the would-be pushed commit). */ + pushedCommit?: string; + /** Planned action counts from the PURE planner (present once a plan was built). */ + planned?: { + creates: number; + updates: number; + deletes: number; + renamesMoves: number; + skipped: number; + }; + /** The applier's structured result — ONLY present on the `--apply` path. */ + applied?: ApplyPushResult; + /** + * True when `applyPushActions` REFUSED to fast-forward a divergent `docmost` + * mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded + * into the CLI's non-zero exit. + */ + divergentDocmost?: boolean; + /** Per-page failures from the applier (empty/absent on a clean run). */ + failures?: PushFailure[]; +} +/** + * Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT. + * + * Steps (mirrors `pull.ts`): + * 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message + + * non-zero-ish result) if a merge is in progress — never push on top of an + * unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach + * Docmost (SPEC §9). + * 2. Checkout `main` (the human-facing branch the push reads from). + * 3. Commit the human's pending working-tree changes on `main` with the + * `local` provenance trailer (SPEC §7.3). A no-op when nothing changed. + * 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the + * `docmost` mirror branch (what Docmost currently has). Resolve `main`. + * 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)` + * resolver (current = working tree, prev = `git show :`); run + * the PURE `computePushActions`. + * 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost + * calls, NO ref advance. + * 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`, + * then (a) if any pageIds were written back (creates), commit them on `main` + * with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the + * new commit so the recorded pageIds are persisted in what Docmost mirrors; + * (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent + * WARNING and a non-zero-ish flag. Then log a one-line summary. + */ +export declare function runPush(deps: PushDeps, opts: { + dryRun: boolean; +}): Promise; +/** Parsed `push` CLI flags. DRY-RUN is the default; `--apply` opts into writes. */ +export interface PushParsedArgs { + /** True when `--apply` was passed (the ONLY path that writes to Docmost). */ + apply: boolean; +} +/** + * Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a + * DRY-RUN (plan only). Exported so the flag handling is unit-testable. + */ +export declare function parseArgs(argv: string[]): PushParsedArgs; diff --git a/packages/git-sync/build/engine/push.js b/packages/git-sync/build/engine/push.js new file mode 100644 index 00000000..e1023b45 --- /dev/null +++ b/packages/git-sync/build/engine/push.js @@ -0,0 +1,864 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.LOCAL_SOURCE_TRAILER = exports.LOCAL_AUTHOR_EMAIL = exports.LOCAL_AUTHOR_NAME = exports.DOCMOST_BRANCH = exports.LAST_PUSHED_REF = void 0; +exports.classifyRenameMoves = classifyRenameMoves; +exports.computePushActions = computePushActions; +exports.applyPushActions = applyPushActions; +exports.parentFolderFile = parentFolderFile; +exports.runPush = runPush; +exports.parseArgs = parseArgs; +/** + * Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment. + * + * This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref + * primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns + * a git diff into a classified action set with NO IO, and a THIN injectable + * applier (`applyPushActions`) exercised in tests via fakes only. + * + * Direction is vault -> Docmost. The diff is `main` against + * `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is + * translated into a Docmost mutation by `pageId` identity (SPEC §4): + * - A without pageId -> create_page (then write the assigned pageId back). + * - A with pageId -> update (restored/copied file; the page already exists). + * - M -> update content (collab/Yjs path, SPEC §2/§15.6). + * - D -> delete_page (pageId recovered from the PRE-IMAGE meta). + * - R -> rename/move (CLASSIFIED here, APPLIED in push #3). + * + * MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves + * each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH- + * derived parent (SPEC §5: the file path is the source of truth for tree + * position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions` + * then calls `move_page` / `rename_page` (both for a reparent+retitle), or + * records a NO-OP for a cosmetic local-only file-path rename. + * + * VENDORED into gitmost (plan §2.1/§3.1): the client seam is the native + * `GitSyncClient` (`Pick`), not the upstream REST + * `DocmostClient`; the upstream CLI `main()` entry point is dropped (the gitmost + * server drives the engine in-process). Engine LOGIC is byte-identical. + */ +const index_1 = require("../lib/index"); +const git_1 = require("./git"); +const loop_guard_1 = require("./loop-guard"); +/** + * PURE classifier for the `renamesMoves` produced by `computePushActions` + * (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the + * Docmost op(s) it needs, with NO IO (both resolvers are injected). + * + * SPEC §5 — the file PATH is the source of truth for tree position, NOT the + * (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from + * `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing + * folder, via `deps.resolveParentPageId`. The title comes from the meta. + * + * For each entry: + * - `newParent = resolveParentPageId(newPath, 'current')`, + * `oldParent = resolveParentPageId(oldPath, 'prev')`. + * - `newTitle = metaAt(newPath,'current')?.title`, + * `oldTitle = metaAt(oldPath,'prev')?.title`. + * - include `move` iff `newParent !== oldParent` (a real reparent), + * - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from + * `oldTitle` (a real title edit; an empty/absent new title is never a rename), + * - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename; + * the page is its pageId, so Docmost is not touched). + */ +function classifyRenameMoves(renamesMoves, deps) { + return renamesMoves.map((rm) => { + const newParent = deps.resolveParentPageId(rm.newPath, "current"); + const oldParent = deps.resolveParentPageId(rm.oldPath, "prev"); + const newTitle = deps.metaAt(rm.newPath, "current")?.title; + const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title; + const out = { + pageId: rm.pageId, + oldPath: rm.oldPath, + newPath: rm.newPath, + }; + // A reparent: the new path's resolved parent page differs from the old's. + if (newParent !== oldParent) { + out.move = { parentPageId: newParent }; + } + // A title edit: only when there is a real, non-empty new title that changed. + if (typeof newTitle === "string" && + newTitle.length > 0 && + newTitle !== oldTitle) { + out.rename = { title: newTitle }; + } + // Neither changed -> a purely LOCAL file-path rename; do NOT call Docmost. + if (!out.move && !out.rename) { + out.noop = true; + } + return out; + }); +} +/** + * PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost + * action by `pageId` identity, with NO IO (the `metaAt` resolver is injected). + * + * Classification rules: + * - `A` (added): + * - current meta HAS a pageId -> UPDATE (a restored/copied file whose + * page already exists; we push its content rather than create a dup). + * - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a + * brand-new local file; the page does not exist in Docmost yet). + * - current meta has NO pageId and NO usable spaceId -> SKIP with reason + * `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId + * (§16), and a new local file may carry only partial human meta. We + * refuse to create rather than guess a space (SPEC §8 guard spirit). + * - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified + * file somehow lost its pageId it is skipped — there is nothing to target.) + * - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path, + * 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason + * (untracked-file guard, SPEC §8: never delete an untracked page). + * - `R` (renamed/moved): same pageId (from current meta), path changed -> + * RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is + * DEFERRED to the next increment; here we only record oldPath/newPath/ + * pageId. If the renamed file has no recoverable pageId it is SKIPPED. + * (`C` copy is treated the same as `R` for recording purposes.) + */ +function computePushActions(input) { + const { changes, metaAt } = input; + const actions = { + creates: [], + updates: [], + deletes: [], + renamesMoves: [], + skipped: [], + }; + for (const change of changes) { + switch (change.status) { + case "A": { + const meta = metaAt(change.path, "current"); + const pageId = meta?.pageId; + if (pageId) { + // Added but already carries a pageId (restored/copied file): the page + // exists in Docmost, so push content as an UPDATE — never a duplicate. + actions.updates.push({ pageId, path: change.path }); + } + else if (meta?.spaceId) { + // Brand-new local file with a target space -> create the page, then + // write the assigned pageId back into its meta (in `applyPushActions`). + // `meta.spaceId` is truthy here, so empty-string is also rejected. + actions.creates.push({ path: change.path }); + } + else { + // A create needs a spaceId (Docmost `create_page` requires it, §16). A + // new file with partial meta and no usable spaceId is SKIPPED rather + // than created into a guessed space (SPEC §8 guard spirit). + actions.skipped.push({ + path: change.path, + status: "A", + reason: "create-without-spaceId", + }); + } + break; + } + case "M": { + const meta = metaAt(change.path, "current"); + const pageId = meta?.pageId; + if (pageId) { + actions.updates.push({ pageId, path: change.path }); + } + else { + // A modified file with no pageId has no Docmost target to update. + actions.skipped.push({ + path: change.path, + status: "M", + reason: "modified file has no pageId in meta", + }); + } + break; + } + case "D": { + // The file is gone from `main`; recover its pageId from the PRE-IMAGE + // (the version last pushed to Docmost) so we delete the RIGHT page. + const prevMeta = metaAt(change.path, "prev"); + const pageId = prevMeta?.pageId; + if (pageId) { + actions.deletes.push({ pageId }); + } + else { + // Untracked-file guard (SPEC §8): a file with no recoverable pageId was + // never a Docmost page — do NOT translate its removal into a delete. + actions.skipped.push({ + path: change.path, + status: "D", + reason: "deleted file has no recoverable pageId (pre-image meta)", + }); + } + break; + } + case "R": + case "C": { + // Same page, new path. Identity comes from the CURRENT (post-rename) meta + // since the file still exists. RESOLUTION (move vs rename, parentPageId) + // is deferred — record oldPath/newPath/pageId only. + const meta = metaAt(change.path, "current"); + const pageId = meta?.pageId; + const oldPath = change.oldPath ?? change.path; + if (pageId) { + actions.renamesMoves.push({ + pageId, + oldPath, + newPath: change.path, + }); + } + else { + actions.skipped.push({ + path: change.path, + status: change.status, + reason: "renamed/moved file has no pageId in meta", + }); + } + break; + } + default: { + // Unreachable for A/M/D/R/C; defensive for any future status. + actions.skipped.push({ + path: change.path, + status: change.status, + reason: `unhandled diff status ${change.status}`, + }); + } + } + } + return actions; +} +// --- thin apply (create/update/delete), fakes-only in this increment --------- +/** The marker the push direction advances after a successful push (SPEC §5/§6). */ +exports.LAST_PUSHED_REF = "refs/docmost/last-pushed"; +/** + * The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It + * reflects "what Docmost currently contains"; advancing it to the pushed `main` + * commit closes the loop so the next pull diffs empty for the pushed pages. + */ +exports.DOCMOST_BRANCH = "docmost"; +/** + * THIN IO applier for the COMMON push cases (create/update/delete). Exercised + * via FAKES only in this increment — there is no live wiring. + * + * - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`. + * This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb + * overwrite. The full self-contained markdown (meta + body) is sent as-is; + * `importPageMarkdown` parses the meta/body itself. + * - CREATE: derive title/spaceId/parentPageId from the file's current meta, + * `client.createPage(...)`, take the assigned pageId from the result, and + * write it BACK into the file's `docmost:meta` (re-serialized via + * `serializeDocmostMarkdownBody`, body preserved) so the file becomes + * tracked. The write-back is recorded in `writtenBack` (a follow-up commit + * is needed — NEXT increment). + * - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8). + * - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry + * with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for + * the parent pageId — path-as-truth — and the meta for the title), then: + * - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent; + * `position` is UNDEFINED for now — the client supplies a default), + * - `rename` -> `client.renamePage(pageId, title)` (title-only), + * - BOTH -> move (reparent) THEN rename (title), in that order, + * - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only + * file-path rename: the page is its pageId, the path is local, SPEC §5). + * + * FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation + * is wrapped in its own try/catch: a single failing page is recorded in + * `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES — + * one bad page must never block the rest. Crucially, the refs are advanced ONLY + * when `failures.length === 0`: a PARTIAL push must NOT advance + * `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the + * whole batch cleanly (the already-applied pages are idempotent re-applies). + * + * LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a + * `pushedCommit` is supplied: + * - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND + * - fast-forward the `docmost` mirror branch to it via + * `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects + * what Docmost now contains and the NEXT pull diffs EMPTY for these pages + * (it does not re-pull our own write). The ff is REFUSED (not forced) if + * `docmost` is not an ancestor of the pushed commit; the result is surfaced + * in `docmostFastForward`. On ANY failure, NEITHER ref is advanced. + * + * LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the + * result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body + * hash of what was pushed plus the write's `updatedAt` (when the client returned + * one). A future pull-side poll-suppression consults this so it does not re-pull + * our own write; producing it is in scope here, consuming it is deferred. + * + * @param pushedCommit The `main` commit just reflected into Docmost (SHA or + * commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan). + */ +async function applyPushActions(deps, actions, pushedCommit) { + const { client, git } = deps; + let created = 0; + let updated = 0; + let deleted = 0; + let moved = 0; + let renamed = 0; + const writtenBack = []; + const pushed = []; + const failures = []; + const noops = []; + // 1. UPDATES — collab/Yjs write path (SPEC §2/§15.6), never a raw overwrite. + // Each update is isolated: a thrown page is recorded and the batch goes on. + for (const u of actions.updates) { + try { + const fullMarkdown = await deps.readFile(u.path); + const result = await client.importPageMarkdown(u.pageId, fullMarkdown); + updated++; + // §10 loop-guard data: hash the body we pushed + capture `updatedAt`. + pushed.push({ + pageId: u.pageId, + ...extractUpdatedAt(result), + bodyHash: (0, loop_guard_1.bodyHash)(fullMarkdown), + }); + } + catch (err) { + failures.push({ + kind: "update", + pageId: u.pageId, + path: u.path, + error: errMessage(err), + }); + } + } + // 2. CREATES — create the page, then write the assigned pageId back to meta so + // the file becomes tracked (SPEC §4 "записать присвоенный pageId обратно"). + // Isolated per page like updates. + for (const c of actions.creates) { + try { + const text = await deps.readFile(c.path); + const { meta, body } = (0, index_1.parseDocmostMarkdown)(text); + // Derive create args from the file's current meta. A new local file may + // have partial meta (e.g. title/spaceId only); spaceId is required by + // Docmost (the planner already guards a create against a missing spaceId). + const title = meta?.title ?? ""; + const spaceId = meta?.spaceId ?? ""; + const parentPageId = meta?.parentPageId ?? undefined; + const result = await client.createPage(title, body, spaceId, parentPageId); + // `createPage` returns `{ data: { id, ... }, success }`; the assigned + // pageId is at `result.data.id`. + const assignedPageId = result?.data?.id; + if (assignedPageId) { + // Re-serialize the file with the pageId in meta, body preserved. + const newMeta = { + version: meta?.version ?? 1, + ...meta, + pageId: assignedPageId, + }; + const rewritten = (0, index_1.serializeDocmostMarkdownBody)(newMeta, body); + await deps.writeFile(c.path, rewritten); + writtenBack.push({ path: c.path, pageId: assignedPageId }); + // §10 loop-guard data for the created page (hash the pushed body). + pushed.push({ + pageId: assignedPageId, + ...extractUpdatedAt(result), + bodyHash: (0, loop_guard_1.bodyHash)(text), + }); + } + created++; + } + catch (err) { + failures.push({ kind: "create", path: c.path, error: errMessage(err) }); + } + } + // 3. DELETES — soft-delete to Trash (SPEC §8), reversible. Isolated per page. + for (const d of actions.deletes) { + try { + await client.deletePage(d.pageId); + deleted++; + } + catch (err) { + failures.push({ + kind: "delete", + pageId: d.pageId, + error: errMessage(err), + }); + } + } + // 4. RENAME/MOVE (push #3, SPEC §5/§6/§16). Classify each entry against the + // tree-backed resolvers (the NEW parent comes from the new path's enclosing + // folder `.md`, the OLD parent from the old path's at last-pushed — PATH is + // the truth, not stale `meta.parentPageId`; the title from the meta), then + // apply only the real ops. Each page is isolated like the cases above: a + // thrown op is recorded in `failures` and the batch continues. ORDER for a + // page that needs both: reparent (move) FIRST, then retitle (rename). + if (actions.renamesMoves.length > 0) { + // The classifier is PURE over sync resolvers; the tree reads are async, so + // prefetch every (path, side) lookup it will make into plain tables first. + const parentTable = new Map(); + const metaTable = new Map(); + // A tree read (readFile / git.showFileAtRef) throwing must isolate THAT page + // into `failures`, NOT abort the whole batch (§12 resumability). The helpers + // already swallow their own errors, but this per-entry try/catch keeps the + // batch-isolation invariant holding regardless of future changes to them. + const prefetchFailed = new Set(); + for (const rm of actions.renamesMoves) { + // newParent + newTitle from the CURRENT tree; oldParent + oldTitle from the + // last-pushed pre-image (`prev`). Keyed by `path|side` so duplicates fold. + try { + parentTable.set(`${rm.newPath}|current`, await resolveParentPageIdViaTree(deps, rm.newPath, "current")); + parentTable.set(`${rm.oldPath}|prev`, await resolveParentPageIdViaTree(deps, rm.oldPath, "prev")); + metaTable.set(`${rm.newPath}|current`, await metaAtViaTree(deps, rm.newPath, "current")); + metaTable.set(`${rm.oldPath}|prev`, await metaAtViaTree(deps, rm.oldPath, "prev")); + } + catch (err) { + prefetchFailed.add(rm.pageId); + failures.push({ + kind: "move", + pageId: rm.pageId, + path: rm.newPath, + error: errMessage(err), + }); + } + } + const classified = classifyRenameMoves(actions.renamesMoves.filter((rm) => !prefetchFailed.has(rm.pageId)), { + metaAt: (path, side) => metaTable.get(`${path}|${side}`) ?? null, + resolveParentPageId: (path, side) => parentTable.get(`${path}|${side}`) ?? null, + }); + for (const c of classified) { + if (c.noop) { + // Cosmetic local-only file-path rename — no Docmost op (SPEC §5). + noops.push({ + pageId: c.pageId, + oldPath: c.oldPath, + newPath: c.newPath, + reason: "path-only-rename", + }); + continue; + } + // Track which op is in flight so a failure is attributed to the op that + // ACTUALLY threw: for a page needing both, a move that succeeds then a + // rename that throws must be recorded as `rename`, not `move`. + let failingKind = c.move ? "move" : "rename"; + try { + // Reparent FIRST so the page is in its new tree position, THEN retitle. + if (c.move) { + failingKind = "move"; + // TODO(next): compute a fractional-index position between siblings + // (SPEC §16). `position` is UNDEFINED here; the client supplies a valid + // default. Pass `parentPageId: null` for a move to the space ROOT. + await client.movePage(c.pageId, c.move.parentPageId); + moved++; + } + if (c.rename) { + failingKind = "rename"; + await client.renamePage(c.pageId, c.rename.title); + renamed++; + } + } + catch (err) { + // Isolate the failed page: the op that ACTUALLY threw is recorded so a + // re-run can retry. A move that threw before its rename leaves `rename` + // for the next run (idempotent re-apply); refs are NOT advanced (below). + failures.push({ + kind: failingKind, + pageId: c.pageId, + path: c.newPath, + error: errMessage(err), + }); + } + } + } + // 5. Advance the refs ONLY on a CLEAN push (no failures) AND when a pushed + // commit is supplied. A partial push must advance NEITHER ref, so a re-run + // retries the whole batch (SPEC §12). The loop-close (SPEC §6 step 3 / §10): + // advance `refs/docmost/last-pushed` AND fast-forward the `docmost` mirror, + // so Docmost's new content is mirrored and the next pull diffs empty. + let lastPushedAdvanced = false; + let docmostFastForward = null; + if (pushedCommit && failures.length === 0) { + await git.updateRef(exports.LAST_PUSHED_REF, pushedCommit); + lastPushedAdvanced = true; + // Fast-forward the mirror (refused, not forced, on a non-fast-forward — the + // caller logs the reason). Surfaced in the result. + docmostFastForward = await git.fastForwardBranch(exports.DOCMOST_BRANCH, pushedCommit); + } + return { + created, + updated, + deleted, + moved, + renamed, + writtenBack, + pushed, + failures, + noops, + skipped: actions.skipped, + lastPushedAdvanced, + docmostFastForward, + }; +} +/** Stringify a thrown value into a stable error message. */ +function errMessage(err) { + return err instanceof Error ? err.message : String(err); +} +/** + * SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative + * (forward-slash) path. `buildVaultLayout` puts a page with children at + * `<...>/Title.md` and nests its children under `<...>/Title/`, so for + * `newPath = /Child.md` the parent page's file is `.md` (the enclosing + * folder, one level up). A path with NO enclosing folder (`Child.md`, at the + * space root) has no parent folder file -> `null` (the parent is ROOT). + */ +function parentFolderFile(path) { + const slash = path.lastIndexOf("/"); + if (slash < 0) + return null; // root-level file: no enclosing folder. + return `${path.slice(0, slash)}.md`; +} +/** + * Build the `resolveParentPageId(path, side)` resolver `classifyRenameMoves` + * needs, reading the PARENT FOLDER's `.md` (SPEC §5 path-as-truth): + * - `current` -> `deps.readFile(.md)` (the live working tree), + * - `prev` -> `git.showFileAtRef('refs/docmost/last-pushed', .md)` (the + * last-pushed pre-image), + * then parse its `docmost:meta` and return that page's pageId. A root-level path + * (no enclosing folder), a missing/unreadable parent file, or a parent file with + * no parseable pageId all resolve to `null` (parent is ROOT / unknown -> + * `parentPageId: null`, SPEC §16 "parentPageId: null -> в корень"). + * + * The IO is async, so this returns an ASYNC resolver; the call sites prefetch the + * parent pageIds (the classifier itself stays pure/sync over a plain table). + */ +async function resolveParentPageIdViaTree(deps, path, side) { + const parentFile = parentFolderFile(path); + if (parentFile === null) + return null; // root-level: parent is ROOT. + let text; + try { + text = + side === "current" + ? await deps.readFile(parentFile) + : await deps.git.showFileAtRef(exports.LAST_PUSHED_REF, parentFile); + } + catch { + // Parent folder file missing/unreadable at that side -> treat as ROOT. + return null; + } + if (text === null) + return null; // showFileAtRef returns null when absent. + try { + const { meta } = (0, index_1.parseDocmostMarkdown)(text); + return meta?.pageId ?? null; + } + catch { + // Unparseable parent meta -> no resolvable parent pageId. + return null; + } +} +/** + * Resolve the file `docmost:meta` at a side for the rename/move classifier (the + * title comes from here). Mirrors `resolveParentPageIdViaTree`'s IO sides: + * `current` reads the working tree, `prev` reads `refs/docmost/last-pushed`. + * Returns `null` on a missing/unreadable/unparseable file. + */ +async function metaAtViaTree(deps, path, side) { + let text; + try { + text = + side === "current" + ? await deps.readFile(path) + : await deps.git.showFileAtRef(exports.LAST_PUSHED_REF, path); + } + catch { + return null; + } + if (text === null) + return null; + try { + return (0, index_1.parseDocmostMarkdown)(text).meta ?? null; + } + catch { + return null; + } +} +/** + * Pull an `updatedAt` out of a create/update client result, if present. The + * shape is `{ data: { updatedAt? }, ... }` (createPage) or a flatter object; + * absent in the simple fakes, so the field is omitted rather than `undefined`. + */ +function extractUpdatedAt(result) { + const r = result; + const raw = r?.data?.updatedAt ?? r?.updatedAt; + return typeof raw === "string" ? { updatedAt: raw } : {}; +} +// --- runnable push orchestration (`runPush`) --------------------------------- +// +// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit +// diff/ref primitives + the PURE `computePushActions` planner + the THIN +// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the +// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO +// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that +// builds a client and mutates Docmost. +// +// Every external effect is injected (`PushDeps`) so the whole orchestration is +// driven by FAKES in tests — no live Docmost, git, fs, or network. +/** + * The human ("local") git identity used for engine-made commits on `main` in the + * push direction (SPEC §7.3). The provenance is carried by the trailer (below), + * which the loop-guard keys on; the identity is for history readability only. + * When the vault repo already has a configured `user.name`/`user.email`, git + * uses that for the working-tree commit; this is the fallback the daemon stamps. + */ +exports.LOCAL_AUTHOR_NAME = "Local"; +exports.LOCAL_AUTHOR_EMAIL = "local@local"; +/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */ +exports.LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local"; +/** + * Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT. + * + * Steps (mirrors `pull.ts`): + * 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message + + * non-zero-ish result) if a merge is in progress — never push on top of an + * unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach + * Docmost (SPEC §9). + * 2. Checkout `main` (the human-facing branch the push reads from). + * 3. Commit the human's pending working-tree changes on `main` with the + * `local` provenance trailer (SPEC §7.3). A no-op when nothing changed. + * 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the + * `docmost` mirror branch (what Docmost currently has). Resolve `main`. + * 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)` + * resolver (current = working tree, prev = `git show :`); run + * the PURE `computePushActions`. + * 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost + * calls, NO ref advance. + * 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`, + * then (a) if any pageIds were written back (creates), commit them on `main` + * with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the + * new commit so the recorded pageIds are persisted in what Docmost mirrors; + * (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent + * WARNING and a non-zero-ish flag. Then log a one-line summary. + */ +async function runPush(deps, opts) { + const { git, settings, log } = deps; + const dryRun = opts.dryRun; + // 1. Preflight git. Fail fast (actionable message via main().catch) if the git + // binary is missing — the vault state store relies on it. + await git.assertGitAvailable(); + await git.ensureRepo(); + // 1b. Refuse to push on top of an unresolved merge (SPEC §9/§12). A previous + // conflicting pull leaves the vault mid-merge; pushing now could leak + // conflict markers into Docmost (SPEC §9, the cardinal invariant). Detect + // it BEFORE any checkout/diff and stop with a clear, actionable message so + // re-runs converge once the human resolves (or aborts) the merge. + if (await git.isMergeInProgress()) { + log(`push: vault has an unresolved merge at ${settings.vaultPath} — resolve ` + + `it (or 'git merge --abort') and re-run. Nothing was pushed to Docmost ` + + `(conflict markers must never reach Docmost, SPEC §9).`); + return { mode: dryRun ? "dry-run" : "apply", aborted: "merge-in-progress" }; + } + // 2. Work on `main` — the human-facing branch the push diffs FROM. + await git.checkout(git_1.DEFAULT_BRANCH); + // 3. Commit the human's pending working-tree changes on `main` with the `local` + // provenance trailer (SPEC §7.3). A no-op commit when nothing changed is + // fine (`commit` returns false). The loop-guard keys on the trailer. + // Even on a "plan only" dry-run this commits the working tree (it is the + // only way to diff `base..main`, acceptable §6.1 behavior) — so make that + // LOCAL git mutation VISIBLE, never silent: a created commit is local-only + // and nothing is sent to Docmost. + await git.stageAll(); + const committedWorkingTree = await git.commit("local: working-tree changes", { + authorName: exports.LOCAL_AUTHOR_NAME, + authorEmail: exports.LOCAL_AUTHOR_EMAIL, + trailers: [exports.LOCAL_SOURCE_TRAILER], + }); + if (committedWorkingTree) { + const sha = await git.revParse(git_1.DEFAULT_BRANCH); + log(`push: committed local working-tree changes on main` + + (sha ? ` as ${sha.slice(0, 8)}` : "") + + ` (local git only — nothing sent to Docmost).`); + } + else { + log("push: working tree clean (no local changes to push)."); + } + // 4. Pick the diff BASE (SPEC §5/§6): `refs/docmost/last-pushed` if it resolves + // (the marker of what `main` is already in Docmost), else fall back to the + // `docmost` mirror branch (the mirror of what Docmost currently has) — which + // is what exists before the first push ever advanced last-pushed. + let base; + const lastPushedSha = await git.readRef(exports.LAST_PUSHED_REF); + if (lastPushedSha) { + base = { ref: exports.LAST_PUSHED_REF, source: "last-pushed", sha: lastPushedSha }; + } + else { + base = { + ref: exports.DOCMOST_BRANCH, + source: "docmost", + sha: await git.revParse(exports.DOCMOST_BRANCH), + }; + } + const pushedCommit = await git.revParse(git_1.DEFAULT_BRANCH); + if (!pushedCommit) { + // `main` has no commit — `ensureRepo` always makes an initial one, so this is + // defensive. Nothing to diff. + log("push: `main` has no commit to push — nothing to do."); + return { mode: dryRun ? "dry-run" : "apply", base }; + } + // 5. Diff the base against `main` and build the `metaAt` resolver (PURE planner + // input). `current` reads the live working tree; `prev` reads the base ref's + // pre-image via `git show :` (so a DELETE recovers its pageId). + const changes = await git.diffNameStatus(base.ref, git_1.DEFAULT_BRANCH); + // Synchronous resolver over PREFETCHED meta tables: `computePushActions` is + // PURE/sync, but the file/ref reads are async — so we prefetch every (path, + // side) the diff will ask for into a table first, then resolve from it. + const metaTable = new Map(); + for (const change of changes) { + // `current`: A/M/R/C still have the file on `main`. `prev`: D needs the + // pre-image; R/C also benefit (old title). Prefetch both sides per path. + const currentPath = change.path; + const prevPath = change.oldPath ?? change.path; + if (!metaTable.has(`${currentPath}|current`)) { + metaTable.set(`${currentPath}|current`, await readMetaCurrent(deps, currentPath)); + } + if (!metaTable.has(`${prevPath}|prev`)) { + metaTable.set(`${prevPath}|prev`, await readMetaPrev(deps, base.ref, prevPath)); + } + } + const metaAt = (path, side) => metaTable.get(`${path}|${side}`) ?? null; + const actions = computePushActions({ changes, metaAt }); + const planned = { + creates: actions.creates.length, + updates: actions.updates.length, + deletes: actions.deletes.length, + renamesMoves: actions.renamesMoves.length, + skipped: actions.skipped.length, + }; + // 6. DRY-RUN (default): log the full plan and RETURN — build NO client, make + // ZERO Docmost calls, advance NO refs. This is the SAFE default. + logPlan(log, base, pushedCommit, actions, planned, dryRun); + if (dryRun) { + return { mode: "dry-run", base, pushedCommit, planned }; + } + // 7. --apply: build the REAL client and execute. This is the ONLY write path. + const client = deps.makeClient(settings); + const applied = await applyPushActions({ + client, + // Pass the WHOLE `git` object (it satisfies the applier's + // `Pick` deps surface). Passing bare method references + // (`git.updateRef`, …) would lose their `this` binding, so on a REAL + // `VaultGit` they would throw `this.runRaw is not a function`. Hand over + // the object so the methods keep their receiver — exactly as `pull.ts` + // does for `applyPullActions`. + git, + readFile: deps.readFile, + writeFile: deps.writeFile, + }, actions, pushedCommit); + // 7a. Persist freshly-assigned pageIds (creates) back into git. `applyPushActions` + // rewrote those files on disk; commit them on `main` with the `local` trailer + // so the new pageIds are recorded, then RE-advance `refs/docmost/last-pushed` + // to the new commit so what Docmost mirrors and what last-pushed points at + // stay in lock-step (the write-back commit is part of `main` now). + // Track a divergent-`docmost` mirror across BOTH ff sites (the applier's main + // push ff in 7b, and the write-back ff here). A divergent mirror is a §5 + // invariant breach in EITHER branch and must escalate identically (exit 1). + let divergentDocmost = false; + if (applied.writtenBack.length > 0) { + await git.stageAll(); + const recorded = await git.commit("local: record created pageIds", { + authorName: exports.LOCAL_AUTHOR_NAME, + authorEmail: exports.LOCAL_AUTHOR_EMAIL, + trailers: [exports.LOCAL_SOURCE_TRAILER], + }); + if (recorded) { + const newCommit = await git.revParse(git_1.DEFAULT_BRANCH); + // Only re-advance when the original push was CLEAN (last-pushed was already + // advanced by the applier); a partial push left the refs untouched and a + // re-run retries the whole batch, so we must not move them either. + if (newCommit && applied.lastPushedAdvanced) { + await git.updateRef(exports.LAST_PUSHED_REF, newCommit); + const ff = await git.fastForwardBranch(exports.DOCMOST_BRANCH, newCommit); + if (!ff.ok) { + // SYMMETRIC with the main escalation (7b): a divergent mirror in the + // write-back branch is the SAME §5 invariant breach and must escalate + // (exit 1), not just log a soft warning. + divergentDocmost = true; + log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` + + `fast-forwarded to the pageId write-back commit ` + + `(${ff.reason ?? "not-fast-forward"}). The §5 invariant ('docmost' ` + + `mirrors what Docmost contains) is broken: reconcile 'docmost' ` + + `against the live Docmost tree before the next cycle.`); + } + } + } + } + // 7b. ESCALATE a divergent-`docmost` fast-forward refusal (SPEC §5 invariant + // broken). The applier already refused to clobber a divergent mirror; make + // it LOUD (not silent) so the operator notices, and fold it into the exit. + if (applied.docmostFastForward && !applied.docmostFastForward.ok) { + divergentDocmost = true; + log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` + + `fast-forwarded (${applied.docmostFastForward.reason ?? "not-fast-forward"}). ` + + `The §5 invariant ('docmost' mirrors what Docmost contains) is broken: ` + + `reconcile 'docmost' against the live Docmost tree before the next cycle.`); + } + // 7c. One-line summary (mirrors pull.ts's summary line). + log(`push complete: ${applied.created} created, ${applied.updated} updated, ` + + `${applied.deleted} deleted, ${applied.moved} moved, ${applied.renamed} ` + + `renamed, ${applied.noops.length} no-op(s), ${applied.skipped.length} ` + + `skipped, ${applied.failures.length} failure(s)` + + (divergentDocmost ? " [DIVERGENT docmost mirror]" : "")); + return { + mode: "apply", + base, + pushedCommit, + planned, + applied, + divergentDocmost, + failures: applied.failures, + }; +} +/** Parse a file's `docmost:meta` from the live working tree (`current` side). */ +async function readMetaCurrent(deps, path) { + let text; + try { + text = await deps.readFile(path); + } + catch { + return null; // absent on disk (e.g. a D row's path) -> no current meta. + } + try { + return (0, index_1.parseDocmostMarkdown)(text).meta ?? null; + } + catch { + return null; // unparseable meta -> not engine-tracked. + } +} +/** Parse a file's `docmost:meta` from the base ref's pre-image (`prev` side). */ +async function readMetaPrev(deps, baseRef, path) { + let text; + try { + text = await deps.git.showFileAtRef(baseRef, path); + } + catch { + return null; + } + if (text === null) + return null; // path absent at the base ref. + try { + return (0, index_1.parseDocmostMarkdown)(text).meta ?? null; + } + catch { + return null; + } +} +/** Emit the full plan (counts + per-item) to the injected logger. */ +function logPlan(log, base, pushedCommit, actions, planned, dryRun) { + log(`push plan (${dryRun ? "DRY-RUN — no Docmost writes" : "APPLY"}): base=` + + `${base.ref} (${base.source}${base.sha ? ` ${base.sha.slice(0, 8)}` : ""}) ` + + `-> main ${pushedCommit.slice(0, 8)}`); + log(`push plan counts: ${planned.creates} create, ${planned.updates} update, ` + + `${planned.deletes} delete, ${planned.renamesMoves} rename/move, ` + + `${planned.skipped} skipped`); + for (const c of actions.creates) + log(` create: ${c.path}`); + for (const u of actions.updates) + log(` update: ${u.pageId} (${u.path})`); + for (const d of actions.deletes) + log(` delete: ${d.pageId}`); + for (const rm of actions.renamesMoves) + log(` rename/move: ${rm.oldPath} -> ${rm.newPath} (${rm.pageId})`); + for (const s of actions.skipped) + log(` skipped [${s.status}] ${s.path}: ${s.reason}`); +} +/** + * Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a + * DRY-RUN (plan only). Exported so the flag handling is unit-testable. + */ +function parseArgs(argv) { + return { apply: argv.includes("--apply") }; +} diff --git a/packages/git-sync/build/engine/settings.d.ts b/packages/git-sync/build/engine/settings.d.ts new file mode 100644 index 00000000..3cf216eb --- /dev/null +++ b/packages/git-sync/build/engine/settings.d.ts @@ -0,0 +1,43 @@ +/** + * Engine settings (plan §2.1 / §7.2 — ADAPTED for vendoring). + * + * Upstream this module also loaded `.env` (`dotenv`) and bound `parseSettings` + * to `process.env` via a `loadSettings()` entry point. In gitmost the engine is + * driven IN-PROCESS by the NestJS server, which builds the `Settings` object + * from `EnvironmentService` (plan §7.2) — so the engine must NOT reach into + * `process.env` here. We therefore vendor ONLY: + * - the `Settings` type the engine consumes, and + * - `parseSettings(env)` as a PURE function (validate a raw env object -> typed + * `Settings`), kept for unit tests and for the server to reuse if it wants + * to validate an env-shaped object. + * The `loadSettings()` / `loadDotenv()` side-effecting entry point is dropped. + */ +import { z } from 'zod'; +export declare const envSchema: z.ZodObject<{ + DOCMOST_API_URL: z.ZodString; + DOCMOST_EMAIL: z.ZodString; + DOCMOST_PASSWORD: z.ZodString; + DOCMOST_SPACE_ID: z.ZodString; + VAULT_PATH: z.ZodDefault; + GIT_REMOTE: z.ZodPipe, z.ZodOptional>; + POLL_INTERVAL_MS: z.ZodDefault>; + DEBOUNCE_MS: z.ZodDefault>; + LOG_LEVEL: z.ZodDefault>; +}, z.core.$strip>; +export type Settings = { + docmostApiUrl: string; + docmostEmail: string; + docmostPassword: string; + docmostSpaceId: string; + vaultPath: string; + gitRemote?: string; + pollIntervalMs: number; + debounceMs: number; + logLevel: 'debug' | 'info' | 'warn' | 'error'; +}; +export declare function parseSettings(env: NodeJS.ProcessEnv): Settings; diff --git a/packages/git-sync/build/engine/settings.js b/packages/git-sync/build/engine/settings.js new file mode 100644 index 00000000..3c69c4bf --- /dev/null +++ b/packages/git-sync/build/engine/settings.js @@ -0,0 +1,55 @@ +"use strict"; +Object.defineProperty(exports, "__esModule", { value: true }); +exports.envSchema = void 0; +exports.parseSettings = parseSettings; +/** + * Engine settings (plan §2.1 / §7.2 — ADAPTED for vendoring). + * + * Upstream this module also loaded `.env` (`dotenv`) and bound `parseSettings` + * to `process.env` via a `loadSettings()` entry point. In gitmost the engine is + * driven IN-PROCESS by the NestJS server, which builds the `Settings` object + * from `EnvironmentService` (plan §7.2) — so the engine must NOT reach into + * `process.env` here. We therefore vendor ONLY: + * - the `Settings` type the engine consumes, and + * - `parseSettings(env)` as a PURE function (validate a raw env object -> typed + * `Settings`), kept for unit tests and for the server to reuse if it wants + * to validate an env-shaped object. + * The `loadSettings()` / `loadDotenv()` side-effecting entry point is dropped. + */ +const zod_1 = require("zod"); +// Schema keyed by the real ENV variable names so validation errors name the +// exact variable. Credentials and the address of our OWN Docmost instance have +// NO default — a missing value must fail at startup, never silently fall back. +exports.envSchema = zod_1.z.object({ + // Docmost connection — address of our own instance, no default. + DOCMOST_API_URL: zod_1.z.string().url(), + // Credentials for /auth/login — no default, never hardcoded. + DOCMOST_EMAIL: zod_1.z.string().min(1), + DOCMOST_PASSWORD: zod_1.z.string().min(1), + // Which Docmost space to mirror. + DOCMOST_SPACE_ID: zod_1.z.string().min(1), + // Local git vault (state store) — kept under data/ so the volume persists it. + VAULT_PATH: zod_1.z.string().min(1).default('data/vault'), + // Optional git remote the vault pushes to. Empty string is treated as unset. + GIT_REMOTE: zod_1.z.preprocess((v) => (v === '' ? undefined : v), zod_1.z.string().min(1).optional()), + // Non-secret tunables — sensible defaults are fine. + POLL_INTERVAL_MS: zod_1.z.coerce.number().int().positive().default(15000), + DEBOUNCE_MS: zod_1.z.coerce.number().int().positive().default(2000), + LOG_LEVEL: zod_1.z.enum(['debug', 'info', 'warn', 'error']).default('info'), +}); +// Pure: validate a raw environment object and map it to a typed Settings. +// Throws ZodError on bad config. No side effects — safe to import in tests. +function parseSettings(env) { + const e = exports.envSchema.parse(env); + return { + docmostApiUrl: e.DOCMOST_API_URL, + docmostEmail: e.DOCMOST_EMAIL, + docmostPassword: e.DOCMOST_PASSWORD, + docmostSpaceId: e.DOCMOST_SPACE_ID, + vaultPath: e.VAULT_PATH, + gitRemote: e.GIT_REMOTE, + pollIntervalMs: e.POLL_INTERVAL_MS, + debounceMs: e.DEBOUNCE_MS, + logLevel: e.LOG_LEVEL, + }; +} diff --git a/packages/git-sync/build/index.d.ts b/packages/git-sync/build/index.d.ts index b5afada0..a6268313 100644 --- a/packages/git-sync/build/index.d.ts +++ b/packages/git-sync/build/index.d.ts @@ -15,3 +15,13 @@ export { sanitizeTitle, disambiguate } from "./engine/sanitize"; export { stabilizePageFile } from "./engine/stabilize"; export type { PageMeta } from "./engine/stabilize"; export { bodyHash } from "./engine/loop-guard"; +export type { GitSyncClient, GitSyncPageNodeLite } from "./engine/client.types"; +export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git"; +export type { DiffEntry, MergeResult, CommitOptions } from "./engine/git"; +export { readExisting, computePullActions, applyPullActions, } from "./engine/pull"; +export type { ReadExistingDeps, PullActionsInput, PullActions, ApplyPullActionsDeps, ApplyResult, } from "./engine/pull"; +export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push"; +export type { CreateAction, UpdateAction, DeleteAction, RenameMoveAction, RenameMoveActionClassified, ClassifyRenameMovesDeps, PushActions, PushActionsInput, MetaSide, ApplyPushDeps, WrittenBackPage, PushedPageRecord, PushFailure, PushNoop, ApplyPushResult, PushDeps, PushRunResult, PushParsedArgs, } from "./engine/push"; +export { parseSettings, envSchema } from "./engine/settings"; +export type { Settings } from "./engine/settings"; +export { loadSettingsOrExit } from "./engine/config-errors"; diff --git a/packages/git-sync/build/index.js b/packages/git-sync/build/index.js index 2ac0563e..6856bbfd 100644 --- a/packages/git-sync/build/index.js +++ b/packages/git-sync/build/index.js @@ -7,7 +7,7 @@ * VaultGit, pull/push) is added in later steps. */ Object.defineProperty(exports, "__esModule", { value: true }); -exports.bodyHash = exports.stabilizePageFile = exports.disambiguate = exports.sanitizeTitle = exports.buildVaultLayout = exports.MASS_DELETE_FRACTION = exports.MASS_DELETE_MIN_EXISTING = exports.decideAbsenceDeletions = exports.planReconciliation = exports.docsCanonicallyEqual = exports.canonicalizeContent = exports.markdownToProseMirror = exports.convertProseMirrorToMarkdown = exports.parseDocmostMarkdown = exports.serializeDocmostMarkdownBody = exports.serializeDocmostMarkdown = void 0; +exports.loadSettingsOrExit = exports.envSchema = exports.parseSettings = exports.LOCAL_SOURCE_TRAILER = exports.LOCAL_AUTHOR_EMAIL = exports.LOCAL_AUTHOR_NAME = exports.DOCMOST_BRANCH = exports.LAST_PUSHED_REF = exports.parseArgs = exports.parentFolderFile = exports.runPush = exports.applyPushActions = exports.computePushActions = exports.classifyRenameMoves = exports.applyPullActions = exports.computePullActions = exports.readExisting = exports.DEFAULT_BRANCH = exports.BOT_AUTHOR_EMAIL = exports.BOT_AUTHOR_NAME = exports.buildCommitMessage = exports.vaultGitEnv = exports.VaultGit = exports.bodyHash = exports.stabilizePageFile = exports.disambiguate = exports.sanitizeTitle = exports.buildVaultLayout = exports.MASS_DELETE_FRACTION = exports.MASS_DELETE_MIN_EXISTING = exports.decideAbsenceDeletions = exports.planReconciliation = exports.docsCanonicallyEqual = exports.canonicalizeContent = exports.markdownToProseMirror = exports.convertProseMirrorToMarkdown = exports.parseDocmostMarkdown = exports.serializeDocmostMarkdownBody = exports.serializeDocmostMarkdown = void 0; // Pure converter (markdown <-> ProseMirror, file envelope, canonicalization). var index_1 = require("./lib/index"); Object.defineProperty(exports, "serializeDocmostMarkdown", { enumerable: true, get: function () { return index_1.serializeDocmostMarkdown; } }); @@ -33,3 +33,31 @@ var stabilize_1 = require("./engine/stabilize"); Object.defineProperty(exports, "stabilizePageFile", { enumerable: true, get: function () { return stabilize_1.stabilizePageFile; } }); var loop_guard_1 = require("./engine/loop-guard"); Object.defineProperty(exports, "bodyHash", { enumerable: true, get: function () { return loop_guard_1.bodyHash; } }); +var git_1 = require("./engine/git"); +Object.defineProperty(exports, "VaultGit", { enumerable: true, get: function () { return git_1.VaultGit; } }); +Object.defineProperty(exports, "vaultGitEnv", { enumerable: true, get: function () { return git_1.vaultGitEnv; } }); +Object.defineProperty(exports, "buildCommitMessage", { enumerable: true, get: function () { return git_1.buildCommitMessage; } }); +Object.defineProperty(exports, "BOT_AUTHOR_NAME", { enumerable: true, get: function () { return git_1.BOT_AUTHOR_NAME; } }); +Object.defineProperty(exports, "BOT_AUTHOR_EMAIL", { enumerable: true, get: function () { return git_1.BOT_AUTHOR_EMAIL; } }); +Object.defineProperty(exports, "DEFAULT_BRANCH", { enumerable: true, get: function () { return git_1.DEFAULT_BRANCH; } }); +var pull_1 = require("./engine/pull"); +Object.defineProperty(exports, "readExisting", { enumerable: true, get: function () { return pull_1.readExisting; } }); +Object.defineProperty(exports, "computePullActions", { enumerable: true, get: function () { return pull_1.computePullActions; } }); +Object.defineProperty(exports, "applyPullActions", { enumerable: true, get: function () { return pull_1.applyPullActions; } }); +var push_1 = require("./engine/push"); +Object.defineProperty(exports, "classifyRenameMoves", { enumerable: true, get: function () { return push_1.classifyRenameMoves; } }); +Object.defineProperty(exports, "computePushActions", { enumerable: true, get: function () { return push_1.computePushActions; } }); +Object.defineProperty(exports, "applyPushActions", { enumerable: true, get: function () { return push_1.applyPushActions; } }); +Object.defineProperty(exports, "runPush", { enumerable: true, get: function () { return push_1.runPush; } }); +Object.defineProperty(exports, "parentFolderFile", { enumerable: true, get: function () { return push_1.parentFolderFile; } }); +Object.defineProperty(exports, "parseArgs", { enumerable: true, get: function () { return push_1.parseArgs; } }); +Object.defineProperty(exports, "LAST_PUSHED_REF", { enumerable: true, get: function () { return push_1.LAST_PUSHED_REF; } }); +Object.defineProperty(exports, "DOCMOST_BRANCH", { enumerable: true, get: function () { return push_1.DOCMOST_BRANCH; } }); +Object.defineProperty(exports, "LOCAL_AUTHOR_NAME", { enumerable: true, get: function () { return push_1.LOCAL_AUTHOR_NAME; } }); +Object.defineProperty(exports, "LOCAL_AUTHOR_EMAIL", { enumerable: true, get: function () { return push_1.LOCAL_AUTHOR_EMAIL; } }); +Object.defineProperty(exports, "LOCAL_SOURCE_TRAILER", { enumerable: true, get: function () { return push_1.LOCAL_SOURCE_TRAILER; } }); +var settings_1 = require("./engine/settings"); +Object.defineProperty(exports, "parseSettings", { enumerable: true, get: function () { return settings_1.parseSettings; } }); +Object.defineProperty(exports, "envSchema", { enumerable: true, get: function () { return settings_1.envSchema; } }); +var config_errors_1 = require("./engine/config-errors"); +Object.defineProperty(exports, "loadSettingsOrExit", { enumerable: true, get: function () { return config_errors_1.loadSettingsOrExit; } }); diff --git a/packages/git-sync/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json b/packages/git-sync/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json index 78a22f2a..7a64bcf9 100644 --- a/packages/git-sync/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json +++ b/packages/git-sync/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json @@ -1 +1 @@ -{"version":"4.1.6","results":[[":test/node-ops.test.ts",{"duration":55.050974999999994,"failed":false}],[":test/markdown-converter.test.ts",{"duration":34.13685399999997,"failed":false}],[":test/diff.test.ts",{"duration":49.63132299999995,"failed":false}],[":test/node-ops-extra.test.ts",{"duration":49.444255,"failed":false}],[":test/reconcile.test.ts",{"duration":14.958778999999993,"failed":false}],[":test/canonicalize.test.ts",{"duration":15.778381999999965,"failed":false}],[":test/markdown-roundtrip.property.test.ts",{"duration":8319.439738000001,"failed":false}],[":test/stabilize.test.ts",{"duration":150.99475900000016,"failed":false}],[":test/canonicalize-extra.test.ts",{"duration":155.9570060000001,"failed":false}],[":test/loop-guard.test.ts",{"duration":8.774560999999949,"failed":false}],[":test/markdown-document.test.ts",{"duration":8.800682999999935,"failed":false}],[":test/sanitize.test.ts",{"duration":13.741441000000009,"failed":false}],[":test/markdown-converter-golden.test.ts",{"duration":16.489082999999994,"failed":false}],[":test/roundtrip-corpus.test.ts",{"duration":298.84454900000014,"failed":false}],[":test/layout.test.ts",{"duration":13.172171000000048,"failed":false}],[":test/markdown-document-envelope.test.ts",{"duration":17.021231,"failed":false}],[":test/roundtrip.test.ts",{"duration":97.44139999999993,"failed":false}]]} \ No newline at end of file +{"version":"4.1.6","results":[[":test/node-ops.test.ts",{"duration":56.38882699999999,"failed":false}],[":test/markdown-converter.test.ts",{"duration":40.668915000000084,"failed":false}],[":test/diff.test.ts",{"duration":49.456359999999904,"failed":false}],[":test/node-ops-extra.test.ts",{"duration":63.93989399999998,"failed":false}],[":test/reconcile.test.ts",{"duration":13.678699999999992,"failed":false}],[":test/canonicalize.test.ts",{"duration":17.350126000000046,"failed":false}],[":test/markdown-roundtrip.property.test.ts",{"duration":8429.205451,"failed":false}],[":test/stabilize.test.ts",{"duration":176.2395210000002,"failed":false}],[":test/canonicalize-extra.test.ts",{"duration":186.62702600000011,"failed":false}],[":test/loop-guard.test.ts",{"duration":10.596054999999978,"failed":false}],[":test/markdown-document.test.ts",{"duration":9.079204000000118,"failed":false}],[":test/sanitize.test.ts",{"duration":26.067102999999975,"failed":false}],[":test/markdown-converter-golden.test.ts",{"duration":16.916695000000004,"failed":false}],[":test/roundtrip-corpus.test.ts",{"duration":333.193577,"failed":false}],[":test/layout.test.ts",{"duration":12.358113000000003,"failed":false}],[":test/markdown-document-envelope.test.ts",{"duration":19.542911000000004,"failed":false}],[":test/roundtrip.test.ts",{"duration":118.45819199999983,"failed":false}],[":test/compute-push-actions.test.ts",{"duration":13.49065500000006,"failed":false}],[":test/apply-pull-actions.test.ts",{"duration":213.97865000000002,"failed":false}],[":test/git.test.ts",{"duration":2389.5195719999997,"failed":false}],[":test/run-push.test.ts",{"duration":24.829793999999993,"failed":false}],[":test/compute-pull-actions.test.ts",{"duration":13.413110999999844,"failed":false}],[":test/apply-push-actions.test.ts",{"duration":36.624465999999984,"failed":false}],[":test/classify-rename-moves.test.ts",{"duration":10.711434000000054,"failed":false}],[":test/git-merge.test.ts",{"duration":332.99638500000003,"failed":false}],[":test/read-existing.test.ts",{"duration":10.29277900000011,"failed":false}],[":test/config-errors-invalid.test.ts",{"duration":24.780978000000005,"failed":false}],[":test/run-push-realgit.test.ts",{"duration":276.9070680000002,"failed":false}],[":test/settings.test.ts",{"duration":17.865966000000014,"failed":false}],[":test/config-errors.test.ts",{"duration":17.732034,"failed":false}]]} \ No newline at end of file diff --git a/packages/git-sync/node_modules/zod b/packages/git-sync/node_modules/zod new file mode 120000 index 00000000..9350ab54 --- /dev/null +++ b/packages/git-sync/node_modules/zod @@ -0,0 +1 @@ +../../../node_modules/.pnpm/zod@4.3.6/node_modules/zod \ No newline at end of file diff --git a/packages/git-sync/package.json b/packages/git-sync/package.json index 58f6ad8c..94637f0e 100644 --- a/packages/git-sync/package.json +++ b/packages/git-sync/package.json @@ -31,7 +31,8 @@ "@tiptap/pm": "3.20.4", "@tiptap/starter-kit": "3.20.4", "jsdom": "25.0.0", - "marked": "17.0.5" + "marked": "17.0.5", + "zod": "4.3.6" }, "devDependencies": { "@types/jsdom": "^21.1.7", diff --git a/packages/git-sync/src/engine/client.types.ts b/packages/git-sync/src/engine/client.types.ts new file mode 100644 index 00000000..873e9c2c --- /dev/null +++ b/packages/git-sync/src/engine/client.types.ts @@ -0,0 +1,132 @@ +/** + * The client seam (plan §3.1). Upstream `pull.ts`/`push.ts` reached into the + * REST `DocmostClient` from the `docmost-client` package via `Pick` subsets. That package is NOT vendored here (the gitmost server writes + * NATIVELY — through repositories + collab `openDirectConnection`, plan §3.2/§3.3), + * so the engine must depend on a narrow STRUCTURAL interface instead. + * + * `GitSyncClient` is that interface: the native datasource (server side, a later + * step) implements it, and the vendored engine only ever uses `Pick` subsets of it. The signatures below MIRROR exactly the methods the + * vendored `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine + * reads off each result) — verified against the upstream `DocmostClient` + * (packages/docmost-client/src/client.ts) so a real REST client is still + * structurally assignable, and so the native adapter has a precise contract. + */ + +/** + * A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body). + * The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`, + * which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this + * lite shape documents the fields the tree walk surfaces. Upstream nodes also + * carry `position`, `icon`, `hasChildren` — kept open via the index signature. + */ +export interface GitSyncPageNodeLite { + id: string; + slugId?: string; + title?: string; + parentPageId?: string | null; + hasChildren?: boolean; + /** Upstream `listSpaceTree` nodes carry extra fields (position, icon, …). */ + [key: string]: unknown; +} + +/** + * The structural client the engine depends on. Only `Pick` + * subsets are ever used: + * - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`), + * - push writes: `importPageMarkdown` / `createPage` / `deletePage` / + * `movePage` / `renamePage`, + * - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`. + */ +export interface GitSyncClient { + // --- reads (pull) --------------------------------------------------------- + + /** + * Full tree of page nodes for the space (or the subtree rooted at + * `rootPageId`), each WITHOUT body content. `complete` is `false` when the + * walk was truncated / a fetch failed — the pull side suppresses absence + * deletions on an incomplete tree (SPEC §8). Native impl returns + * `complete: true` always (reads the DB, not a paginated REST endpoint). + */ + listSpaceTree( + spaceId: string, + rootPageId?: string, + ): Promise<{ pages: GitSyncPageNodeLite[]; complete: boolean }>; + + /** + * One page WITH its ProseMirror body content. `applyPullActions` reads + * `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and + * `content` (to stabilize/serialize). `updatedAt` is carried for the + * poll-suppression loop-guard. + */ + getPageJson(pageId: string): Promise<{ + id: string; + slugId: string; + title: string; + parentPageId: string | null; + spaceId: string; + updatedAt: string; + content: unknown; + }>; + + // --- writes (push) -------------------------------------------------------- + + /** + * Replace a page's body from a self-contained markdown file (meta + body). + * The collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite. + * `applyPushActions` reads only an optional `updatedAt` off the result + * (via `extractUpdatedAt`, tolerant of extra fields). + */ + importPageMarkdown( + pageId: string, + fullMarkdown: string, + ): Promise<{ updatedAt?: string; [key: string]: unknown }>; + + /** + * Create a new page and return the assigned id at `data.id` + * (`applyPushActions` reads `result.data.id`, then writes it back into the + * file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard. + */ + createPage( + title: string, + content: string, + spaceId: string, + parentPageId?: string, + ): Promise<{ data: { id: string }; updatedAt?: string; [key: string]: unknown }>; + + /** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */ + deletePage(pageId: string): Promise; + + /** + * Reparent a page (and optionally set its fractional-index `position`). The + * engine passes `position` UNDEFINED for now; the native impl computes a + * default between siblings (plan §3.2). Result is not inspected. + */ + movePage( + pageId: string, + parentPageId: string | null, + position?: string, + ): Promise; + + /** Change a page's title only (no body touch). Result is not inspected. */ + renamePage(pageId: string, title: string): Promise; + + // --- continuous (phase B+) ------------------------------------------------ + + /** + * Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8). + * `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk. + */ + listRecentSince( + spaceId: string | undefined, + sinceIso: string | null, + hardPageCap?: number, + ): Promise; + + /** List soft-deleted (trashed) pages for the space (deletion detection). */ + listTrash(spaceId: string): Promise; + + /** Restore a soft-deleted page from Trash. Result is not inspected. */ + restorePage(pageId: string): Promise; +} diff --git a/packages/git-sync/src/engine/config-errors.ts b/packages/git-sync/src/engine/config-errors.ts new file mode 100644 index 00000000..a4c7a21b --- /dev/null +++ b/packages/git-sync/src/engine/config-errors.ts @@ -0,0 +1,46 @@ +import { ZodError } from 'zod'; + +// Turn a ZodError from settings validation into a clear, actionable startup +// message that names the offending env var(s), then exit(1) — no raw stack +// trace. Mirrors the Python new-project skeleton's load_settings_or_exit. +// A non-ZodError is left to propagate unchanged. +export function loadSettingsOrExit(factory: () => T): T { + try { + return factory(); + } catch (err) { + if (!(err instanceof ZodError)) throw err; + const missing: string[] = []; + const invalid: string[] = []; + for (const issue of err.issues) { + const name = issue.path.length ? String(issue.path[0]) : '?'; + // A missing required variable surfaces as an `invalid_type` issue whose + // received value was `undefined`. zod 3 exposed `issue.received` directly; + // zod 4 dropped that field and instead folds it into the message + // ("expected string, received undefined"). Detect both shapes so the + // missing-vs-invalid split holds across zod majors. NOTE: an invalid (but + // present) value uses a different code (invalid_format / invalid_value) or + // an `invalid_type` message that reports a non-undefined received (e.g. + // "received NaN" from a coerced number), so neither is misread as missing. + const i = issue as { received?: unknown; message?: string }; + const isMissing = + issue.code === 'invalid_type' && + (i.received === 'undefined' || + /received undefined/i.test(i.message ?? '')); + if (isMissing) missing.push(name); + else invalid.push(`${name}: ${issue.message}`); + } + const lines = ['Configuration error in environment / .env:']; + if (missing.length) { + lines.push(' Missing required variable(s):'); + for (const n of [...new Set(missing)]) lines.push(` - ${n}`); + } + if (invalid.length) { + lines.push(' Invalid value(s):'); + for (const item of invalid) lines.push(` - ${item}`); + } + lines.push(''); + lines.push('Set them in .env (see .env.example) and try again.'); + process.stderr.write(lines.join('\n') + '\n'); + process.exit(1); + } +} diff --git a/packages/git-sync/src/engine/git.ts b/packages/git-sync/src/engine/git.ts new file mode 100644 index 00000000..8a9a5947 --- /dev/null +++ b/packages/git-sync/src/engine/git.ts @@ -0,0 +1,663 @@ +/** + * Thin async wrapper over the system `git` binary (SPEC §5: state store = git). + * + * IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`, + * which is the vault's OWN git repository (default `data/vault`), SEPARATE from + * the docmost-sync source repo. This module MUST NEVER run git against the + * source repo. `data/` is gitignored by the source repo, so a nested repo under + * `data/vault` is safe. The pull cycle is READ-ONLY toward Docmost; this module + * only touches the local vault git, never a git remote (push is deferred, see + * SPEC §7). + * + * Implementation notes: + * - We shell out via `node:child_process` `execFile` (promisified), passing + * ARGS AS AN ARRAY — no shell, so there is no command injection surface even + * if a page title / branch name contains shell metacharacters. + * - EVERY git invocation funnels through the single `runRaw` primitive, which + * ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git + * never blocks on a pager and always prints verbatim UTF-8 paths). There is + * no exception — even the `git --version` preflight goes through `runRaw`. + * - "nothing to commit" is treated as a graceful no-op, not an error. + */ +import { execFile } from "node:child_process"; +import { mkdir } from "node:fs/promises"; +import { promisify } from "node:util"; + +const execFileAsync = promisify(execFile); + +/** Bot identity used for engine-authored vault commits (SPEC §7.3). */ +export const BOT_AUTHOR_NAME = "Docmost Sync"; +export const BOT_AUTHOR_EMAIL = "docmost-sync@local"; + +/** Default branch the vault repo is initialized on. */ +export const DEFAULT_BRANCH = "main"; + +/** + * One row of `git diff --name-status` (SPEC §6 "ФС → Docmost"). `status` is the + * single-letter change code (`-M` rename detection on), `path` is the (new) file + * path; for a rename/copy (`R`/`C`) `oldPath` is the source and `path` is the + * destination, with `score` carrying git's similarity index (0–100). + */ +export interface DiffEntry { + status: "A" | "M" | "D" | "R" | "C"; + /** New (destination) path. For A/M/D it is the only path. */ + path: string; + /** Source path — present only for R/C. */ + oldPath?: string; + /** Rename/copy similarity score (0–100) — present only for R/C. */ + score?: number; +} + +/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */ +export interface MergeResult { + /** True when the merge applied cleanly (fast-forward or clean 3-way). */ + ok: boolean; + /** True when the merge stopped on conflicts (markers left in the worktree). */ + conflict: boolean; + /** Raw combined stdout+stderr, for logging/diagnostics. */ + output: string; +} + +/** Options for an engine-authored commit (provenance, SPEC §7.3). */ +export interface CommitOptions { + authorName: string; + authorEmail: string; + /** + * Trailer lines appended to the commit message body (e.g. + * `Docmost-Sync-Source: docmost`). These are the machine-readable provenance + * the loop-guard keys on (SPEC §12, "commit-attribution"). + */ + trailers?: string[]; +} + +/** + * A git wrapper bound to a single vault path. Construct once per vault; every + * method runs git with `cwd = vaultPath`. + */ +export class VaultGit { + constructor(private readonly vaultPath: string) {} + + /** + * Preflight: verify a runnable `git` binary is on PATH. The daemon shells out + * to system `git` for every vault operation, so a missing binary (e.g. a slim + * container image without git) must fail fast with an actionable message + * rather than a cryptic ENOENT deep inside the first real git call. Presence + * check only — we do NOT gate on a specific version. Runs `git --version` + * with NO `cwd` (the vault dir may not exist yet at preflight time). + */ + async assertGitAvailable(): Promise { + // Goes through the single `runRaw` primitive like every other invocation. + // `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at + // preflight time, so we must not point git at a missing directory. + const r = await this.runRaw(["--version"], { cwd: null }); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error( + "git binary not found or not runnable — install git (the vault state " + + `store requires it). Underlying error: ${detail}`, + ); + } + } + + /** + * Run a git command in the vault and return trimmed stdout. THIN wrapper over + * the single `runRaw` primitive: throws a clear, unified Error (including + * stderr/stdout) on a non-zero exit. + */ + private async run( + args: string[], + opts?: { cwd?: string | null; env?: Record }, + ): Promise { + const r = await this.runRaw(args, opts); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ${args.join(" ")} failed: ${detail}`); + } + return r.stdout.trim(); + } + + /** + * The ONE primitive every git invocation in this module flows through. Builds + * the full argv (`--no-pager -c core.quotepath=false `), env, cwd, and + * maxBuffer, runs git, and NEVER throws — it returns the exit info so callers + * can treat a non-zero exit as either an error (`run`) or a meaningful state + * (e.g. a merge conflict, a porcelain diff that "fails" deliberately). + * + * - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never + * blocks on a pager and always prints verbatim UTF-8 paths (no octal + * escaping/quoting). `quotepath=false` is the baseline for ALL path- + * printing commands (ls-files, diff --name-only, …). + * - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the + * vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`. + * - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras). + * - On a spawn/exec error we capture the error `message` too, so a failure + * before git could write to stderr (e.g. ENOENT) is NOT lost. + */ + private async runRaw( + args: string[], + opts?: { cwd?: string | null; env?: Record }, + ): Promise<{ code: number; stdout: string; stderr: string }> { + const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath); + try { + const { stdout, stderr } = await execFileAsync( + "git", + ["--no-pager", "-c", "core.quotepath=false", ...args], + { + // Generous buffer: file listings / porcelain output on a large vault + // can be sizable. + ...(cwd !== undefined ? { cwd } : {}), + maxBuffer: 64 * 1024 * 1024, + env: vaultGitEnv(opts?.env), + }, + ); + return { code: 0, stdout, stderr }; + } catch (err: unknown) { + const e = err as { + code?: number; + stdout?: string; + stderr?: string; + message?: string; + }; + return { + code: typeof e.code === "number" ? e.code : 1, + stdout: e.stdout ?? "", + // Preserve the error message when there is no stderr (e.g. a spawn + // failure like ENOENT, where promisified execFile sets stderr to an + // EMPTY STRING — so `||`, not `??`, to fall through to `message`). + stderr: e.stderr || e.message || "", + }; + } + } + + /** + * Ensure the vault directory exists and is an initialized git repo on `main` + * with an initial (empty) commit so branches exist. Idempotent: safe to call + * on every run. Sets a LOCAL bot identity for the vault repo if none is set + * (so engine commits never fall back to a global/unset identity). + */ + async ensureRepo(): Promise { + await mkdir(this.vaultPath, { recursive: true }); + + if (!(await this.isRepo())) { + // `git init -b main` sets the initial branch on modern git; we still + // guard the branch name below for safety on older binaries. + await this.run(["init", "-b", DEFAULT_BRANCH]); + } + + // Set a local identity for the vault repo if unset, so engine commits have + // a deterministic committer even on a machine with no global git config. + if (!(await this.hasLocalConfig("user.name"))) { + await this.run(["config", "user.name", BOT_AUTHOR_NAME]); + } + if (!(await this.hasLocalConfig("user.email"))) { + await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]); + } + + // Neutralize correctness-affecting git config in the vault's LOCAL config so + // a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just + // output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL + // values (which override global/system) are the right scope. Set + // UNCONDITIONALLY every run — idempotent and cheap; `git config ` + // writes to `--local` by default inside the repo. These MUST be in place + // before any add/commit/checkout that could be affected, hence they run + // before the initial-commit block below. + // - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true + // would rewrite LF<->CRLF on add/checkout, making our deterministic, + // byte-stable markdown churn and breaking the round-trip invariant. + // `false` guarantees git stores/checks out verbatim bytes. + // - core.safecrlf=false — avoid CRLF-related warnings/aborts on add. + // - commit.gpgsign=false — the headless daemon must never try to GPG-sign + // a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0). + // - core.attributesFile=/dev/null — neutralize the user's GLOBAL + // gitattributes so a global clean/smudge filter (filter..clean) + // cannot rewrite the STORED blob and break §11 byte-stability (a config + // that core.autocrlf=false does not cover). POSIX-only path, which is + // fine: the daemon runs on Linux (Docker) / macOS. A system + // /etc/gitattributes remains the host admin's domain (out of scope). + // NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a + // human running git by hand in the vault must inherit the same neutralized + // behavior; a transient `-c` would not persist. (core.quotepath, by + // contrast, only affects OUR parsing of output and so is baked into the + // `runRaw` argv baseline instead.) + try { + await this.run(["config", "core.autocrlf", "false"]); + await this.run(["config", "core.safecrlf", "false"]); + await this.run(["config", "commit.gpgsign", "false"]); + await this.run(["config", "core.attributesFile", "/dev/null"]); + } catch (err: unknown) { + const detail = err instanceof Error ? err.message : String(err); + throw new Error( + `failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` + + "/.git/config is writable and not locked (e.g. stale config.lock): " + + detail, + ); + } + + // Create the initial empty commit on `main` if the repo has no commits yet, + // so both `main` and (later) `docmost` branches have a common base. + if (!(await this.hasAnyCommit())) { + // Make sure we are on the default branch before the first commit (covers + // the older-git case where `init -b` was not honored). + await this.run(["checkout", "-B", DEFAULT_BRANCH]); + await this.commitRaw("init vault", { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + allowEmpty: true, + }); + } + } + + /** True if `cwd` is inside a git work-tree (the vault is initialized). */ + private async isRepo(): Promise { + const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]); + return r.code === 0 && r.stdout.trim() === "true"; + } + + /** True if a LOCAL git config key is set in the vault repo. */ + private async hasLocalConfig(key: string): Promise { + const r = await this.runRaw(["config", "--local", "--get", key]); + return r.code === 0 && r.stdout.trim().length > 0; + } + + /** True if the repo has at least one commit (HEAD resolves). */ + private async hasAnyCommit(): Promise { + const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]); + return r.code === 0; + } + + /** True if a branch with the given name exists. */ + async branchExists(name: string): Promise { + const r = await this.runRaw([ + "rev-parse", + "--verify", + `refs/heads/${name}`, + ]); + return r.code === 0; + } + + /** + * Create `name` from `fromBranch` if it does not already exist. No-op (and no + * checkout) when the branch is already present. + */ + async ensureBranch(name: string, fromBranch: string): Promise { + if (await this.branchExists(name)) return; + await this.run(["branch", name, fromBranch]); + } + + /** Name of the currently checked-out branch. */ + async currentBranch(): Promise { + return this.run(["rev-parse", "--abbrev-ref", "HEAD"]); + } + + /** Check out an existing branch. */ + async checkout(name: string): Promise { + await this.run(["checkout", name]); + } + + /** Stage everything (adds, modifications, deletions). */ + async stageAll(): Promise { + await this.run(["add", "-A"]); + } + + /** + * True if the vault is mid-merge (an unresolved merge from a previous run, + * SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged + * (conflicted) index entries (`git ls-files -u`). The pull cycle checks this + * BEFORE any checkout so a left-over merge produces a clear, actionable + * message instead of a raw "you need to resolve your current index first" + * failure deep inside `checkout`. This is what makes re-runs converge + * (resumability, SPEC §12). + */ + async isMergeInProgress(): Promise { + // MERGE_HEAD exists exactly while a merge is in progress. + const mergeHead = await this.runRaw([ + "rev-parse", + "--verify", + "--quiet", + "MERGE_HEAD", + ]); + if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0) return true; + // Fallback / belt-and-suspenders: any unmerged index entries also mean the + // working tree is mid-conflict and a checkout would refuse. + const unmerged = await this.runRaw(["ls-files", "-u"]); + return unmerged.code === 0 && unmerged.stdout.trim().length > 0; + } + + /** + * Commit the currently STAGED changes with an explicit author/committer + * identity and the given trailers appended to the message body (SPEC §7.3 + * provenance). Returns `true` if a commit was made, `false` if there was + * nothing to commit (graceful no-op). The caller is expected to have staged + * its changes first (e.g. via `stageAll`). + */ + async commit(message: string, opts: CommitOptions): Promise { + // Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a + // deterministic re-pull of unchanged pages produces identical bytes, so + // git sees no diff and we must not error). + const staged = await this.runRaw([ + "diff", + "--cached", + "--quiet", + ]); + // `diff --cached --quiet` exits 0 when the index matches HEAD (nothing + // staged), 1 when there are staged changes. + if (staged.code === 0) return false; + + await this.commitRaw(message, opts); + return true; + } + + /** + * Low-level commit used by both `commit` and `ensureRepo`'s initial commit. + * Builds the full message with appended trailers and sets author + committer + * identity via env vars (so the committer matches the author, not the repo + * default). + */ + private async commitRaw( + message: string, + opts: CommitOptions & { allowEmpty?: boolean }, + ): Promise { + const fullMessage = buildCommitMessage(message, opts.trailers); + // `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath + // (or any injected hook) must never interfere with engine commits in our + // dedicated vault repo. + const args = ["commit", "--no-verify", "-m", fullMessage]; + if (opts.allowEmpty) args.push("--allow-empty"); + + // Route through the single `runRaw` primitive; set author + committer + // identity via env vars (so the committer matches the author, not the repo + // default). Throw via the same unified message on a non-zero exit. + const r = await this.runRaw(args, { + env: { + GIT_AUTHOR_NAME: opts.authorName, + GIT_AUTHOR_EMAIL: opts.authorEmail, + GIT_COMMITTER_NAME: opts.authorName, + GIT_COMMITTER_EMAIL: opts.authorEmail, + }, + }); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ${args.join(" ")} failed: ${detail}`); + } + } + + /** + * Merge `fromBranch` into the current branch (`git merge --no-edit`). + * Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict + * state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict + * markers are left in the worktree for manual resolution by a later increment, + * and — critically — nothing is pushed to Docmost (we never write to Docmost + * anyway). + */ + async merge(fromBranch: string): Promise { + const r = await this.runRaw(["merge", "--no-edit", fromBranch]); + const output = `${r.stdout}\n${r.stderr}`.trim(); + if (r.code === 0) { + return { ok: true, conflict: false, output }; + } + // A non-zero exit on merge most commonly means a conflict. Confirm by + // checking for unmerged paths (porcelain "U" status) so we don't mislabel + // an unrelated failure as a conflict. + const conflict = await this.hasUnmergedPaths(); + return { ok: false, conflict, output }; + } + + /** True if the index has any unmerged (conflicted) paths. */ + private async hasUnmergedPaths(): Promise { + const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]); + return r.code === 0 && r.stdout.trim().length > 0; + } + + /** + * List tracked files on the current branch (paths relative to the vault + * root, forward-slash separated). An optional glob (a git pathspec) narrows + * the listing, e.g. `"*.md"`. + * + * The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic + * (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files` + * returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`), + * which `src/pull.ts` `readExisting` would then parse as garbage paths, + * breaking move/duplicate detection. We defeat that two ways at once: + * - `core.quotepath=false` disables the octal-escape/quoting. It is now the + * `runRaw` argv baseline (prepended to EVERY invocation), so we no longer + * pass it inline here. + * - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline + * ambiguity), which we split on `\0`. + * We read the RAW stdout (NOT the trimming `run()` helper, which would mangle + * the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths + * are returned verbatim — git already emits forward slashes. + */ + async listTrackedFiles(glob?: string): Promise { + const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git ls-files failed: ${detail}`); + } + return r.stdout.split("\0").filter((p) => p.length > 0); + } + + /** + * Diff two refs with `--name-status -M -z` and parse the NUL-delimited output + * (SPEC §6: the FS→Docmost push direction diffs `main` against + * `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed + * file is reported as a single `R` row with both its old and new path instead + * of a delete+add pair — that distinction is what lets the push planner tell a + * move from a delete+create (SPEC §8 "Move vs delete"). + * + * `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has + * Cyrillic file names) with NO quoting/escaping. The record shape differs by + * status: + * - A/M/D: `status\0path\0` + * - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`) + * We read the RAW stdout (not the trimming `run()` helper, which would mangle + * the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the + * tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim. + */ + async diffNameStatus( + fromRef: string, + toRef: string, + ): Promise { + const r = await this.runRaw([ + "diff", + "--name-status", + "-M", + "-z", + fromRef, + toRef, + ]); + if (r.code !== 0) { + const detail = (r.stderr || r.stdout || "").trim(); + throw new Error(`git diff --name-status failed: ${detail}`); + } + // Tokens alternate: ... With `-z`, + // each token (status code AND each path) is its own NUL-delimited field. + const tokens = r.stdout.split("\0").filter((t) => t.length > 0); + const entries: DiffEntry[] = []; + let i = 0; + while (i < tokens.length) { + const raw = tokens[i++]; + // The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading + // letter is the change kind; any trailing digits are the similarity score. + const letter = raw[0] as DiffEntry["status"]; + if (letter === "R" || letter === "C") { + const score = Number.parseInt(raw.slice(1), 10); + const oldPath = tokens[i++]; + const path = tokens[i++]; + if (oldPath === undefined || path === undefined) break; // malformed tail + entries.push({ + status: letter, + path, + oldPath, + ...(Number.isFinite(score) ? { score } : {}), + }); + } else if (letter === "A" || letter === "M" || letter === "D") { + const path = tokens[i++]; + if (path === undefined) break; // malformed tail + entries.push({ status: letter, path }); + } else { + // Unknown/other status (e.g. T type-change, U unmerged) — consume one + // path token defensively so the walk stays aligned, but do not emit it + // (the push planner only handles A/M/D/R/C). + i++; + } + } + return entries; + } + + /** + * Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist. + * `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an + * unknown ref, so a non-zero exit maps cleanly to `null`. Used to read + * `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push. + */ + async revParse(ref: string): Promise { + const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]); + if (r.code !== 0) return null; + const sha = r.stdout.trim(); + return sha.length > 0 ? sha : null; + } + + /** + * Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`, + * named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5: + * "что из `main` уже отражено в Docmost"). + */ + async readRef(ref: string): Promise { + return this.revParse(ref); + } + + /** + * Point `ref` at `target` (`git update-ref `). Used to advance + * `refs/docmost/last-pushed` to the just-pushed `main` commit after a push + * (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts. + */ + async updateRef(ref: string, target: string): Promise { + await this.run(["update-ref", ref, target]); + } + + /** + * Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward, + * i.e. the current `branch` tip is an ancestor of `toCommit` (verified via + * `git merge-base --is-ancestor `). Used to advance the + * `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a + * push succeeds, Docmost already contains the pushed `main` content, so the + * mirror must reflect it — otherwise the NEXT pull would diff our own write + * back and re-pull it (loop-guard). + * + * SAFETY — never force, never clobber divergent history: + * - If `branch` IS an ancestor of `toCommit`, advance it with + * `git update-ref refs/heads/ `. The `docmost` branch is + * NOT checked out during a push (push works on `main`), so updating the ref + * directly is safe and avoids any working-tree touch. + * - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward), + * do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and + * let the caller log it. We must never overwrite a `docmost` history that + * has commits the push base does not contain. + * + * Returns `{ ok: true }` when the branch was advanced (or already at + * `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise. + * A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason. + */ + async fastForwardBranch( + branch: string, + toCommit: string, + ): Promise<{ ok: boolean; reason?: string }> { + const branchRef = `refs/heads/${branch}`; + // Resolve both endpoints first so a missing ref is a clean refusal, not a + // confusing `merge-base` failure. + const branchSha = await this.revParse(branchRef); + if (branchSha === null) { + return { ok: false, reason: `branch ${branch} does not exist` }; + } + const targetSha = await this.revParse(toCommit); + if (targetSha === null) { + return { ok: false, reason: `target ${toCommit} does not resolve` }; + } + // Already at the target -> a no-op fast-forward (still ok). + if (branchSha === targetSha) return { ok: true }; + + // `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a + // true ancestor is a fast-forward; anything else is divergent and refused. + const ancestor = await this.runRaw([ + "merge-base", + "--is-ancestor", + branchSha, + targetSha, + ]); + if (ancestor.code !== 0) { + return { ok: false, reason: "not-fast-forward" }; + } + + // Safe to advance: the branch is not checked out during push, so a direct + // ref update avoids a checkout/working-tree touch. + await this.updateRef(branchRef, targetSha); + return { ok: true }; + } + + /** + * Read a file's content at a specific ref (`git show :`), or `null` + * if the path does not exist there. Used by the push direction to read the + * PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its + * `docmost:meta` — and therefore its `pageId` — can be recovered to translate + * the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones + * that had a pageId, are deleted in Docmost). A non-zero exit (path absent at + * that ref) maps to `null` rather than throwing. + */ + async showFileAtRef(ref: string, path: string): Promise { + // `git show :` requires the path relative to the repo root; pass + // it verbatim (forward-slash, matching `listTrackedFiles` / diff output). + const r = await this.runRaw(["show", `${ref}:${path}`]); + if (r.code !== 0) return null; + return r.stdout; + } +} + +/** + * Build the environment for a vault git invocation (SPEC §12 cwd-isolation). + * Used by the single `runRaw` primitive every git command flows through, so + * these pins apply uniformly (including the `git --version` preflight). + * + * cwd-isolation is this module's central safety guarantee: every git command + * MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An + * inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently + * redirect the operation away from `cwd` (e.g. to the source repo or another + * checkout), defeating that guarantee. So we always strip them, regardless of + * whatever else the caller adds (author/committer identity, etc.). + * + * Exported for unit testing. + */ +export function vaultGitEnv( + extra?: Record, +): NodeJS.ProcessEnv { + const env: NodeJS.ProcessEnv = { + ...process.env, + // Locale-independent output (defense in depth). We never parse localized + // prose, but pinning the locale prevents a future regression where some + // git message we DO key on is translated by an inherited LC_ALL/LANG. + LC_ALL: "C", + LANG: "C", + // Never page (we already pass --no-pager, but a stray GIT_PAGER could still + // bite) and never block on an interactive prompt (e.g. credentials) — the + // daemon runs unattended and must not hang. + GIT_PAGER: "cat", + GIT_TERMINAL_PROMPT: "0", + ...extra, + }; + delete env.GIT_DIR; + delete env.GIT_WORK_TREE; + return env; +} + +/** + * Build a commit message body with trailer lines appended (SPEC §7.3). The + * trailers are separated from the subject by a blank line so `git interpret- + * trailers` / `git log --format=%(trailers)` parse them as trailers. + * Exported for unit testing. + */ +export function buildCommitMessage( + subject: string, + trailers?: string[], +): string { + if (!trailers || trailers.length === 0) return subject; + return `${subject}\n\n${trailers.join("\n")}`; +} diff --git a/packages/git-sync/src/engine/pull.ts b/packages/git-sync/src/engine/pull.ts new file mode 100644 index 00000000..e40c0f10 --- /dev/null +++ b/packages/git-sync/src/engine/pull.ts @@ -0,0 +1,425 @@ +/** + * Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС"). + * + * This increment turns the read-only mirror into the git-backed pull cycle: + * + * 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12); + * ensureBranch("docmost", "main") (SPEC §5 branches) + * 2. checkout docmost + * 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the + * desired `live` files (relPath via the pure sanitize/disambiguation layout) + * 4. parse `existing` tracked .md files (pageId + relPath from docmost:meta) + * 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete + * is absence-only, moves are separate + * 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree + * fetch (SPEC §8) and behind the mass-delete guard (defense in depth) + * 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11); + * apply moved-old-path removals (only when the move write SUCCEEDED) and + * absence-delete removals (only when the decision allowed them) + * 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3) + * 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved, + * SPEC §9); push is deferred (SPEC §7) + * 10. one-line summary + * + * DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost + * (read-only: listSpaceTree + getPageJson). All git operations run against + * the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts). + * + * VENDORED into gitmost (plan §2.1/§3.1): the client seam is the native + * `GitSyncClient` (`Pick`), not the upstream REST + * `DocmostClient`; the upstream CLI `main()` entry point is dropped (the gitmost + * server drives the engine in-process). Engine LOGIC is byte-identical. + */ +import { dirname } from "node:path"; +import { sep } from "node:path"; +import { parseDocmostMarkdown } from "../lib/index"; +import type { GitSyncClient } from "./client.types"; +import { buildVaultLayout, type PageNode } from "./layout"; +import { + VaultGit, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, + DEFAULT_BRANCH, +} from "./git"; +import { + planReconciliation, + decideAbsenceDeletions, + type LiveEntry, + type MovedEntry, + type DeletionDecision, +} from "./reconcile"; +import { stabilizePageFile, type PageMeta } from "./stabilize"; + +// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do. +const DOCMOST_BRANCH = "docmost"; +// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12). +const SOURCE_TRAILER = "Docmost-Sync-Source: docmost"; + +// Number of pages fetched/stabilized concurrently. Bounded so a large space +// does not open thousands of simultaneous requests/conversions at once. +const CONCURRENCY = 6; +// How often to log incremental progress (every N completed pages). +const PROGRESS_EVERY = 25; + +/** Convert a vault-relative path (forward-slash) to an absolute FS path. */ +function relToAbs(vaultRoot: string, relPath: string): string { + return [vaultRoot, ...relPath.split("/")].join("/"); +} + +/** Convert an absolute/relative segment list under the vault to a relPath. */ +function segmentsToRelPath(segments: string[], stem: string): string { + return [...segments, `${stem}.md`].join("/"); +} + +/** + * Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real + * `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile` + * rooted at the vault; tests pass fakes so the parsing/skip rules are unit- + * testable without a real git repo or filesystem. + */ +export interface ReadExistingDeps { + /** List tracked .md paths (forward-slash, vault-relative). */ + listTracked: () => Promise; + /** Read a tracked file's text by its (forward-slash) vault-relative path. */ + readFile: (relPath: string) => Promise; +} + +/** + * Read every tracked .md file in the vault and parse its `docmost:meta` to + * recover `{ pageId, relPath }`. Files without a parseable pageId in meta are + * skipped (they are not engine-tracked pages — e.g. a stray hand-written file). + * + * The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules: + * - a `readFile` rejection (tracked but missing on disk, a mid-operation race) + * -> skipped, NOT thrown; the next pull converges; + * - unparseable meta (`parseDocmostMarkdown` throws) -> skipped; + * - parseable but no `pageId` in meta -> skipped. + */ +export async function readExisting( + deps: ReadExistingDeps, +): Promise<{ pageId: string; relPath: string }[]> { + const tracked = await deps.listTracked(); + const existing: { pageId: string; relPath: string }[] = []; + for (const relPath of tracked) { + // git ls-files always emits forward-slash paths; normalize just in case. + const rel = relPath.split(sep).join("/"); + let text: string; + try { + text = await deps.readFile(rel); + } catch { + // Tracked but missing on disk (mid-operation race) — skip; the next pull + // converges. + continue; + } + let pageId: string | undefined; + try { + const { meta } = parseDocmostMarkdown(text); + pageId = meta?.pageId; + } catch { + // Unparseable meta — not engine-tracked; leave it alone. + pageId = undefined; + } + if (pageId) existing.push({ pageId, relPath: rel }); + } + return existing; +} + +/** + * Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live + * tree nodes + completeness flag (from `listSpaceTree`) and the parsed + * `existing` tracked files (from `readExisting`). + */ +export interface PullActionsInput { + /** Live page nodes for the space (from `listSpaceTree`). */ + pages: PageNode[]; + /** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */ + treeComplete: boolean; + /** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */ + existing: { pageId: string; relPath: string }[]; +} + +/** + * The PURE decisions object computed by `computePullActions` (no IO). It holds + * the reconciliation plan plus the SPEC §8 absence-deletion decision, with the + * suppression already folded in: `toDelete` is the POST-suppression set the + * caller should actually remove (empty when `deletionDecision.apply` is false). + */ +export interface PullActions { + /** Pages to (re)write at their relPath (add + update + move target). */ + toWrite: { pageId: string; relPath: string }[]; + /** Moves: write new path, then remove old path (only on a successful write). */ + moved: MovedEntry[]; + /** + * Absence-based paths to delete AFTER suppression. Empty when the decision + * suppressed deletions this cycle, so the caller can apply it unconditionally. + */ + toDelete: string[]; + /** Why absence deletions were (or were not) applied (for logging + tests). */ + deletionDecision: DeletionDecision; + /** Tracked-file count (for the suppression log messages). */ + existingCount: number; + /** Planned absence-delete count BEFORE suppression (for the log message). */ + plannedDeleteCount: number; +} + +/** + * PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live + * tree nodes + completeness + existing tracked files and returns the full set of + * decisions with NO IO: + * + * - builds the vault layout (deterministic relPath per live page), + * - `planReconciliation` -> toWrite / moved / absence-toDelete, + * - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch + + * empty-live + mass-delete guard), folded IN here so `toDelete` is the + * POST-suppression set (empty when suppressed). + * + * Moves are NOT governed by the suppression: a moved page is present in `live`, + * so its old-path removal is real (the caller still gates it on the write + * succeeding). The expensive content fetch / file write / git ops happen in the + * thin `applyPullActions`. + */ +export function computePullActions(input: PullActionsInput): PullActions { + const { pages, treeComplete, existing } = input; + const layout = buildVaultLayout(pages); + + const live: LiveEntry[] = []; + for (const p of pages) { + if (!p || !p.id) continue; + const entry = layout.get(p.id); + if (!entry) continue; + live.push({ + pageId: p.id, + relPath: segmentsToRelPath(entry.segments, entry.stem), + }); + } + + // Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only; + // `plan.moved` carries move old-path removals separately. + const plan = planReconciliation(live, existing); + + // Decide whether the ABSENCE-based deletions may be applied this cycle + // (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard. + // Moves are NOT governed by this. + const deletionDecision = decideAbsenceDeletions({ + treeComplete, + liveCount: live.length, + existingCount: existing.length, + deleteCount: plan.toDelete.length, + }); + + return { + toWrite: plan.toWrite, + moved: plan.moved, + // Fold the suppression in: a suppressed cycle deletes nothing. + toDelete: deletionDecision.apply ? plan.toDelete : [], + deletionDecision, + existingCount: existing.length, + plannedDeleteCount: plan.toDelete.length, + }; +} + +/** + * Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these + * to the live client, the vault git wrapper, and `node:fs/promises`; tests pass + * fakes that RECORD calls so the ordering + the move-on-success data-loss guard + * are testable without real git/fs/network. + */ +export interface ApplyPullActionsDeps { + client: Pick; + git: Pick; + /** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */ + writeFile: (absPath: string, text: string) => Promise; + /** Recursive mkdir of an ABSOLUTE directory path. */ + mkdir: (absDir: string) => Promise; + /** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */ + rm: (absPath: string) => Promise; +} + +/** Outcome counters from `applyPullActions` (for the summary + tests). */ +export interface ApplyResult { + written: number; + movedApplied: number; + deleted: number; + failed: number; + committed: boolean; + merge: { ok: boolean; conflict: boolean; output: string }; +} + +/** + * THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current + * order, with all the original safety guards preserved bit-for-bit: + * + * 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize + * (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page + * never aborts the pull (bounded-concurrency pool, fault-tolerant). + * 2. apply MOVE old-path removals — ONLY when the planner marked the old path + * removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a + * failed move-write keeps the old path so the page never vanishes). + * 3. apply (post-suppression) absence deletes. + * 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted + * counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9). + * + * `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps. + */ +export async function applyPullActions( + deps: ApplyPullActionsDeps, + actions: PullActions, + vaultRoot: string, +): Promise { + const { client, git } = deps; + + // Emit the SPEC §8 suppression warnings (preserved from the original `main`). + const decision = actions.deletionDecision; + if (!decision.apply) { + if (decision.reason === "incomplete-fetch") { + console.warn( + "pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)", + ); + } else if (decision.reason === "empty-live") { + console.warn( + `pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` + + `tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` + + `Docmost is reachable.`, + ); + } else { + console.warn( + `pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` + + `tracked file(s) (mass-delete guard) — deletions suppressed this ` + + `cycle (SPEC §8). Verify the live Docmost tree, then re-run.`, + ); + } + } + + // 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11). + let written = 0; + let failed = 0; + let completed = 0; + let nextIndex = 0; + // pageIds whose write FAILED. A moved page whose new-path write failed must + // NOT have its old path removed (otherwise the page vanishes entirely). + const failedPageIds = new Set(); + + const writeOne = async (w: { + pageId: string; + relPath: string; + }): Promise => { + try { + const page = await client.getPageJson(w.pageId); + const meta: PageMeta = { + version: 1, + pageId: page.id, + slugId: page.slugId, + title: page.title, + spaceId: page.spaceId, + parentPageId: page.parentPageId ?? null, + }; + const text = await stabilizePageFile(page.content, meta); + const abs = relToAbs(vaultRoot, w.relPath); + await deps.mkdir(dirname(abs)); + await deps.writeFile(abs, text); + written++; + } catch (err) { + failed++; + failedPageIds.add(w.pageId); + console.error( + `pull: failed page ${w.pageId}:`, + err instanceof Error ? err.message : String(err), + ); + } finally { + completed++; + if (completed % PROGRESS_EVERY === 0) { + console.log(`pulled ${completed}/${actions.toWrite.length}`); + } + } + }; + + // Bounded-concurrency pool (dependency-free): a fixed set of runners each + // take the next index until the write list is exhausted. One bad page never + // aborts the whole pull (mirrors the fault-tolerant tree walk). + const runner = async (): Promise => { + while (true) { + const i = nextIndex++; + if (i >= actions.toWrite.length) return; + await writeOne(actions.toWrite[i]); + } + }; + await Promise.all( + Array.from( + { length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, + () => runner(), + ), + ); + + // Helper: `rm` with force:true is a no-op if the file is already gone. + const removePath = async (rel: string, what: string): Promise => { + try { + await deps.rm(relToAbs(vaultRoot, rel)); + return true; + } catch (err) { + console.error( + `pull: failed to ${what} ${rel}:`, + err instanceof Error ? err.message : String(err), + ); + return false; + } + }; + + // 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its + // old path is genuinely stale — NOT subject to the incomplete-fetch + // suppression. BUT only remove the old path when (a) the planner marked it + // removable (not reused by another live page) AND (b) the new-path write + // actually SUCCEEDED — otherwise we would delete the only copy of a page + // whose move-write failed (⭐ data-loss guard). + let movedApplied = 0; + for (const m of actions.moved) { + if (!m.removeOldPath) continue; + if (failedPageIds.has(m.pageId)) { + console.warn( + `pull: move write for ${m.pageId} failed — keeping old path ` + + `${m.fromRelPath} (SPEC §8)`, + ); + continue; + } + if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++; + } + + // 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the + // post-suppression set (empty when the decision suppressed them, SPEC §8). + let deleted = 0; + for (const rel of actions.toDelete) { + if (await removePath(rel, "delete")) deleted++; + } + + // 4. Stage + commit on `docmost` (only if there is something to commit). + // Deterministic stabilized output means unchanged pages produce identical + // bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the + // ACTUAL work applied (pages written + files deleted), not the planned size, + // so a run with failures does not over-report (SPEC §5 nit). + const subject = + deleted > 0 + ? `docmost: sync ${written} page(s), ${deleted} deleted` + : `docmost: sync ${written} page(s)`; + await git.stageAll(); + const committed = await git.commit(subject, { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: [SOURCE_TRAILER], + }); + + // Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9); + // we never push to Docmost. Push to a git remote is deferred (SPEC §7). + await git.checkout(DEFAULT_BRANCH); + const merge = await git.merge(DOCMOST_BRANCH); + if (merge.conflict) { + console.error( + "pull: merge of docmost -> main CONFLICTED. Conflict markers were left " + + "in the vault for manual resolution (SPEC §9). Nothing is pushed to " + + "Docmost (read-only). Resolve locally, then re-run.", + ); + } else if (!merge.ok) { + console.error(`pull: merge of docmost -> main failed: ${merge.output}`); + } + console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7)."); + + return { written, movedApplied, deleted, failed, committed, merge }; +} diff --git a/packages/git-sync/src/engine/push.ts b/packages/git-sync/src/engine/push.ts new file mode 100644 index 00000000..0c037827 --- /dev/null +++ b/packages/git-sync/src/engine/push.ts @@ -0,0 +1,1268 @@ +/** + * Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment. + * + * This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref + * primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns + * a git diff into a classified action set with NO IO, and a THIN injectable + * applier (`applyPushActions`) exercised in tests via fakes only. + * + * Direction is vault -> Docmost. The diff is `main` against + * `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is + * translated into a Docmost mutation by `pageId` identity (SPEC §4): + * - A without pageId -> create_page (then write the assigned pageId back). + * - A with pageId -> update (restored/copied file; the page already exists). + * - M -> update content (collab/Yjs path, SPEC §2/§15.6). + * - D -> delete_page (pageId recovered from the PRE-IMAGE meta). + * - R -> rename/move (CLASSIFIED here, APPLIED in push #3). + * + * MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves + * each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH- + * derived parent (SPEC §5: the file path is the source of truth for tree + * position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions` + * then calls `move_page` / `rename_page` (both for a reparent+retitle), or + * records a NO-OP for a cosmetic local-only file-path rename. + * + * VENDORED into gitmost (plan §2.1/§3.1): the client seam is the native + * `GitSyncClient` (`Pick`), not the upstream REST + * `DocmostClient`; the upstream CLI `main()` entry point is dropped (the gitmost + * server drives the engine in-process). Engine LOGIC is byte-identical. + */ +import { + parseDocmostMarkdown, + serializeDocmostMarkdownBody, + type DocmostMdMeta, +} from "../lib/index"; +import type { GitSyncClient } from "./client.types"; +import type { DiffEntry } from "./git"; +import { VaultGit, DEFAULT_BRANCH } from "./git"; +import { bodyHash } from "./loop-guard"; +import { type Settings } from "./settings"; + +// Re-export so callers/tests can import the diff row shape from either module. +export type { DiffEntry } from "./git"; + +/** A page to CREATE in Docmost (new local file, meta has no pageId yet). */ +export interface CreateAction { + /** Vault-relative path of the new file. */ + path: string; +} + +/** A page whose CONTENT changed (meta carries the existing pageId). */ +export interface UpdateAction { + pageId: string; + /** Vault-relative path of the changed file. */ + path: string; +} + +/** A page to soft-delete in Docmost (Trash, SPEC §8). */ +export interface DeleteAction { + pageId: string; +} + +/** A renamed/moved page (same pageId, new path). Resolution DEFERRED. */ +export interface RenameMoveAction { + pageId: string; + oldPath: string; + newPath: string; +} + +/** + * A CLASSIFIED rename/move (push #3): a `RenameMoveAction` resolved into the + * Docmost op(s) it actually needs. The file PATH is the source of truth for tree + * position (SPEC §5: "истина связи — pageId, не путь" — the path is COSMETIC and + * LOCAL, the page identity is its pageId), so we compare the RESOLVED parent of + * the new path against the resolved parent of the old path, and the title in the + * current meta against the title in the previous meta. Each sub-op is emitted + * ONLY when something real changed: + * - `move` — the resolved parent page changed (reparent in Docmost). A `null` + * `parentPageId` means the new parent is ROOT (the file sits at the space + * root, no enclosing folder). + * - `rename` — the page title changed (a pure title edit in Docmost). + * - `noop` — neither changed: a purely LOCAL file-path rename (same parent, + * same title). The page identity is its pageId, so Docmost is NOT called. + * `move` and `rename` are independent and may BOTH be present (reparent + retitle). + */ +export interface RenameMoveActionClassified { + pageId: string; + oldPath: string; + newPath: string; + /** Present iff the resolved parent changed -> `move_page` (reparent). */ + move?: { parentPageId: string | null }; + /** Present iff the title changed -> `rename_page` (title-only). */ + rename?: { title: string }; + /** True iff neither parent nor title changed (cosmetic local-only rename). */ + noop?: true; +} + +/** + * Injected resolvers for the PURE `classifyRenameMoves` (push #3). Both are PURE + * given a path + side; the real `main` (a follow-up) wires them to the file tree + * (`readFile` for `current`, `git.showFileAtRef` for `prev`), tests pass plain + * lookups. SPEC §5 path-as-truth: + * - `metaAt`: the file's `docmost:meta` at that side (for the title). + * - `resolveParentPageId`: the pageId of the page whose FILE is the parent + * FOLDER's `.md` (one level up from the given path), or `null` for ROOT. + */ +export interface ClassifyRenameMovesDeps { + metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null; + resolveParentPageId: (path: string, side: MetaSide) => string | null; +} + +/** + * PURE classifier for the `renamesMoves` produced by `computePushActions` + * (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the + * Docmost op(s) it needs, with NO IO (both resolvers are injected). + * + * SPEC §5 — the file PATH is the source of truth for tree position, NOT the + * (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from + * `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing + * folder, via `deps.resolveParentPageId`. The title comes from the meta. + * + * For each entry: + * - `newParent = resolveParentPageId(newPath, 'current')`, + * `oldParent = resolveParentPageId(oldPath, 'prev')`. + * - `newTitle = metaAt(newPath,'current')?.title`, + * `oldTitle = metaAt(oldPath,'prev')?.title`. + * - include `move` iff `newParent !== oldParent` (a real reparent), + * - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from + * `oldTitle` (a real title edit; an empty/absent new title is never a rename), + * - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename; + * the page is its pageId, so Docmost is not touched). + */ +export function classifyRenameMoves( + renamesMoves: RenameMoveAction[], + deps: ClassifyRenameMovesDeps, +): RenameMoveActionClassified[] { + return renamesMoves.map((rm) => { + const newParent = deps.resolveParentPageId(rm.newPath, "current"); + const oldParent = deps.resolveParentPageId(rm.oldPath, "prev"); + const newTitle = deps.metaAt(rm.newPath, "current")?.title; + const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title; + + const out: RenameMoveActionClassified = { + pageId: rm.pageId, + oldPath: rm.oldPath, + newPath: rm.newPath, + }; + // A reparent: the new path's resolved parent page differs from the old's. + if (newParent !== oldParent) { + out.move = { parentPageId: newParent }; + } + // A title edit: only when there is a real, non-empty new title that changed. + if ( + typeof newTitle === "string" && + newTitle.length > 0 && + newTitle !== oldTitle + ) { + out.rename = { title: newTitle }; + } + // Neither changed -> a purely LOCAL file-path rename; do NOT call Docmost. + if (!out.move && !out.rename) { + out.noop = true; + } + return out; + }); +} + +/** The classified set of push actions (PURE output of `computePushActions`). */ +export interface PushActions { + creates: CreateAction[]; + updates: UpdateAction[]; + deletes: DeleteAction[]; + renamesMoves: RenameMoveAction[]; + /** + * Diff rows that could NOT be classified into an action, with a reason — e.g. + * a deleted file whose PRE-IMAGE meta carried no recoverable pageId (the + * untracked-file guard, SPEC §8: only files that were tracked with a pageId + * are deleted in Docmost). Carried so the caller can log them. + */ + skipped: { path: string; status: DiffEntry["status"]; reason: string }[]; +} + +/** + * Which tree a `metaAt` lookup reads the file's `docmost:meta` from: + * - `current`: the current `main` tree (the live file content) — used for + * A/M/R, where the file still exists. + * - `prev`: the last-pushed PRE-IMAGE (e.g. `refs/docmost/last-pushed:`) + * — used for D, where the file is gone from `main` but its pageId must be + * recovered from the version Docmost last knew (SPEC §6/§8). + */ +export type MetaSide = "current" | "prev"; + +/** Input to the PURE planner. `metaAt` is injected (no IO inside the planner). */ +export interface PushActionsInput { + /** Diff rows of `main` vs `refs/docmost/last-pushed` (SPEC §6 step 2). */ + changes: DiffEntry[]; + /** + * Resolve a file's `docmost:meta` at a given side, or `null` if the file is + * absent there / has no parseable meta. PURE injection: the real `main` reads + * the working tree (current) or `git show :` (prev); tests + * pass a plain lookup. + */ + metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null; +} + +/** + * PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost + * action by `pageId` identity, with NO IO (the `metaAt` resolver is injected). + * + * Classification rules: + * - `A` (added): + * - current meta HAS a pageId -> UPDATE (a restored/copied file whose + * page already exists; we push its content rather than create a dup). + * - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a + * brand-new local file; the page does not exist in Docmost yet). + * - current meta has NO pageId and NO usable spaceId -> SKIP with reason + * `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId + * (§16), and a new local file may carry only partial human meta. We + * refuse to create rather than guess a space (SPEC §8 guard spirit). + * - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified + * file somehow lost its pageId it is skipped — there is nothing to target.) + * - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path, + * 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason + * (untracked-file guard, SPEC §8: never delete an untracked page). + * - `R` (renamed/moved): same pageId (from current meta), path changed -> + * RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is + * DEFERRED to the next increment; here we only record oldPath/newPath/ + * pageId. If the renamed file has no recoverable pageId it is SKIPPED. + * (`C` copy is treated the same as `R` for recording purposes.) + */ +export function computePushActions(input: PushActionsInput): PushActions { + const { changes, metaAt } = input; + const actions: PushActions = { + creates: [], + updates: [], + deletes: [], + renamesMoves: [], + skipped: [], + }; + + for (const change of changes) { + switch (change.status) { + case "A": { + const meta = metaAt(change.path, "current"); + const pageId = meta?.pageId; + if (pageId) { + // Added but already carries a pageId (restored/copied file): the page + // exists in Docmost, so push content as an UPDATE — never a duplicate. + actions.updates.push({ pageId, path: change.path }); + } else if (meta?.spaceId) { + // Brand-new local file with a target space -> create the page, then + // write the assigned pageId back into its meta (in `applyPushActions`). + // `meta.spaceId` is truthy here, so empty-string is also rejected. + actions.creates.push({ path: change.path }); + } else { + // A create needs a spaceId (Docmost `create_page` requires it, §16). A + // new file with partial meta and no usable spaceId is SKIPPED rather + // than created into a guessed space (SPEC §8 guard spirit). + actions.skipped.push({ + path: change.path, + status: "A", + reason: "create-without-spaceId", + }); + } + break; + } + case "M": { + const meta = metaAt(change.path, "current"); + const pageId = meta?.pageId; + if (pageId) { + actions.updates.push({ pageId, path: change.path }); + } else { + // A modified file with no pageId has no Docmost target to update. + actions.skipped.push({ + path: change.path, + status: "M", + reason: "modified file has no pageId in meta", + }); + } + break; + } + case "D": { + // The file is gone from `main`; recover its pageId from the PRE-IMAGE + // (the version last pushed to Docmost) so we delete the RIGHT page. + const prevMeta = metaAt(change.path, "prev"); + const pageId = prevMeta?.pageId; + if (pageId) { + actions.deletes.push({ pageId }); + } else { + // Untracked-file guard (SPEC §8): a file with no recoverable pageId was + // never a Docmost page — do NOT translate its removal into a delete. + actions.skipped.push({ + path: change.path, + status: "D", + reason: "deleted file has no recoverable pageId (pre-image meta)", + }); + } + break; + } + case "R": + case "C": { + // Same page, new path. Identity comes from the CURRENT (post-rename) meta + // since the file still exists. RESOLUTION (move vs rename, parentPageId) + // is deferred — record oldPath/newPath/pageId only. + const meta = metaAt(change.path, "current"); + const pageId = meta?.pageId; + const oldPath = change.oldPath ?? change.path; + if (pageId) { + actions.renamesMoves.push({ + pageId, + oldPath, + newPath: change.path, + }); + } else { + actions.skipped.push({ + path: change.path, + status: change.status, + reason: "renamed/moved file has no pageId in meta", + }); + } + break; + } + default: { + // Unreachable for A/M/D/R/C; defensive for any future status. + actions.skipped.push({ + path: change.path, + status: change.status, + reason: `unhandled diff status ${change.status}`, + }); + } + } + } + + return actions; +} + +// --- thin apply (create/update/delete), fakes-only in this increment --------- + +/** The marker the push direction advances after a successful push (SPEC §5/§6). */ +export const LAST_PUSHED_REF = "refs/docmost/last-pushed"; + +/** + * The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It + * reflects "what Docmost currently contains"; advancing it to the pushed `main` + * commit closes the loop so the next pull diffs empty for the pushed pages. + */ +export const DOCMOST_BRANCH = "docmost"; + +/** + * Injectable IO for `applyPushActions`. The real `main` (NEXT increment) wires + * these to the live client, `node:fs/promises`, and the vault git wrapper; this + * increment drives them only through FAKES in tests (no live destructive run). + * - `client`: the create/update/delete/move/rename subset of `GitSyncClient`. + * - `readFile`/`writeFile`: read a changed file's body / write a file back + * (by vault-relative path; the applier does not resolve absolute paths so + * fakes stay trivial). + * - `git`: `updateRef` (advance `refs/docmost/last-pushed`) and + * `fastForwardBranch` (advance the `docmost` mirror after a clean push, the + * loop-close — SPEC §6 step 3 / §10). + */ +export interface ApplyPushDeps { + client: Pick< + GitSyncClient, + | "importPageMarkdown" + | "createPage" + | "deletePage" + | "movePage" + | "renamePage" + >; + /** Read a changed file's full text by its vault-relative path. */ + readFile: (path: string) => Promise; + /** Write a file's full text by its vault-relative path. */ + writeFile: (path: string, text: string) => Promise; + /** + * `updateRef` advances `refs/docmost/last-pushed`; `fastForwardBranch` advances + * the `docmost` mirror after a clean push. `showFileAtRef` reads a file's text + * at a ref (used by the move/rename classifier to resolve the PREVIOUS parent + * folder's `.md` at `refs/docmost/last-pushed`, SPEC §5 path-as-truth). + */ + git: Pick; +} + +/** A file whose meta was rewritten with a freshly-assigned pageId (post-create). */ +export interface WrittenBackPage { + path: string; + pageId: string; +} + +/** + * The per-page push record consulted by a FUTURE poll-suppression (SPEC §10): a + * pulled page whose body hash + `updatedAt` match a record here is OUR OWN write + * and must not be re-pulled. PRODUCED here; CONSUMED on the pull side later. + */ +export interface PushedPageRecord { + /** The Docmost pageId that was updated/created. */ + pageId: string; + /** + * The `updatedAt` from the create/update client result, when the result + * exposed one. Absent when the (fake) client did not return it. + */ + updatedAt?: string; + /** Stable hash of the markdown BODY that was pushed (SPEC §10 "хэш тела"). */ + bodyHash: string; +} + +/** + * One page whose operation FAILED during apply (SPEC §12 resumability). The bad + * page is isolated — recorded here — and the rest of the batch still runs; the + * refs are NOT advanced when there is any failure, so a re-run retries cleanly. + */ +export interface PushFailure { + kind: "update" | "create" | "delete" | "move" | "rename"; + /** The pageId for update/delete/move/rename; absent for a never-id'd create. */ + pageId?: string; + /** The vault-relative path for create/update/move/rename; absent for delete. */ + path?: string; + /** The error message captured from the thrown error. */ + error: string; +} + +/** + * A rename/move action that resolved to a NO-OP (push #3, SPEC §5): a purely + * LOCAL file-path rename whose resolved parent AND title are both unchanged. The + * page identity is its pageId and the path is COSMETIC/local-only, so Docmost is + * NOT called — the skip is recorded here (with the reason) for logging. + */ +export interface PushNoop { + pageId: string; + oldPath: string; + newPath: string; + /** Why no Docmost op was emitted (currently always a path-only rename). */ + reason: "path-only-rename"; +} + +/** Structured outcome of `applyPushActions` (counts + write-backs + noops). */ +export interface ApplyPushResult { + created: number; + updated: number; + deleted: number; + /** Pages reparented in Docmost via `move_page` (push #3, SPEC §5/§16). */ + moved: number; + /** Pages retitled in Docmost via `rename_page` (push #3, SPEC §5/§6). */ + renamed: number; + /** + * Files whose `docmost:meta` was rewritten with the pageId Docmost assigned on + * create — these now need a FOLLOW-UP commit (the meta on disk changed). The + * commit itself is the caller's job (NEXT increment); recorded here so it is + * not lost. + */ + writtenBack: WrittenBackPage[]; + /** + * Per-page push records (pageId + optional `updatedAt` + body hash) for every + * page successfully updated/created — the §10 loop-guard data a future + * poll-suppression (pull side) will consult so it does not re-pull our own + * write. Deletes are not included (no body was pushed). + */ + pushed: PushedPageRecord[]; + /** + * Pages whose operation threw — isolated and recorded, the batch continued + * (SPEC §12). Non-empty here means the refs were NOT advanced. + */ + failures: PushFailure[]; + /** + * Rename/move actions that resolved to a NO-OP — a purely LOCAL file-path + * rename (same parent, same title). NO Docmost call was made for these (SPEC + * §5: the page is its pageId, the path is local-only). Recorded for logging. + */ + noops: PushNoop[]; + /** Diff rows the planner could not classify (carried through for logging). */ + skipped: PushActions["skipped"]; + /** Whether `refs/docmost/last-pushed` was advanced (only on a CLEAN push). */ + lastPushedAdvanced: boolean; + /** + * Result of fast-forwarding the `docmost` mirror branch after a CLEAN push + * (the loop-close, SPEC §6 step 3 / §10). `null` when no advance was attempted + * (no `pushedCommit`, or there were failures). `{ ok:false, reason }` when a + * non-fast-forward was REFUSED (divergent `docmost` history is never clobbered). + */ + docmostFastForward: { ok: boolean; reason?: string } | null; +} + +/** + * THIN IO applier for the COMMON push cases (create/update/delete). Exercised + * via FAKES only in this increment — there is no live wiring. + * + * - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`. + * This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb + * overwrite. The full self-contained markdown (meta + body) is sent as-is; + * `importPageMarkdown` parses the meta/body itself. + * - CREATE: derive title/spaceId/parentPageId from the file's current meta, + * `client.createPage(...)`, take the assigned pageId from the result, and + * write it BACK into the file's `docmost:meta` (re-serialized via + * `serializeDocmostMarkdownBody`, body preserved) so the file becomes + * tracked. The write-back is recorded in `writtenBack` (a follow-up commit + * is needed — NEXT increment). + * - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8). + * - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry + * with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for + * the parent pageId — path-as-truth — and the meta for the title), then: + * - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent; + * `position` is UNDEFINED for now — the client supplies a default), + * - `rename` -> `client.renamePage(pageId, title)` (title-only), + * - BOTH -> move (reparent) THEN rename (title), in that order, + * - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only + * file-path rename: the page is its pageId, the path is local, SPEC §5). + * + * FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation + * is wrapped in its own try/catch: a single failing page is recorded in + * `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES — + * one bad page must never block the rest. Crucially, the refs are advanced ONLY + * when `failures.length === 0`: a PARTIAL push must NOT advance + * `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the + * whole batch cleanly (the already-applied pages are idempotent re-applies). + * + * LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a + * `pushedCommit` is supplied: + * - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND + * - fast-forward the `docmost` mirror branch to it via + * `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects + * what Docmost now contains and the NEXT pull diffs EMPTY for these pages + * (it does not re-pull our own write). The ff is REFUSED (not forced) if + * `docmost` is not an ancestor of the pushed commit; the result is surfaced + * in `docmostFastForward`. On ANY failure, NEITHER ref is advanced. + * + * LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the + * result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body + * hash of what was pushed plus the write's `updatedAt` (when the client returned + * one). A future pull-side poll-suppression consults this so it does not re-pull + * our own write; producing it is in scope here, consuming it is deferred. + * + * @param pushedCommit The `main` commit just reflected into Docmost (SHA or + * commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan). + */ +export async function applyPushActions( + deps: ApplyPushDeps, + actions: PushActions, + pushedCommit?: string, +): Promise { + const { client, git } = deps; + + let created = 0; + let updated = 0; + let deleted = 0; + let moved = 0; + let renamed = 0; + const writtenBack: WrittenBackPage[] = []; + const pushed: PushedPageRecord[] = []; + const failures: PushFailure[] = []; + const noops: PushNoop[] = []; + + // 1. UPDATES — collab/Yjs write path (SPEC §2/§15.6), never a raw overwrite. + // Each update is isolated: a thrown page is recorded and the batch goes on. + for (const u of actions.updates) { + try { + const fullMarkdown = await deps.readFile(u.path); + const result = await client.importPageMarkdown(u.pageId, fullMarkdown); + updated++; + // §10 loop-guard data: hash the body we pushed + capture `updatedAt`. + pushed.push({ + pageId: u.pageId, + ...extractUpdatedAt(result), + bodyHash: bodyHash(fullMarkdown), + }); + } catch (err: unknown) { + failures.push({ + kind: "update", + pageId: u.pageId, + path: u.path, + error: errMessage(err), + }); + } + } + + // 2. CREATES — create the page, then write the assigned pageId back to meta so + // the file becomes tracked (SPEC §4 "записать присвоенный pageId обратно"). + // Isolated per page like updates. + for (const c of actions.creates) { + try { + const text = await deps.readFile(c.path); + const { meta, body } = parseDocmostMarkdown(text); + // Derive create args from the file's current meta. A new local file may + // have partial meta (e.g. title/spaceId only); spaceId is required by + // Docmost (the planner already guards a create against a missing spaceId). + const title = meta?.title ?? ""; + const spaceId = meta?.spaceId ?? ""; + const parentPageId = meta?.parentPageId ?? undefined; + const result = await client.createPage(title, body, spaceId, parentPageId); + // `createPage` returns `{ data: { id, ... }, success }`; the assigned + // pageId is at `result.data.id`. + const assignedPageId: string | undefined = result?.data?.id; + if (assignedPageId) { + // Re-serialize the file with the pageId in meta, body preserved. + const newMeta: DocmostMdMeta = { + version: meta?.version ?? 1, + ...meta, + pageId: assignedPageId, + }; + const rewritten = serializeDocmostMarkdownBody(newMeta, body); + await deps.writeFile(c.path, rewritten); + writtenBack.push({ path: c.path, pageId: assignedPageId }); + // §10 loop-guard data for the created page (hash the pushed body). + pushed.push({ + pageId: assignedPageId, + ...extractUpdatedAt(result), + bodyHash: bodyHash(text), + }); + } + created++; + } catch (err: unknown) { + failures.push({ kind: "create", path: c.path, error: errMessage(err) }); + } + } + + // 3. DELETES — soft-delete to Trash (SPEC §8), reversible. Isolated per page. + for (const d of actions.deletes) { + try { + await client.deletePage(d.pageId); + deleted++; + } catch (err: unknown) { + failures.push({ + kind: "delete", + pageId: d.pageId, + error: errMessage(err), + }); + } + } + + // 4. RENAME/MOVE (push #3, SPEC §5/§6/§16). Classify each entry against the + // tree-backed resolvers (the NEW parent comes from the new path's enclosing + // folder `.md`, the OLD parent from the old path's at last-pushed — PATH is + // the truth, not stale `meta.parentPageId`; the title from the meta), then + // apply only the real ops. Each page is isolated like the cases above: a + // thrown op is recorded in `failures` and the batch continues. ORDER for a + // page that needs both: reparent (move) FIRST, then retitle (rename). + if (actions.renamesMoves.length > 0) { + // The classifier is PURE over sync resolvers; the tree reads are async, so + // prefetch every (path, side) lookup it will make into plain tables first. + const parentTable = new Map(); + const metaTable = new Map(); + // A tree read (readFile / git.showFileAtRef) throwing must isolate THAT page + // into `failures`, NOT abort the whole batch (§12 resumability). The helpers + // already swallow their own errors, but this per-entry try/catch keeps the + // batch-isolation invariant holding regardless of future changes to them. + const prefetchFailed = new Set(); + for (const rm of actions.renamesMoves) { + // newParent + newTitle from the CURRENT tree; oldParent + oldTitle from the + // last-pushed pre-image (`prev`). Keyed by `path|side` so duplicates fold. + try { + parentTable.set( + `${rm.newPath}|current`, + await resolveParentPageIdViaTree(deps, rm.newPath, "current"), + ); + parentTable.set( + `${rm.oldPath}|prev`, + await resolveParentPageIdViaTree(deps, rm.oldPath, "prev"), + ); + metaTable.set( + `${rm.newPath}|current`, + await metaAtViaTree(deps, rm.newPath, "current"), + ); + metaTable.set( + `${rm.oldPath}|prev`, + await metaAtViaTree(deps, rm.oldPath, "prev"), + ); + } catch (err: unknown) { + prefetchFailed.add(rm.pageId); + failures.push({ + kind: "move", + pageId: rm.pageId, + path: rm.newPath, + error: errMessage(err), + }); + } + } + const classified = classifyRenameMoves( + actions.renamesMoves.filter((rm) => !prefetchFailed.has(rm.pageId)), + { + metaAt: (path, side) => metaTable.get(`${path}|${side}`) ?? null, + resolveParentPageId: (path, side) => + parentTable.get(`${path}|${side}`) ?? null, + }, + ); + + for (const c of classified) { + if (c.noop) { + // Cosmetic local-only file-path rename — no Docmost op (SPEC §5). + noops.push({ + pageId: c.pageId, + oldPath: c.oldPath, + newPath: c.newPath, + reason: "path-only-rename", + }); + continue; + } + // Track which op is in flight so a failure is attributed to the op that + // ACTUALLY threw: for a page needing both, a move that succeeds then a + // rename that throws must be recorded as `rename`, not `move`. + let failingKind: "move" | "rename" = c.move ? "move" : "rename"; + try { + // Reparent FIRST so the page is in its new tree position, THEN retitle. + if (c.move) { + failingKind = "move"; + // TODO(next): compute a fractional-index position between siblings + // (SPEC §16). `position` is UNDEFINED here; the client supplies a valid + // default. Pass `parentPageId: null` for a move to the space ROOT. + await client.movePage(c.pageId, c.move.parentPageId); + moved++; + } + if (c.rename) { + failingKind = "rename"; + await client.renamePage(c.pageId, c.rename.title); + renamed++; + } + } catch (err: unknown) { + // Isolate the failed page: the op that ACTUALLY threw is recorded so a + // re-run can retry. A move that threw before its rename leaves `rename` + // for the next run (idempotent re-apply); refs are NOT advanced (below). + failures.push({ + kind: failingKind, + pageId: c.pageId, + path: c.newPath, + error: errMessage(err), + }); + } + } + } + + // 5. Advance the refs ONLY on a CLEAN push (no failures) AND when a pushed + // commit is supplied. A partial push must advance NEITHER ref, so a re-run + // retries the whole batch (SPEC §12). The loop-close (SPEC §6 step 3 / §10): + // advance `refs/docmost/last-pushed` AND fast-forward the `docmost` mirror, + // so Docmost's new content is mirrored and the next pull diffs empty. + let lastPushedAdvanced = false; + let docmostFastForward: { ok: boolean; reason?: string } | null = null; + if (pushedCommit && failures.length === 0) { + await git.updateRef(LAST_PUSHED_REF, pushedCommit); + lastPushedAdvanced = true; + // Fast-forward the mirror (refused, not forced, on a non-fast-forward — the + // caller logs the reason). Surfaced in the result. + docmostFastForward = await git.fastForwardBranch( + DOCMOST_BRANCH, + pushedCommit, + ); + } + + return { + created, + updated, + deleted, + moved, + renamed, + writtenBack, + pushed, + failures, + noops, + skipped: actions.skipped, + lastPushedAdvanced, + docmostFastForward, + }; +} + +/** Stringify a thrown value into a stable error message. */ +function errMessage(err: unknown): string { + return err instanceof Error ? err.message : String(err); +} + +/** + * SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative + * (forward-slash) path. `buildVaultLayout` puts a page with children at + * `<...>/Title.md` and nests its children under `<...>/Title/`, so for + * `newPath = /Child.md` the parent page's file is `.md` (the enclosing + * folder, one level up). A path with NO enclosing folder (`Child.md`, at the + * space root) has no parent folder file -> `null` (the parent is ROOT). + */ +export function parentFolderFile(path: string): string | null { + const slash = path.lastIndexOf("/"); + if (slash < 0) return null; // root-level file: no enclosing folder. + return `${path.slice(0, slash)}.md`; +} + +/** + * Build the `resolveParentPageId(path, side)` resolver `classifyRenameMoves` + * needs, reading the PARENT FOLDER's `.md` (SPEC §5 path-as-truth): + * - `current` -> `deps.readFile(.md)` (the live working tree), + * - `prev` -> `git.showFileAtRef('refs/docmost/last-pushed', .md)` (the + * last-pushed pre-image), + * then parse its `docmost:meta` and return that page's pageId. A root-level path + * (no enclosing folder), a missing/unreadable parent file, or a parent file with + * no parseable pageId all resolve to `null` (parent is ROOT / unknown -> + * `parentPageId: null`, SPEC §16 "parentPageId: null -> в корень"). + * + * The IO is async, so this returns an ASYNC resolver; the call sites prefetch the + * parent pageIds (the classifier itself stays pure/sync over a plain table). + */ +async function resolveParentPageIdViaTree( + deps: Pick, + path: string, + side: MetaSide, +): Promise { + const parentFile = parentFolderFile(path); + if (parentFile === null) return null; // root-level: parent is ROOT. + let text: string | null; + try { + text = + side === "current" + ? await deps.readFile(parentFile) + : await deps.git.showFileAtRef(LAST_PUSHED_REF, parentFile); + } catch { + // Parent folder file missing/unreadable at that side -> treat as ROOT. + return null; + } + if (text === null) return null; // showFileAtRef returns null when absent. + try { + const { meta } = parseDocmostMarkdown(text); + return meta?.pageId ?? null; + } catch { + // Unparseable parent meta -> no resolvable parent pageId. + return null; + } +} + +/** + * Resolve the file `docmost:meta` at a side for the rename/move classifier (the + * title comes from here). Mirrors `resolveParentPageIdViaTree`'s IO sides: + * `current` reads the working tree, `prev` reads `refs/docmost/last-pushed`. + * Returns `null` on a missing/unreadable/unparseable file. + */ +async function metaAtViaTree( + deps: Pick, + path: string, + side: MetaSide, +): Promise { + let text: string | null; + try { + text = + side === "current" + ? await deps.readFile(path) + : await deps.git.showFileAtRef(LAST_PUSHED_REF, path); + } catch { + return null; + } + if (text === null) return null; + try { + return parseDocmostMarkdown(text).meta ?? null; + } catch { + return null; + } +} + +/** + * Pull an `updatedAt` out of a create/update client result, if present. The + * shape is `{ data: { updatedAt? }, ... }` (createPage) or a flatter object; + * absent in the simple fakes, so the field is omitted rather than `undefined`. + */ +function extractUpdatedAt(result: unknown): { updatedAt?: string } { + const r = result as + | { updatedAt?: unknown; data?: { updatedAt?: unknown } } + | null + | undefined; + const raw = r?.data?.updatedAt ?? r?.updatedAt; + return typeof raw === "string" ? { updatedAt: raw } : {}; +} + +// --- runnable push orchestration (`runPush`) --------------------------------- +// +// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit +// diff/ref primitives + the PURE `computePushActions` planner + the THIN +// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the +// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO +// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that +// builds a client and mutates Docmost. +// +// Every external effect is injected (`PushDeps`) so the whole orchestration is +// driven by FAKES in tests — no live Docmost, git, fs, or network. + +/** + * The human ("local") git identity used for engine-made commits on `main` in the + * push direction (SPEC §7.3). The provenance is carried by the trailer (below), + * which the loop-guard keys on; the identity is for history readability only. + * When the vault repo already has a configured `user.name`/`user.email`, git + * uses that for the working-tree commit; this is the fallback the daemon stamps. + */ +export const LOCAL_AUTHOR_NAME = "Local"; +export const LOCAL_AUTHOR_EMAIL = "local@local"; + +/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */ +export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local"; + +/** + * Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that + * touches the outside world is here so tests pass fakes). `makeClient` is a + * FACTORY, not a client — a dry-run must build NO client at all (it is never + * called), and only `--apply` invokes it. + */ +export interface PushDeps { + settings: Settings; + git: Pick< + VaultGit, + | "assertGitAvailable" + | "ensureRepo" + | "isMergeInProgress" + | "checkout" + | "stageAll" + | "commit" + | "readRef" + | "revParse" + | "diffNameStatus" + | "showFileAtRef" + | "updateRef" + | "fastForwardBranch" + >; + /** Build a real client — called ONLY on `--apply`, never on dry-run. */ + makeClient: (settings: Settings) => ApplyPushDeps["client"]; + /** Read a file's full text by its vault-relative (forward-slash) path. */ + readFile: (path: string) => Promise; + /** Write a file's full text by its vault-relative path. */ + writeFile: (path: string, text: string) => Promise; + /** Structured logger (defaults to console in `main`; a recorder in tests). */ + log: (line: string) => void; +} + +/** The structured outcome of a `runPush` cycle (returned + summarized). */ +export interface PushRunResult { + /** Which path ran: `dry-run` (plan only) or `apply` (Docmost mutated). */ + mode: "dry-run" | "apply"; + /** Why the cycle stopped before planning, if it did (e.g. a left-over merge). */ + aborted?: "merge-in-progress"; + /** The diff base the plan was computed against (`last-pushed` else `docmost`). */ + base?: { ref: string; source: "last-pushed" | "docmost"; sha: string | null }; + /** The `main` commit the plan targets (the would-be pushed commit). */ + pushedCommit?: string; + /** Planned action counts from the PURE planner (present once a plan was built). */ + planned?: { + creates: number; + updates: number; + deletes: number; + renamesMoves: number; + skipped: number; + }; + /** The applier's structured result — ONLY present on the `--apply` path. */ + applied?: ApplyPushResult; + /** + * True when `applyPushActions` REFUSED to fast-forward a divergent `docmost` + * mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded + * into the CLI's non-zero exit. + */ + divergentDocmost?: boolean; + /** Per-page failures from the applier (empty/absent on a clean run). */ + failures?: PushFailure[]; +} + +/** + * Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT. + * + * Steps (mirrors `pull.ts`): + * 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message + + * non-zero-ish result) if a merge is in progress — never push on top of an + * unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach + * Docmost (SPEC §9). + * 2. Checkout `main` (the human-facing branch the push reads from). + * 3. Commit the human's pending working-tree changes on `main` with the + * `local` provenance trailer (SPEC §7.3). A no-op when nothing changed. + * 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the + * `docmost` mirror branch (what Docmost currently has). Resolve `main`. + * 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)` + * resolver (current = working tree, prev = `git show :`); run + * the PURE `computePushActions`. + * 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost + * calls, NO ref advance. + * 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`, + * then (a) if any pageIds were written back (creates), commit them on `main` + * with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the + * new commit so the recorded pageIds are persisted in what Docmost mirrors; + * (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent + * WARNING and a non-zero-ish flag. Then log a one-line summary. + */ +export async function runPush( + deps: PushDeps, + opts: { dryRun: boolean }, +): Promise { + const { git, settings, log } = deps; + const dryRun = opts.dryRun; + + // 1. Preflight git. Fail fast (actionable message via main().catch) if the git + // binary is missing — the vault state store relies on it. + await git.assertGitAvailable(); + await git.ensureRepo(); + + // 1b. Refuse to push on top of an unresolved merge (SPEC §9/§12). A previous + // conflicting pull leaves the vault mid-merge; pushing now could leak + // conflict markers into Docmost (SPEC §9, the cardinal invariant). Detect + // it BEFORE any checkout/diff and stop with a clear, actionable message so + // re-runs converge once the human resolves (or aborts) the merge. + if (await git.isMergeInProgress()) { + log( + `push: vault has an unresolved merge at ${settings.vaultPath} — resolve ` + + `it (or 'git merge --abort') and re-run. Nothing was pushed to Docmost ` + + `(conflict markers must never reach Docmost, SPEC §9).`, + ); + return { mode: dryRun ? "dry-run" : "apply", aborted: "merge-in-progress" }; + } + + // 2. Work on `main` — the human-facing branch the push diffs FROM. + await git.checkout(DEFAULT_BRANCH); + + // 3. Commit the human's pending working-tree changes on `main` with the `local` + // provenance trailer (SPEC §7.3). A no-op commit when nothing changed is + // fine (`commit` returns false). The loop-guard keys on the trailer. + // Even on a "plan only" dry-run this commits the working tree (it is the + // only way to diff `base..main`, acceptable §6.1 behavior) — so make that + // LOCAL git mutation VISIBLE, never silent: a created commit is local-only + // and nothing is sent to Docmost. + await git.stageAll(); + const committedWorkingTree = await git.commit("local: working-tree changes", { + authorName: LOCAL_AUTHOR_NAME, + authorEmail: LOCAL_AUTHOR_EMAIL, + trailers: [LOCAL_SOURCE_TRAILER], + }); + if (committedWorkingTree) { + const sha = await git.revParse(DEFAULT_BRANCH); + log( + `push: committed local working-tree changes on main` + + (sha ? ` as ${sha.slice(0, 8)}` : "") + + ` (local git only — nothing sent to Docmost).`, + ); + } else { + log("push: working tree clean (no local changes to push)."); + } + + // 4. Pick the diff BASE (SPEC §5/§6): `refs/docmost/last-pushed` if it resolves + // (the marker of what `main` is already in Docmost), else fall back to the + // `docmost` mirror branch (the mirror of what Docmost currently has) — which + // is what exists before the first push ever advanced last-pushed. + let base: { ref: string; source: "last-pushed" | "docmost"; sha: string | null }; + const lastPushedSha = await git.readRef(LAST_PUSHED_REF); + if (lastPushedSha) { + base = { ref: LAST_PUSHED_REF, source: "last-pushed", sha: lastPushedSha }; + } else { + base = { + ref: DOCMOST_BRANCH, + source: "docmost", + sha: await git.revParse(DOCMOST_BRANCH), + }; + } + const pushedCommit = await git.revParse(DEFAULT_BRANCH); + if (!pushedCommit) { + // `main` has no commit — `ensureRepo` always makes an initial one, so this is + // defensive. Nothing to diff. + log("push: `main` has no commit to push — nothing to do."); + return { mode: dryRun ? "dry-run" : "apply", base }; + } + + // 5. Diff the base against `main` and build the `metaAt` resolver (PURE planner + // input). `current` reads the live working tree; `prev` reads the base ref's + // pre-image via `git show :` (so a DELETE recovers its pageId). + const changes = await git.diffNameStatus(base.ref, DEFAULT_BRANCH); + // Synchronous resolver over PREFETCHED meta tables: `computePushActions` is + // PURE/sync, but the file/ref reads are async — so we prefetch every (path, + // side) the diff will ask for into a table first, then resolve from it. + const metaTable = new Map(); + for (const change of changes) { + // `current`: A/M/R/C still have the file on `main`. `prev`: D needs the + // pre-image; R/C also benefit (old title). Prefetch both sides per path. + const currentPath = change.path; + const prevPath = change.oldPath ?? change.path; + if (!metaTable.has(`${currentPath}|current`)) { + metaTable.set( + `${currentPath}|current`, + await readMetaCurrent(deps, currentPath), + ); + } + if (!metaTable.has(`${prevPath}|prev`)) { + metaTable.set( + `${prevPath}|prev`, + await readMetaPrev(deps, base.ref, prevPath), + ); + } + } + const metaAt = (path: string, side: MetaSide): DocmostMdMeta | null => + metaTable.get(`${path}|${side}`) ?? null; + + const actions = computePushActions({ changes, metaAt }); + const planned = { + creates: actions.creates.length, + updates: actions.updates.length, + deletes: actions.deletes.length, + renamesMoves: actions.renamesMoves.length, + skipped: actions.skipped.length, + }; + + // 6. DRY-RUN (default): log the full plan and RETURN — build NO client, make + // ZERO Docmost calls, advance NO refs. This is the SAFE default. + logPlan(log, base, pushedCommit, actions, planned, dryRun); + if (dryRun) { + return { mode: "dry-run", base, pushedCommit, planned }; + } + + // 7. --apply: build the REAL client and execute. This is the ONLY write path. + const client = deps.makeClient(settings); + const applied = await applyPushActions( + { + client, + // Pass the WHOLE `git` object (it satisfies the applier's + // `Pick` deps surface). Passing bare method references + // (`git.updateRef`, …) would lose their `this` binding, so on a REAL + // `VaultGit` they would throw `this.runRaw is not a function`. Hand over + // the object so the methods keep their receiver — exactly as `pull.ts` + // does for `applyPullActions`. + git, + readFile: deps.readFile, + writeFile: deps.writeFile, + }, + actions, + pushedCommit, + ); + + // 7a. Persist freshly-assigned pageIds (creates) back into git. `applyPushActions` + // rewrote those files on disk; commit them on `main` with the `local` trailer + // so the new pageIds are recorded, then RE-advance `refs/docmost/last-pushed` + // to the new commit so what Docmost mirrors and what last-pushed points at + // stay in lock-step (the write-back commit is part of `main` now). + // Track a divergent-`docmost` mirror across BOTH ff sites (the applier's main + // push ff in 7b, and the write-back ff here). A divergent mirror is a §5 + // invariant breach in EITHER branch and must escalate identically (exit 1). + let divergentDocmost = false; + if (applied.writtenBack.length > 0) { + await git.stageAll(); + const recorded = await git.commit("local: record created pageIds", { + authorName: LOCAL_AUTHOR_NAME, + authorEmail: LOCAL_AUTHOR_EMAIL, + trailers: [LOCAL_SOURCE_TRAILER], + }); + if (recorded) { + const newCommit = await git.revParse(DEFAULT_BRANCH); + // Only re-advance when the original push was CLEAN (last-pushed was already + // advanced by the applier); a partial push left the refs untouched and a + // re-run retries the whole batch, so we must not move them either. + if (newCommit && applied.lastPushedAdvanced) { + await git.updateRef(LAST_PUSHED_REF, newCommit); + const ff = await git.fastForwardBranch(DOCMOST_BRANCH, newCommit); + if (!ff.ok) { + // SYMMETRIC with the main escalation (7b): a divergent mirror in the + // write-back branch is the SAME §5 invariant breach and must escalate + // (exit 1), not just log a soft warning. + divergentDocmost = true; + log( + `push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` + + `fast-forwarded to the pageId write-back commit ` + + `(${ff.reason ?? "not-fast-forward"}). The §5 invariant ('docmost' ` + + `mirrors what Docmost contains) is broken: reconcile 'docmost' ` + + `against the live Docmost tree before the next cycle.`, + ); + } + } + } + } + + // 7b. ESCALATE a divergent-`docmost` fast-forward refusal (SPEC §5 invariant + // broken). The applier already refused to clobber a divergent mirror; make + // it LOUD (not silent) so the operator notices, and fold it into the exit. + if (applied.docmostFastForward && !applied.docmostFastForward.ok) { + divergentDocmost = true; + log( + `push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` + + `fast-forwarded (${applied.docmostFastForward.reason ?? "not-fast-forward"}). ` + + `The §5 invariant ('docmost' mirrors what Docmost contains) is broken: ` + + `reconcile 'docmost' against the live Docmost tree before the next cycle.`, + ); + } + + // 7c. One-line summary (mirrors pull.ts's summary line). + log( + `push complete: ${applied.created} created, ${applied.updated} updated, ` + + `${applied.deleted} deleted, ${applied.moved} moved, ${applied.renamed} ` + + `renamed, ${applied.noops.length} no-op(s), ${applied.skipped.length} ` + + `skipped, ${applied.failures.length} failure(s)` + + (divergentDocmost ? " [DIVERGENT docmost mirror]" : ""), + ); + + return { + mode: "apply", + base, + pushedCommit, + planned, + applied, + divergentDocmost, + failures: applied.failures, + }; +} + +/** Parse a file's `docmost:meta` from the live working tree (`current` side). */ +async function readMetaCurrent( + deps: Pick, + path: string, +): Promise { + let text: string; + try { + text = await deps.readFile(path); + } catch { + return null; // absent on disk (e.g. a D row's path) -> no current meta. + } + try { + return parseDocmostMarkdown(text).meta ?? null; + } catch { + return null; // unparseable meta -> not engine-tracked. + } +} + +/** Parse a file's `docmost:meta` from the base ref's pre-image (`prev` side). */ +async function readMetaPrev( + deps: Pick, + baseRef: string, + path: string, +): Promise { + let text: string | null; + try { + text = await deps.git.showFileAtRef(baseRef, path); + } catch { + return null; + } + if (text === null) return null; // path absent at the base ref. + try { + return parseDocmostMarkdown(text).meta ?? null; + } catch { + return null; + } +} + +/** Emit the full plan (counts + per-item) to the injected logger. */ +function logPlan( + log: (line: string) => void, + base: { ref: string; source: string; sha: string | null }, + pushedCommit: string, + actions: PushActions, + planned: PushRunResult["planned"], + dryRun: boolean, +): void { + log( + `push plan (${dryRun ? "DRY-RUN — no Docmost writes" : "APPLY"}): base=` + + `${base.ref} (${base.source}${base.sha ? ` ${base.sha.slice(0, 8)}` : ""}) ` + + `-> main ${pushedCommit.slice(0, 8)}`, + ); + log( + `push plan counts: ${planned!.creates} create, ${planned!.updates} update, ` + + `${planned!.deletes} delete, ${planned!.renamesMoves} rename/move, ` + + `${planned!.skipped} skipped`, + ); + for (const c of actions.creates) log(` create: ${c.path}`); + for (const u of actions.updates) log(` update: ${u.pageId} (${u.path})`); + for (const d of actions.deletes) log(` delete: ${d.pageId}`); + for (const rm of actions.renamesMoves) + log(` rename/move: ${rm.oldPath} -> ${rm.newPath} (${rm.pageId})`); + for (const s of actions.skipped) + log(` skipped [${s.status}] ${s.path}: ${s.reason}`); +} + +/** Parsed `push` CLI flags. DRY-RUN is the default; `--apply` opts into writes. */ +export interface PushParsedArgs { + /** True when `--apply` was passed (the ONLY path that writes to Docmost). */ + apply: boolean; +} + +/** + * Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a + * DRY-RUN (plan only). Exported so the flag handling is unit-testable. + */ +export function parseArgs(argv: string[]): PushParsedArgs { + return { apply: argv.includes("--apply") }; +} diff --git a/packages/git-sync/src/engine/settings.ts b/packages/git-sync/src/engine/settings.ts new file mode 100644 index 00000000..7fce7f99 --- /dev/null +++ b/packages/git-sync/src/engine/settings.ts @@ -0,0 +1,70 @@ +/** + * Engine settings (plan §2.1 / §7.2 — ADAPTED for vendoring). + * + * Upstream this module also loaded `.env` (`dotenv`) and bound `parseSettings` + * to `process.env` via a `loadSettings()` entry point. In gitmost the engine is + * driven IN-PROCESS by the NestJS server, which builds the `Settings` object + * from `EnvironmentService` (plan §7.2) — so the engine must NOT reach into + * `process.env` here. We therefore vendor ONLY: + * - the `Settings` type the engine consumes, and + * - `parseSettings(env)` as a PURE function (validate a raw env object -> typed + * `Settings`), kept for unit tests and for the server to reuse if it wants + * to validate an env-shaped object. + * The `loadSettings()` / `loadDotenv()` side-effecting entry point is dropped. + */ +import { z } from 'zod'; + +// Schema keyed by the real ENV variable names so validation errors name the +// exact variable. Credentials and the address of our OWN Docmost instance have +// NO default — a missing value must fail at startup, never silently fall back. +export const envSchema = z.object({ + // Docmost connection — address of our own instance, no default. + DOCMOST_API_URL: z.string().url(), + // Credentials for /auth/login — no default, never hardcoded. + DOCMOST_EMAIL: z.string().min(1), + DOCMOST_PASSWORD: z.string().min(1), + // Which Docmost space to mirror. + DOCMOST_SPACE_ID: z.string().min(1), + + // Local git vault (state store) — kept under data/ so the volume persists it. + VAULT_PATH: z.string().min(1).default('data/vault'), + // Optional git remote the vault pushes to. Empty string is treated as unset. + GIT_REMOTE: z.preprocess( + (v) => (v === '' ? undefined : v), + z.string().min(1).optional(), + ), + + // Non-secret tunables — sensible defaults are fine. + POLL_INTERVAL_MS: z.coerce.number().int().positive().default(15000), + DEBOUNCE_MS: z.coerce.number().int().positive().default(2000), + LOG_LEVEL: z.enum(['debug', 'info', 'warn', 'error']).default('info'), +}); + +export type Settings = { + docmostApiUrl: string; + docmostEmail: string; + docmostPassword: string; + docmostSpaceId: string; + vaultPath: string; + gitRemote?: string; + pollIntervalMs: number; + debounceMs: number; + logLevel: 'debug' | 'info' | 'warn' | 'error'; +}; + +// Pure: validate a raw environment object and map it to a typed Settings. +// Throws ZodError on bad config. No side effects — safe to import in tests. +export function parseSettings(env: NodeJS.ProcessEnv): Settings { + const e = envSchema.parse(env); + return { + docmostApiUrl: e.DOCMOST_API_URL, + docmostEmail: e.DOCMOST_EMAIL, + docmostPassword: e.DOCMOST_PASSWORD, + docmostSpaceId: e.DOCMOST_SPACE_ID, + vaultPath: e.VAULT_PATH, + gitRemote: e.GIT_REMOTE, + pollIntervalMs: e.POLL_INTERVAL_MS, + debounceMs: e.DEBOUNCE_MS, + logLevel: e.LOG_LEVEL, + }; +} diff --git a/packages/git-sync/src/index.ts b/packages/git-sync/src/index.ts index fc227ddb..c8969e7e 100644 --- a/packages/git-sync/src/index.ts +++ b/packages/git-sync/src/index.ts @@ -44,3 +44,71 @@ export { stabilizePageFile } from "./engine/stabilize"; export type { PageMeta } from "./engine/stabilize"; export { bodyHash } from "./engine/loop-guard"; + +// IO engine (plan §2.1/§3.1): the client seam, the VaultGit git wrapper, the +// pull (Docmost->FS) + push (FS->Docmost) planners/appliers, and the (pure) +// settings parser. The engine consumes the native `GitSyncClient` seam (server +// implements it) — the upstream REST `DocmostClient` is NOT vendored. +export type { GitSyncClient, GitSyncPageNodeLite } from "./engine/client.types"; + +export { + VaultGit, + vaultGitEnv, + buildCommitMessage, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, + DEFAULT_BRANCH, +} from "./engine/git"; +export type { DiffEntry, MergeResult, CommitOptions } from "./engine/git"; + +export { + readExisting, + computePullActions, + applyPullActions, +} from "./engine/pull"; +export type { + ReadExistingDeps, + PullActionsInput, + PullActions, + ApplyPullActionsDeps, + ApplyResult, +} from "./engine/pull"; + +export { + classifyRenameMoves, + computePushActions, + applyPushActions, + runPush, + parentFolderFile, + parseArgs, + LAST_PUSHED_REF, + DOCMOST_BRANCH, + LOCAL_AUTHOR_NAME, + LOCAL_AUTHOR_EMAIL, + LOCAL_SOURCE_TRAILER, +} from "./engine/push"; +export type { + CreateAction, + UpdateAction, + DeleteAction, + RenameMoveAction, + RenameMoveActionClassified, + ClassifyRenameMovesDeps, + PushActions, + PushActionsInput, + MetaSide, + ApplyPushDeps, + WrittenBackPage, + PushedPageRecord, + PushFailure, + PushNoop, + ApplyPushResult, + PushDeps, + PushRunResult, + PushParsedArgs, +} from "./engine/push"; + +export { parseSettings, envSchema } from "./engine/settings"; +export type { Settings } from "./engine/settings"; + +export { loadSettingsOrExit } from "./engine/config-errors"; diff --git a/packages/git-sync/test/apply-pull-actions.test.ts b/packages/git-sync/test/apply-pull-actions.test.ts new file mode 100644 index 00000000..1b7276fd --- /dev/null +++ b/packages/git-sync/test/apply-pull-actions.test.ts @@ -0,0 +1,417 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest'; +import { applyPullActions } from '../src/engine/pull'; +import type { + PullActions, + ApplyPullActionsDeps, +} from '../src/engine/pull'; +import type { DeletionDecision } from '../src/engine/reconcile'; + +// R-Pull-2 (test-strategy report §5): `applyPullActions` is the THIN IO half of +// the pull cycle. These tests drive it with FAKES that record every call — no +// real git, fs, or network — so the ordering and the ⭐ move-on-success +// data-loss guard are verifiable. SPEC §8 (delete suppression) + SPEC §5 (commit +// subject reflects ACTUAL counts) are asserted here. + +const VAULT = '/vault'; + +/** A getPageJson fake: returns a minimal page whose content stabilizes cheaply. */ +function makeClient(opts?: { failFor?: Set }) { + const calls: string[] = []; + const client = { + getPageJson: vi.fn(async (pageId: string) => { + calls.push(pageId); + if (opts?.failFor?.has(pageId)) { + throw new Error(`fetch failed for ${pageId}`); + } + return { + id: pageId, + slugId: `slug-${pageId}`, + title: `Title ${pageId}`, + spaceId: 'space', + parentPageId: null, + updatedAt: '2026-01-01T00:00:00.000Z', + // A trivial doc so stabilizePageFile (the real one) runs fast. + content: { + type: 'doc', + content: [ + { type: 'paragraph', content: [{ type: 'text', text: pageId }] }, + ], + }, + }; + }), + }; + return { client, calls }; +} + +/** A git fake recording the order of ops; merge result is configurable. */ +function makeGit(merge: { ok: boolean; conflict: boolean; output?: string } = { + ok: true, + conflict: false, +}) { + const order: string[] = []; + let committedSubject: string | undefined; + const git = { + stageAll: vi.fn(async () => { + order.push('stageAll'); + }), + commit: vi.fn(async (subject: string) => { + order.push(`commit:${subject}`); + committedSubject = subject; + return true; + }), + checkout: vi.fn(async (branch: string) => { + order.push(`checkout:${branch}`); + }), + merge: vi.fn(async () => { + order.push('merge'); + return { ok: merge.ok, conflict: merge.conflict, output: merge.output ?? '' }; + }), + }; + return { + git, + order, + get committedSubject() { + return committedSubject; + }, + }; +} + +/** A recording fs fake: writes/mkdirs/rms tracked in arrays. */ +function makeFs(opts?: { failWriteFor?: Set }) { + const writes: { abs: string; text: string }[] = []; + const mkdirs: string[] = []; + const rms: string[] = []; + const fs = { + writeFile: vi.fn(async (abs: string, text: string) => { + // Fail a specific destination path if asked (to simulate a write failure). + if (opts?.failWriteFor?.has(abs)) { + throw new Error(`write failed for ${abs}`); + } + writes.push({ abs, text }); + }), + mkdir: vi.fn(async (abs: string) => { + mkdirs.push(abs); + }), + rm: vi.fn(async (abs: string) => { + rms.push(abs); + }), + }; + return { fs, writes, mkdirs, rms }; +} + +function deps( + client: any, + git: any, + fs: ReturnType, +): ApplyPullActionsDeps { + return { + client, + git, + writeFile: fs.fs.writeFile, + mkdir: fs.fs.mkdir, + rm: fs.fs.rm, + }; +} + +const APPLY: DeletionDecision = { apply: true }; + +function actions(partial: Partial): PullActions { + return { + toWrite: [], + moved: [], + toDelete: [], + deletionDecision: APPLY, + existingCount: 0, + plannedDeleteCount: 0, + ...partial, + }; +} + +beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); + vi.spyOn(console, 'error').mockImplementation(() => {}); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('applyPullActions — happy path (write + commit + merge)', () => { + it('fetches, writes each page, stages, commits, checks out main, merges', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [ + { pageId: 'p1', relPath: 'A.md' }, + { pageId: 'p2', relPath: 'Sub/B.md' }, + ], + }), + VAULT, + ); + + expect(res.written).toBe(2); + expect(res.failed).toBe(0); + expect(res.committed).toBe(true); + expect(res.merge).toEqual({ ok: true, conflict: false, output: '' }); + + // Both pages were fetched and written at their absolute paths. + expect(client.getPageJson).toHaveBeenCalledTimes(2); + const writtenPaths = fs.writes.map((w) => w.abs).sort(); + expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']); + + // The git op order is: stageAll -> commit -> checkout main -> merge. + expect(g.order).toEqual([ + 'stageAll', + `commit:docmost: sync 2 page(s)`, + 'checkout:main', + 'merge', + ]); + }); +}); + +describe('applyPullActions — ordering (write before move/delete before commit)', () => { + it('does writes, then move-old-path removals, then deletes, then commit/merge', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + toDelete: ['Dead.md'], + plannedDeleteCount: 1, + existingCount: 3, + }), + VAULT, + ); + + // The write to the new path happened (the page was fetched first). + expect(fs.writes.map((w) => w.abs)).toEqual(['/vault/New/M.md']); + // The move old-path removal AND the absence delete both ran, old path first. + expect(fs.rms).toEqual(['/vault/Old/M.md', '/vault/Dead.md']); + // git ops happen AFTER all fs work. + expect(g.order).toEqual([ + 'stageAll', + 'commit:docmost: sync 1 page(s), 1 deleted', + 'checkout:main', + 'merge', + ]); + }); +}); + +describe('applyPullActions — ⭐ data-loss guard (move-on-success)', () => { + it('does NOT remove the OLD path when the new-path write FAILS', async () => { + // The page "m" is being moved Old/M.md -> New/M.md, but its new-path write + // FAILS. Removing the old path now would erase the only copy of the page. + // The guard must KEEP the old path. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs({ failWriteFor: new Set(['/vault/New/M.md']) }); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + }), + VAULT, + ); + + // The write failed -> recorded as a failure, nothing written. + expect(res.failed).toBe(1); + expect(res.written).toBe(0); + expect(fs.writes).toEqual([]); + // ⭐ The OLD path was NOT removed: the data-loss guard kept it. + expect(fs.rms).not.toContain('/vault/Old/M.md'); + expect(fs.rms).toEqual([]); + expect(res.movedApplied).toBe(0); + + // The commit subject reflects ACTUAL counts: 0 written, 0 deleted. + expect(g.committedSubject).toBe('docmost: sync 0 page(s)'); + }); + + it('DOES remove the old path when the new-path write SUCCEEDS', async () => { + // Same move, but the write succeeds -> the old path is safely removed. This + // is the positive control proving the guard is keyed on write success. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); // no write failures + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'Old/M.md', + toRelPath: 'New/M.md', + removeOldPath: true, + }, + ], + }), + VAULT, + ); + + expect(res.written).toBe(1); + expect(res.movedApplied).toBe(1); + expect(fs.rms).toContain('/vault/Old/M.md'); + expect(g.committedSubject).toBe('docmost: sync 1 page(s)'); + }); + + it('honours removeOldPath:false (path reused by another live page is kept)', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'm', relPath: 'New/M.md' }], + moved: [ + { + pageId: 'm', + fromRelPath: 'X.md', + toRelPath: 'New/M.md', + removeOldPath: false, // X.md is a live target of another page + }, + ], + }), + VAULT, + ); + + // The reused old path is never removed. + expect(fs.rms).not.toContain('/vault/X.md'); + expect(fs.rms).toEqual([]); + }); +}); + +describe('applyPullActions — deletion suppression (SPEC §8)', () => { + it('skips deletions when the decision SUPPRESSES them (toDelete already empty)', async () => { + // computePullActions empties toDelete when suppressed, but assert the applier + // ALSO does no removals and the subject omits the deleted count. + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'p1', relPath: 'A.md' }], + // Suppressed: toDelete is empty even though 5 were planned. + toDelete: [], + deletionDecision: { apply: false, reason: 'incomplete-fetch' }, + plannedDeleteCount: 5, + existingCount: 6, + }), + VAULT, + ); + + expect(res.deleted).toBe(0); + expect(fs.rms).toEqual([]); + // Subject reflects 0 deleted (no ", N deleted" suffix). + expect(g.committedSubject).toBe('docmost: sync 1 page(s)'); + // The suppression warning was emitted. + expect(console.warn).toHaveBeenCalledWith( + expect.stringMatching(/tree fetch incomplete/), + ); + }); + + it('applies deletions present in toDelete when the decision allows them', async () => { + const { client } = makeClient(); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [{ pageId: 'p1', relPath: 'A.md' }], + toDelete: ['Dead1.md', 'Dead2.md'], + deletionDecision: APPLY, + plannedDeleteCount: 2, + existingCount: 5, + }), + VAULT, + ); + + expect(res.deleted).toBe(2); + expect(fs.rms).toEqual(['/vault/Dead1.md', '/vault/Dead2.md']); + // Subject reflects ACTUAL written + deleted counts. + expect(g.committedSubject).toBe('docmost: sync 1 page(s), 2 deleted'); + }); +}); + +describe('applyPullActions — commit subject reflects ACTUAL counts', () => { + it('counts only SUCCESSFUL writes when some page fetches fail', async () => { + // p2 fetch fails; the subject must say 1 page (only p1 was written), not 2. + const { client } = makeClient({ failFor: new Set(['p2']) }); + const g = makeGit(); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ + toWrite: [ + { pageId: 'p1', relPath: 'A.md' }, + { pageId: 'p2', relPath: 'B.md' }, + ], + }), + VAULT, + ); + + expect(res.written).toBe(1); + expect(res.failed).toBe(1); + expect(g.committedSubject).toBe('docmost: sync 1 page(s)'); + }); +}); + +describe('applyPullActions — merge result is surfaced, not swallowed', () => { + it('returns conflict:true on a conflicting merge (no auto-resolve)', async () => { + const { client } = makeClient(); + const g = makeGit({ ok: false, conflict: true, output: 'CONFLICT' }); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }), + VAULT, + ); + expect(res.merge.conflict).toBe(true); + expect(res.merge.ok).toBe(false); + }); + + it('returns ok:false conflict:false on a non-conflict merge failure', async () => { + const { client } = makeClient(); + const g = makeGit({ ok: false, conflict: false, output: 'some error' }); + const fs = makeFs(); + + const res = await applyPullActions( + deps(client, g.git, fs), + actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }), + VAULT, + ); + expect(res.merge.ok).toBe(false); + expect(res.merge.conflict).toBe(false); + }); +}); diff --git a/packages/git-sync/test/apply-push-actions.test.ts b/packages/git-sync/test/apply-push-actions.test.ts new file mode 100644 index 00000000..a56e8357 --- /dev/null +++ b/packages/git-sync/test/apply-push-actions.test.ts @@ -0,0 +1,655 @@ +import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest'; +import { applyPushActions, LAST_PUSHED_REF } from '../src/engine/push'; +import { bodyHash } from '../src/engine/loop-guard'; +import type { ApplyPushDeps, PushActions } from '../src/engine/push'; +import { + parseDocmostMarkdown, + serializeDocmostMarkdownBody, +} from '../src/lib/index'; + +// FS→Docmost push, FIRST increment (SPEC §6). `applyPushActions` is the THIN IO +// half: create/update/delete via FAKES that record every call — no real network, +// git, or fs. Asserts: update uses importPageMarkdown (collab path, SPEC +// §2/§15.6); create writes the assigned pageId BACK into the file meta; delete +// soft-deletes; rename/move is returned as `deferred` with NO client call; the +// last-pushed ref is advanced. + +/** A recording client fake; createPage returns a configurable assigned id. */ +function makeClient(opts?: { createId?: string }) { + const client = { + importPageMarkdown: vi.fn(async (_pageId: string, _md: string) => ({ + success: true, + })), + createPage: vi.fn( + async ( + title: string, + _content: string, + _spaceId: string, + _parentPageId?: string, + ) => ({ + // Mirrors the real `createPage` shape: `{ data: { id, ... }, success }`. + data: { id: opts?.createId ?? 'assigned-id', title }, + success: true, + }), + ), + deletePage: vi.fn(async (_pageId: string) => ({ success: true })), + movePage: vi.fn( + async ( + _pageId: string, + _parentPageId: string | null, + _position?: string, + ) => ({ success: true }), + ), + renamePage: vi.fn(async (pageId: string, title: string) => ({ + success: true, + pageId, + title, + })), + }; + return client; +} + +/** + * A recording git fake: `updateRef` (advance last-pushed) and `fastForwardBranch` + * (advance the `docmost` mirror, the loop-close). `ffResult` configures what the + * ff returns (default a successful advance). + */ +function makeGit(opts?: { + ffResult?: { ok: boolean; reason?: string }; + /** Pre-image tree at `refs/docmost/last-pushed` (path -> text). */ + prevTree?: Record; +}) { + const updateRefCalls: { ref: string; target: string }[] = []; + const ffCalls: { branch: string; toCommit: string }[] = []; + const prevTree = opts?.prevTree ?? {}; + const git = { + updateRef: vi.fn(async (ref: string, target: string) => { + updateRefCalls.push({ ref, target }); + }), + fastForwardBranch: vi.fn(async (branch: string, toCommit: string) => { + ffCalls.push({ branch, toCommit }); + return opts?.ffResult ?? { ok: true }; + }), + // The move/rename classifier reads the PREVIOUS parent folder's `.md` at + // refs/docmost/last-pushed via this; `null` when absent there (SPEC §5). + showFileAtRef: vi.fn(async (_ref: string, path: string) => + path in prevTree ? prevTree[path] : null, + ), + }; + return { git, updateRefCalls, ffCalls }; +} + +/** A recording fs fake over a path->text store. */ +function makeFs(initial: Record = {}) { + const store: Record = { ...initial }; + const writes: { path: string; text: string }[] = []; + const reads: string[] = []; + const fs = { + readFile: vi.fn(async (path: string) => { + reads.push(path); + if (!(path in store)) throw new Error(`no such file: ${path}`); + return store[path]; + }), + writeFile: vi.fn(async (path: string, text: string) => { + store[path] = text; + writes.push({ path, text }); + }), + }; + return { fs, store, writes, reads }; +} + +function deps(client: any, git: any, fs: ReturnType): ApplyPushDeps { + return { + client, + git, + readFile: fs.fs.readFile, + writeFile: fs.fs.writeFile, + }; +} + +function actions(partial: Partial): PushActions { + return { + creates: [], + updates: [], + deletes: [], + renamesMoves: [], + skipped: [], + ...partial, + }; +} + +beforeEach(() => { + vi.spyOn(console, 'log').mockImplementation(() => {}); + vi.spyOn(console, 'warn').mockImplementation(() => {}); +}); + +afterEach(() => { + vi.restoreAllMocks(); +}); + +describe('applyPushActions — update (collab path, SPEC §2/§15.6)', () => { + it('reads the file body and calls importPageMarkdown with it', async () => { + const fileBody = + '\n\nupdated body\n'; + const client = makeClient(); + const { git } = makeGit(); + const fs = makeFs({ 'Doc.md': fileBody }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ updates: [{ pageId: 'p-1', path: 'Doc.md' }] }), + ); + + expect(res.updated).toBe(1); + // The collab/Yjs write path is used — NOT a raw jsonb overwrite. + expect(client.importPageMarkdown).toHaveBeenCalledTimes(1); + expect(client.importPageMarkdown).toHaveBeenCalledWith('p-1', fileBody); + // No raw-overwrite path exists on the injected client surface at all. + expect((client as any).updatePageJson).toBeUndefined(); + expect(client.createPage).not.toHaveBeenCalled(); + expect(client.deletePage).not.toHaveBeenCalled(); + }); +}); + +describe('applyPushActions — create (assigned pageId written back to meta)', () => { + it('createPage is called and the new pageId is serialized back into the file', async () => { + // A brand-new local file: meta has title/spaceId but NO pageId yet. + const original = serializeDocmostMarkdownBody( + { version: 1, title: 'My New Page', spaceId: 'sp-7', parentPageId: 'parent-9' }, + '# My New Page\n\nbody text', + ); + const client = makeClient({ createId: 'page-new-42' }); + const { git } = makeGit(); + const fs = makeFs({ 'New.md': original }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ creates: [{ path: 'New.md' }] }), + ); + + expect(res.created).toBe(1); + // createPage was called with title/body/spaceId/parentPageId from meta. + expect(client.createPage).toHaveBeenCalledTimes(1); + const [title, content, spaceId, parentPageId] = + client.createPage.mock.calls[0]; + expect(title).toBe('My New Page'); + expect(spaceId).toBe('sp-7'); + expect(parentPageId).toBe('parent-9'); + expect(content).toContain('body text'); + + // The file was rewritten with the assigned pageId in meta... + expect(fs.writes.map((w) => w.path)).toEqual(['New.md']); + const rewritten = fs.store['New.md']; + const parsed = parseDocmostMarkdown(rewritten); + expect(parsed.meta?.pageId).toBe('page-new-42'); + // ...preserving the rest of the meta and the body. + expect(parsed.meta?.title).toBe('My New Page'); + expect(parsed.meta?.spaceId).toBe('sp-7'); + expect(parsed.body).toContain('body text'); + + // The write-back is recorded so a follow-up commit can be made (NEXT inc). + expect(res.writtenBack).toEqual([{ path: 'New.md', pageId: 'page-new-42' }]); + }); +}); + +describe('applyPushActions — delete (soft-delete to Trash, SPEC §8)', () => { + it('calls deletePage(pageId)', async () => { + const client = makeClient(); + const { git } = makeGit(); + const fs = makeFs(); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ deletes: [{ pageId: 'p-del' }] }), + ); + + expect(res.deleted).toBe(1); + expect(client.deletePage).toHaveBeenCalledTimes(1); + expect(client.deletePage).toHaveBeenCalledWith('p-del'); + // No body read needed for a delete. + expect(fs.reads).toEqual([]); + }); +}); + +// FS→Docmost push #3 (SPEC §5/§6/§16): the move/rename APPLY. The classifier +// resolves the parent from the FILE PATH (the enclosing folder's `.md`), not +// stale `meta.parentPageId`, then `applyPushActions` calls move_page / rename_page +// (both for a reparent+retitle) or records a path-only NO-OP with NO client call. + +/** + * Helper: a self-contained file with the given pageId + title in its meta. Used + * both to seed the working tree (fs) and the prev tree (git.showFileAtRef). + */ +function fileWith(meta: { pageId: string; title?: string }): string { + return serializeDocmostMarkdownBody( + { version: 1, pageId: meta.pageId, ...(meta.title ? { title: meta.title } : {}) }, + 'body', + ); +} + +describe('applyPushActions — move (parent changed, title same; SPEC §5/§16)', () => { + it('calls movePage(pageId, newParent) and NOT renamePage', async () => { + // The page moved from the space root (Doc.md) under a folder (Parent/Doc.md). + // The new parent page's file is `Parent.md`; its meta carries the parent id. + const client = makeClient(); + const { git } = makeGit({ + // Prev pre-image: the file used to sit at the root (parent ROOT). + prevTree: { 'Doc.md': fileWith({ pageId: 'p-mv', title: 'Doc' }) }, + }); + const fs = makeFs({ + // Current tree: the moved file + its new parent folder's `.md`. + 'Parent/Doc.md': fileWith({ pageId: 'p-mv', title: 'Doc' }), + 'Parent.md': fileWith({ pageId: 'parent-id', title: 'Parent' }), + }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + renamesMoves: [ + { pageId: 'p-mv', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' }, + ], + }), + ); + + expect(res.moved).toBe(1); + expect(res.renamed).toBe(0); + expect(client.movePage).toHaveBeenCalledTimes(1); + // Reparented under `parent-id`; position left UNDEFINED (client default). + expect(client.movePage).toHaveBeenCalledWith('p-mv', 'parent-id'); + expect(client.renamePage).not.toHaveBeenCalled(); + expect(res.noops).toEqual([]); + }); +}); + +describe('applyPushActions — move-to-root (newParent null; SPEC §16)', () => { + it('calls movePage(pageId, null) when the file lands at the space root', async () => { + const client = makeClient(); + const { git } = makeGit({ + // Prev: the file used to live under `Parent/`, so its old parent is the + // page whose file is `Parent.md` (parent-id). + prevTree: { + 'Parent/Doc.md': fileWith({ pageId: 'p-mv', title: 'Doc' }), + 'Parent.md': fileWith({ pageId: 'parent-id', title: 'Parent' }), + }, + }); + // Current: the file is now at the root -> no enclosing folder -> parent ROOT. + const fs = makeFs({ 'Doc.md': fileWith({ pageId: 'p-mv', title: 'Doc' }) }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + renamesMoves: [ + { pageId: 'p-mv', oldPath: 'Parent/Doc.md', newPath: 'Doc.md' }, + ], + }), + ); + + expect(res.moved).toBe(1); + expect(client.movePage).toHaveBeenCalledWith('p-mv', null); + expect(client.renamePage).not.toHaveBeenCalled(); + }); +}); + +describe('applyPushActions — rename (same parent, title changed; SPEC §5/§6)', () => { + it('calls renamePage(pageId, title) and NOT movePage', async () => { + // Same enclosing folder on both sides (parent unchanged), only the title + // changed in meta -> a pure rename. + const client = makeClient(); + const { git } = makeGit({ + prevTree: { + 'Folder/Old.md': fileWith({ pageId: 'p-rn', title: 'Old Title' }), + 'Folder.md': fileWith({ pageId: 'folder-id', title: 'Folder' }), + }, + }); + const fs = makeFs({ + 'Folder/New.md': fileWith({ pageId: 'p-rn', title: 'New Title' }), + 'Folder.md': fileWith({ pageId: 'folder-id', title: 'Folder' }), + }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + renamesMoves: [ + { pageId: 'p-rn', oldPath: 'Folder/Old.md', newPath: 'Folder/New.md' }, + ], + }), + ); + + expect(res.renamed).toBe(1); + expect(res.moved).toBe(0); + expect(client.renamePage).toHaveBeenCalledTimes(1); + expect(client.renamePage).toHaveBeenCalledWith('p-rn', 'New Title'); + expect(client.movePage).not.toHaveBeenCalled(); + }); +}); + +describe('applyPushActions — both (reparent + retitle; move THEN rename)', () => { + it('calls movePage first, then renamePage', async () => { + const callOrder: string[] = []; + const client = makeClient(); + client.movePage.mockImplementation(async () => { + callOrder.push('move'); + return { success: true }; + }); + client.renamePage.mockImplementation(async (pageId: string, title: string) => { + callOrder.push('rename'); + return { success: true, pageId, title }; + }); + const { git } = makeGit({ + // Prev: at root (parent ROOT) with the old title. + prevTree: { 'Old.md': fileWith({ pageId: 'p-x', title: 'Old' }) }, + }); + const fs = makeFs({ + // Current: under a new folder AND retitled. + 'NewParent/New.md': fileWith({ pageId: 'p-x', title: 'New' }), + 'NewParent.md': fileWith({ pageId: 'np-id', title: 'NewParent' }), + }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + renamesMoves: [ + { pageId: 'p-x', oldPath: 'Old.md', newPath: 'NewParent/New.md' }, + ], + }), + ); + + expect(res.moved).toBe(1); + expect(res.renamed).toBe(1); + expect(client.movePage).toHaveBeenCalledWith('p-x', 'np-id'); + expect(client.renamePage).toHaveBeenCalledWith('p-x', 'New'); + // Order matters: reparent FIRST, then retitle. + expect(callOrder).toEqual(['move', 'rename']); + }); +}); + +describe('applyPushActions — noop (path-only rename; NO Docmost call; SPEC §5)', () => { + it('calls NEITHER movePage NOR renamePage and records the noop', async () => { + // Same enclosing folder AND same title on both sides: a purely LOCAL file + // rename. The page is its pageId; the path is cosmetic -> Docmost untouched. + const client = makeClient(); + const { git } = makeGit({ + prevTree: { + 'Folder/A.md': fileWith({ pageId: 'p-noop', title: 'Same' }), + 'Folder.md': fileWith({ pageId: 'folder-id', title: 'Folder' }), + }, + }); + const fs = makeFs({ + 'Folder/B.md': fileWith({ pageId: 'p-noop', title: 'Same' }), + 'Folder.md': fileWith({ pageId: 'folder-id', title: 'Folder' }), + }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + renamesMoves: [ + { pageId: 'p-noop', oldPath: 'Folder/A.md', newPath: 'Folder/B.md' }, + ], + }), + ); + + expect(res.moved).toBe(0); + expect(res.renamed).toBe(0); + // ZERO Docmost calls for a cosmetic rename. + expect(client.movePage).not.toHaveBeenCalled(); + expect(client.renamePage).not.toHaveBeenCalled(); + expect(res.noops).toEqual([ + { + pageId: 'p-noop', + oldPath: 'Folder/A.md', + newPath: 'Folder/B.md', + reason: 'path-only-rename', + }, + ]); + }); +}); + +describe('applyPushActions — move whose client call throws (SPEC §12 isolation)', () => { + it('isolates the failure into `failures` and does NOT advance the refs', async () => { + const client = makeClient(); + client.movePage.mockImplementation(async () => { + throw new Error('move boom'); + }); + const { git, updateRefCalls, ffCalls } = makeGit({ + prevTree: { 'Doc.md': fileWith({ pageId: 'p-mv', title: 'Doc' }) }, + }); + const fs = makeFs({ + 'Parent/Doc.md': fileWith({ pageId: 'p-mv', title: 'Doc' }), + 'Parent.md': fileWith({ pageId: 'parent-id', title: 'Parent' }), + }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + renamesMoves: [ + { pageId: 'p-mv', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' }, + ], + }), + 'sha-move-fail', + ); + + expect(res.moved).toBe(0); + expect(res.failures).toEqual([ + { + kind: 'move', + pageId: 'p-mv', + path: 'Parent/Doc.md', + error: 'move boom', + }, + ]); + // A failure means the refs are NOT advanced — a re-run retries cleanly (§12). + expect(res.lastPushedAdvanced).toBe(false); + expect(updateRefCalls).toEqual([]); + expect(ffCalls).toEqual([]); + expect(git.updateRef).not.toHaveBeenCalled(); + }); +}); + +describe('applyPushActions — loop-close: ref advance + docmost ff (SPEC §6 step 3 / §10)', () => { + it('advances last-pushed AND fast-forwards the docmost mirror on a clean push', async () => { + const client = makeClient(); + const { git, updateRefCalls, ffCalls } = makeGit(); + const fs = makeFs(); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ deletes: [{ pageId: 'p' }] }), + 'commit-sha-abc', + ); + + expect(res.lastPushedAdvanced).toBe(true); + expect(updateRefCalls).toEqual([ + { ref: LAST_PUSHED_REF, target: 'commit-sha-abc' }, + ]); + // The loop-close: the docmost mirror is fast-forwarded to the pushed commit. + expect(ffCalls).toEqual([{ branch: 'docmost', toCommit: 'commit-sha-abc' }]); + expect(res.docmostFastForward).toEqual({ ok: true }); + }); + + it('surfaces a REFUSED non-fast-forward (mirror NOT clobbered)', async () => { + const client = makeClient(); + // The ff is refused because docmost is not an ancestor of the pushed commit. + const { git, updateRefCalls, ffCalls } = makeGit({ + ffResult: { ok: false, reason: 'not-fast-forward' }, + }); + const fs = makeFs(); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ deletes: [{ pageId: 'p' }] }), + 'sha-div', + ); + + // last-pushed still advances (it is our own marker), but the ff result is + // surfaced so the caller can log the refusal. + expect(res.lastPushedAdvanced).toBe(true); + expect(updateRefCalls).toEqual([{ ref: LAST_PUSHED_REF, target: 'sha-div' }]); + expect(ffCalls).toEqual([{ branch: 'docmost', toCommit: 'sha-div' }]); + expect(res.docmostFastForward).toEqual({ ok: false, reason: 'not-fast-forward' }); + }); + + it('does NOT advance either ref when no pushed commit is given', async () => { + const client = makeClient(); + const { git, updateRefCalls } = makeGit(); + const fs = makeFs(); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ updates: [] }), + ); + + expect(res.lastPushedAdvanced).toBe(false); + expect(updateRefCalls).toEqual([]); + expect(res.docmostFastForward).toBeNull(); + expect(git.updateRef).not.toHaveBeenCalled(); + expect(git.fastForwardBranch).not.toHaveBeenCalled(); + }); +}); + +describe('applyPushActions — per-page error isolation + refs gated on success (SPEC §12)', () => { + it('continues the batch when an update throws; records the failure; refs NOT advanced', async () => { + // A client whose 2nd importPageMarkdown call throws — the 1st and 3rd must + // still be applied, the 2nd recorded as a failure, and NO ref advanced. + let call = 0; + const client = { + importPageMarkdown: vi.fn(async (_pageId: string, _md: string) => { + call++; + if (call === 2) throw new Error('boom on page 2'); + return { success: true }; + }), + createPage: vi.fn(), + deletePage: vi.fn(), + }; + const { git, updateRefCalls, ffCalls } = makeGit(); + const fs = makeFs({ + 'A.md': 'a body', + 'B.md': 'b body', + 'C.md': 'c body', + }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ + updates: [ + { pageId: 'p-a', path: 'A.md' }, + { pageId: 'p-b', path: 'B.md' }, + { pageId: 'p-c', path: 'C.md' }, + ], + }), + 'sha-partial', + ); + + // The 1st and 3rd were applied; the 2nd threw. + expect(res.updated).toBe(2); + expect(client.importPageMarkdown).toHaveBeenCalledTimes(3); + expect(client.importPageMarkdown).toHaveBeenNthCalledWith(1, 'p-a', 'a body'); + expect(client.importPageMarkdown).toHaveBeenNthCalledWith(3, 'p-c', 'c body'); + + // The failure is recorded with kind/pageId/path/error. + expect(res.failures).toEqual([ + { kind: 'update', pageId: 'p-b', path: 'B.md', error: 'boom on page 2' }, + ]); + + // Only the successful pages carry a loop-guard push record. + expect(res.pushed.map((p) => p.pageId)).toEqual(['p-a', 'p-c']); + + // A PARTIAL push advances NEITHER ref, so a re-run retries cleanly (§12). + expect(res.lastPushedAdvanced).toBe(false); + expect(updateRefCalls).toEqual([]); + expect(ffCalls).toEqual([]); + expect(res.docmostFastForward).toBeNull(); + expect(git.updateRef).not.toHaveBeenCalled(); + expect(git.fastForwardBranch).not.toHaveBeenCalled(); + }); +}); + +describe('applyPushActions — loop-guard push record (SPEC §10)', () => { + it('records pageId + updatedAt + bodyHash per applied update', async () => { + const fileBody = + '\n\nupdated body\n'; + const client = { + importPageMarkdown: vi.fn(async (_pageId: string, _md: string) => ({ + // The write returns an updatedAt the loop-guard records. + data: { updatedAt: '2026-06-20T10:00:00.000Z' }, + success: true, + })), + createPage: vi.fn(), + deletePage: vi.fn(), + }; + const { git } = makeGit(); + const fs = makeFs({ 'Doc.md': fileBody }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ updates: [{ pageId: 'p-1', path: 'Doc.md' }] }), + ); + + expect(res.pushed).toHaveLength(1); + expect(res.pushed[0].pageId).toBe('p-1'); + expect(res.pushed[0].updatedAt).toBe('2026-06-20T10:00:00.000Z'); + // The bodyHash is a stable sha256 hex of the pushed markdown. + expect(res.pushed[0].bodyHash).toBe(bodyHash(fileBody)); + expect(res.pushed[0].bodyHash).toMatch(/^[0-9a-f]{64}$/); + }); + + it('omits updatedAt when the client result does not expose one', async () => { + const newFile = serializeDocmostMarkdownBody( + { version: 1, title: 'N', spaceId: 'sp' }, + 'fresh body', + ); + const client = makeClient({ createId: 'created-9' }); + const { git } = makeGit(); + const fs = makeFs({ 'N.md': newFile }); + + const res = await applyPushActions( + deps(client, git, fs), + actions({ creates: [{ path: 'N.md' }] }), + ); + + expect(res.pushed).toHaveLength(1); + expect(res.pushed[0].pageId).toBe('created-9'); + expect(res.pushed[0].updatedAt).toBeUndefined(); + // bodyHash of the ORIGINAL pushed file text (what createPage received). + expect(res.pushed[0].bodyHash).toBe(bodyHash(newFile)); + }); +}); + +describe('applyPushActions — mixed batch + skipped passthrough', () => { + it('applies update + create + delete and carries skipped rows through', async () => { + const updFile = + '\n\nupd\n'; + const newFile = serializeDocmostMarkdownBody( + { version: 1, title: 'N', spaceId: 'sp' }, + 'fresh body', + ); + const client = makeClient({ createId: 'created-1' }); + const { git, updateRefCalls } = makeGit(); + const fs = makeFs({ 'U.md': updFile, 'N.md': newFile }); + + const skipped = [ + { path: 'Stray.md', status: 'D' as const, reason: 'no recoverable pageId' }, + ]; + const res = await applyPushActions( + deps(client, git, fs), + actions({ + updates: [{ pageId: 'u-1', path: 'U.md' }], + creates: [{ path: 'N.md' }], + deletes: [{ pageId: 'd-1' }], + skipped, + }), + 'sha-9', + ); + + expect(res).toMatchObject({ + created: 1, + updated: 1, + deleted: 1, + lastPushedAdvanced: true, + }); + expect(res.writtenBack).toEqual([{ path: 'N.md', pageId: 'created-1' }]); + expect(res.skipped).toEqual(skipped); + expect(updateRefCalls).toEqual([{ ref: LAST_PUSHED_REF, target: 'sha-9' }]); + expect(client.importPageMarkdown).toHaveBeenCalledWith('u-1', updFile); + expect(client.deletePage).toHaveBeenCalledWith('d-1'); + }); +}); diff --git a/packages/git-sync/test/classify-rename-moves.test.ts b/packages/git-sync/test/classify-rename-moves.test.ts new file mode 100644 index 00000000..ceb00285 --- /dev/null +++ b/packages/git-sync/test/classify-rename-moves.test.ts @@ -0,0 +1,263 @@ +import { describe, expect, it } from 'vitest'; +import { classifyRenameMoves } from '../src/engine/push'; +import type { + ClassifyRenameMovesDeps, + MetaSide, + RenameMoveAction, +} from '../src/engine/push'; +import type { DocmostMdMeta } from '../src/lib/index'; + +// FS→Docmost push #3 (SPEC §5/§6/§16). `classifyRenameMoves` is the PURE half of +// the move/rename apply: it resolves each `{pageId, oldPath, newPath}` into the +// Docmost op(s) it needs, with NO IO (both resolvers are injected). The key +// design (SPEC §5) is that the file PATH is the source of truth for tree +// position — the NEW parent comes from the new path, the OLD parent from the old +// path — and the title comes from the meta. An op is emitted ONLY when something +// really changed; a path-only rename (same parent + same title) is a noop and +// NEVER calls Docmost. + +/** Build `metaAt` from a `path|side -> meta` table. */ +function metaTable( + table: Record, +): (path: string, side: MetaSide) => DocmostMdMeta | null { + return (path, side) => { + const key = `${path}|${side}`; + return key in table ? table[key] : null; + }; +} + +/** Build `resolveParentPageId` from a `path|side -> parentPageId|null` table. */ +function parentTable( + table: Record, +): (path: string, side: MetaSide) => string | null { + return (path, side) => { + const key = `${path}|${side}`; + return key in table ? table[key] : null; + }; +} + +function deps( + metas: Record, + parents: Record, +): ClassifyRenameMovesDeps { + return { + metaAt: metaTable(metas), + resolveParentPageId: parentTable(parents), + }; +} + +function meta(partial: Partial): DocmostMdMeta { + return { version: 1, ...partial }; +} + +describe('classifyRenameMoves — move-only (parent changed, title same)', () => { + it('emits move (new parent) and NO rename', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p1', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + // Same title on both sides. + 'Parent/Doc.md|current': meta({ title: 'Doc' }), + 'Doc.md|prev': meta({ title: 'Doc' }), + }, + { + // Parent changed: root (null) -> 'parent-id'. + 'Parent/Doc.md|current': 'parent-id', + 'Doc.md|prev': null, + }, + ), + ); + expect(out).toEqual([ + { + pageId: 'p1', + oldPath: 'Doc.md', + newPath: 'Parent/Doc.md', + move: { parentPageId: 'parent-id' }, + }, + ]); + expect(out[0].rename).toBeUndefined(); + expect(out[0].noop).toBeUndefined(); + }); +}); + +describe('classifyRenameMoves — rename-only (same parent, title changed)', () => { + it('emits rename (new title) and NO move', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p2', oldPath: 'Folder/Old.md', newPath: 'Folder/New.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + 'Folder/New.md|current': meta({ title: 'New Title' }), + 'Folder/Old.md|prev': meta({ title: 'Old Title' }), + }, + { + // Same parent on both sides. + 'Folder/New.md|current': 'folder-id', + 'Folder/Old.md|prev': 'folder-id', + }, + ), + ); + expect(out).toEqual([ + { + pageId: 'p2', + oldPath: 'Folder/Old.md', + newPath: 'Folder/New.md', + rename: { title: 'New Title' }, + }, + ]); + expect(out[0].move).toBeUndefined(); + expect(out[0].noop).toBeUndefined(); + }); +}); + +describe('classifyRenameMoves — both (parent AND title changed)', () => { + it('emits BOTH move and rename', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p3', oldPath: 'Old.md', newPath: 'NewParent/New.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + 'NewParent/New.md|current': meta({ title: 'New' }), + 'Old.md|prev': meta({ title: 'Old' }), + }, + { + 'NewParent/New.md|current': 'np-id', + 'Old.md|prev': null, + }, + ), + ); + expect(out).toEqual([ + { + pageId: 'p3', + oldPath: 'Old.md', + newPath: 'NewParent/New.md', + move: { parentPageId: 'np-id' }, + rename: { title: 'New' }, + }, + ]); + expect(out[0].noop).toBeUndefined(); + }); +}); + +describe('classifyRenameMoves — noop (path-only rename, same parent + title)', () => { + it('emits noop and NEITHER move NOR rename (SPEC §5: page is its pageId)', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p4', oldPath: 'Folder/A.md', newPath: 'Folder/B.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + 'Folder/B.md|current': meta({ title: 'Same' }), + 'Folder/A.md|prev': meta({ title: 'Same' }), + }, + { + 'Folder/B.md|current': 'folder-id', + 'Folder/A.md|prev': 'folder-id', + }, + ), + ); + expect(out).toEqual([ + { + pageId: 'p4', + oldPath: 'Folder/A.md', + newPath: 'Folder/B.md', + noop: true, + }, + ]); + expect(out[0].move).toBeUndefined(); + expect(out[0].rename).toBeUndefined(); + }); +}); + +describe('classifyRenameMoves — move-to-root (newParent null)', () => { + it('emits move with parentPageId null when the file lands at the space root', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p5', oldPath: 'Parent/Doc.md', newPath: 'Doc.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + 'Doc.md|current': meta({ title: 'Doc' }), + 'Parent/Doc.md|prev': meta({ title: 'Doc' }), + }, + { + // New parent is ROOT (null), old parent was 'parent-id'. + 'Doc.md|current': null, + 'Parent/Doc.md|prev': 'parent-id', + }, + ), + ); + expect(out).toEqual([ + { + pageId: 'p5', + oldPath: 'Parent/Doc.md', + newPath: 'Doc.md', + move: { parentPageId: null }, + }, + ]); + expect(out[0].rename).toBeUndefined(); + expect(out[0].noop).toBeUndefined(); + }); +}); + +describe('classifyRenameMoves — title guards', () => { + it('an EMPTY new title is NOT a rename (even if it differs from old)', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p6', oldPath: 'Folder/A.md', newPath: 'Folder/B.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + // New title is empty -> never a rename; same parent -> overall noop. + 'Folder/B.md|current': meta({ title: '' }), + 'Folder/A.md|prev': meta({ title: 'Had A Title' }), + }, + { + 'Folder/B.md|current': 'folder-id', + 'Folder/A.md|prev': 'folder-id', + }, + ), + ); + expect(out[0].rename).toBeUndefined(); + expect(out[0].move).toBeUndefined(); + expect(out[0].noop).toBe(true); + }); + + it('a missing new meta is NOT a rename; a parent change still yields a move', () => { + const rms: RenameMoveAction[] = [ + { pageId: 'p7', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' }, + ]; + const out = classifyRenameMoves( + rms, + deps( + { + // No current meta entry at all (resolver returns null). + 'Doc.md|prev': meta({ title: 'Doc' }), + }, + { + 'Parent/Doc.md|current': 'parent-id', + 'Doc.md|prev': null, + }, + ), + ); + expect(out[0].move).toEqual({ parentPageId: 'parent-id' }); + expect(out[0].rename).toBeUndefined(); + expect(out[0].noop).toBeUndefined(); + }); +}); + +describe('classifyRenameMoves — empty input', () => { + it('returns an empty array for no rename/move entries', () => { + expect(classifyRenameMoves([], deps({}, {}))).toEqual([]); + }); +}); diff --git a/packages/git-sync/test/compute-pull-actions.test.ts b/packages/git-sync/test/compute-pull-actions.test.ts new file mode 100644 index 00000000..df26d94b --- /dev/null +++ b/packages/git-sync/test/compute-pull-actions.test.ts @@ -0,0 +1,193 @@ +import { describe, expect, it } from 'vitest'; +import { computePullActions } from '../src/engine/pull'; +import type { PageNode } from '../src/engine/layout'; + +// R-Pull-2 (test-strategy report §5): `computePullActions` is the PURE half of +// the pull cycle — layout + planReconciliation + the SPEC §8 absence-deletion +// suppression decision, folded together, with NO IO. These tests exercise it +// without git/fs/network. The thin IO applier is covered in apply-pull-actions. + +/** A live tree node (only the fields the layout / reconciliation read). */ +function node( + id: string, + title: string, + parentPageId: string | null = null, + hasChildren = false, +): PageNode { + return { id, title, slugId: id, parentPageId, hasChildren }; +} + +describe('computePullActions — normal complete fetch', () => { + it('builds toWrite from the live layout and an empty existing set (all adds)', () => { + const pages = [ + node('root', 'Root', null, true), + node('child', 'Child', 'root'), + ]; + const actions = computePullActions({ + pages, + treeComplete: true, + existing: [], + }); + // Each live page is (re)written at its deterministic layout path. + expect(actions.toWrite).toEqual([ + { pageId: 'root', relPath: 'Root.md' }, + { pageId: 'child', relPath: 'Root/Child.md' }, + ]); + expect(actions.moved).toEqual([]); + expect(actions.toDelete).toEqual([]); + expect(actions.deletionDecision).toEqual({ apply: true }); + }); + + it('plans toWrite / moved / toDelete correctly for a mixed reconciliation', () => { + const pages = [ + node('keep', 'Keep'), + node('mover', 'Mover'), + node('fresh', 'Fresh'), + ]; + // existing: keep (same path), mover (old path -> move), dead (absent -> delete). + const existing = [ + { pageId: 'keep', relPath: 'Keep.md' }, + { pageId: 'mover', relPath: 'Old/Mover.md' }, + { pageId: 'dead', relPath: 'Dead.md' }, + ]; + const actions = computePullActions({ pages, treeComplete: true, existing }); + + expect(actions.toWrite).toEqual([ + { pageId: 'keep', relPath: 'Keep.md' }, + { pageId: 'mover', relPath: 'Mover.md' }, + { pageId: 'fresh', relPath: 'Fresh.md' }, + ]); + // mover moved from Old/Mover.md to the new layout path Mover.md. + expect(actions.moved).toEqual([ + { + pageId: 'mover', + fromRelPath: 'Old/Mover.md', + toRelPath: 'Mover.md', + removeOldPath: true, + }, + ]); + // dead is absent from live -> an absence delete (decision applies it). + expect(actions.toDelete).toEqual(['Dead.md']); + expect(actions.deletionDecision).toEqual({ apply: true }); + }); + + it('a live page moved to a NEW path is in `moved`, its old path NOT in toDelete', () => { + const pages = [node('p1', 'Doc', 'newparent'), node('newparent', 'NewParent', null, true)]; + const existing = [{ pageId: 'p1', relPath: 'OldParent/Doc.md' }]; + const actions = computePullActions({ pages, treeComplete: true, existing }); + + const moved = actions.moved.find((m) => m.pageId === 'p1'); + expect(moved).toBeTruthy(); + expect(moved!.fromRelPath).toBe('OldParent/Doc.md'); + expect(moved!.toRelPath).toBe('NewParent/Doc.md'); + // The old path is a MOVE removal, NEVER an absence delete. + expect(actions.toDelete).not.toContain('OldParent/Doc.md'); + expect(actions.toDelete).toEqual([]); + }); +}); + +describe('computePullActions — SPEC §8 suppression folded in', () => { + it('INCOMPLETE fetch (treeComplete:false) SUPPRESSES absence deletions', () => { + // dead is absent from the live tree, but the tree fetch was partial -> the + // missing pageId is NOT proof of deletion, so toDelete must be EMPTY and the + // decision must report apply:false / incomplete-fetch. + const pages = [node('keep', 'Keep')]; + const existing = [ + { pageId: 'keep', relPath: 'Keep.md' }, + { pageId: 'dead', relPath: 'Dead.md' }, + ]; + const actions = computePullActions({ + pages, + treeComplete: false, + existing, + }); + + expect(actions.deletionDecision).toEqual({ + apply: false, + reason: 'incomplete-fetch', + }); + // Suppressed: nothing to delete this cycle... + expect(actions.toDelete).toEqual([]); + // ...but the planned count is still reported (for the suppression log). + expect(actions.plannedDeleteCount).toBe(1); + // Writes/updates still happen regardless of the suppression. + expect(actions.toWrite).toEqual([{ pageId: 'keep', relPath: 'Keep.md' }]); + }); + + it('MASS-DELETE guard (>50% of a non-trivial vault) SUPPRESSES deletions', () => { + // 1 live page, 10 existing tracked, 9 of them absent -> 9/10 > 50% on a + // non-trivial (>=4) vault -> mass-delete suppression. + const pages = [node('p0', 'P0')]; + const existing = [ + { pageId: 'p0', relPath: 'P0.md' }, + ...Array.from({ length: 9 }, (_, i) => ({ + pageId: `gone${i}`, + relPath: `Gone${i}.md`, + })), + ]; + const actions = computePullActions({ pages, treeComplete: true, existing }); + + expect(actions.deletionDecision).toEqual({ + apply: false, + reason: 'mass-delete', + }); + expect(actions.toDelete).toEqual([]); + expect(actions.plannedDeleteCount).toBe(9); + expect(actions.existingCount).toBe(10); + }); + + it('moves are NOT suppressed even on an incomplete fetch', () => { + // A moved page is PRESENT in live, so its move is real regardless of the + // suppression (which only governs ABSENCE deletes). + const pages = [node('m', 'Moved')]; + const existing = [{ pageId: 'm', relPath: 'Old/Moved.md' }]; + const actions = computePullActions({ + pages, + treeComplete: false, + existing, + }); + expect(actions.moved).toEqual([ + { + pageId: 'm', + fromRelPath: 'Old/Moved.md', + toRelPath: 'Moved.md', + removeOldPath: true, + }, + ]); + // No absence deletes were planned here, so the decision trivially applies. + expect(actions.toDelete).toEqual([]); + }); + + it('empty-live with tracked files SUPPRESSES (failed fetch, not a real wipe)', () => { + const existing = [ + { pageId: 'a', relPath: 'A.md' }, + { pageId: 'b', relPath: 'B.md' }, + ]; + const actions = computePullActions({ + pages: [], + treeComplete: true, + existing, + }); + expect(actions.deletionDecision).toEqual({ + apply: false, + reason: 'empty-live', + }); + expect(actions.toDelete).toEqual([]); + expect(actions.toWrite).toEqual([]); + }); +}); + +describe('computePullActions — degenerate inputs', () => { + it('skips nodes without an id and nodes with no layout entry', () => { + const pages = [ + node('p1', 'Valid'), + { id: '', title: 'NoId' } as PageNode, // skipped (no id) + ]; + const actions = computePullActions({ + pages, + treeComplete: true, + existing: [], + }); + expect(actions.toWrite).toEqual([{ pageId: 'p1', relPath: 'Valid.md' }]); + }); +}); diff --git a/packages/git-sync/test/compute-push-actions.test.ts b/packages/git-sync/test/compute-push-actions.test.ts new file mode 100644 index 00000000..0ccfcbad --- /dev/null +++ b/packages/git-sync/test/compute-push-actions.test.ts @@ -0,0 +1,225 @@ +import { describe, expect, it } from 'vitest'; +import { computePushActions } from '../src/engine/push'; +import type { DiffEntry, MetaSide } from '../src/engine/push'; +import type { DocmostMdMeta } from '../src/lib/index'; + +// FS→Docmost push, FIRST increment (SPEC §6). `computePushActions` is the PURE +// half: it classifies each `git diff --name-status` row into a Docmost action by +// `pageId` identity (SPEC §4/§8), with NO IO — the `metaAt` resolver is injected. +// These tests cover every classification incl. edges. + +/** Build a `metaAt` resolver from a `path|side -> meta` table. */ +function metaTable( + table: Record, +): (path: string, side: MetaSide) => DocmostMdMeta | null { + return (path, side) => { + const key = `${path}|${side}`; + return key in table ? table[key] : null; + }; +} + +function meta(partial: Partial): DocmostMdMeta { + return { version: 1, ...partial }; +} + +describe('computePushActions — A (added)', () => { + it('added file with NO pageId -> create', () => { + const changes: DiffEntry[] = [{ status: 'A', path: 'New.md' }]; + const metaAt = metaTable({ + 'New.md|current': meta({ title: 'New', spaceId: 'sp1' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.creates).toEqual([{ path: 'New.md' }]); + expect(actions.updates).toEqual([]); + expect(actions.deletes).toEqual([]); + expect(actions.renamesMoves).toEqual([]); + expect(actions.skipped).toEqual([]); + }); + + it('added file with NO meta at all -> skipped (a create needs a spaceId)', () => { + // No meta -> no spaceId -> cannot create (Docmost create_page requires it). + const changes: DiffEntry[] = [{ status: 'A', path: 'Plain.md' }]; + const actions = computePushActions({ changes, metaAt: metaTable({}) }); + expect(actions.creates).toEqual([]); + expect(actions.skipped).toEqual([ + { path: 'Plain.md', status: 'A', reason: 'create-without-spaceId' }, + ]); + }); + + it('added file with meta but NO spaceId -> skipped (create-without-spaceId)', () => { + // Partial human meta (title only, no spaceId) -> refuse to create. + const changes: DiffEntry[] = [{ status: 'A', path: 'Partial.md' }]; + const metaAt = metaTable({ + 'Partial.md|current': meta({ title: 'Partial' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.creates).toEqual([]); + expect(actions.skipped).toEqual([ + { path: 'Partial.md', status: 'A', reason: 'create-without-spaceId' }, + ]); + }); + + it('added file with an EMPTY-string spaceId -> skipped (create-without-spaceId)', () => { + // An empty spaceId is not a usable target either. + const changes: DiffEntry[] = [{ status: 'A', path: 'Empty.md' }]; + const metaAt = metaTable({ + 'Empty.md|current': meta({ title: 'E', spaceId: '' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.creates).toEqual([]); + expect(actions.skipped).toEqual([ + { path: 'Empty.md', status: 'A', reason: 'create-without-spaceId' }, + ]); + }); + + it('added file WITH a pageId (restored/copied) -> update (page exists)', () => { + const changes: DiffEntry[] = [{ status: 'A', path: 'Restored.md' }]; + const metaAt = metaTable({ + 'Restored.md|current': meta({ pageId: 'p-restored', title: 'R' }), + }); + const actions = computePushActions({ changes, metaAt }); + // The page already exists -> push content as an UPDATE, never a duplicate. + expect(actions.updates).toEqual([ + { pageId: 'p-restored', path: 'Restored.md' }, + ]); + expect(actions.creates).toEqual([]); + }); +}); + +describe('computePushActions — M (modified)', () => { + it('modified file with a pageId -> update content', () => { + const changes: DiffEntry[] = [{ status: 'M', path: 'Doc.md' }]; + const metaAt = metaTable({ + 'Doc.md|current': meta({ pageId: 'p-doc' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.updates).toEqual([{ pageId: 'p-doc', path: 'Doc.md' }]); + expect(actions.skipped).toEqual([]); + }); + + it('modified file with NO pageId -> skipped (no target to update)', () => { + const changes: DiffEntry[] = [{ status: 'M', path: 'Untracked.md' }]; + const actions = computePushActions({ changes, metaAt: metaTable({}) }); + expect(actions.updates).toEqual([]); + expect(actions.skipped).toEqual([ + { + path: 'Untracked.md', + status: 'M', + reason: 'modified file has no pageId in meta', + }, + ]); + }); +}); + +describe('computePushActions — D (deleted)', () => { + it('deleted file recovers pageId from the PRE-IMAGE meta -> delete', () => { + const changes: DiffEntry[] = [{ status: 'D', path: 'Gone.md' }]; + // The file is gone from `current`; its pageId lives in the `prev` pre-image. + const metaAt = metaTable({ + 'Gone.md|prev': meta({ pageId: 'p-gone' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.deletes).toEqual([{ pageId: 'p-gone' }]); + expect(actions.skipped).toEqual([]); + }); + + it('deleted file with NO recoverable pageId -> skipped (untracked guard §8)', () => { + const changes: DiffEntry[] = [{ status: 'D', path: 'Stray.md' }]; + // No pre-image pageId -> the untracked-file guard skips it (never deletes a + // page that was never tracked, SPEC §8). + const actions = computePushActions({ changes, metaAt: metaTable({}) }); + expect(actions.deletes).toEqual([]); + expect(actions.skipped).toEqual([ + { + path: 'Stray.md', + status: 'D', + reason: 'deleted file has no recoverable pageId (pre-image meta)', + }, + ]); + }); + + it('uses the PREV side, not current, to recover the deleted pageId', () => { + const changes: DiffEntry[] = [{ status: 'D', path: 'Gone.md' }]; + // A stale `current` meta must NOT be used; only the pre-image counts. + const metaAt = metaTable({ + 'Gone.md|current': meta({ pageId: 'WRONG' }), + 'Gone.md|prev': meta({ pageId: 'p-correct' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.deletes).toEqual([{ pageId: 'p-correct' }]); + }); +}); + +describe('computePushActions — R/C (renamed/moved)', () => { + it('renamed file -> renamesMoves (record only; resolution deferred)', () => { + const changes: DiffEntry[] = [ + { status: 'R', path: 'New/Path.md', oldPath: 'Old/Path.md', score: 100 }, + ]; + const metaAt = metaTable({ + 'New/Path.md|current': meta({ pageId: 'p-moved' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.renamesMoves).toEqual([ + { pageId: 'p-moved', oldPath: 'Old/Path.md', newPath: 'New/Path.md' }, + ]); + // It is NOT also recorded as a create/update/delete. + expect(actions.creates).toEqual([]); + expect(actions.updates).toEqual([]); + expect(actions.deletes).toEqual([]); + }); + + it('copy (C) is recorded like a rename for the deferred apply', () => { + const changes: DiffEntry[] = [ + { status: 'C', path: 'Copy.md', oldPath: 'Src.md', score: 90 }, + ]; + const metaAt = metaTable({ + 'Copy.md|current': meta({ pageId: 'p-copy' }), + }); + const actions = computePushActions({ changes, metaAt }); + expect(actions.renamesMoves).toEqual([ + { pageId: 'p-copy', oldPath: 'Src.md', newPath: 'Copy.md' }, + ]); + }); + + it('renamed file with NO pageId -> skipped', () => { + const changes: DiffEntry[] = [ + { status: 'R', path: 'New.md', oldPath: 'Old.md', score: 100 }, + ]; + const actions = computePushActions({ changes, metaAt: metaTable({}) }); + expect(actions.renamesMoves).toEqual([]); + expect(actions.skipped).toEqual([ + { path: 'New.md', status: 'R', reason: 'renamed/moved file has no pageId in meta' }, + ]); + }); +}); + +describe('computePushActions — mixed batch', () => { + it('classifies a realistic mixed diff in one pass', () => { + const changes: DiffEntry[] = [ + { status: 'A', path: 'Fresh.md' }, // create + { status: 'A', path: 'Restored.md' }, // update (has pageId) + { status: 'M', path: 'Edited.md' }, // update + { status: 'D', path: 'Removed.md' }, // delete + { status: 'R', path: 'Dst.md', oldPath: 'Srcc.md', score: 100 }, // move + ]; + const metaAt = metaTable({ + 'Fresh.md|current': meta({ title: 'Fresh', spaceId: 'sp' }), + 'Restored.md|current': meta({ pageId: 'p-rest' }), + 'Edited.md|current': meta({ pageId: 'p-edit' }), + 'Removed.md|prev': meta({ pageId: 'p-rm' }), + 'Dst.md|current': meta({ pageId: 'p-mv' }), + }); + const actions = computePushActions({ changes, metaAt }); + + expect(actions.creates).toEqual([{ path: 'Fresh.md' }]); + expect(actions.updates).toEqual([ + { pageId: 'p-rest', path: 'Restored.md' }, + { pageId: 'p-edit', path: 'Edited.md' }, + ]); + expect(actions.deletes).toEqual([{ pageId: 'p-rm' }]); + expect(actions.renamesMoves).toEqual([ + { pageId: 'p-mv', oldPath: 'Srcc.md', newPath: 'Dst.md' }, + ]); + expect(actions.skipped).toEqual([]); + }); +}); diff --git a/packages/git-sync/test/config-errors-invalid.test.ts b/packages/git-sync/test/config-errors-invalid.test.ts new file mode 100644 index 00000000..14fabe12 --- /dev/null +++ b/packages/git-sync/test/config-errors-invalid.test.ts @@ -0,0 +1,139 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { z, ZodError } from 'zod'; +import { loadSettingsOrExit } from '../src/engine/config-errors'; +import { envSchema } from '../src/engine/settings'; + +// Companion to test/config-errors.test.ts. That file covers the success path, +// the MISSING-required (undefined -> invalid_type) -> exit branch, and the +// non-ZodError passthrough. This file fills the remaining GAP: the +// INVALID-VALUE branch (config-errors.ts lines ~20, 27-30). A ZodError whose +// issue is a CONSTRAINT violation (bad URL, bad enum, too-short string) is NOT +// a missing key, so it must be routed into the `invalid` bucket and reported +// under the "Invalid value(s)" heading with a `: ` line — a +// distinct, operator-facing message from the missing-variable case. +describe('loadSettingsOrExit — invalid-value branch', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + // Stub process.exit so it throws (control stops at the exit point without + // killing the runner) and capture everything written to stderr. Mirrors the + // approach in the existing config-errors.test.ts. + function stubExitAndStderr() { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation((( + code?: number, + ) => { + throw new Error(`exit:${code}`); + }) as never); + const writeSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + const written = () => writeSpy.mock.calls.map((c) => String(c[0])).join(''); + return { exitSpy, writeSpy, written }; + } + + it('exits(1) and reports an invalid value (bad URL) under "Invalid value(s)"', () => { + const { exitSpy, written } = stubExitAndStderr(); + + // A present-but-invalid DOCMOST_API_URL: the value exists (so it is NOT a + // missing-key issue), but fails the .url() constraint -> goes to `invalid`. + expect(() => + loadSettingsOrExit(() => + envSchema.parse({ + DOCMOST_API_URL: 'not-a-url', + DOCMOST_EMAIL: 'ops@example.com', + DOCMOST_PASSWORD: 'hunter2', + DOCMOST_SPACE_ID: 'space-1', + }), + ), + ).toThrow('exit:1'); + + expect(exitSpy).toHaveBeenCalledWith(1); + const out = written(); + // The invalid-value heading must appear... + expect(out).toContain('Invalid value(s)'); + // ...and it must name the offending variable on a `: ` line. + expect(out).toContain('DOCMOST_API_URL:'); + // The header line is always present. + expect(out).toContain('Configuration error in environment / .env:'); + // It must NOT misreport an invalid value as a missing one. + expect(out).not.toContain('Missing required variable(s)'); + }); + + it('exits(1) and reports an invalid enum value (LOG_LEVEL)', () => { + const { exitSpy, written } = stubExitAndStderr(); + + // All required vars present and valid; only LOG_LEVEL violates the enum. + expect(() => + loadSettingsOrExit(() => + envSchema.parse({ + DOCMOST_API_URL: 'https://docs.example.com/api', + DOCMOST_EMAIL: 'ops@example.com', + DOCMOST_PASSWORD: 'hunter2', + DOCMOST_SPACE_ID: 'space-1', + LOG_LEVEL: 'verbose', // not in ['debug','info','warn','error'] + }), + ), + ).toThrow('exit:1'); + + expect(exitSpy).toHaveBeenCalledWith(1); + const out = written(); + expect(out).toContain('Invalid value(s)'); + expect(out).toContain('LOG_LEVEL:'); + expect(out).not.toContain('Missing required variable(s)'); + }); + + it('routes a hand-built constraint-violation ZodError into the invalid bucket', () => { + const { exitSpy, written } = stubExitAndStderr(); + + // Construct the ZodError directly from a min-length violation so the test + // does not depend on the project schema's exact field set. The issue has a + // non-empty path (so a variable name is printed) and code "too_small" + // (NOT invalid_type/undefined), so config-errors.ts classifies it as + // invalid rather than missing. + const zerr = new ZodError([ + { + code: 'too_small', + minimum: 1, + type: 'string', + inclusive: true, + path: ['DOCMOST_PASSWORD'], + message: 'String must contain at least 1 character(s)', + } as z.ZodIssue, + ]); + + expect(() => + loadSettingsOrExit(() => { + throw zerr; + }), + ).toThrow('exit:1'); + + expect(exitSpy).toHaveBeenCalledWith(1); + const out = written(); + expect(out).toContain('Invalid value(s)'); + expect(out).toContain('DOCMOST_PASSWORD: String must contain at least 1'); + expect(out).not.toContain('Missing required variable(s)'); + }); + + it('reports missing AND invalid in their own sections when both occur', () => { + const { exitSpy, written } = stubExitAndStderr(); + + // DOCMOST_API_URL present but invalid (-> invalid section); the three other + // required vars absent (-> missing section). Confirms the two branches are + // populated and emitted independently. + expect(() => + loadSettingsOrExit(() => + envSchema.parse({ + DOCMOST_API_URL: 'not-a-url', + }), + ), + ).toThrow('exit:1'); + + expect(exitSpy).toHaveBeenCalledWith(1); + const out = written(); + expect(out).toContain('Missing required variable(s)'); + expect(out).toContain('Invalid value(s)'); + expect(out).toContain('DOCMOST_API_URL:'); + expect(out).toContain('DOCMOST_EMAIL'); + }); +}); diff --git a/packages/git-sync/test/config-errors.test.ts b/packages/git-sync/test/config-errors.test.ts new file mode 100644 index 00000000..6ecf7093 --- /dev/null +++ b/packages/git-sync/test/config-errors.test.ts @@ -0,0 +1,56 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; +import { z } from 'zod'; +import { loadSettingsOrExit } from '../src/engine/config-errors'; + +describe('loadSettingsOrExit', () => { + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('returns the factory value and does not exit on success', () => { + const exitSpy = vi + .spyOn(process, 'exit') + .mockImplementation((() => undefined) as never); + + const result = loadSettingsOrExit(() => ({ ok: true })); + + expect(result).toEqual({ ok: true }); + expect(exitSpy).not.toHaveBeenCalled(); + }); + + it('prints a named-variable message and exits(1) on a ZodError', () => { + // Mock process.exit to throw so control stops at the exit point, mirroring + // the real exit-the-process behaviour without killing the test runner. + const exitSpy = vi.spyOn(process, 'exit').mockImplementation((( + code?: number, + ) => { + throw new Error(`exit:${code}`); + }) as never); + const writeSpy = vi + .spyOn(process.stderr, 'write') + .mockImplementation(() => true); + + expect(() => + loadSettingsOrExit(() => z.object({ FOO: z.string() }).parse({})), + ).toThrow('exit:1'); + + expect(exitSpy).toHaveBeenCalledWith(1); + const written = writeSpy.mock.calls.map((c) => String(c[0])).join(''); + expect(written).toContain('Missing required variable(s)'); + expect(written).toContain('FOO'); + }); + + it('propagates a non-ZodError without exiting', () => { + const exitSpy = vi + .spyOn(process, 'exit') + .mockImplementation((() => undefined) as never); + const boom = new Error('x'); + + expect(() => + loadSettingsOrExit(() => { + throw boom; + }), + ).toThrow(boom); + expect(exitSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/packages/git-sync/test/git-merge.test.ts b/packages/git-sync/test/git-merge.test.ts new file mode 100644 index 00000000..a3826b0f --- /dev/null +++ b/packages/git-sync/test/git-merge.test.ts @@ -0,0 +1,151 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { + VaultGit, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, +} from '../src/engine/git'; + +// git 3-way merge integration (test-strategy report §2 git gap). The existing +// git.test.ts covers a fast-forward merge and a conflicting merge; this file +// adds the two MISSING cases against a REAL temp git repo under os.tmpdir(): +// 1. a clean NON-fast-forward 3-way merge of non-overlapping changes -> +// { ok:true, conflict:false } and a real merge commit (two parents); +// 2. a NON-conflict merge FAILURE -> { ok:false, conflict:false } so the pull +// cycle does not mislabel it a "conflict markers in vault" situation. +// The conflicting-merge case (markers + conflict:true) already lives in +// git.test.ts and is NOT duplicated here. Skips gracefully if git is missing. + +const execFileAsync = promisify(execFile); + +async function gitAvailable(): Promise { + try { + await execFileAsync('git', ['--version']); + return true; + } catch { + return false; + } +} + +/** Number of parents of HEAD (2 => a real merge commit). */ +async function headParentCount(dir: string): Promise { + const { stdout } = await execFileAsync( + 'git', + ['--no-pager', 'rev-list', '--parents', '-n', '1', 'HEAD'], + { cwd: dir }, + ); + // Output: " ..." — parents are the trailing ids. + return stdout.trim().split(/\s+/).length - 1; +} + +describe('VaultGit.merge — 3-way merge integration (temp repo)', () => { + let available = false; + let dir: string; + + beforeAll(async () => { + available = await gitAvailable(); + }); + + afterEach(async () => { + if (dir) await rm(dir, { recursive: true, force: true }); + }); + + async function freshRepo(): Promise<{ vault: string; git: VaultGit }> { + dir = await mkdtemp(join(tmpdir(), 'docmost-merge-')); + const git = new VaultGit(dir); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + return { vault: dir, git }; + } + + async function commit( + git: VaultGit, + subject: string, + author = { name: BOT_AUTHOR_NAME, email: BOT_AUTHOR_EMAIL }, + ): Promise { + await git.stageAll(); + await git.commit(subject, { + authorName: author.name, + authorEmail: author.email, + }); + } + + it('clean NON-fast-forward 3-way merge of non-overlapping changes -> merge commit', async () => { + if (!available) return; // skip gracefully when git is unavailable + const { vault, git } = await freshRepo(); + + // Seed a shared base file on main so both branches diverge from a real + // merge-base (not an empty tree). + await writeFile(join(vault, 'base.md'), 'shared base\n', 'utf8'); + await commit(git, 'base'); + // Re-create docmost from this base so the merge-base is `base`. + await execFileAsync('git', ['--no-pager', 'branch', '-f', 'docmost', 'main'], { + cwd: vault, + }); + + // docmost adds doc-only.md (a DIFFERENT file than main touches). + await git.checkout('docmost'); + await writeFile(join(vault, 'doc-only.md'), 'from docmost\n', 'utf8'); + await commit(git, 'docmost: add doc-only'); + + // main adds main-only.md AND advances past the merge-base, so the merge can + // NOT fast-forward — it must create a real 3-way merge commit. + await git.checkout('main'); + await writeFile(join(vault, 'main-only.md'), 'from main\n', 'utf8'); + await commit(git, 'local: add main-only', { + name: 'Human', + email: 'human@local', + }); + + const res = await git.merge('docmost'); + expect(res.ok).toBe(true); + expect(res.conflict).toBe(false); + + // A real (non-FF) merge: HEAD has TWO parents. + expect(await headParentCount(vault)).toBe(2); + + // Both non-overlapping changes are present on main after the merge. + const tracked = await git.listTrackedFiles(); + expect(new Set(tracked)).toEqual( + new Set(['base.md', 'main-only.md', 'doc-only.md']), + ); + }); + + it('NON-conflict merge FAILURE -> { ok:false, conflict:false } (not mislabeled a conflict)', async () => { + if (!available) return; + const { vault, git } = await freshRepo(); + + // base file on main, then fork docmost from this base. + await writeFile(join(vault, 'f.md'), 'base\n', 'utf8'); + await commit(git, 'base'); + await execFileAsync('git', ['--no-pager', 'branch', '-f', 'docmost', 'main'], { + cwd: vault, + }); + + // docmost modifies f.md (committed). + await git.checkout('docmost'); + await writeFile(join(vault, 'f.md'), 'docmost change\n', 'utf8'); + await commit(git, 'docmost: edit f'); + + // Back on main, leave an UNCOMMITTED local change to f.md. git refuses the + // merge ("Your local changes ... would be overwritten by merge") and exits + // non-zero — but there are NO unmerged index paths, so this is a clean + // FAILURE, not a conflict. `merge()` must report { ok:false, conflict:false } + // so pull.ts does not falsely claim conflict markers are in the vault. + await git.checkout('main'); + await writeFile(join(vault, 'f.md'), 'uncommitted local edit\n', 'utf8'); + // NOTE: deliberately NOT staged/committed. + + const res = await git.merge('docmost'); + expect(res.ok).toBe(false); + expect(res.conflict).toBe(false); + // The merge did not start: HEAD is still a single-parent commit. + expect(await headParentCount(vault)).toBe(1); + // And the repo is NOT left mid-merge (no MERGE_HEAD / unmerged paths). + expect(await git.isMergeInProgress()).toBe(false); + }); +}); diff --git a/packages/git-sync/test/git.test.ts b/packages/git-sync/test/git.test.ts new file mode 100644 index 00000000..1a914b2b --- /dev/null +++ b/packages/git-sync/test/git.test.ts @@ -0,0 +1,710 @@ +import { execFile } from 'node:child_process'; +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import { afterEach, beforeAll, describe, expect, it } from 'vitest'; +import { chmod } from 'node:fs/promises'; +import { + VaultGit, + BOT_AUTHOR_NAME, + BOT_AUTHOR_EMAIL, + buildCommitMessage, + vaultGitEnv, +} from '../src/engine/git'; + +const execFileAsync = promisify(execFile); + +/** True if a usable `git` binary is on PATH (skip the suite otherwise). */ +async function gitAvailable(): Promise { + try { + await execFileAsync('git', ['--version']); + return true; + } catch { + return false; + } +} + +/** Read the full commit message of HEAD (subject + body) in a repo dir. */ +async function headMessage(dir: string): Promise { + const { stdout } = await execFileAsync( + 'git', + ['--no-pager', 'log', '-1', '--pretty=%B'], + { cwd: dir }, + ); + return stdout.trim(); +} + +/** Read the author "Name " of HEAD in a repo dir. */ +async function headAuthor(dir: string): Promise { + const { stdout } = await execFileAsync( + 'git', + ['--no-pager', 'log', '-1', '--pretty=%an <%ae>'], + { cwd: dir }, + ); + return stdout.trim(); +} + +describe('buildCommitMessage (pure)', () => { + it('returns the bare subject when there are no trailers', () => { + expect(buildCommitMessage('subject')).toBe('subject'); + expect(buildCommitMessage('subject', [])).toBe('subject'); + }); + + it('appends trailers separated from the subject by a blank line', () => { + expect(buildCommitMessage('subject', ['Docmost-Sync-Source: docmost'])).toBe( + 'subject\n\nDocmost-Sync-Source: docmost', + ); + }); +}); + +describe('vaultGitEnv (pure)', () => { + it('pins locale, pager and prompt, and strips GIT_DIR/GIT_WORK_TREE', () => { + // Seed inputs that MUST be neutralized/stripped: a redirecting GIT_DIR and + // GIT_WORK_TREE would defeat the cwd-isolation guarantee (SPEC §12). + process.env.GIT_DIR = '/somewhere/else/.git'; + process.env.GIT_WORK_TREE = '/somewhere/else'; + try { + const env = vaultGitEnv(); + // Locale-independent output. + expect(env.LC_ALL).toBe('C'); + expect(env.LANG).toBe('C'); + // Never page, never block on an interactive prompt. + expect(env.GIT_PAGER).toBe('cat'); + expect(env.GIT_TERMINAL_PROMPT).toBe('0'); + // The redirecting vars are removed regardless of what process.env held. + expect(env.GIT_DIR).toBeUndefined(); + expect(env.GIT_WORK_TREE).toBeUndefined(); + } finally { + delete process.env.GIT_DIR; + delete process.env.GIT_WORK_TREE; + } + }); + + it('passes through caller extras (e.g. author/committer identity)', () => { + const env = vaultGitEnv({ GIT_AUTHOR_NAME: 'X', GIT_AUTHOR_EMAIL: 'x@y' }); + expect(env.GIT_AUTHOR_NAME).toBe('X'); + expect(env.GIT_AUTHOR_EMAIL).toBe('x@y'); + // Still strips the redirecting vars even with extras present. + expect(env.GIT_DIR).toBeUndefined(); + expect(env.GIT_WORK_TREE).toBeUndefined(); + }); +}); + +describe('VaultGit (integration; temp repo)', () => { + let available = false; + let dir: string; + + beforeAll(async () => { + available = await gitAvailable(); + }); + + afterEach(async () => { + if (dir) { + await rm(dir, { recursive: true, force: true }); + } + }); + + /** Make a fresh temp dir for one test (under the OS tmpdir, NOT the repo). */ + async function freshDir(): Promise { + dir = await mkdtemp(join(tmpdir(), 'docmost-vault-')); + return dir; + } + + it('ensureRepo creates .git + main + an initial commit', async () => { + if (!available) return; // skip gracefully when git is unavailable + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // It is a git work-tree now. + const { stdout: insideWt } = await execFileAsync( + 'git', + ['rev-parse', '--is-inside-work-tree'], + { cwd: vault }, + ); + expect(insideWt.trim()).toBe('true'); + + // On `main`. + expect(await git.currentBranch()).toBe('main'); + + // Has the initial commit. + expect(await headMessage(vault)).toBe('init vault'); + + // Idempotent: calling again does not create a second commit. + await git.ensureRepo(); + const { stdout: count } = await execFileAsync( + 'git', + ['rev-list', '--count', 'HEAD'], + { cwd: vault }, + ); + expect(count.trim()).toBe('1'); + }); + + it('ensureRepo neutralizes correctness-affecting LOCAL config', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // These LOCAL values neutralize a hostile GLOBAL/system config that would + // otherwise change porcelain BEHAVIOR and corrupt the vault (SPEC §11 for + // core.autocrlf; gpgsign/safecrlf for the headless daemon). + const localConfig = async (key: string): Promise => { + const { stdout } = await execFileAsync( + 'git', + ['config', '--local', '--get', key], + { cwd: vault }, + ); + return stdout.trim(); + }; + expect(await localConfig('core.autocrlf')).toBe('false'); + expect(await localConfig('commit.gpgsign')).toBe('false'); + expect(await localConfig('core.safecrlf')).toBe('false'); + expect(await localConfig('core.attributesFile')).toBe('/dev/null'); + + // Idempotent: a second run leaves the same single values (no duplicates). + await git.ensureRepo(); + expect(await localConfig('core.autocrlf')).toBe('false'); + expect(await localConfig('commit.gpgsign')).toBe('false'); + expect(await localConfig('core.safecrlf')).toBe('false'); + }); + + it('preserves LF bytes verbatim on commit (SPEC §11: autocrlf=false)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Write content with explicit LF line endings. With a hostile + // core.autocrlf=true git would translate these to CRLF in the stored blob, + // breaking the byte-stable round-trip invariant. ensureRepo pins + // core.autocrlf=false locally, so the stored bytes must round-trip exactly. + const fileName = 'lf.md'; + const content = 'line1\nline2\nline3\n'; + await writeFile(join(vault, fileName), content, 'utf8'); + await git.stageAll(); + const made = await git.commit('add LF file', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + expect(made).toBe(true); + + // Read the STORED blob (not the worktree file) and assert verbatim bytes: + // still LF-only, no CRLF translation. + const { stdout: stored } = await execFileAsync( + 'git', + ['--no-pager', 'show', `HEAD:${fileName}`], + { cwd: vault, encoding: 'buffer' }, + ); + const storedBuf = stored as unknown as Buffer; + expect(storedBuf.includes(Buffer.from('\r\n'))).toBe(false); + expect(storedBuf.toString('utf8')).toBe(content); + }); + + it('ensureBranch creates the docmost branch from main', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + expect(await git.branchExists('docmost')).toBe(false); + await git.ensureBranch('docmost', 'main'); + expect(await git.branchExists('docmost')).toBe(true); + + // Idempotent. + await git.ensureBranch('docmost', 'main'); + expect(await git.branchExists('docmost')).toBe(true); + }); + + it('commit writes a commit with the provenance trailer and the bot identity', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + await writeFile(join(vault, 'page.md'), 'hello\n', 'utf8'); + await git.stageAll(); + const made = await git.commit('docmost: sync 1 page(s)', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: ['Docmost-Sync-Source: docmost'], + }); + expect(made).toBe(true); + + const msg = await headMessage(vault); + expect(msg).toContain('docmost: sync 1 page(s)'); + expect(msg).toContain('Docmost-Sync-Source: docmost'); + + const author = await headAuthor(vault); + expect(author).toBe(`${BOT_AUTHOR_NAME} <${BOT_AUTHOR_EMAIL}>`); + + // The trailer is parseable by git itself. + const { stdout: trailers } = await execFileAsync( + 'git', + ['--no-pager', 'log', '-1', '--pretty=%(trailers:key=Docmost-Sync-Source,valueonly)'], + { cwd: vault }, + ); + expect(trailers.trim()).toBe('docmost'); + }); + + it('commit is a no-op when there is nothing to commit', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + await git.stageAll(); // nothing changed since the init commit + const made = await git.commit('docmost: sync 0 page(s)', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: ['Docmost-Sync-Source: docmost'], + }); + expect(made).toBe(false); + + // Still exactly one commit (the init one). + const { stdout: count } = await execFileAsync( + 'git', + ['rev-list', '--count', 'HEAD'], + { cwd: vault }, + ); + expect(count.trim()).toBe('1'); + }); + + it('commit honors --no-verify (a failing pre-commit hook does not block it)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Commit count BEFORE: just the init commit. + const countBefore = async (): Promise => { + const { stdout } = await execFileAsync( + 'git', + ['rev-list', '--count', 'HEAD'], + { cwd: vault }, + ); + return Number(stdout.trim()); + }; + const before = await countBefore(); + + // Install an EXECUTABLE pre-commit hook that always fails. Without + // `--no-verify`, `git commit` would run it, the hook would `exit 1`, and the + // commit would be ABORTED. So this test fails (no commit created, made !== + // true) the moment `--no-verify` is removed from commitRaw. + const hookPath = join(vault, '.git', 'hooks', 'pre-commit'); + await writeFile(hookPath, '#!/bin/sh\nexit 1\n', 'utf8'); + await chmod(hookPath, 0o755); + + await writeFile(join(vault, 'hooked.md'), 'content\n', 'utf8'); + await git.stageAll(); + const made = await git.commit('commit past a failing hook', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: ['Docmost-Sync-Source: docmost'], + }); + + // The commit was reported made AND actually landed (HEAD advanced by one). + expect(made).toBe(true); + expect(await countBefore()).toBe(before + 1); + expect(await headMessage(vault)).toContain('commit past a failing hook'); + }); + + it('merge fast-forwards main to docmost', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + + // Commit a file on docmost. + await git.checkout('docmost'); + await writeFile(join(vault, 'a.md'), 'a\n', 'utf8'); + await git.stageAll(); + await git.commit('docmost: sync 1 page(s)', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + trailers: ['Docmost-Sync-Source: docmost'], + }); + + // main has not diverged, so the merge is a clean fast-forward. + await git.checkout('main'); + const res = await git.merge('docmost'); + expect(res.ok).toBe(true); + expect(res.conflict).toBe(false); + + // main now contains the file and the docmost commit. + const tracked = await git.listTrackedFiles(); + expect(tracked).toContain('a.md'); + expect(await headMessage(vault)).toContain('docmost: sync 1 page(s)'); + }); + + it('merge surfaces a conflict distinctly (no auto-resolve)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + + // Divergent edits to the SAME file on both branches -> real conflict. + await git.checkout('docmost'); + await writeFile(join(vault, 'c.md'), 'from docmost\n', 'utf8'); + await git.stageAll(); + await git.commit('docmost edit', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + + await git.checkout('main'); + await writeFile(join(vault, 'c.md'), 'from main\n', 'utf8'); + await git.stageAll(); + await git.commit('main edit', { + authorName: 'Human', + authorEmail: 'human@local', + }); + + const res = await git.merge('docmost'); + expect(res.ok).toBe(false); + expect(res.conflict).toBe(true); + }); + + it('isMergeInProgress is false on a clean repo and true mid-merge', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + + // Clean repo, no merge in progress. + expect(await git.isMergeInProgress()).toBe(false); + + // Create a REAL conflict: divergent edits to the same file on both branches. + await git.checkout('docmost'); + await writeFile(join(vault, 'c.md'), 'from docmost\n', 'utf8'); + await git.stageAll(); + await git.commit('docmost edit', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + + await git.checkout('main'); + await writeFile(join(vault, 'c.md'), 'from main\n', 'utf8'); + await git.stageAll(); + await git.commit('main edit', { + authorName: 'Human', + authorEmail: 'human@local', + }); + + // Merge conflicts -> the repo is now left mid-merge. + const res = await git.merge('docmost'); + expect(res.conflict).toBe(true); + expect(await git.isMergeInProgress()).toBe(true); + + // Aborting the merge clears the in-progress state again. + await execFileAsync('git', ['--no-pager', 'merge', '--abort'], { cwd: vault }); + expect(await git.isMergeInProgress()).toBe(false); + }); + + it('listTrackedFiles supports a glob and returns forward-slash paths', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + await writeFile(join(vault, 'keep.md'), 'k\n', 'utf8'); + await writeFile(join(vault, 'note.txt'), 't\n', 'utf8'); + await git.stageAll(); + await git.commit('add files', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + + const md = await git.listTrackedFiles('*.md'); + expect(md).toEqual(['keep.md']); + const all = await git.listTrackedFiles(); + expect(new Set(all)).toEqual(new Set(['keep.md', 'note.txt'])); + }); + + it('listTrackedFiles returns RAW UTF-8 Cyrillic paths (not octal-escaped/quoted)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // The target wiki is Russian, so file names contain Cyrillic. With git's + // DEFAULT core.quotepath=true these come back as `"\320\232..."` from + // ls-files; `listTrackedFiles` must return them verbatim as UTF-8. + const topName = 'Колонка.md'; + const nestedDir = 'Раздел'; + const nestedName = 'Подстраница.md'; + await writeFile(join(vault, topName), 'top\n', 'utf8'); + await mkdir(join(vault, nestedDir), { recursive: true }); + await writeFile(join(vault, nestedDir, nestedName), 'nested\n', 'utf8'); + await git.stageAll(); + await git.commit('add cyrillic files', { + authorName: BOT_AUTHOR_NAME, + authorEmail: BOT_AUTHOR_EMAIL, + }); + + const md = await git.listTrackedFiles('*.md'); + // Exact UTF-8 names, forward-slash separated for the nested one — NOT an + // escaped/quoted form like `"\320\232..."`. + expect(new Set(md)).toEqual( + new Set([topName, `${nestedDir}/${nestedName}`]), + ); + // Guard explicitly against the quotepath regression: no entry is quoted or + // contains a backslash escape sequence. + for (const p of md) { + expect(p.startsWith('"')).toBe(false); + expect(p.includes('\\')).toBe(false); + } + + // No-glob listing also returns the raw Cyrillic names. + const all = await git.listTrackedFiles(); + expect(all).toContain(topName); + expect(all).toContain(`${nestedDir}/${nestedName}`); + }); + + it('assertGitAvailable resolves when git is present', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + // No repo needed: it only probes `git --version` (and the vault dir need + // not even exist yet). + await expect(git.assertGitAvailable()).resolves.toBeUndefined(); + }); + + // --- Push-direction primitives (SPEC §6 "ФС → Docmost", FIRST increment) --- + + it('diffNameStatus parses A / M / D rows between two commits', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Commit 1: two files (keep.md will be modified, gone.md will be deleted). + await writeFile(join(vault, 'keep.md'), 'v1\n', 'utf8'); + await writeFile(join(vault, 'gone.md'), 'old\n', 'utf8'); + await git.stageAll(); + await git.commit('base', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + const base = await git.revParse('HEAD'); + expect(base).toBeTruthy(); + + // Commit 2: modify keep.md, add fresh.md, delete gone.md. + await writeFile(join(vault, 'keep.md'), 'v2\n', 'utf8'); + await writeFile(join(vault, 'fresh.md'), 'new\n', 'utf8'); + await rm(join(vault, 'gone.md')); + await git.stageAll(); + await git.commit('change', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + + const entries = await git.diffNameStatus(base!, 'HEAD'); + // Sort for deterministic assertion regardless of git's row order. + const byPath = new Map(entries.map((e) => [e.path, e])); + expect(byPath.get('keep.md')).toEqual({ status: 'M', path: 'keep.md' }); + expect(byPath.get('fresh.md')).toEqual({ status: 'A', path: 'fresh.md' }); + expect(byPath.get('gone.md')).toEqual({ status: 'D', path: 'gone.md' }); + expect(entries.length).toBe(3); + }); + + it('diffNameStatus parses a real rename (R) with old + new path', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // A file with enough content that git's -M rename detection ties the rename + // to the same blob (identical content -> R100). + const body = 'line a\nline b\nline c\nline d\n'; + await writeFile(join(vault, 'old-name.md'), body, 'utf8'); + await git.stageAll(); + await git.commit('add', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + const base = await git.revParse('HEAD'); + + // Rename it (same content) so -M detects a rename, not delete+add. + await rm(join(vault, 'old-name.md')); + await writeFile(join(vault, 'new-name.md'), body, 'utf8'); + await git.stageAll(); + await git.commit('rename', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + + const entries = await git.diffNameStatus(base!, 'HEAD'); + expect(entries.length).toBe(1); + const r = entries[0]; + expect(r.status).toBe('R'); + expect(r.oldPath).toBe('old-name.md'); + expect(r.path).toBe('new-name.md'); + // Identical content -> a 100% similarity score. + expect(r.score).toBe(100); + }); + + it('diffNameStatus returns RAW UTF-8 Cyrillic paths (no quoting)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + const base = await git.revParse('HEAD'); + await writeFile(join(vault, 'Статья.md'), 'тело\n', 'utf8'); + await git.stageAll(); + await git.commit('add cyrillic', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + + const entries = await git.diffNameStatus(base!, 'HEAD'); + expect(entries).toEqual([{ status: 'A', path: 'Статья.md' }]); + }); + + it('revParse / readRef resolve a ref to a SHA, null when missing', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + const head = await git.revParse('HEAD'); + expect(head).toMatch(/^[0-9a-f]{40}$/); + // A non-existent ref resolves to null (not a throw). + expect(await git.revParse('refs/docmost/last-pushed')).toBeNull(); + expect(await git.readRef('refs/docmost/last-pushed')).toBeNull(); + }); + + it('updateRef / readRef round-trip a custom ref', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + const head = await git.revParse('HEAD'); + expect(await git.readRef('refs/docmost/last-pushed')).toBeNull(); + + await git.updateRef('refs/docmost/last-pushed', head!); + // It now resolves to the same SHA as HEAD. + expect(await git.readRef('refs/docmost/last-pushed')).toBe(head); + expect(await git.revParse('refs/docmost/last-pushed')).toBe(head); + }); + + it('showFileAtRef returns a committed file content and null for a missing path', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + const content = 'hello at ref\nsecond line\n'; + await writeFile(join(vault, 'doc.md'), content, 'utf8'); + await git.stageAll(); + await git.commit('add doc', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + + // The committed file is readable at HEAD verbatim. + expect(await git.showFileAtRef('HEAD', 'doc.md')).toBe(content); + // A path that does not exist at that ref maps to null (not a throw). + expect(await git.showFileAtRef('HEAD', 'nope.md')).toBeNull(); + }); + + it('showFileAtRef reads a DELETED file pre-image at an earlier ref', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Commit a tracked page, capture the ref, then delete it. + const meta = + '\n\nbody\n'; + await writeFile(join(vault, 'tracked.md'), meta, 'utf8'); + await git.stageAll(); + await git.commit('add tracked', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + const beforeDelete = await git.revParse('HEAD'); + + await rm(join(vault, 'tracked.md')); + await git.stageAll(); + await git.commit('delete tracked', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + + // The pre-image (pageId) is recoverable at the earlier ref even though the + // file is gone from HEAD — this is how the push direction recovers the + // pageId of a deleted file (SPEC §6/§8). + expect(await git.showFileAtRef('HEAD', 'tracked.md')).toBeNull(); + const preImage = await git.showFileAtRef(beforeDelete!, 'tracked.md'); + expect(preImage).toBe(meta); + expect(preImage).toContain('page-123'); + }); + + it('fastForwardBranch advances a true fast-forward (the loop-close, SPEC §6 step 3)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // docmost branches off main at the initial commit; main then moves ahead. + await git.ensureBranch('docmost', 'main'); + const base = await git.revParse('refs/heads/docmost'); + + await writeFile(join(vault, 'page.md'), 'pushed content\n', 'utf8'); + await git.stageAll(); + await git.commit('push page', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + const mainTip = await git.revParse('HEAD'); + + // docmost is BEHIND main and an ancestor -> a true fast-forward advances it. + expect(await git.revParse('refs/heads/docmost')).toBe(base); + const res = await git.fastForwardBranch('docmost', mainTip!); + expect(res).toEqual({ ok: true }); + // The branch now points at the pushed main commit (mirror reflects Docmost). + expect(await git.revParse('refs/heads/docmost')).toBe(mainTip); + + // It does NOT touch the working tree / current branch (still on main). + expect(await git.currentBranch()).toBe('main'); + }); + + it('fastForwardBranch is a no-op (ok) when the branch is already at the target', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + await git.ensureBranch('docmost', 'main'); + const mainTip = await git.revParse('HEAD'); + + // Already equal -> a degenerate fast-forward, still ok, branch unchanged. + const res = await git.fastForwardBranch('docmost', mainTip!); + expect(res).toEqual({ ok: true }); + expect(await git.revParse('refs/heads/docmost')).toBe(mainTip); + }); + + it('fastForwardBranch REFUSES a non-fast-forward (never clobbers divergent history)', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + + // Make docmost diverge: it has a commit that main does NOT contain. + await git.checkout('main'); // ensure on main first + await git.ensureBranch('docmost', 'main'); + await git.checkout('docmost'); + await writeFile(join(vault, 'only-on-docmost.md'), 'mirror-only\n', 'utf8'); + await git.stageAll(); + await git.commit('docmost-only commit', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + const docmostTip = await git.revParse('refs/heads/docmost'); + + // main moves ahead independently (divergent from docmost). + await git.checkout('main'); + await writeFile(join(vault, 'only-on-main.md'), 'main-only\n', 'utf8'); + await git.stageAll(); + await git.commit('main-only commit', { authorName: BOT_AUTHOR_NAME, authorEmail: BOT_AUTHOR_EMAIL }); + const mainTip = await git.revParse('HEAD'); + + // docmost is NOT an ancestor of main -> the ff is REFUSED, branch untouched. + const res = await git.fastForwardBranch('docmost', mainTip!); + expect(res).toEqual({ ok: false, reason: 'not-fast-forward' }); + expect(await git.revParse('refs/heads/docmost')).toBe(docmostTip); + }); + + it('fastForwardBranch refuses a missing branch / unresolved target with a reason', async () => { + if (!available) return; + const vault = await freshDir(); + const git = new VaultGit(vault); + await git.ensureRepo(); + const mainTip = await git.revParse('HEAD'); + + const noBranch = await git.fastForwardBranch('nope', mainTip!); + expect(noBranch.ok).toBe(false); + expect(noBranch.reason).toContain('nope'); + + await git.ensureBranch('docmost', 'main'); + const noTarget = await git.fastForwardBranch('docmost', 'deadbeefdeadbeef'); + expect(noTarget.ok).toBe(false); + expect(noTarget.reason).toContain('deadbeefdeadbeef'); + }); +}); diff --git a/packages/git-sync/test/read-existing.test.ts b/packages/git-sync/test/read-existing.test.ts new file mode 100644 index 00000000..f6c9c92f --- /dev/null +++ b/packages/git-sync/test/read-existing.test.ts @@ -0,0 +1,120 @@ +import { describe, expect, it } from 'vitest'; +import { readExisting } from '../src/engine/pull'; + +// R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO +// (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable +// without a real git repo or filesystem. These tests pass fakes only — no git, +// no fs, no network. + +/** Build a valid self-contained file with a `docmost:meta` block. */ +function withMeta(meta: Record, body = '# Title\nbody\n'): string { + return `\n\n${body}`; +} + +/** A fake `readFile` backed by an in-memory map (rejects on a missing key). */ +function fakeReadFile(files: Record) { + return async (rel: string): Promise => { + if (!(rel in files)) { + throw Object.assign(new Error(`ENOENT: ${rel}`), { code: 'ENOENT' }); + } + return files[rel]; + }; +} + +describe('readExisting (R-Pull-1, injected IO)', () => { + it('recovers { pageId, relPath } for valid tracked files', async () => { + const files = { + 'Space/A.md': withMeta({ version: 1, pageId: 'p1', title: 'A' }), + 'Space/Sub/B.md': withMeta({ version: 1, pageId: 'p2', title: 'B' }), + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + expect(result).toEqual([ + { pageId: 'p1', relPath: 'Space/A.md' }, + { pageId: 'p2', relPath: 'Space/Sub/B.md' }, + ]); + }); + + it('SKIPS a file with no docmost:meta block (plain hand-written markdown)', async () => { + const files = { + 'tracked.md': withMeta({ version: 1, pageId: 'p1' }), + 'stray.md': '# Just a hand-written note\n\nNo meta here.\n', + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + // Only the engine-tracked file (with a pageId) survives. + expect(result).toEqual([{ pageId: 'p1', relPath: 'tracked.md' }]); + }); + + it('SKIPS a file whose meta has no pageId', async () => { + const files = { + 'has-id.md': withMeta({ version: 1, pageId: 'keep' }), + 'no-id.md': withMeta({ version: 1, title: 'untitled', slugId: 's' }), + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + expect(result).toEqual([{ pageId: 'keep', relPath: 'has-id.md' }]); + }); + + it('SKIPS a file with an unparseable (invalid-JSON) meta block, does not throw', async () => { + // Invalid JSON inside the meta block makes parseDocmostMarkdown throw; the + // skip-rule must swallow it and treat the file as not-engine-tracked. + const files = { + 'good.md': withMeta({ version: 1, pageId: 'good' }), + 'broken.md': '\n\nbody\n', + }; + const result = await readExisting({ + listTracked: async () => Object.keys(files), + readFile: fakeReadFile(files), + }); + expect(result).toEqual([{ pageId: 'good', relPath: 'good.md' }]); + }); + + it('does NOT throw when readFile REJECTS (tracked but missing) — treats it as skipped', async () => { + const files = { + 'present.md': withMeta({ version: 1, pageId: 'present' }), + // "ghost.md" is listed as tracked but absent from the file map -> reject. + }; + const result = await readExisting({ + listTracked: async () => ['present.md', 'ghost.md'], + readFile: fakeReadFile(files), + }); + // The rejection is swallowed; the present file still comes through. + expect(result).toEqual([{ pageId: 'present', relPath: 'present.md' }]); + }); + + it('returns an empty list when nothing is tracked', async () => { + const result = await readExisting({ + listTracked: async () => [], + readFile: async () => { + throw new Error('should not be called'); + }, + }); + expect(result).toEqual([]); + }); + + it('combines all skip rules in one listing (only the valid files survive)', async () => { + const files = { + 'ok1.md': withMeta({ version: 1, pageId: 'a' }), + 'no-meta.md': 'plain\n', + 'no-id.md': withMeta({ version: 1, title: 'x' }), + 'broken.md': '\nbody\n', + 'ok2.md': withMeta({ version: 1, pageId: 'b' }), + // missing.md rejects on read. + }; + const result = await readExisting({ + listTracked: async () => [...Object.keys(files), 'missing.md'], + readFile: fakeReadFile(files), + }); + expect(result).toEqual([ + { pageId: 'a', relPath: 'ok1.md' }, + { pageId: 'b', relPath: 'ok2.md' }, + ]); + }); +}); diff --git a/packages/git-sync/test/run-push-realgit.test.ts b/packages/git-sync/test/run-push-realgit.test.ts new file mode 100644 index 00000000..72d6ae8a --- /dev/null +++ b/packages/git-sync/test/run-push-realgit.test.ts @@ -0,0 +1,142 @@ +import { execFile } from 'node:child_process'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { promisify } from 'node:util'; +import { afterEach, beforeAll, describe, expect, it, vi } from 'vitest'; +import { runPush, LAST_PUSHED_REF } from '../src/engine/push'; +import type { PushDeps } from '../src/engine/push'; +import { VaultGit } from '../src/engine/git'; +import type { Settings } from '../src/engine/settings'; +import { serializeDocmostMarkdownBody } from '../src/lib/index'; + +const execFileAsync = promisify(execFile); + +// runPush `--apply` against a REAL VaultGit in a temp repo (NO Docmost — the +// client is faked). This guards the real-git BINDING contract that the plain- +// object git fakes in run-push.test.ts cannot catch: the applier's git deps +// (`updateRef`/`fastForwardBranch`/`showFileAtRef`) call `this.run`/`this.runRaw` +// internally, so they only work when their `this` receiver is preserved. Passing +// bare method references (`git.updateRef`, …) would throw `this.runRaw is not a +// function` here. Only the LOCAL temp git is mutated; nothing is sent to Docmost. + +/** True if a usable `git` binary is on PATH (skip the suite otherwise). */ +async function gitAvailable(): Promise { + try { + await execFileAsync('git', ['--version']); + return true; + } catch { + return false; + } +} + +/** A minimal valid Settings fixture (only fields runPush reads matter). */ +function makeSettings(vaultPath: string): Settings { + return { + docmostApiUrl: 'https://docmost.example.com', + docmostEmail: 'you@example.com', + docmostPassword: 'secret', + docmostSpaceId: 'space-1', + vaultPath, + pollIntervalMs: 15000, + debounceMs: 2000, + logLevel: 'info', + }; +} + +/** A recording client fake; createPage returns an assigned id + updatedAt. */ +function makeClientFake() { + return { + importPageMarkdown: vi.fn(async () => ({ + data: { updatedAt: '2026-06-20T00:00:00.000Z' }, + success: true, + })), + createPage: vi.fn(async (title: string) => ({ + data: { id: 'new-id', title, updatedAt: '2026-06-20T00:00:00.000Z' }, + success: true, + })), + deletePage: vi.fn(async () => ({ success: true })), + movePage: vi.fn(async () => ({ success: true })), + renamePage: vi.fn(async () => ({ success: true })), + }; +} + +describe('runPush --apply against a REAL VaultGit (binding contract)', () => { + let available = false; + let dir: string; + + beforeAll(async () => { + available = await gitAvailable(); + }); + + afterEach(async () => { + if (dir) { + await rm(dir, { recursive: true, force: true }); + } + }); + + it('writes through real git: createPage runs, last-pushed advances, no throw', async () => { + if (!available) return; // skip gracefully when git is unavailable + + // Temp vault repo under the OS tmpdir (mirrors test/git.test.ts setup). + dir = await mkdtemp(join(tmpdir(), 'docmost-push-realgit-')); + const vault = dir; + const git = new VaultGit(vault); + await git.ensureRepo(); + // The `docmost` mirror branches off `main` at the initial commit; this is + // also the diff base (last-pushed is unset, so runPush falls back to it). + await git.ensureBranch('docmost', 'main'); + + // A brand-new local file with meta carrying title + spaceId but NO pageId, + // committed on `main` AHEAD of the base -> computePushActions yields a CREATE. + const newFile = serializeDocmostMarkdownBody( + { version: 1, title: 'New', spaceId: 'sp-1' }, + 'fresh body', + ); + await writeFile(join(vault, 'New.md'), newFile, 'utf8'); + await git.stageAll(); + await git.commit('add New.md', { + authorName: 'Human', + authorEmail: 'human@local', + }); + + // last-pushed must be UNSET so the run actually advances it for the first time. + expect(await git.revParse(LAST_PUSHED_REF)).toBeNull(); + + const client = makeClientFake(); + const logs: string[] = []; + const deps: PushDeps = { + settings: makeSettings(vault), + // The WHOLE real VaultGit — its methods must keep their `this` binding. + git, + makeClient: () => client as any, + readFile: (path) => + import('node:fs/promises').then((fs) => + fs.readFile(join(vault, ...path.split('/')), 'utf8'), + ), + writeFile: async (path, text) => { + const fs = await import('node:fs/promises'); + await fs.writeFile(join(vault, ...path.split('/')), text, 'utf8'); + }, + log: (line) => logs.push(line), + }; + + // The run must NOT throw — this is what FAILS before Fix 1 (the bare-method + // git deps would throw `this.runRaw is not a function` on the real VaultGit). + const res = await runPush(deps, { dryRun: false }); + + expect(res.mode).toBe('apply'); + expect(res.failures).toEqual([]); + // The FAKE client was actually called (the write path ran). + expect(client.createPage).toHaveBeenCalledTimes(1); + expect(res.applied?.created).toBe(1); + // The assigned pageId was written back to disk + committed. + expect(res.applied?.writtenBack).toEqual([{ path: 'New.md', pageId: 'new-id' }]); + + // CRITICALLY: refs/docmost/last-pushed ACTUALLY advanced in the real repo — + // it now resolves to a real commit (proving updateRef ran with binding). + const lastPushed = await git.revParse(LAST_PUSHED_REF); + expect(lastPushed).toMatch(/^[0-9a-f]{40}$/); + expect(res.divergentDocmost).toBe(false); + }); +}); diff --git a/packages/git-sync/test/run-push.test.ts b/packages/git-sync/test/run-push.test.ts new file mode 100644 index 00000000..29b31fe8 --- /dev/null +++ b/packages/git-sync/test/run-push.test.ts @@ -0,0 +1,398 @@ +import { describe, expect, it, vi } from 'vitest'; +import { runPush, LAST_PUSHED_REF, DOCMOST_BRANCH } from '../src/engine/push'; +import type { PushDeps } from '../src/engine/push'; +import type { Settings } from '../src/engine/settings'; +import { serializeDocmostMarkdownBody } from '../src/lib/index'; + +// runPush orchestration (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT. Driven by +// FAKES only — no live Docmost, git, fs, or network. Asserts the SAFE-BY-DEFAULT +// contract: a dry-run builds NO client, makes ZERO Docmost calls, advances NO +// refs; `--apply` is the ONLY path that writes. Also covers the merge-in-progress +// abort, the divergent-`docmost` escalation, and the base selection fallback. + +/** A minimal valid Settings fixture (only fields runPush reads matter). */ +function makeSettings(): Settings { + return { + docmostApiUrl: 'https://docmost.example.com', + docmostEmail: 'you@example.com', + docmostPassword: 'secret', + docmostSpaceId: 'space-1', + vaultPath: '/vault', + pollIntervalMs: 15000, + debounceMs: 2000, + logLevel: 'info', + }; +} + +/** + * A recording git fake covering exactly the `PushDeps['git']` surface. Options + * configure the diff rows, which refs resolve, and what the ff returns. + */ +function makeGit(opts?: { + mergeInProgress?: boolean; + lastPushed?: string | null; + docmostSha?: string | null; + mainSha?: string; + /** Diff rows returned by diffNameStatus(base, main). */ + changes?: { status: 'A' | 'M' | 'D' | 'R' | 'C'; path: string; oldPath?: string }[]; + /** Pre-image tree at the base ref (path -> text) for showFileAtRef. */ + prevTree?: Record; + ffResult?: { ok: boolean; reason?: string }; + /** When set, commit returns this per call (queue); defaults to always-true. */ + commitResults?: boolean[]; +}) { + const calls = { + assertGitAvailable: 0, + ensureRepo: 0, + checkout: [] as string[], + stageAll: 0, + commit: [] as string[], + updateRef: [] as { ref: string; target: string }[], + fastForwardBranch: [] as { branch: string; toCommit: string }[], + diffNameStatus: [] as { from: string; to: string }[], + }; + const prevTree = opts?.prevTree ?? {}; + const commitQueue = [...(opts?.commitResults ?? [])]; + let mainSha = opts?.mainSha ?? 'main-sha-1'; + + const git: PushDeps['git'] = { + assertGitAvailable: vi.fn(async () => { + calls.assertGitAvailable++; + }), + ensureRepo: vi.fn(async () => { + calls.ensureRepo++; + }), + isMergeInProgress: vi.fn(async () => opts?.mergeInProgress ?? false), + checkout: vi.fn(async (name: string) => { + calls.checkout.push(name); + }), + stageAll: vi.fn(async () => { + calls.stageAll++; + }), + commit: vi.fn(async (subject: string) => { + calls.commit.push(subject); + return commitQueue.length > 0 ? (commitQueue.shift() as boolean) : true; + }), + readRef: vi.fn(async (ref: string) => + ref === LAST_PUSHED_REF ? (opts?.lastPushed ?? null) : null, + ), + revParse: vi.fn(async (ref: string) => { + if (ref === DOCMOST_BRANCH) return opts?.docmostSha ?? null; + if (ref === 'main') return mainSha; + return null; + }), + diffNameStatus: vi.fn(async (from: string, to: string) => { + calls.diffNameStatus.push({ from, to }); + return opts?.changes ?? []; + }), + showFileAtRef: vi.fn(async (_ref: string, path: string) => + path in prevTree ? prevTree[path] : null, + ), + updateRef: vi.fn(async (ref: string, target: string) => { + calls.updateRef.push({ ref, target }); + }), + fastForwardBranch: vi.fn(async (branch: string, toCommit: string) => { + calls.fastForwardBranch.push({ branch, toCommit }); + return opts?.ffResult ?? { ok: true }; + }), + }; + return { + git, + calls, + /** Advance the fake `main` HEAD (so a write-back commit yields a new sha). */ + setMainSha: (sha: string) => { + mainSha = sha; + }, + }; +} + +/** A recording client fake; createPage returns a configurable assigned id. */ +function makeClientFake(opts?: { createId?: string }) { + return { + importPageMarkdown: vi.fn(async () => ({ success: true })), + createPage: vi.fn(async (title: string) => ({ + data: { id: opts?.createId ?? 'assigned-id', title }, + success: true, + })), + deletePage: vi.fn(async () => ({ success: true })), + movePage: vi.fn(async () => ({ success: true })), + renamePage: vi.fn(async () => ({ success: true })), + }; +} + +/** A recording fs fake over a path->text store. */ +function makeFs(initial: Record = {}) { + const store: Record = { ...initial }; + const reads: string[] = []; + const writes: { path: string; text: string }[] = []; + return { + store, + reads, + writes, + readFile: vi.fn(async (path: string) => { + reads.push(path); + if (!(path in store)) throw new Error(`no such file: ${path}`); + return store[path]; + }), + writeFile: vi.fn(async (path: string, text: string) => { + store[path] = text; + writes.push({ path, text }); + }), + }; +} + +/** Assemble PushDeps with a recording logger and a makeClient FACTORY spy. */ +function makeDeps( + git: PushDeps['git'], + fs: ReturnType, + client?: ReturnType, +) { + const logs: string[] = []; + const makeClient = vi.fn(() => (client ?? makeClientFake()) as any); + const deps: PushDeps = { + settings: makeSettings(), + git, + makeClient, + readFile: fs.readFile, + writeFile: fs.writeFile, + log: (line) => logs.push(line), + }; + return { deps, logs, makeClient }; +} + +describe('runPush — dry-run is the DEFAULT (safe)', () => { + it('logs a plan, builds NO client, makes ZERO Docmost calls, advances NO refs', async () => { + const file = + '\n\nedited body\n'; + const { git, calls } = makeGit({ + lastPushed: 'base-sha', + changes: [{ status: 'M', path: 'Doc.md' }], + }); + const fs = makeFs({ 'Doc.md': file }); + const { deps, logs, makeClient } = makeDeps(git, fs); + + const res = await runPush(deps, { dryRun: true }); + + expect(res.mode).toBe('dry-run'); + expect(res.planned).toEqual({ + creates: 0, + updates: 1, + deletes: 0, + renamesMoves: 0, + skipped: 0, + }); + // The client FACTORY was never invoked -> zero Docmost contact. + expect(makeClient).not.toHaveBeenCalled(); + // No ref advance, no mirror ff. + expect(calls.updateRef).toEqual([]); + expect(calls.fastForwardBranch).toEqual([]); + // A plan WAS logged (counts + the per-item update line). + expect(logs.join('\n')).toMatch(/DRY-RUN/); + expect(logs.join('\n')).toMatch(/update: p-1 \(Doc\.md\)/); + // It still diffs the base against main and works on main. + expect(calls.diffNameStatus).toEqual([{ from: LAST_PUSHED_REF, to: 'main' }]); + expect(calls.checkout).toEqual(['main']); + }); + + it('commits the working tree with the local provenance trailer before diffing', async () => { + const { git, calls } = makeGit({ lastPushed: 'base-sha' }); + const fs = makeFs(); + const { deps } = makeDeps(git, fs); + + await runPush(deps, { dryRun: true }); + + // The first commit is the human working-tree commit on main (SPEC §7.3). + expect(calls.commit[0]).toBe('local: working-tree changes'); + expect(calls.stageAll).toBeGreaterThanOrEqual(1); + const trailerArg = (git.commit as any).mock.calls[0][1]; + expect(trailerArg.trailers).toEqual(['Docmost-Sync-Source: local']); + }); +}); + +describe('runPush — --apply is the ONLY write path', () => { + it('builds the client, calls applyPushActions, records created pageIds, advances last-pushed', async () => { + // A brand-new local file: meta has title + spaceId but NO pageId yet. + const newFile = serializeDocmostMarkdownBody( + { version: 1, title: 'New', spaceId: 'sp-1' }, + 'fresh body', + ); + const { git, calls, setMainSha } = makeGit({ + lastPushed: 'base-sha', + mainSha: 'main-1', + changes: [{ status: 'A', path: 'New.md' }], + }); + const fs = makeFs({ 'New.md': newFile }); + const client = makeClientFake({ createId: 'page-new' }); + const { deps, makeClient } = makeDeps(git, fs, client); + // After the write-back commit, `main` moves to a new commit. + (git.commit as any).mockImplementation(async (subject: string) => { + calls.commit.push(subject); + if (subject === 'local: record created pageIds') setMainSha('main-2'); + return true; + }); + + const res = await runPush(deps, { dryRun: false }); + + expect(res.mode).toBe('apply'); + // The client factory WAS used and createPage ran (the write path). + expect(makeClient).toHaveBeenCalledTimes(1); + expect(client.createPage).toHaveBeenCalledTimes(1); + expect(res.applied?.created).toBe(1); + // The assigned pageId was written back into the file on disk. + expect(res.applied?.writtenBack).toEqual([{ path: 'New.md', pageId: 'page-new' }]); + expect(fs.store['New.md']).toMatch(/page-new/); + // A "record created pageIds" commit persisted the write-back. + expect(calls.commit).toContain('local: record created pageIds'); + // last-pushed was advanced — first by the applier (main-1), then re-advanced + // to the write-back commit (main-2). + const lastPushedAdvances = calls.updateRef.filter( + (u) => u.ref === LAST_PUSHED_REF, + ); + expect(lastPushedAdvances.map((u) => u.target)).toEqual(['main-1', 'main-2']); + expect(res.divergentDocmost).toBe(false); + expect(res.failures).toEqual([]); + }); + + it('ESCALATES a divergent docmost mirror in the write-back branch too (SPEC §5, symmetric)', async () => { + // A create -> the pageId is written back and a "record created pageIds" + // commit is made, which triggers the write-back-branch ff. Here the applier's + // MAIN push ff succeeds (ok) but the WRITE-BACK ff diverges — the write-back + // branch must escalate identically to the main branch (set divergentDocmost, + // log the same prominent WARNING), so main() exits 1. + const newFile = serializeDocmostMarkdownBody( + { version: 1, title: 'New', spaceId: 'sp-1' }, + 'fresh body', + ); + const { git, calls, setMainSha } = makeGit({ + lastPushed: 'base-sha', + mainSha: 'main-1', + changes: [{ status: 'A', path: 'New.md' }], + }); + const fs = makeFs({ 'New.md': newFile }); + const client = makeClientFake({ createId: 'page-new' }); + const { deps, logs } = makeDeps(git, fs, client); + (git.commit as any).mockImplementation(async (subject: string) => { + calls.commit.push(subject); + if (subject === 'local: record created pageIds') setMainSha('main-2'); + return true; + }); + // First ff (applier 7b, main push) is OK; second ff (write-back) DIVERGES. + let ffCall = 0; + (git.fastForwardBranch as any).mockImplementation( + async (branch: string, toCommit: string) => { + calls.fastForwardBranch.push({ branch, toCommit }); + ffCall++; + return ffCall === 1 + ? { ok: true } + : { ok: false, reason: 'not-fast-forward' }; + }, + ); + + const res = await runPush(deps, { dryRun: false }); + + // The apply still happened, but the write-back divergence is escalated. + expect(res.applied?.created).toBe(1); + expect(res.divergentDocmost).toBe(true); + // The SAME prominent WARNING (DIVERGED + §5) — not a soft warning. + expect(logs.join('\n')).toMatch(/WARNING/); + expect(logs.join('\n')).toMatch(/DIVERGED/); + expect(logs.join('\n')).toMatch(/write-back/); + }); + + it('an update goes through importPageMarkdown (collab path)', async () => { + const file = + '\n\nbody\n'; + const { git } = makeGit({ + lastPushed: 'base-sha', + changes: [{ status: 'M', path: 'Doc.md' }], + }); + const fs = makeFs({ 'Doc.md': file }); + const client = makeClientFake(); + const { deps } = makeDeps(git, fs, client); + + const res = await runPush(deps, { dryRun: false }); + + expect(client.importPageMarkdown).toHaveBeenCalledWith('p-9', file); + expect(res.applied?.updated).toBe(1); + }); +}); + +describe('runPush — merge-in-progress aborts (SPEC §9/§12)', () => { + it('stops with a clear message, no diff, no client, no apply', async () => { + const { git, calls } = makeGit({ mergeInProgress: true }); + const fs = makeFs(); + const { deps, logs, makeClient } = makeDeps(git, fs); + + const res = await runPush(deps, { dryRun: false }); + + expect(res.aborted).toBe('merge-in-progress'); + // Never diffed, never built a client, never checked out / committed. + expect(calls.diffNameStatus).toEqual([]); + expect(makeClient).not.toHaveBeenCalled(); + expect(calls.checkout).toEqual([]); + expect(logs.join('\n')).toMatch(/unresolved merge/); + expect(logs.join('\n')).toMatch(/SPEC §9/); + }); +}); + +describe('runPush — divergent docmost escalation (SPEC §5)', () => { + it('sets the escalation flag and logs a WARNING, but the apply still happened', async () => { + const file = + '\n\nbody\n'; + const { git } = makeGit({ + lastPushed: 'base-sha', + changes: [{ status: 'M', path: 'Doc.md' }], + // The applier refuses to clobber a divergent mirror. + ffResult: { ok: false, reason: 'not-fast-forward' }, + }); + const fs = makeFs({ 'Doc.md': file }); + const client = makeClientFake(); + const { deps, logs } = makeDeps(git, fs, client); + + const res = await runPush(deps, { dryRun: false }); + + // The apply STILL happened (the page was updated)... + expect(res.applied?.updated).toBe(1); + expect(client.importPageMarkdown).toHaveBeenCalledTimes(1); + // ...but the divergence is escalated, not silent. + expect(res.divergentDocmost).toBe(true); + expect(logs.join('\n')).toMatch(/WARNING/); + expect(logs.join('\n')).toMatch(/DIVERGED/); + }); +}); + +describe('runPush — base selection (last-pushed else docmost)', () => { + it('uses refs/docmost/last-pushed when it resolves', async () => { + const { git, calls } = makeGit({ lastPushed: 'lp-sha' }); + const fs = makeFs(); + const { deps } = makeDeps(git, fs); + + const res = await runPush(deps, { dryRun: true }); + + expect(res.base).toEqual({ + ref: LAST_PUSHED_REF, + source: 'last-pushed', + sha: 'lp-sha', + }); + expect(calls.diffNameStatus[0].from).toBe(LAST_PUSHED_REF); + }); + + it('falls back to the docmost branch when last-pushed is missing', async () => { + const { git, calls } = makeGit({ + lastPushed: null, // last-pushed does not resolve -> fall back. + docmostSha: 'doc-sha', + }); + const fs = makeFs(); + const { deps } = makeDeps(git, fs); + + const res = await runPush(deps, { dryRun: true }); + + expect(res.base).toEqual({ + ref: DOCMOST_BRANCH, + source: 'docmost', + sha: 'doc-sha', + }); + // The diff is taken against the docmost mirror branch. + expect(calls.diffNameStatus[0].from).toBe(DOCMOST_BRANCH); + }); +}); diff --git a/packages/git-sync/test/settings.test.ts b/packages/git-sync/test/settings.test.ts new file mode 100644 index 00000000..ed7efa01 --- /dev/null +++ b/packages/git-sync/test/settings.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest'; +import { parseSettings } from '../src/engine/settings'; + +// A minimal valid environment with every required variable set. Tests clone and +// mutate this object so process.env is never touched (hermetic). +const baseEnv = { + DOCMOST_API_URL: 'https://docmost.example.com', + DOCMOST_EMAIL: 'you@example.com', + DOCMOST_PASSWORD: 'secret', + DOCMOST_SPACE_ID: 'space-123', +} as NodeJS.ProcessEnv; + +describe('parseSettings', () => { + it('maps a full valid env to the camelCase Settings object', () => { + const settings = parseSettings({ + ...baseEnv, + VAULT_PATH: 'data/custom-vault', + GIT_REMOTE: 'git@github.com:you/vault.git', + POLL_INTERVAL_MS: '5000', + DEBOUNCE_MS: '1000', + LOG_LEVEL: 'debug', + }); + + expect(settings).toEqual({ + docmostApiUrl: 'https://docmost.example.com', + docmostEmail: 'you@example.com', + docmostPassword: 'secret', + docmostSpaceId: 'space-123', + vaultPath: 'data/custom-vault', + gitRemote: 'git@github.com:you/vault.git', + pollIntervalMs: 5000, + debounceMs: 1000, + logLevel: 'debug', + }); + }); + + it('applies defaults when optional vars are omitted', () => { + const settings = parseSettings({ ...baseEnv }); + + expect(settings.vaultPath).toBe('data/vault'); + expect(settings.pollIntervalMs).toBe(15000); + expect(settings.debounceMs).toBe(2000); + expect(settings.logLevel).toBe('info'); + expect(settings.gitRemote).toBeUndefined(); + }); + + it('coerces numeric strings to numbers', () => { + const settings = parseSettings({ ...baseEnv, POLL_INTERVAL_MS: '3000' }); + + expect(settings.pollIntervalMs).toBe(3000); + expect(typeof settings.pollIntervalMs).toBe('number'); + }); + + it('throws when a required var is missing', () => { + const { DOCMOST_API_URL: _omit, ...rest } = baseEnv; + void _omit; + expect(() => parseSettings(rest as NodeJS.ProcessEnv)).toThrow(); + }); + + it('throws on an invalid LOG_LEVEL', () => { + expect(() => + parseSettings({ ...baseEnv, LOG_LEVEL: 'verbose' }), + ).toThrow(); + }); + + it('throws on a non-numeric POLL_INTERVAL_MS', () => { + expect(() => + parseSettings({ ...baseEnv, POLL_INTERVAL_MS: 'soon' }), + ).toThrow(); + }); + + it('treats an empty GIT_REMOTE as undefined', () => { + const settings = parseSettings({ ...baseEnv, GIT_REMOTE: '' }); + expect(settings.gitRemote).toBeUndefined(); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b90f234b..8cbab434 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -919,6 +919,9 @@ importers: marked: specifier: 17.0.5 version: 17.0.5 + zod: + specifier: 4.3.6 + version: 4.3.6 devDependencies: '@types/jsdom': specifier: ^21.1.7 @@ -5314,6 +5317,7 @@ packages: '@ungap/structured-clone@1.3.0': resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==} + deprecated: Potential CWE-502 - Update to 1.3.1 or higher '@unrs/resolver-binding-android-arm-eabi@1.11.1': resolution: {integrity: sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw==} @@ -9713,6 +9717,7 @@ packages: sliced@1.0.1: resolution: {integrity: sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==} + deprecated: Unsupported socket.io-adapter@2.5.4: resolution: {integrity: sha512-wDNHGXGewWAjQPt3pyeYBtpWSq9cLE5UW1ZUPL/2eGK9jtse/FpXib7epSTsz0Q0m+6sg6Y4KtcFTlah1bdOVg==}