feat(git-sync): vendor IO engine (pull/push/git/settings) with GitSyncClient seam (Phase A.3)

Vendor the IO engine from docmost-sync into packages/git-sync/src/engine:
- git.ts (VaultGit, execFile shell-out — verbatim)
- pull.ts (readExisting, computePullActions, applyPullActions)
- push.ts (classifyRenameMoves, computePushActions, applyPushActions, runPush)
- settings.ts adapted (pure parseSettings + Settings type; no process.env binding
  — the server builds Settings from EnvironmentService later), config-errors.ts.
CLI main()/import.meta entrypoints dropped (server drives in-process).

Client seam: new engine/client.types.ts defines GitSyncClient; pull.ts/push.ts
now use Pick<GitSyncClient, ...> instead of the non-vendored DocmostClient. Engine
logic byte-identical except a zod4-compat fix in config-errors (zod4 dropped the
issue.received==='undefined' signal; match /received undefined/ on the message).

Ported the engine unit tests (compute/apply pull+push actions, classify-rename-
moves, run-push, settings, config-errors) incl. real-git temp-repo tests: 431
pass / 3 expected-fail (was 314/3). REST/CLI-coupled upstream tests skipped
(noted). CJS build clean. No apps/server wiring yet (next step).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-21 14:39:38 +03:00
parent b0cd4bd6cf
commit 70bd0dba4d
32 changed files with 6222 additions and 2728 deletions

View File

@@ -1,109 +0,0 @@
/**
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
* rather than any concrete client, because the gitmost server writes NATIVELY —
* through repositories + collab `openDirectConnection`.
*
* `GitSyncClient` is that interface: the native datasource (server side)
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
* subsets of it. The signatures below MIRROR exactly the methods the engine's
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
* off each result), so a REST-style client is still structurally assignable and
* the native adapter has a precise contract.
*/
/**
* A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body).
* The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`,
* which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this
* lite shape documents the fields the tree walk surfaces. Real tree nodes also
* carry `position`, `icon`, `hasChildren` — kept open via the index signature.
*/
export interface GitSyncPageNodeLite {
id: string;
slugId?: string;
title?: string;
parentPageId?: string | null;
hasChildren?: boolean;
/** `listSpaceTree` nodes carry extra fields (position, icon, …). */
[key: string]: unknown;
}
/**
* The structural client the engine depends on. Only `Pick<GitSyncClient, ...>`
* subsets are ever used:
* - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`),
* - push writes: `importPageMarkdown` / `createPage` / `deletePage` /
* `movePage` / `renamePage`,
* - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`.
*/
export interface GitSyncClient {
/**
* Full tree of page nodes for the space (or the subtree rooted at
* `rootPageId`), each WITHOUT body content. `complete` is `false` when the
* walk was truncated / a fetch failed — the pull side suppresses absence
* deletions on an incomplete tree (SPEC §8). Native impl returns
* `complete: true` always (reads the DB, not a paginated REST endpoint).
*/
listSpaceTree(spaceId: string, rootPageId?: string): Promise<{
pages: GitSyncPageNodeLite[];
complete: boolean;
}>;
/**
* One page WITH its ProseMirror body content. `applyPullActions` reads
* `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
* `content` (to stabilize/serialize). `updatedAt` is carried for the
* poll-suppression loop-guard.
*/
getPageJson(pageId: string): Promise<{
id: string;
slugId: string;
title: string;
parentPageId: string | null;
spaceId: string;
updatedAt: string;
content: unknown;
}>;
/**
* Merge a page's body from a self-contained markdown file (meta + body). The
* collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite.
* `applyPushActions` reads only an optional `updatedAt` off the result
* (via `extractUpdatedAt`, tolerant of extra fields).
*
* `baseMarkdown` is the last-synced version of the file (`refs/docmost/
* last-pushed`), the common ancestor for a THREE-WAY merge against the live
* doc so concurrent human edits survive (review #5). Optional/null -> 2-way.
*/
importPageMarkdown(pageId: string, fullMarkdown: string, baseMarkdown?: string | null): Promise<{
updatedAt?: string;
[key: string]: unknown;
}>;
/**
* Create a new page and return the assigned id at `data.id`
* (`applyPushActions` reads `result.data.id`, then writes it back into the
* file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard.
*/
createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{
data: {
id: string;
};
updatedAt?: string;
[key: string]: unknown;
}>;
/** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */
deletePage(pageId: string): Promise<unknown>;
/**
* Reparent a page (and optionally set its fractional-index `position`). The
* engine passes `position` UNDEFINED for now; the native impl computes a
* default between siblings. Result is not inspected.
*/
movePage(pageId: string, parentPageId: string | null, position?: string): Promise<unknown>;
/** Change a page's title only (no body touch). Result is not inspected. */
renamePage(pageId: string, title: string): Promise<unknown>;
/**
* Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8).
* `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk.
*/
listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise<unknown[]>;
/** List soft-deleted (trashed) pages for the space (deletion detection). */
listTrash(spaceId: string): Promise<unknown[]>;
/** Restore a soft-deleted page from Trash. Result is not inspected. */
restorePage(pageId: string): Promise<unknown>;
}

View File

@@ -1,13 +0,0 @@
/**
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
* rather than any concrete client, because the gitmost server writes NATIVELY —
* through repositories + collab `openDirectConnection`.
*
* `GitSyncClient` is that interface: the native datasource (server side)
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
* subsets of it. The signatures below MIRROR exactly the methods the engine's
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
* off each result), so a REST-style client is still structurally assignable and
* the native adapter has a precise contract.
*/
export {};

View File

@@ -1,50 +0,0 @@
import { ZodError } from 'zod';
// Turn a ZodError from settings validation into a clear, actionable startup
// message that names the offending env var(s), then exit(1) — no raw stack
// trace. Mirrors the Python new-project skeleton's load_settings_or_exit.
// A non-ZodError is left to propagate unchanged.
export function loadSettingsOrExit(factory) {
try {
return factory();
}
catch (err) {
if (!(err instanceof ZodError))
throw err;
const missing = [];
const invalid = [];
for (const issue of err.issues) {
const name = issue.path.length ? String(issue.path[0]) : '?';
// A missing required variable surfaces as an `invalid_type` issue whose
// received value was `undefined`. zod 3 exposed `issue.received` directly;
// zod 4 dropped that field and instead folds it into the message
// ("expected string, received undefined"). Detect both shapes so the
// missing-vs-invalid split holds across zod majors. NOTE: an invalid (but
// present) value uses a different code (invalid_format / invalid_value) or
// an `invalid_type` message that reports a non-undefined received (e.g.
// "received NaN" from a coerced number), so neither is misread as missing.
const i = issue;
const isMissing = issue.code === 'invalid_type' &&
(i.received === 'undefined' ||
/received undefined/i.test(i.message ?? ''));
if (isMissing)
missing.push(name);
else
invalid.push(`${name}: ${issue.message}`);
}
const lines = ['Configuration error in environment / .env:'];
if (missing.length) {
lines.push(' Missing required variable(s):');
for (const n of [...new Set(missing)])
lines.push(` - ${n}`);
}
if (invalid.length) {
lines.push(' Invalid value(s):');
for (const item of invalid)
lines.push(` - ${item}`);
}
lines.push('');
lines.push('Set them in .env (see .env.example) and try again.');
process.stderr.write(lines.join('\n') + '\n');
process.exit(1);
}
}

View File

@@ -1,570 +0,0 @@
/**
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
*
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
* the gitmost application repo. This module MUST NEVER run git against the
* application repo. `data/` is gitignored, so a nested repo under `data/vault`
* is safe. The pull cycle is READ-ONLY toward Docmost; this module only touches
* the local vault git, never a git remote (push is deferred, see SPEC §7).
*
* Implementation notes:
* - We shell out via `node:child_process` `execFile` (promisified), passing
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
* if a page title / branch name contains shell metacharacters.
* - EVERY git invocation funnels through the single `runRaw` primitive, which
* ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git
* never blocks on a pager and always prints verbatim UTF-8 paths). There is
* no exception — even the `git --version` preflight goes through `runRaw`.
* - "nothing to commit" is treated as a graceful no-op, not an error.
*/
import { execFile } from "node:child_process";
import { mkdir } from "node:fs/promises";
import { promisify } from "node:util";
const execFileAsync = promisify(execFile);
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
export const BOT_AUTHOR_NAME = "Docmost Sync";
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
/** Default branch the vault repo is initialized on. */
export const DEFAULT_BRANCH = "main";
/**
* A git wrapper bound to a single vault path. Construct once per vault; every
* method runs git with `cwd = vaultPath`.
*/
export class VaultGit {
vaultPath;
constructor(vaultPath) {
this.vaultPath = vaultPath;
}
/**
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
* to system `git` for every vault operation, so a missing binary (e.g. a slim
* container image without git) must fail fast with an actionable message
* rather than a cryptic ENOENT deep inside the first real git call. Presence
* check only — we do NOT gate on a specific version. Runs `git --version`
* with NO `cwd` (the vault dir may not exist yet at preflight time).
*/
async assertGitAvailable() {
// Goes through the single `runRaw` primitive like every other invocation.
// `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at
// preflight time, so we must not point git at a missing directory.
const r = await this.runRaw(["--version"], { cwd: null });
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error("git binary not found or not runnable — install git (the vault state " +
`store requires it). Underlying error: ${detail}`);
}
}
/**
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
* the single `runRaw` primitive: throws a clear, unified Error (including
* stderr/stdout) on a non-zero exit.
*/
async run(args, opts) {
const r = await this.runRaw(args, opts);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
return r.stdout.trim();
}
/**
* The ONE primitive every git invocation in this module flows through. Builds
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
* can treat a non-zero exit as either an error (`run`) or a meaningful state
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
*
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
* printing commands (ls-files, diff --name-only, …).
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
* - On a spawn/exec error we capture the error `message` too, so a failure
* before git could write to stderr (e.g. ENOENT) is NOT lost.
*/
async runRaw(args, opts) {
const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath);
try {
const { stdout, stderr } = await execFileAsync("git", ["--no-pager", "-c", "core.quotepath=false", ...args], {
// Generous buffer: file listings / porcelain output on a large vault
// can be sizable.
...(cwd !== undefined ? { cwd } : {}),
maxBuffer: 64 * 1024 * 1024,
env: vaultGitEnv(opts?.env),
});
return { code: 0, stdout, stderr };
}
catch (err) {
const e = err;
return {
code: typeof e.code === "number" ? e.code : 1,
stdout: e.stdout ?? "",
// Preserve the error message when there is no stderr (e.g. a spawn
// failure like ENOENT, where promisified execFile sets stderr to an
// EMPTY STRING — so `||`, not `??`, to fall through to `message`).
stderr: e.stderr || e.message || "",
};
}
}
/**
* Ensure the vault directory exists and is an initialized git repo on `main`
* with an initial (empty) commit so branches exist. Idempotent: safe to call
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
* (so engine commits never fall back to a global/unset identity).
*/
async ensureRepo() {
await mkdir(this.vaultPath, { recursive: true });
if (!(await this.isRepo())) {
// `git init -b main` sets the initial branch on modern git; we still
// guard the branch name below for safety on older binaries.
await this.run(["init", "-b", DEFAULT_BRANCH]);
}
// Set a local identity for the vault repo if unset, so engine commits have
// a deterministic committer even on a machine with no global git config.
if (!(await this.hasLocalConfig("user.name"))) {
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
}
if (!(await this.hasLocalConfig("user.email"))) {
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
}
// Neutralize correctness-affecting git config in the vault's LOCAL config so
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
// values (which override global/system) are the right scope. Set
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
// writes to `--local` by default inside the repo. These MUST be in place
// before any add/commit/checkout that could be affected, hence they run
// before the initial-commit block below.
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
// byte-stable markdown churn and breaking the round-trip invariant.
// `false` guarantees git stores/checks out verbatim bytes.
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
// cannot rewrite the STORED blob and break §11 byte-stability (a config
// that core.autocrlf=false does not cover). POSIX-only path, which is
// fine: the daemon runs on Linux (Docker) / macOS. A system
// /etc/gitattributes remains the host admin's domain (out of scope).
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
// human running git by hand in the vault must inherit the same neutralized
// behavior; a transient `-c` would not persist. (core.quotepath, by
// contrast, only affects OUR parsing of output and so is baked into the
// `runRaw` argv baseline instead.)
try {
await this.run(["config", "core.autocrlf", "false"]);
await this.run(["config", "core.safecrlf", "false"]);
await this.run(["config", "commit.gpgsign", "false"]);
await this.run(["config", "core.attributesFile", "/dev/null"]);
}
catch (err) {
const detail = err instanceof Error ? err.message : String(err);
throw new Error(`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` +
"/.git/config is writable and not locked (e.g. stale config.lock): " +
detail);
}
// Create the initial empty commit on `main` if the repo has no commits yet,
// so both `main` and (later) `docmost` branches have a common base.
if (!(await this.hasAnyCommit())) {
// Make sure we are on the default branch before the first commit (covers
// the older-git case where `init -b` was not honored).
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
await this.commitRaw("init vault", {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
allowEmpty: true,
});
}
}
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
async isRepo() {
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
return r.code === 0 && r.stdout.trim() === "true";
}
/** True if a LOCAL git config key is set in the vault repo. */
async hasLocalConfig(key) {
const r = await this.runRaw(["config", "--local", "--get", key]);
return r.code === 0 && r.stdout.trim().length > 0;
}
/** True if the repo has at least one commit (HEAD resolves). */
async hasAnyCommit() {
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
return r.code === 0;
}
/** True if a branch with the given name exists. */
async branchExists(name) {
const r = await this.runRaw([
"rev-parse",
"--verify",
`refs/heads/${name}`,
]);
return r.code === 0;
}
/**
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
* checkout) when the branch is already present.
*/
async ensureBranch(name, fromBranch) {
if (await this.branchExists(name))
return;
await this.run(["branch", name, fromBranch]);
}
/** Name of the currently checked-out branch. */
async currentBranch() {
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
}
/** Check out an existing branch. */
async checkout(name) {
await this.run(["checkout", name]);
}
/** Stage everything (adds, modifications, deletions). */
async stageAll() {
await this.run(["add", "-A"]);
}
/**
* True if the vault is mid-merge (an unresolved merge from a previous run,
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
* BEFORE any checkout so a left-over merge produces a clear, actionable
* message instead of a raw "you need to resolve your current index first"
* failure deep inside `checkout`. This is what makes re-runs converge
* (resumability, SPEC §12).
*/
async isMergeInProgress() {
// MERGE_HEAD exists exactly while a merge is in progress.
const mergeHead = await this.runRaw([
"rev-parse",
"--verify",
"--quiet",
"MERGE_HEAD",
]);
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0)
return true;
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
// working tree is mid-conflict and a checkout would refuse.
const unmerged = await this.runRaw(["ls-files", "-u"]);
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
}
/**
* Commit the currently STAGED changes with an explicit author/committer
* identity and the given trailers appended to the message body (SPEC §7.3
* provenance). Returns `true` if a commit was made, `false` if there was
* nothing to commit (graceful no-op). The caller is expected to have staged
* its changes first (e.g. via `stageAll`).
*/
async commit(message, opts) {
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
// deterministic re-pull of unchanged pages produces identical bytes, so
// git sees no diff and we must not error).
const staged = await this.runRaw([
"diff",
"--cached",
"--quiet",
]);
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
// staged), 1 when there are staged changes.
if (staged.code === 0)
return false;
await this.commitRaw(message, opts);
return true;
}
/**
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
* Builds the full message with appended trailers and sets author + committer
* identity via env vars (so the committer matches the author, not the repo
* default).
*/
async commitRaw(message, opts) {
const fullMessage = buildCommitMessage(message, opts.trailers);
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
// (or any injected hook) must never interfere with engine commits in our
// dedicated vault repo.
const args = ["commit", "--no-verify", "-m", fullMessage];
if (opts.allowEmpty)
args.push("--allow-empty");
// Route through the single `runRaw` primitive; set author + committer
// identity via env vars (so the committer matches the author, not the repo
// default). Throw via the same unified message on a non-zero exit.
const r = await this.runRaw(args, {
env: {
GIT_AUTHOR_NAME: opts.authorName,
GIT_AUTHOR_EMAIL: opts.authorEmail,
GIT_COMMITTER_NAME: opts.authorName,
GIT_COMMITTER_EMAIL: opts.authorEmail,
},
});
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
}
/**
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
* markers are left in the worktree for manual resolution by a later increment,
* and — critically — nothing is pushed to Docmost (we never write to Docmost
* anyway).
*/
async merge(fromBranch) {
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
const output = `${r.stdout}\n${r.stderr}`.trim();
if (r.code === 0) {
return { ok: true, conflict: false, output };
}
// A non-zero exit on merge most commonly means a conflict. Confirm by
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
// an unrelated failure as a conflict.
const conflict = await this.hasUnmergedPaths();
return { ok: false, conflict, output };
}
/** True if the index has any unmerged (conflicted) paths. */
async hasUnmergedPaths() {
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
return r.code === 0 && r.stdout.trim().length > 0;
}
/**
* List tracked files on the current branch (paths relative to the vault
* root, forward-slash separated). An optional glob (a git pathspec) narrows
* the listing, e.g. `"*.md"`.
*
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
* breaking move/duplicate detection. We defeat that two ways at once:
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
* pass it inline here.
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
* ambiguity), which we split on `\0`.
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
* are returned verbatim — git already emits forward slashes.
*/
async listTrackedFiles(glob) {
const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ls-files failed: ${detail}`);
}
return r.stdout.split("\0").filter((p) => p.length > 0);
}
/**
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
* (SPEC §6: the FS→Docmost push direction diffs `main` against
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
* file is reported as a single `R` row with both its old and new path instead
* of a delete+add pair — that distinction is what lets the push planner tell a
* move from a delete+create (SPEC §8 "Move vs delete").
*
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
* status:
* - A/M/D: `status\0path\0`
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
*/
async diffNameStatus(fromRef, toRef) {
const r = await this.runRaw([
"diff",
"--name-status",
"-M",
"-z",
fromRef,
toRef,
]);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git diff --name-status failed: ${detail}`);
}
// Tokens alternate: <status> <path...> <status> <path...> ... With `-z`,
// each token (status code AND each path) is its own NUL-delimited field.
const tokens = r.stdout.split("\0").filter((t) => t.length > 0);
const entries = [];
let i = 0;
while (i < tokens.length) {
const raw = tokens[i++];
// The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading
// letter is the change kind; any trailing digits are the similarity score.
const letter = raw[0];
if (letter === "R" || letter === "C") {
const score = Number.parseInt(raw.slice(1), 10);
const oldPath = tokens[i++];
const path = tokens[i++];
if (oldPath === undefined || path === undefined)
break; // malformed tail
entries.push({
status: letter,
path,
oldPath,
...(Number.isFinite(score) ? { score } : {}),
});
}
else if (letter === "A" || letter === "M" || letter === "D") {
const path = tokens[i++];
if (path === undefined)
break; // malformed tail
entries.push({ status: letter, path });
}
else {
// Unknown/other status (e.g. T type-change, U unmerged) — consume one
// path token defensively so the walk stays aligned, but do not emit it
// (the push planner only handles A/M/D/R/C).
i++;
}
}
return entries;
}
/**
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
*/
async revParse(ref) {
const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]);
if (r.code !== 0)
return null;
const sha = r.stdout.trim();
return sha.length > 0 ? sha : null;
}
/**
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
* "что из `main` уже отражено в Docmost").
*/
async readRef(ref) {
return this.revParse(ref);
}
/**
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
*/
async updateRef(ref, target) {
await this.run(["update-ref", ref, target]);
}
/**
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
* push succeeds, Docmost already contains the pushed `main` content, so the
* mirror must reflect it — otherwise the NEXT pull would diff our own write
* back and re-pull it (loop-guard).
*
* SAFETY — never force, never clobber divergent history:
* - If `branch` IS an ancestor of `toCommit`, advance it with
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
* NOT checked out during a push (push works on `main`), so updating the ref
* directly is safe and avoids any working-tree touch.
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
* let the caller log it. We must never overwrite a `docmost` history that
* has commits the push base does not contain.
*
* Returns `{ ok: true }` when the branch was advanced (or already at
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
*/
async fastForwardBranch(branch, toCommit) {
const branchRef = `refs/heads/${branch}`;
// Resolve both endpoints first so a missing ref is a clean refusal, not a
// confusing `merge-base` failure.
const branchSha = await this.revParse(branchRef);
if (branchSha === null) {
return { ok: false, reason: `branch ${branch} does not exist` };
}
const targetSha = await this.revParse(toCommit);
if (targetSha === null) {
return { ok: false, reason: `target ${toCommit} does not resolve` };
}
// Already at the target -> a no-op fast-forward (still ok).
if (branchSha === targetSha)
return { ok: true };
// `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a
// true ancestor is a fast-forward; anything else is divergent and refused.
const ancestor = await this.runRaw([
"merge-base",
"--is-ancestor",
branchSha,
targetSha,
]);
if (ancestor.code !== 0) {
return { ok: false, reason: "not-fast-forward" };
}
// Safe to advance: the branch is not checked out during push, so a direct
// ref update avoids a checkout/working-tree touch.
await this.updateRef(branchRef, targetSha);
return { ok: true };
}
/**
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
* if the path does not exist there. Used by the push direction to read the
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
* that ref) maps to `null` rather than throwing.
*/
async showFileAtRef(ref, path) {
// `git show <ref>:<path>` requires the path relative to the repo root; pass
// it verbatim (forward-slash, matching `listTrackedFiles` / diff output).
const r = await this.runRaw(["show", `${ref}:${path}`]);
if (r.code !== 0)
return null;
return r.stdout;
}
}
/**
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
* Used by the single `runRaw` primitive every git command flows through, so
* these pins apply uniformly (including the `git --version` preflight).
*
* cwd-isolation is this module's central safety guarantee: every git command
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
* redirect the operation away from `cwd` (e.g. to the source repo or another
* checkout), defeating that guarantee. So we always strip them, regardless of
* whatever else the caller adds (author/committer identity, etc.).
*
* Exported for unit testing.
*/
export function vaultGitEnv(extra) {
const env = {
...process.env,
// Locale-independent output (defense in depth). We never parse localized
// prose, but pinning the locale prevents a future regression where some
// git message we DO key on is translated by an inherited LC_ALL/LANG.
LC_ALL: "C",
LANG: "C",
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
// bite) and never block on an interactive prompt (e.g. credentials) — the
// daemon runs unattended and must not hang.
GIT_PAGER: "cat",
GIT_TERMINAL_PROMPT: "0",
...extra,
};
delete env.GIT_DIR;
delete env.GIT_WORK_TREE;
return env;
}
/**
* Build a commit message body with trailer lines appended (SPEC §7.3). The
* trailers are separated from the subject by a blank line so `git interpret-
* trailers` / `git log --format=%(trailers)` parse them as trailers.
* Exported for unit testing.
*/
export function buildCommitMessage(subject, trailers) {
if (!trailers || trailers.length === 0)
return subject;
return `${subject}\n\n${trailers.join("\n")}`;
}

View File

@@ -1,136 +0,0 @@
import type { GitSyncClient } from "./client.types.js";
import { type PageNode } from "./layout.js";
import { VaultGit } from "./git.js";
import { type MovedEntry, type DeletionDecision } from "./reconcile.js";
/**
* Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real
* `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile`
* rooted at the vault; tests pass fakes so the parsing/skip rules are unit-
* testable without a real git repo or filesystem.
*/
export interface ReadExistingDeps {
/** List tracked .md paths (forward-slash, vault-relative). */
listTracked: () => Promise<string[]>;
/** Read a tracked file's text by its (forward-slash) vault-relative path. */
readFile: (relPath: string) => Promise<string>;
}
/**
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
*/
export declare function readExisting(deps: ReadExistingDeps): Promise<{
pageId: string;
relPath: string;
}[]>;
/**
* Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live
* tree nodes + completeness flag (from `listSpaceTree`) and the parsed
* `existing` tracked files (from `readExisting`).
*/
export interface PullActionsInput {
/** Live page nodes for the space (from `listSpaceTree`). */
pages: PageNode[];
/** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */
treeComplete: boolean;
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
existing: {
pageId: string;
relPath: string;
}[];
}
/**
* The PURE decisions object computed by `computePullActions` (no IO). It holds
* the reconciliation plan plus the SPEC §8 absence-deletion decision, with the
* suppression already folded in: `toDelete` is the POST-suppression set the
* caller should actually remove (empty when `deletionDecision.apply` is false).
*/
export interface PullActions {
/** Pages to (re)write at their relPath (add + update + move target). */
toWrite: {
pageId: string;
relPath: string;
}[];
/** Moves: write new path, then remove old path (only on a successful write). */
moved: MovedEntry[];
/**
* Absence-based paths to delete AFTER suppression. Empty when the decision
* suppressed deletions this cycle, so the caller can apply it unconditionally.
*/
toDelete: string[];
/** Why absence deletions were (or were not) applied (for logging + tests). */
deletionDecision: DeletionDecision;
/** Tracked-file count (for the suppression log messages). */
existingCount: number;
/** Planned absence-delete count BEFORE suppression (for the log message). */
plannedDeleteCount: number;
}
/**
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
* tree nodes + completeness + existing tracked files and returns the full set of
* decisions with NO IO:
*
* - builds the vault layout (deterministic relPath per live page),
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
* POST-suppression set (empty when suppressed).
*
* Moves are NOT governed by the suppression: a moved page is present in `live`,
* so its old-path removal is real (the caller still gates it on the write
* succeeding). The expensive content fetch / file write / git ops happen in the
* thin `applyPullActions`.
*/
export declare function computePullActions(input: PullActionsInput): PullActions;
/**
* Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these
* to the live client, the vault git wrapper, and `node:fs/promises`; tests pass
* fakes that RECORD calls so the ordering + the move-on-success data-loss guard
* are testable without real git/fs/network.
*/
export interface ApplyPullActionsDeps {
client: Pick<GitSyncClient, "getPageJson">;
git: Pick<VaultGit, "stageAll" | "commit" | "checkout" | "merge">;
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
writeFile: (absPath: string, text: string) => Promise<void>;
/** Recursive mkdir of an ABSOLUTE directory path. */
mkdir: (absDir: string) => Promise<void>;
/** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */
rm: (absPath: string) => Promise<void>;
}
/** Outcome counters from `applyPullActions` (for the summary + tests). */
export interface ApplyResult {
written: number;
movedApplied: number;
deleted: number;
failed: number;
committed: boolean;
merge: {
ok: boolean;
conflict: boolean;
output: string;
};
}
/**
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
* order, with all the original safety guards preserved bit-for-bit:
*
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
* failed move-write keeps the old path so the page never vanishes).
* 3. apply (post-suppression) absence deletes.
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
*
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
*/
export declare function applyPullActions(deps: ApplyPullActionsDeps, actions: PullActions, vaultRoot: string): Promise<ApplyResult>;

View File

@@ -1,284 +0,0 @@
/**
* Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС").
*
* This increment turns the read-only mirror into the git-backed pull cycle:
*
* 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12);
* ensureBranch("docmost", "main") (SPEC §5 branches)
* 2. checkout docmost
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
* 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
* is absence-only, moves are separate
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
* fetch (SPEC §8) and behind the mass-delete guard (defense in depth)
* 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11);
* apply moved-old-path removals (only when the move write SUCCEEDED) and
* absence-delete removals (only when the decision allowed them)
* 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3)
* 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved,
* SPEC §9); push is deferred (SPEC §7)
* 10. one-line summary
*
* DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost
* (read-only: listSpaceTree + getPageJson). All git operations run against
* the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts).
*
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
* the gitmost server drives the engine in-process (there is no standalone CLI
* entry point).
*/
import { dirname } from "node:path";
import { sep } from "node:path";
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
import { buildVaultLayout } from "./layout.js";
import { BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./git.js";
import { planReconciliation, decideAbsenceDeletions, } from "./reconcile.js";
import { stabilizePageBody } from "./stabilize.js";
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
const DOCMOST_BRANCH = "docmost";
// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12).
const SOURCE_TRAILER = "Docmost-Sync-Source: docmost";
// Number of pages fetched/stabilized concurrently. Bounded so a large space
// does not open thousands of simultaneous requests/conversions at once.
const CONCURRENCY = 6;
// How often to log incremental progress (every N completed pages).
const PROGRESS_EVERY = 25;
/** Convert a vault-relative path (forward-slash) to an absolute FS path. */
function relToAbs(vaultRoot, relPath) {
return [vaultRoot, ...relPath.split("/")].join("/");
}
/** Convert an absolute/relative segment list under the vault to a relPath. */
function segmentsToRelPath(segments, stem) {
return [...segments, `${stem}.md`].join("/");
}
/**
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
*/
export async function readExisting(deps) {
const tracked = await deps.listTracked();
const existing = [];
for (const relPath of tracked) {
// git ls-files always emits forward-slash paths; normalize just in case.
const rel = relPath.split(sep).join("/");
let text;
try {
text = await deps.readFile(rel);
}
catch {
// Tracked but missing on disk (mid-operation race) — skip; the next pull
// converges.
continue;
}
const { id } = parsePageFile(text);
if (id)
existing.push({ pageId: id, relPath: rel });
}
return existing;
}
/**
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
* tree nodes + completeness + existing tracked files and returns the full set of
* decisions with NO IO:
*
* - builds the vault layout (deterministic relPath per live page),
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
* POST-suppression set (empty when suppressed).
*
* Moves are NOT governed by the suppression: a moved page is present in `live`,
* so its old-path removal is real (the caller still gates it on the write
* succeeding). The expensive content fetch / file write / git ops happen in the
* thin `applyPullActions`.
*/
export function computePullActions(input) {
const { pages, treeComplete, existing } = input;
const layout = buildVaultLayout(pages);
const live = [];
for (const p of pages) {
if (!p || !p.id)
continue;
const entry = layout.get(p.id);
if (!entry)
continue;
live.push({
pageId: p.id,
relPath: segmentsToRelPath(entry.segments, entry.stem),
});
}
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
// `plan.moved` carries move old-path removals separately.
const plan = planReconciliation(live, existing);
// Decide whether the ABSENCE-based deletions may be applied this cycle
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
// Moves are NOT governed by this.
const deletionDecision = decideAbsenceDeletions({
treeComplete,
liveCount: live.length,
existingCount: existing.length,
deleteCount: plan.toDelete.length,
});
return {
toWrite: plan.toWrite,
moved: plan.moved,
// Fold the suppression in: a suppressed cycle deletes nothing.
toDelete: deletionDecision.apply ? plan.toDelete : [],
deletionDecision,
existingCount: existing.length,
plannedDeleteCount: plan.toDelete.length,
};
}
/**
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
* order, with all the original safety guards preserved bit-for-bit:
*
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
* failed move-write keeps the old path so the page never vanishes).
* 3. apply (post-suppression) absence deletes.
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
*
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
*/
export async function applyPullActions(deps, actions, vaultRoot) {
const { client, git } = deps;
// Emit the SPEC §8 suppression warnings (preserved from the original `main`).
const decision = actions.deletionDecision;
if (!decision.apply) {
if (decision.reason === "incomplete-fetch") {
console.warn("pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)");
}
else if (decision.reason === "empty-live") {
console.warn(`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` +
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
`Docmost is reachable.`);
}
else {
console.warn(`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` +
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`);
}
}
// 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11).
let written = 0;
let failed = 0;
let completed = 0;
let nextIndex = 0;
// pageIds whose write FAILED. A moved page whose new-path write failed must
// NOT have its old path removed (otherwise the page vanishes entirely).
const failedPageIds = new Set();
const writeOne = async (w) => {
try {
const page = await client.getPageJson(w.pageId);
// Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
// markdown body. title/parent/space are DERIVED (filename / folder / repo),
// so nothing but the pageId is persisted as meta.
const text = serializePageFile(page.id, await stabilizePageBody(page.content));
const abs = relToAbs(vaultRoot, w.relPath);
await deps.mkdir(dirname(abs));
await deps.writeFile(abs, text);
written++;
}
catch (err) {
failed++;
failedPageIds.add(w.pageId);
console.error(`pull: failed page ${w.pageId}:`, err instanceof Error ? err.message : String(err));
}
finally {
completed++;
if (completed % PROGRESS_EVERY === 0) {
console.log(`pulled ${completed}/${actions.toWrite.length}`);
}
}
};
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
// take the next index until the write list is exhausted. One bad page never
// aborts the whole pull (mirrors the fault-tolerant tree walk).
const runner = async () => {
while (true) {
const i = nextIndex++;
if (i >= actions.toWrite.length)
return;
await writeOne(actions.toWrite[i]);
}
};
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, () => runner()));
// Helper: `rm` with force:true is a no-op if the file is already gone.
const removePath = async (rel, what) => {
try {
await deps.rm(relToAbs(vaultRoot, rel));
return true;
}
catch (err) {
console.error(`pull: failed to ${what} ${rel}:`, err instanceof Error ? err.message : String(err));
return false;
}
};
// 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its
// old path is genuinely stale — NOT subject to the incomplete-fetch
// suppression. BUT only remove the old path when (a) the planner marked it
// removable (not reused by another live page) AND (b) the new-path write
// actually SUCCEEDED — otherwise we would delete the only copy of a page
// whose move-write failed (⭐ data-loss guard).
let movedApplied = 0;
for (const m of actions.moved) {
if (!m.removeOldPath)
continue;
if (failedPageIds.has(m.pageId)) {
console.warn(`pull: move write for ${m.pageId} failed — keeping old path ` +
`${m.fromRelPath} (SPEC §8)`);
continue;
}
if (await removePath(m.fromRelPath, "remove moved old path"))
movedApplied++;
}
// 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the
// post-suppression set (empty when the decision suppressed them, SPEC §8).
let deleted = 0;
for (const rel of actions.toDelete) {
if (await removePath(rel, "delete"))
deleted++;
}
// 4. Stage + commit on `docmost` (only if there is something to commit).
// Deterministic stabilized output means unchanged pages produce identical
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
// ACTUAL work applied (pages written + files deleted), not the planned size,
// so a run with failures does not over-report (SPEC §5 nit).
const subject = deleted > 0
? `docmost: sync ${written} page(s), ${deleted} deleted`
: `docmost: sync ${written} page(s)`;
await git.stageAll();
const committed = await git.commit(subject, {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: [SOURCE_TRAILER],
});
// Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
await git.checkout(DEFAULT_BRANCH);
const merge = await git.merge(DOCMOST_BRANCH);
if (merge.conflict) {
console.error("pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
"Docmost (read-only). Resolve locally, then re-run.");
}
else if (!merge.ok) {
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
}
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
return { written, movedApplied, deleted, failed, committed, merge };
}

View File

@@ -1,504 +0,0 @@
/**
* Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment.
*
* This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref
* primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns
* a git diff into a classified action set with NO IO, and a THIN injectable
* applier (`applyPushActions`) exercised in tests via fakes only.
*
* Direction is vault -> Docmost. The diff is `main` against
* `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is
* translated into a Docmost mutation by `pageId` identity (SPEC §4):
* - A without pageId -> create_page (then write the assigned pageId back).
* - A with pageId -> update (restored/copied file; the page already exists).
* - M -> update content (collab/Yjs path, SPEC §2/§15.6).
* - D -> delete_page (pageId recovered from the PRE-IMAGE meta).
* - R -> rename/move (CLASSIFIED here, APPLIED in push #3).
*
* MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves
* each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH-
* derived parent (SPEC §5: the file path is the source of truth for tree
* position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions`
* then calls `move_page` / `rename_page` (both for a reparent+retitle), or
* records a NO-OP for a cosmetic local-only file-path rename.
*
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
* the gitmost server drives the engine in-process (there is no standalone CLI
* entry point).
*/
import { type DocmostMdMeta } from "../lib/index.js";
import type { GitSyncClient } from "./client.types.js";
import type { DiffEntry } from "./git.js";
import { VaultGit } from "./git.js";
import { type Settings } from "./settings.js";
export type { DiffEntry } from "./git.js";
/** A page to CREATE in Docmost (new local file, meta has no pageId yet). */
export interface CreateAction {
/** Vault-relative path of the new file. */
path: string;
}
/** A page whose CONTENT changed (meta carries the existing pageId). */
export interface UpdateAction {
pageId: string;
/** Vault-relative path of the changed file. */
path: string;
}
/** A page to soft-delete in Docmost (Trash, SPEC §8). */
export interface DeleteAction {
pageId: string;
}
/** A renamed/moved page (same pageId, new path). Resolution DEFERRED. */
export interface RenameMoveAction {
pageId: string;
oldPath: string;
newPath: string;
}
/**
* A CLASSIFIED rename/move (push #3): a `RenameMoveAction` resolved into the
* Docmost op(s) it actually needs. The file PATH is the source of truth for tree
* position (SPEC §5: "истина связи — pageId, не путь" — the path is COSMETIC and
* LOCAL, the page identity is its pageId), so we compare the RESOLVED parent of
* the new path against the resolved parent of the old path, and the title in the
* current meta against the title in the previous meta. Each sub-op is emitted
* ONLY when something real changed:
* - `move` — the resolved parent page changed (reparent in Docmost). A `null`
* `parentPageId` means the new parent is ROOT (the file sits at the space
* root, no enclosing folder).
* - `rename` — the page title changed (a pure title edit in Docmost).
* - `noop` — neither changed: a purely LOCAL file-path rename (same parent,
* same title). The page identity is its pageId, so Docmost is NOT called.
* `move` and `rename` are independent and may BOTH be present (reparent + retitle).
*/
export interface RenameMoveActionClassified {
pageId: string;
oldPath: string;
newPath: string;
/** Present iff the resolved parent changed -> `move_page` (reparent). */
move?: {
parentPageId: string | null;
};
/** Present iff the title changed -> `rename_page` (title-only). */
rename?: {
title: string;
};
/** True iff neither parent nor title changed (cosmetic local-only rename). */
noop?: true;
}
/**
* Injected resolvers for the PURE `classifyRenameMoves` (push #3). Both are PURE
* given a path + side; the real `main` (a follow-up) wires them to the file tree
* (`readFile` for `current`, `git.showFileAtRef` for `prev`), tests pass plain
* lookups. SPEC §5 path-as-truth:
* - `metaAt`: the file's synthetic native meta at that side (title from the
* filename, pageId from the `gitmost_id` frontmatter).
* - `resolveParentPageId`: the pageId of the page whose FILE is the parent
* FOLDER's `.md` (one level up from the given path), or `null` for ROOT.
*/
export interface ClassifyRenameMovesDeps {
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
resolveParentPageId: (path: string, side: MetaSide) => string | null;
}
/**
* PURE classifier for the `renamesMoves` produced by `computePushActions`
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
*
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
*
* For each entry:
* - `newParent = resolveParentPageId(newPath, 'current')`,
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
* - `newTitle = metaAt(newPath,'current')?.title`,
* `oldTitle = metaAt(oldPath,'prev')?.title`.
* - include `move` iff `newParent !== oldParent` (a real reparent),
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
* the page is its pageId, so Docmost is not touched).
*/
export declare function classifyRenameMoves(renamesMoves: RenameMoveAction[], deps: ClassifyRenameMovesDeps): RenameMoveActionClassified[];
/** The classified set of push actions (PURE output of `computePushActions`). */
export interface PushActions {
creates: CreateAction[];
updates: UpdateAction[];
deletes: DeleteAction[];
renamesMoves: RenameMoveAction[];
/**
* Diff rows that could NOT be classified into an action, with a reason — e.g.
* a deleted file whose PRE-IMAGE meta carried no recoverable pageId (the
* untracked-file guard, SPEC §8: only files that were tracked with a pageId
* are deleted in Docmost). Carried so the caller can log them.
*/
skipped: {
path: string;
status: DiffEntry["status"];
reason: string;
}[];
}
/**
* Which tree a `metaAt` lookup reads the file's native meta from:
* - `current`: the current `main` tree (the live file content) — used for
* A/M/R, where the file still exists.
* - `prev`: the last-pushed PRE-IMAGE (e.g. `refs/docmost/last-pushed:<path>`)
* — used for D, where the file is gone from `main` but its pageId must be
* recovered from the version Docmost last knew (SPEC §6/§8).
*/
export type MetaSide = "current" | "prev";
/** Input to the PURE planner. `metaAt` is injected (no IO inside the planner). */
export interface PushActionsInput {
/** Diff rows of `main` vs `refs/docmost/last-pushed` (SPEC §6 step 2). */
changes: DiffEntry[];
/**
* Resolve a file's synthetic native meta at a given side, or `null` if the file is
* absent there / has no parseable meta. PURE injection: the real `main` reads
* the working tree (current) or `git show <last-pushed>:<path>` (prev); tests
* pass a plain lookup.
*/
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
/**
* The pageIds present at ANY path in the current `main` tree (optional). When
* given, a deleted file whose pageId still lives somewhere in the tree is NOT
* a deletion but a MOVE — guards against trashing a live page when a layout
* reshuffle relocated its file (possibly across two cycles, so the matching
* add isn't in THIS diff). When omitted, only the in-diff D+A/M coalescing
* applies.
*/
currentPageIds?: Set<string>;
}
/**
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
*
* Classification rules:
* - `A` (added):
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
* page already exists; we push its content rather than create a dup).
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
* brand-new local file; the page does not exist in Docmost yet).
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
* (§16), and a new local file may carry only partial human meta. We
* refuse to create rather than guess a space (SPEC §8 guard spirit).
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
* file somehow lost its pageId it is skipped — there is nothing to target.)
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
* (untracked-file guard, SPEC §8: never delete an untracked page).
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
* DEFERRED to the next increment; here we only record oldPath/newPath/
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
* (`C` copy is treated the same as `R` for recording purposes.)
*/
export declare function computePushActions(input: PushActionsInput): PushActions;
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
export declare const LAST_PUSHED_REF = "refs/docmost/last-pushed";
/**
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
* commit closes the loop so the next pull diffs empty for the pushed pages.
*/
export declare const DOCMOST_BRANCH = "docmost";
/**
* Injectable IO for `applyPushActions`. The real `main` (NEXT increment) wires
* these to the live client, `node:fs/promises`, and the vault git wrapper; this
* increment drives them only through FAKES in tests (no live destructive run).
* - `client`: the create/update/delete/move/rename subset of `GitSyncClient`.
* - `readFile`/`writeFile`: read a changed file's body / write a file back
* (by vault-relative path; the applier does not resolve absolute paths so
* fakes stay trivial).
* - `git`: `updateRef` (advance `refs/docmost/last-pushed`) and
* `fastForwardBranch` (advance the `docmost` mirror after a clean push, the
* loop-close — SPEC §6 step 3 / §10).
*/
export interface ApplyPushDeps {
client: Pick<GitSyncClient, "importPageMarkdown" | "createPage" | "deletePage" | "movePage" | "renamePage">;
/** Read a changed file's full text by its vault-relative path. */
readFile: (path: string) => Promise<string>;
/** Write a file's full text by its vault-relative path. */
writeFile: (path: string, text: string) => Promise<void>;
/**
* The Docmost spaceId this vault mirrors. A CREATE targets this space (the
* native file carries no spaceId — every file in the vault belongs to it), and
* it backs the synthetic native meta the classifier reads.
*/
spaceId: string;
/**
* `updateRef` advances `refs/docmost/last-pushed`; `fastForwardBranch` advances
* the `docmost` mirror after a clean push. `showFileAtRef` reads a file's text
* at a ref (used by the move/rename classifier to resolve the PREVIOUS parent
* folder's `.md` at `refs/docmost/last-pushed`, SPEC §5 path-as-truth).
*/
git: Pick<VaultGit, "updateRef" | "fastForwardBranch" | "showFileAtRef">;
}
/** A file whose meta was rewritten with a freshly-assigned pageId (post-create). */
export interface WrittenBackPage {
path: string;
pageId: string;
}
/**
* The per-page push record consulted by a FUTURE poll-suppression (SPEC §10): a
* pulled page whose body hash + `updatedAt` match a record here is OUR OWN write
* and must not be re-pulled. PRODUCED here; CONSUMED on the pull side later.
*/
export interface PushedPageRecord {
/** The Docmost pageId that was updated/created. */
pageId: string;
/**
* The `updatedAt` from the create/update client result, when the result
* exposed one. Absent when the (fake) client did not return it.
*/
updatedAt?: string;
/** Stable hash of the markdown BODY that was pushed (SPEC §10 "хэш тела"). */
bodyHash: string;
}
/**
* One page whose operation FAILED during apply (SPEC §12 resumability). The bad
* page is isolated — recorded here — and the rest of the batch still runs; the
* refs are NOT advanced when there is any failure, so a re-run retries cleanly.
*/
export interface PushFailure {
kind: "update" | "create" | "delete" | "move" | "rename";
/** The pageId for update/delete/move/rename; absent for a never-id'd create. */
pageId?: string;
/** The vault-relative path for create/update/move/rename; absent for delete. */
path?: string;
/** The error message captured from the thrown error. */
error: string;
}
/**
* A rename/move action that resolved to a NO-OP (push #3, SPEC §5): a purely
* LOCAL file-path rename whose resolved parent AND title are both unchanged. The
* page identity is its pageId and the path is COSMETIC/local-only, so Docmost is
* NOT called — the skip is recorded here (with the reason) for logging.
*/
export interface PushNoop {
pageId: string;
oldPath: string;
newPath: string;
/** Why no Docmost op was emitted (currently always a path-only rename). */
reason: "path-only-rename";
}
/** Structured outcome of `applyPushActions` (counts + write-backs + noops). */
export interface ApplyPushResult {
created: number;
updated: number;
deleted: number;
/** Pages reparented in Docmost via `move_page` (push #3, SPEC §5/§16). */
moved: number;
/** Pages retitled in Docmost via `rename_page` (push #3, SPEC §5/§6). */
renamed: number;
/**
* Files whose `gitmost_id` frontmatter was written with the pageId Docmost assigned on
* create — these now need a FOLLOW-UP commit (the meta on disk changed). The
* commit itself is the caller's job (NEXT increment); recorded here so it is
* not lost.
*/
writtenBack: WrittenBackPage[];
/**
* Per-page push records (pageId + optional `updatedAt` + body hash) for every
* page successfully updated/created — the §10 loop-guard data a future
* poll-suppression (pull side) will consult so it does not re-pull our own
* write. Deletes are not included (no body was pushed).
*/
pushed: PushedPageRecord[];
/**
* Pages whose operation threw — isolated and recorded, the batch continued
* (SPEC §12). Non-empty here means the refs were NOT advanced.
*/
failures: PushFailure[];
/**
* Rename/move actions that resolved to a NO-OP — a purely LOCAL file-path
* rename (same parent, same title). NO Docmost call was made for these (SPEC
* §5: the page is its pageId, the path is local-only). Recorded for logging.
*/
noops: PushNoop[];
/** Diff rows the planner could not classify (carried through for logging). */
skipped: PushActions["skipped"];
/** Whether `refs/docmost/last-pushed` was advanced (only on a CLEAN push). */
lastPushedAdvanced: boolean;
/**
* Result of fast-forwarding the `docmost` mirror branch after a CLEAN push
* (the loop-close, SPEC §6 step 3 / §10). `null` when no advance was attempted
* (no `pushedCommit`, or there were failures). `{ ok:false, reason }` when a
* non-fast-forward was REFUSED (divergent `docmost` history is never clobbered).
*/
docmostFastForward: {
ok: boolean;
reason?: string;
} | null;
}
/**
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
* via FAKES only in this increment — there is no live wiring.
*
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
* `importPageMarkdown` parses the meta/body itself.
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
* `client.createPage(...)`, take the assigned pageId from the result, and
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
* `serializePageFile`, body preserved) so the file becomes
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
* is needed — NEXT increment).
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
* the parent pageId — path-as-truth — and the meta for the title), then:
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
* `position` is UNDEFINED for now — the client supplies a default),
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
* - BOTH -> move (reparent) THEN rename (title), in that order,
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
* file-path rename: the page is its pageId, the path is local, SPEC §5).
*
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
* is wrapped in its own try/catch: a single failing page is recorded in
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
* when `failures.length === 0`: a PARTIAL push must NOT advance
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
* whole batch cleanly (the already-applied pages are idempotent re-applies).
*
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
* `pushedCommit` is supplied:
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
* - fast-forward the `docmost` mirror branch to it via
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
*
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
* hash of what was pushed plus the write's `updatedAt` (when the client returned
* one). A future pull-side poll-suppression consults this so it does not re-pull
* our own write; producing it is in scope here, consuming it is deferred.
*
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
*/
export declare function applyPushActions(deps: ApplyPushDeps, actions: PushActions, pushedCommit?: string): Promise<ApplyPushResult>;
/**
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
* (forward-slash) path. `buildVaultLayout` puts a page with children at
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
* space root) has no parent folder file -> `null` (the parent is ROOT).
*/
export declare function parentFolderFile(path: string): string | null;
/**
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
* screen the PUSH diff so non-page files are never created/updated/deleted in
* Docmost (and never get a `gitmost_id` frontmatter written into them).
*/
export declare function isPageFile(path: string): boolean;
/**
* The human ("local") git identity used for engine-made commits on `main` in the
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
* which the loop-guard keys on; the identity is for history readability only.
* When the vault repo already has a configured `user.name`/`user.email`, git
* uses that for the working-tree commit; this is the fallback the daemon stamps.
*/
export declare const LOCAL_AUTHOR_NAME = "Local";
export declare const LOCAL_AUTHOR_EMAIL = "local@local";
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
export declare const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
/**
* Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that
* touches the outside world is here so tests pass fakes). `makeClient` is a
* FACTORY, not a client — a dry-run must build NO client at all (it is never
* called), and only `--apply` invokes it.
*/
export interface PushDeps {
settings: Settings;
git: Pick<VaultGit, "assertGitAvailable" | "ensureRepo" | "isMergeInProgress" | "checkout" | "stageAll" | "commit" | "readRef" | "revParse" | "diffNameStatus" | "showFileAtRef" | "updateRef" | "fastForwardBranch" | "listTrackedFiles">;
/** Build a real client — called ONLY on `--apply`, never on dry-run. */
makeClient: (settings: Settings) => ApplyPushDeps["client"];
/** Read a file's full text by its vault-relative (forward-slash) path. */
readFile: (path: string) => Promise<string>;
/** Write a file's full text by its vault-relative path. */
writeFile: (path: string, text: string) => Promise<void>;
/** Structured logger (defaults to console in `main`; a recorder in tests). */
log: (line: string) => void;
}
/** The structured outcome of a `runPush` cycle (returned + summarized). */
export interface PushRunResult {
/** Which path ran: `dry-run` (plan only) or `apply` (Docmost mutated). */
mode: "dry-run" | "apply";
/** Why the cycle stopped before planning, if it did (e.g. a left-over merge). */
aborted?: "merge-in-progress";
/** The diff base the plan was computed against (`last-pushed` else `docmost`). */
base?: {
ref: string;
source: "last-pushed" | "docmost";
sha: string | null;
};
/** The `main` commit the plan targets (the would-be pushed commit). */
pushedCommit?: string;
/** Planned action counts from the PURE planner (present once a plan was built). */
planned?: {
creates: number;
updates: number;
deletes: number;
renamesMoves: number;
skipped: number;
};
/** The applier's structured result — ONLY present on the `--apply` path. */
applied?: ApplyPushResult;
/**
* True when `applyPushActions` REFUSED to fast-forward a divergent `docmost`
* mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded
* into the CLI's non-zero exit.
*/
divergentDocmost?: boolean;
/** Per-page failures from the applier (empty/absent on a clean run). */
failures?: PushFailure[];
}
/**
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
*
* Steps (mirrors `pull.ts`):
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
* non-zero-ish result) if a merge is in progress — never push on top of an
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
* Docmost (SPEC §9).
* 2. Checkout `main` (the human-facing branch the push reads from).
* 3. Commit the human's pending working-tree changes on `main` with the
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
* resolver (current = working tree, prev = `git show <base>:<path>`); run
* the PURE `computePushActions`.
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
* calls, NO ref advance.
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
* then (a) if any pageIds were written back (creates), commit them on `main`
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
* WARNING and a non-zero-ish flag. Then log a one-line summary.
*/
export declare function runPush(deps: PushDeps, opts: {
dryRun: boolean;
}): Promise<PushRunResult>;
/** Parsed `push` CLI flags. DRY-RUN is the default; `--apply` opts into writes. */
export interface PushParsedArgs {
/** True when `--apply` was passed (the ONLY path that writes to Docmost). */
apply: boolean;
}
/**
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
*/
export declare function parseArgs(argv: string[]): PushParsedArgs;

View File

@@ -1,971 +0,0 @@
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
import { DEFAULT_BRANCH } from "./git.js";
import { bodyHash } from "./loop-guard.js";
/**
* PURE classifier for the `renamesMoves` produced by `computePushActions`
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
*
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
*
* For each entry:
* - `newParent = resolveParentPageId(newPath, 'current')`,
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
* - `newTitle = metaAt(newPath,'current')?.title`,
* `oldTitle = metaAt(oldPath,'prev')?.title`.
* - include `move` iff `newParent !== oldParent` (a real reparent),
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
* the page is its pageId, so Docmost is not touched).
*/
export function classifyRenameMoves(renamesMoves, deps) {
return renamesMoves.map((rm) => {
const newParent = deps.resolveParentPageId(rm.newPath, "current");
const oldParent = deps.resolveParentPageId(rm.oldPath, "prev");
const newTitle = deps.metaAt(rm.newPath, "current")?.title;
const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
const out = {
pageId: rm.pageId,
oldPath: rm.oldPath,
newPath: rm.newPath,
};
// A reparent: the new path's resolved parent page differs from the old's.
if (newParent !== oldParent) {
out.move = { parentPageId: newParent };
}
// A title edit: only when there is a real, non-empty new title that changed.
if (typeof newTitle === "string" &&
newTitle.length > 0 &&
newTitle !== oldTitle) {
out.rename = { title: newTitle };
}
// Neither changed -> a purely LOCAL file-path rename; do NOT call Docmost.
if (!out.move && !out.rename) {
out.noop = true;
}
return out;
});
}
/**
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
*
* Classification rules:
* - `A` (added):
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
* page already exists; we push its content rather than create a dup).
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
* brand-new local file; the page does not exist in Docmost yet).
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
* (§16), and a new local file may carry only partial human meta. We
* refuse to create rather than guess a space (SPEC §8 guard spirit).
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
* file somehow lost its pageId it is skipped — there is nothing to target.)
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
* (untracked-file guard, SPEC §8: never delete an untracked page).
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
* DEFERRED to the next increment; here we only record oldPath/newPath/
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
* (`C` copy is treated the same as `R` for recording purposes.)
*/
export function computePushActions(input) {
const { metaAt, currentPageIds } = input;
// PAGE-FILE FILTER (design §"Адопция"): only `.md` files OUTSIDE any dot-folder
// are Docmost pages. `.obsidian/*`, attachments, and other non-page files are
// committed to the vault (no `.gitignore`) and so appear in the diff, but they
// are NEVER pages — Obsidian owns them. Without this filter every ADDED such
// file would be mis-classified as a CREATE (nativeMeta always supplies a
// spaceId, so the old `create-without-spaceId` skip no longer screens them),
// creating junk pages in Docmost and corrupting the file with a `gitmost_id`
// frontmatter. Filter BEFORE any classification so non-page A/M/D/R are ignored.
const changes = input.changes.filter((c) => isPageFile(c.path));
const actions = {
creates: [],
updates: [],
deletes: [],
renamesMoves: [],
skipped: [],
};
// GHOST-MOVE coalescing (⭐ data-loss guard). git's rename detection (`-M`)
// can miss a move when the two files are too dissimilar — which is exactly the
// case for the tiny meta-only files a layout RESHUFFLE produces (e.g.
// several untitled pages sharing the `_` fallback name; retitling one frees the
// bare `_` and another page's file relocates `_ ~slug.md` -> `_.md`). git then
// reports the move as a DELETE of the old path + an ADD of the new one. Taken
// literally that soft-deletes a page that merely MOVED — a live page vanishing
// into Trash. Identity is the pageId, not git's heuristic: a pageId that is
// BOTH deleted (pre-image) and added (current) is one page that relocated, so
// we classify it as a rename/move and NEVER as a delete.
// A pageId can land at its new path two ways: as an ADD (the path was free) or
// as a MODIFY (the path was occupied by ANOTHER page that left — the reshuffle
// case, where `_.md`'s occupant changes pageId). Both are "the page survives at
// a new path", so the surviving side is the CURRENT-meta pageId of A *and* M.
const deletedPath = new Map();
const survivingPath = new Map();
for (const change of changes) {
if (change.status === "D") {
const pid = metaAt(change.path, "prev")?.pageId;
if (pid)
deletedPath.set(pid, change.path);
}
else if (change.status === "A" || change.status === "M") {
const pid = metaAt(change.path, "current")?.pageId;
if (pid)
survivingPath.set(pid, change.path);
}
}
const ghostMove = new Map();
for (const [pid, oldPath] of deletedPath) {
const newPath = survivingPath.get(pid);
if (newPath && newPath !== oldPath) {
ghostMove.set(pid, { oldPath, newPath });
}
}
for (const change of changes) {
switch (change.status) {
case "A": {
const meta = metaAt(change.path, "current");
const pageId = meta?.pageId;
if (pageId && ghostMove.has(pageId)) {
// Half of a git-undetected move (a matching DELETE exists): record it
// as a rename/move (like a real `R`), NOT an update — the `D` side is
// suppressed so the page is never soft-deleted.
actions.renamesMoves.push({
pageId,
oldPath: ghostMove.get(pageId).oldPath,
newPath: change.path,
});
}
else if (pageId) {
// Added but already carries a pageId (restored/copied file): the page
// exists in Docmost, so push content as an UPDATE — never a duplicate.
actions.updates.push({ pageId, path: change.path });
}
else if (meta?.spaceId) {
// Brand-new local file with a target space -> create the page, then
// write the assigned pageId back into its meta (in `applyPushActions`).
// `meta.spaceId` is truthy here, so empty-string is also rejected.
actions.creates.push({ path: change.path });
}
else {
// A create needs a spaceId (Docmost `create_page` requires it, §16). A
// new file with partial meta and no usable spaceId is SKIPPED rather
// than created into a guessed space (SPEC §8 guard spirit).
actions.skipped.push({
path: change.path,
status: "A",
reason: "create-without-spaceId",
});
}
break;
}
case "M": {
const meta = metaAt(change.path, "current");
const pageId = meta?.pageId;
if (pageId && ghostMove.has(pageId)) {
// This path's occupant changed pageId: the previous page left and THIS
// page relocated here (a reshuffle). Its old file was DELETED elsewhere
// — coalesce into a rename/move so the page is never trashed.
actions.renamesMoves.push({
pageId,
oldPath: ghostMove.get(pageId).oldPath,
newPath: change.path,
});
}
else if (pageId) {
actions.updates.push({ pageId, path: change.path });
}
else {
// A modified file with no pageId has no Docmost target to update.
actions.skipped.push({
path: change.path,
status: "M",
reason: "modified file has no pageId in meta",
});
}
break;
}
case "D": {
// The file is gone from `main`; recover its pageId from the PRE-IMAGE
// (the version last pushed to Docmost) so we delete the RIGHT page.
const prevMeta = metaAt(change.path, "prev");
const pageId = prevMeta?.pageId;
if (pageId && ghostMove.has(pageId)) {
// The same pageId was re-ADDED at a new path: this is a git-undetected
// MOVE, handled by the `A` branch above. Suppress the delete so a moved
// page is never trashed (⭐ data-loss guard).
actions.skipped.push({
path: change.path,
status: "D",
reason: "ghost-move (re-added at a new path) — not a deletion",
});
}
else if (pageId && currentPageIds?.has(pageId)) {
// The pageId still EXISTS elsewhere in the current tree: the file moved
// (a layout reshuffle whose matching add was in an earlier cycle, so it
// is not in this diff). A live page must never be trashed because its
// FILENAME changed — identity is the pageId (⭐ data-loss guard).
actions.skipped.push({
path: change.path,
status: "D",
reason: "pageId still present in the tree (moved) — not a deletion",
});
}
else if (pageId) {
actions.deletes.push({ pageId });
}
else {
// Untracked-file guard (SPEC §8): a file with no recoverable pageId was
// never a Docmost page — do NOT translate its removal into a delete.
actions.skipped.push({
path: change.path,
status: "D",
reason: "deleted file has no recoverable pageId (pre-image meta)",
});
}
break;
}
case "R":
case "C": {
// Same page, new path. Identity comes from the CURRENT (post-rename) meta
// since the file still exists. RESOLUTION (move vs rename, parentPageId)
// is deferred — record oldPath/newPath/pageId only.
const meta = metaAt(change.path, "current");
const pageId = meta?.pageId;
const oldPath = change.oldPath ?? change.path;
if (pageId) {
actions.renamesMoves.push({
pageId,
oldPath,
newPath: change.path,
});
}
else {
actions.skipped.push({
path: change.path,
status: change.status,
reason: "renamed/moved file has no pageId in meta",
});
}
break;
}
default: {
// Unreachable for A/M/D/R/C; defensive for any future status.
actions.skipped.push({
path: change.path,
status: change.status,
reason: `unhandled diff status ${change.status}`,
});
}
}
}
return actions;
}
// --- thin apply (create/update/delete), fakes-only in this increment ---------
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
export const LAST_PUSHED_REF = "refs/docmost/last-pushed";
/**
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
* commit closes the loop so the next pull diffs empty for the pushed pages.
*/
export const DOCMOST_BRANCH = "docmost";
/**
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
* via FAKES only in this increment — there is no live wiring.
*
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
* `importPageMarkdown` parses the meta/body itself.
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
* `client.createPage(...)`, take the assigned pageId from the result, and
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
* `serializePageFile`, body preserved) so the file becomes
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
* is needed — NEXT increment).
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
* the parent pageId — path-as-truth — and the meta for the title), then:
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
* `position` is UNDEFINED for now — the client supplies a default),
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
* - BOTH -> move (reparent) THEN rename (title), in that order,
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
* file-path rename: the page is its pageId, the path is local, SPEC §5).
*
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
* is wrapped in its own try/catch: a single failing page is recorded in
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
* when `failures.length === 0`: a PARTIAL push must NOT advance
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
* whole batch cleanly (the already-applied pages are idempotent re-applies).
*
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
* `pushedCommit` is supplied:
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
* - fast-forward the `docmost` mirror branch to it via
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
*
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
* hash of what was pushed plus the write's `updatedAt` (when the client returned
* one). A future pull-side poll-suppression consults this so it does not re-pull
* our own write; producing it is in scope here, consuming it is deferred.
*
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
*/
export async function applyPushActions(deps, actions, pushedCommit) {
const { client, git } = deps;
let created = 0;
let updated = 0;
let deleted = 0;
let moved = 0;
let renamed = 0;
const writtenBack = [];
const pushed = [];
const failures = [];
const noops = [];
// 1. UPDATES — collab/Yjs write path (SPEC §2/§15.6), never a raw overwrite.
// Each update is isolated: a thrown page is recorded and the batch goes on.
for (const u of actions.updates) {
try {
// Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter
// is engine metadata, never page content. The server converts the markdown
// it receives verbatim, so stripping here keeps the id out of Docmost.
const body = parsePageFile(await deps.readFile(u.path)).body;
// The last-synced version of this file (pre-image) is the common ancestor
// for a 3-way merge against the live page, so concurrent human edits are
// not clobbered (review #5). Null when the file is new at last-pushed. Its
// body is stripped the SAME way so the merge compares body-to-body.
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body;
const result = await client.importPageMarkdown(u.pageId, body, baseMarkdown);
updated++;
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
pushed.push({
pageId: u.pageId,
...extractUpdatedAt(result),
bodyHash: bodyHash(body),
});
}
catch (err) {
failures.push({
kind: "update",
pageId: u.pageId,
path: u.path,
error: errMessage(err),
});
}
}
// 2. CREATES — create the page, then write the assigned pageId back to meta so
// the file becomes tracked (SPEC §4 "записать присвоенный pageId обратно").
// Isolated per page like updates.
for (const c of actions.creates) {
try {
const text = await deps.readFile(c.path);
const { body } = parsePageFile(text);
// Derive create args from the PATH (native-Obsidian, SPEC §5): title from
// the filename, parent from the enclosing folder's folder-note, space from
// the run (the vault's space). `parentPageId: null` -> created at ROOT.
const title = titleFromPath(c.path);
const parentPageId = (await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined;
const result = await client.createPage(title, body, deps.spaceId, parentPageId);
// `createPage` returns `{ data: { id, ... }, success }`; the assigned
// pageId is at `result.data.id`.
const assignedPageId = result?.data?.id;
if (assignedPageId) {
// Write the assigned pageId back as the `gitmost_id` frontmatter, body
// preserved — the file becomes engine-tracked (SPEC §4).
const rewritten = serializePageFile(assignedPageId, body);
await deps.writeFile(c.path, rewritten);
writtenBack.push({ path: c.path, pageId: assignedPageId });
// §10 loop-guard data for the created page (hash the pushed BODY).
pushed.push({
pageId: assignedPageId,
...extractUpdatedAt(result),
bodyHash: bodyHash(body),
});
}
created++;
}
catch (err) {
failures.push({ kind: "create", path: c.path, error: errMessage(err) });
}
}
// 3. DELETES — soft-delete to Trash (SPEC §8), reversible. Isolated per page.
for (const d of actions.deletes) {
try {
await client.deletePage(d.pageId);
deleted++;
}
catch (err) {
failures.push({
kind: "delete",
pageId: d.pageId,
error: errMessage(err),
});
}
}
// 4. RENAME/MOVE (push #3, SPEC §5/§6/§16). Classify each entry against the
// tree-backed resolvers (the NEW parent comes from the new path's enclosing
// folder `.md`, the OLD parent from the old path's at last-pushed — PATH is
// the truth, not stale `meta.parentPageId`; the title from the meta), then
// apply only the real ops. Each page is isolated like the cases above: a
// thrown op is recorded in `failures` and the batch continues. ORDER for a
// page that needs both: reparent (move) FIRST, then retitle (rename).
if (actions.renamesMoves.length > 0) {
// The classifier is PURE over sync resolvers; the tree reads are async, so
// prefetch every (path, side) lookup it will make into plain tables first.
const parentTable = new Map();
const metaTable = new Map();
// A tree read (readFile / git.showFileAtRef) throwing must isolate THAT page
// into `failures`, NOT abort the whole batch (§12 resumability). The helpers
// already swallow their own errors, but this per-entry try/catch keeps the
// batch-isolation invariant holding regardless of future changes to them.
const prefetchFailed = new Set();
for (const rm of actions.renamesMoves) {
// newParent + newTitle from the CURRENT tree; oldParent + oldTitle from the
// last-pushed pre-image (`prev`). Keyed by `path|side` so duplicates fold.
try {
parentTable.set(`${rm.newPath}|current`, await resolveParentPageIdViaTree(deps, rm.newPath, "current"));
parentTable.set(`${rm.oldPath}|prev`, await resolveParentPageIdViaTree(deps, rm.oldPath, "prev"));
metaTable.set(`${rm.newPath}|current`, await metaAtViaTree(deps, rm.newPath, "current", deps.spaceId));
metaTable.set(`${rm.oldPath}|prev`, await metaAtViaTree(deps, rm.oldPath, "prev", deps.spaceId));
}
catch (err) {
prefetchFailed.add(rm.pageId);
failures.push({
kind: "move",
pageId: rm.pageId,
path: rm.newPath,
error: errMessage(err),
});
}
}
const classified = classifyRenameMoves(actions.renamesMoves.filter((rm) => !prefetchFailed.has(rm.pageId)), {
metaAt: (path, side) => metaTable.get(`${path}|${side}`) ?? null,
resolveParentPageId: (path, side) => parentTable.get(`${path}|${side}`) ?? null,
});
for (const c of classified) {
if (c.noop) {
// Cosmetic local-only file-path rename — no Docmost op (SPEC §5).
noops.push({
pageId: c.pageId,
oldPath: c.oldPath,
newPath: c.newPath,
reason: "path-only-rename",
});
continue;
}
// Track which op is in flight so a failure is attributed to the op that
// ACTUALLY threw: for a page needing both, a move that succeeds then a
// rename that throws must be recorded as `rename`, not `move`.
let failingKind = c.move ? "move" : "rename";
try {
// Reparent FIRST so the page is in its new tree position, THEN retitle.
if (c.move) {
failingKind = "move";
// TODO(next): compute a fractional-index position between siblings
// (SPEC §16). `position` is UNDEFINED here; the client supplies a valid
// default. Pass `parentPageId: null` for a move to the space ROOT.
await client.movePage(c.pageId, c.move.parentPageId);
moved++;
}
if (c.rename) {
failingKind = "rename";
await client.renamePage(c.pageId, c.rename.title);
renamed++;
}
}
catch (err) {
// Isolate the failed page: the op that ACTUALLY threw is recorded so a
// re-run can retry. A move that threw before its rename leaves `rename`
// for the next run (idempotent re-apply); refs are NOT advanced (below).
failures.push({
kind: failingKind,
pageId: c.pageId,
path: c.newPath,
error: errMessage(err),
});
}
}
}
// 5. Advance the refs ONLY on a CLEAN push (no failures) AND when a pushed
// commit is supplied. A partial push must advance NEITHER ref, so a re-run
// retries the whole batch (SPEC §12). The loop-close (SPEC §6 step 3 / §10):
// advance `refs/docmost/last-pushed` AND fast-forward the `docmost` mirror,
// so Docmost's new content is mirrored and the next pull diffs empty.
let lastPushedAdvanced = false;
let docmostFastForward = null;
if (pushedCommit && failures.length === 0) {
await git.updateRef(LAST_PUSHED_REF, pushedCommit);
lastPushedAdvanced = true;
// Fast-forward the mirror (refused, not forced, on a non-fast-forward — the
// caller logs the reason). Surfaced in the result.
docmostFastForward = await git.fastForwardBranch(DOCMOST_BRANCH, pushedCommit);
}
return {
created,
updated,
deleted,
moved,
renamed,
writtenBack,
pushed,
failures,
noops,
skipped: actions.skipped,
lastPushedAdvanced,
docmostFastForward,
};
}
/** Stringify a thrown value into a stable error message. */
function errMessage(err) {
return err instanceof Error ? err.message : String(err);
}
/**
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
* (forward-slash) path. `buildVaultLayout` puts a page with children at
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
* space root) has no parent folder file -> `null` (the parent is ROOT).
*/
export function parentFolderFile(path) {
const slash = path.lastIndexOf("/");
if (slash < 0)
return null; // root-level file: parent is ROOT.
const dir = path.slice(0, slash); // the enclosing folder
// The page that OWNS the enclosing folder is its folder-note `<dir>/<base>.md`.
const folderNote = `${dir}/${baseSegment(dir)}.md`;
if (path === folderNote) {
// This path IS its folder's folder-note, so its parent is ONE LEVEL UP: the
// folder-note of the grandparent folder (or ROOT at the top level).
const up = dir.lastIndexOf("/");
if (up < 0)
return null; // top-level folder -> parent is ROOT.
const grandDir = dir.slice(0, up);
return `${grandDir}/${baseSegment(grandDir)}.md`;
}
// A leaf (or a nested folder-note) sitting inside `dir`: its parent is `dir`'s
// folder-note.
return folderNote;
}
/**
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
* screen the PUSH diff so non-page files are never created/updated/deleted in
* Docmost (and never get a `gitmost_id` frontmatter written into them).
*/
export function isPageFile(path) {
if (!path.endsWith(".md"))
return false;
return !path.split("/").some((seg) => seg.startsWith("."));
}
/** The last path segment of a forward-slash path (the folder/file base name). */
function baseSegment(path) {
const slash = path.lastIndexOf("/");
return slash < 0 ? path : path.slice(slash + 1);
}
/**
* The page TITLE derived from a vault path: the file's base name without the
* `.md` extension. In the native-Obsidian layout the filename IS the title — for
* a folder-note `<dir>/<base>.md` that base equals the folder name, so the same
* rule yields the folder's title. Self-consistent across pull/push: a pulled
* (possibly disambiguated) filename round-trips to the same title, so a stable
* file never pushes a spurious rename.
*/
function titleFromPath(path) {
const base = baseSegment(path);
return base.endsWith(".md") ? base.slice(0, -3) : base;
}
/**
* Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the
* NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the
* filename, `spaceId` from the run (the vault's space — every file belongs to
* it). `parentPageId` is intentionally absent: tree position is resolved from the
* PATH (`resolveParentPageId`), never from a stored field (SPEC §5).
*/
function nativeMeta(text, path, spaceId) {
const { id } = parsePageFile(text);
const meta = { version: 1, title: titleFromPath(path), spaceId };
if (id)
meta.pageId = id;
return meta;
}
/**
* Build the `resolveParentPageId(path, side)` resolver `classifyRenameMoves`
* needs, reading the PARENT FOLDER's `.md` (SPEC §5 path-as-truth):
* - `current` -> `deps.readFile(<dir>.md)` (the live working tree),
* - `prev` -> `git.showFileAtRef('refs/docmost/last-pushed', <dir>.md)` (the
* last-pushed pre-image),
* then read its `gitmost_id` frontmatter and return that page's pageId. A root-level path
* (no enclosing folder), a missing/unreadable parent file, or a parent file with
* no parseable pageId all resolve to `null` (parent is ROOT / unknown ->
* `parentPageId: null`, SPEC §16 "parentPageId: null -> в корень").
*
* The IO is async, so this returns an ASYNC resolver; the call sites prefetch the
* parent pageIds (the classifier itself stays pure/sync over a plain table).
*/
async function resolveParentPageIdViaTree(deps, path, side) {
const parentFile = parentFolderFile(path);
if (parentFile === null)
return null; // root-level: parent is ROOT.
let text;
try {
text =
side === "current"
? await deps.readFile(parentFile)
: await deps.git.showFileAtRef(LAST_PUSHED_REF, parentFile);
}
catch {
// Parent folder file missing/unreadable at that side -> treat as ROOT.
return null;
}
if (text === null)
return null; // showFileAtRef returns null when absent.
// The parent page's identity is its `gitmost_id` frontmatter; folder position
// is irrelevant here, only the pageId.
return parsePageFile(text).id;
}
/**
* Resolve the synthetic native meta at a side for the rename/move classifier (the
* title — derived from the path — comes from here). Mirrors
* `resolveParentPageIdViaTree`'s IO sides: `current` reads the working tree,
* `prev` reads `refs/docmost/last-pushed`. Returns `null` only when the file is
* missing/unreadable at that side (a real absence the classifier must see).
*/
async function metaAtViaTree(deps, path, side, spaceId) {
let text;
try {
text =
side === "current"
? await deps.readFile(path)
: await deps.git.showFileAtRef(LAST_PUSHED_REF, path);
}
catch {
return null;
}
if (text === null)
return null;
return nativeMeta(text, path, spaceId);
}
/**
* Pull an `updatedAt` out of a create/update client result, if present. The
* shape is `{ data: { updatedAt? }, ... }` (createPage) or a flatter object;
* absent in the simple fakes, so the field is omitted rather than `undefined`.
*/
function extractUpdatedAt(result) {
const r = result;
const raw = r?.data?.updatedAt ?? r?.updatedAt;
return typeof raw === "string" ? { updatedAt: raw } : {};
}
// --- runnable push orchestration (`runPush`) ---------------------------------
//
// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit
// diff/ref primitives + the PURE `computePushActions` planner + the THIN
// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the
// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO
// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that
// builds a client and mutates Docmost.
//
// Every external effect is injected (`PushDeps`) so the whole orchestration is
// driven by FAKES in tests — no live Docmost, git, fs, or network.
/**
* The human ("local") git identity used for engine-made commits on `main` in the
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
* which the loop-guard keys on; the identity is for history readability only.
* When the vault repo already has a configured `user.name`/`user.email`, git
* uses that for the working-tree commit; this is the fallback the daemon stamps.
*/
export const LOCAL_AUTHOR_NAME = "Local";
export const LOCAL_AUTHOR_EMAIL = "local@local";
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
/**
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
*
* Steps (mirrors `pull.ts`):
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
* non-zero-ish result) if a merge is in progress — never push on top of an
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
* Docmost (SPEC §9).
* 2. Checkout `main` (the human-facing branch the push reads from).
* 3. Commit the human's pending working-tree changes on `main` with the
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
* resolver (current = working tree, prev = `git show <base>:<path>`); run
* the PURE `computePushActions`.
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
* calls, NO ref advance.
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
* then (a) if any pageIds were written back (creates), commit them on `main`
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
* WARNING and a non-zero-ish flag. Then log a one-line summary.
*/
export async function runPush(deps, opts) {
const { git, settings, log } = deps;
const dryRun = opts.dryRun;
// 1. Preflight git. Fail fast (actionable message via main().catch) if the git
// binary is missing — the vault state store relies on it.
await git.assertGitAvailable();
await git.ensureRepo();
// 1b. Refuse to push on top of an unresolved merge (SPEC §9/§12). A previous
// conflicting pull leaves the vault mid-merge; pushing now could leak
// conflict markers into Docmost (SPEC §9, the cardinal invariant). Detect
// it BEFORE any checkout/diff and stop with a clear, actionable message so
// re-runs converge once the human resolves (or aborts) the merge.
if (await git.isMergeInProgress()) {
log(`push: vault has an unresolved merge at ${settings.vaultPath} — resolve ` +
`it (or 'git merge --abort') and re-run. Nothing was pushed to Docmost ` +
`(conflict markers must never reach Docmost, SPEC §9).`);
return { mode: dryRun ? "dry-run" : "apply", aborted: "merge-in-progress" };
}
// 2. Work on `main` — the human-facing branch the push diffs FROM.
await git.checkout(DEFAULT_BRANCH);
// 3. Commit the human's pending working-tree changes on `main` with the `local`
// provenance trailer (SPEC §7.3). A no-op commit when nothing changed is
// fine (`commit` returns false). The loop-guard keys on the trailer.
// Even on a "plan only" dry-run this commits the working tree (it is the
// only way to diff `base..main`, acceptable §6.1 behavior) — so make that
// LOCAL git mutation VISIBLE, never silent: a created commit is local-only
// and nothing is sent to Docmost.
await git.stageAll();
const committedWorkingTree = await git.commit("local: working-tree changes", {
authorName: LOCAL_AUTHOR_NAME,
authorEmail: LOCAL_AUTHOR_EMAIL,
trailers: [LOCAL_SOURCE_TRAILER],
});
if (committedWorkingTree) {
const sha = await git.revParse(DEFAULT_BRANCH);
log(`push: committed local working-tree changes on main` +
(sha ? ` as ${sha.slice(0, 8)}` : "") +
` (local git only — nothing sent to Docmost).`);
}
else {
log("push: working tree clean (no local changes to push).");
}
// 4. Pick the diff BASE (SPEC §5/§6): `refs/docmost/last-pushed` if it resolves
// (the marker of what `main` is already in Docmost), else fall back to the
// `docmost` mirror branch (the mirror of what Docmost currently has) — which
// is what exists before the first push ever advanced last-pushed.
let base;
const lastPushedSha = await git.readRef(LAST_PUSHED_REF);
if (lastPushedSha) {
base = { ref: LAST_PUSHED_REF, source: "last-pushed", sha: lastPushedSha };
}
else {
base = {
ref: DOCMOST_BRANCH,
source: "docmost",
sha: await git.revParse(DOCMOST_BRANCH),
};
}
const pushedCommit = await git.revParse(DEFAULT_BRANCH);
if (!pushedCommit) {
// `main` has no commit — `ensureRepo` always makes an initial one, so this is
// defensive. Nothing to diff.
log("push: `main` has no commit to push — nothing to do.");
return { mode: dryRun ? "dry-run" : "apply", base };
}
// 5. Diff the base against `main` and build the `metaAt` resolver (PURE planner
// input). `current` reads the live working tree; `prev` reads the base ref's
// pre-image via `git show <base>:<path>` (so a DELETE recovers its pageId).
const changes = await git.diffNameStatus(base.ref, DEFAULT_BRANCH);
// Synchronous resolver over PREFETCHED meta tables: `computePushActions` is
// PURE/sync, but the file/ref reads are async — so we prefetch every (path,
// side) the diff will ask for into a table first, then resolve from it.
const metaTable = new Map();
for (const change of changes) {
// `current`: A/M/R/C still have the file on `main`. `prev`: D needs the
// pre-image; R/C also benefit (old title). Prefetch both sides per path.
const currentPath = change.path;
const prevPath = change.oldPath ?? change.path;
if (!metaTable.has(`${currentPath}|current`)) {
metaTable.set(`${currentPath}|current`, await readMetaCurrent(deps, currentPath, settings.docmostSpaceId));
}
if (!metaTable.has(`${prevPath}|prev`)) {
metaTable.set(`${prevPath}|prev`, await readMetaPrev(deps, base.ref, prevPath, settings.docmostSpaceId));
}
}
const metaAt = (path, side) => metaTable.get(`${path}|${side}`) ?? null;
// The set of pageIds that STILL EXIST somewhere in the current `main` tree.
// Identity is the pageId, NOT the filename: a file vanishing from one path
// while the SAME pageId lives at another path is a MOVE (often a layout
// reshuffle of `_`-fallback names, whose two halves can even land in separate
// cycles), never a deletion. Built only when the diff contains deletes — the
// guard's whole job is to stop a phantom delete from trashing a live page.
let currentPageIds;
if (changes.some((c) => c.status === "D")) {
currentPageIds = new Set();
for (const relPath of await git.listTrackedFiles("*.md")) {
const pid = (await readMetaCurrent(deps, relPath, settings.docmostSpaceId))
?.pageId;
if (pid)
currentPageIds.add(pid);
}
}
const actions = computePushActions({ changes, metaAt, currentPageIds });
const planned = {
creates: actions.creates.length,
updates: actions.updates.length,
deletes: actions.deletes.length,
renamesMoves: actions.renamesMoves.length,
skipped: actions.skipped.length,
};
// 6. DRY-RUN (default): log the full plan and RETURN — build NO client, make
// ZERO Docmost calls, advance NO refs. This is the SAFE default.
logPlan(log, base, pushedCommit, actions, planned, dryRun);
if (dryRun) {
return { mode: "dry-run", base, pushedCommit, planned };
}
// 7. --apply: build the REAL client and execute. This is the ONLY write path.
const client = deps.makeClient(settings);
const applied = await applyPushActions({
client,
// Pass the WHOLE `git` object (it satisfies the applier's
// `Pick<VaultGit, ...>` deps surface). Passing bare method references
// (`git.updateRef`, …) would lose their `this` binding, so on a REAL
// `VaultGit` they would throw `this.runRaw is not a function`. Hand over
// the object so the methods keep their receiver — exactly as `pull.ts`
// does for `applyPullActions`.
git,
readFile: deps.readFile,
writeFile: deps.writeFile,
spaceId: settings.docmostSpaceId,
}, actions, pushedCommit);
// 7a. Persist freshly-assigned pageIds (creates) back into git. `applyPushActions`
// rewrote those files on disk; commit them on `main` with the `local` trailer
// so the new pageIds are recorded, then RE-advance `refs/docmost/last-pushed`
// to the new commit so what Docmost mirrors and what last-pushed points at
// stay in lock-step (the write-back commit is part of `main` now).
// Track a divergent-`docmost` mirror across BOTH ff sites (the applier's main
// push ff in 7b, and the write-back ff here). A divergent mirror is a §5
// invariant breach in EITHER branch and must escalate identically (exit 1).
let divergentDocmost = false;
if (applied.writtenBack.length > 0) {
await git.stageAll();
const recorded = await git.commit("local: record created pageIds", {
authorName: LOCAL_AUTHOR_NAME,
authorEmail: LOCAL_AUTHOR_EMAIL,
trailers: [LOCAL_SOURCE_TRAILER],
});
if (recorded) {
const newCommit = await git.revParse(DEFAULT_BRANCH);
// Only re-advance when the original push was CLEAN (last-pushed was already
// advanced by the applier); a partial push left the refs untouched and a
// re-run retries the whole batch, so we must not move them either.
if (newCommit && applied.lastPushedAdvanced) {
await git.updateRef(LAST_PUSHED_REF, newCommit);
const ff = await git.fastForwardBranch(DOCMOST_BRANCH, newCommit);
if (!ff.ok) {
// SYMMETRIC with the main escalation (7b): a divergent mirror in the
// write-back branch is the SAME §5 invariant breach and must escalate
// (exit 1), not just log a soft warning.
divergentDocmost = true;
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
`fast-forwarded to the pageId write-back commit ` +
`(${ff.reason ?? "not-fast-forward"}). The §5 invariant ('docmost' ` +
`mirrors what Docmost contains) is broken: reconcile 'docmost' ` +
`against the live Docmost tree before the next cycle.`);
}
}
}
}
// 7b. ESCALATE a divergent-`docmost` fast-forward refusal (SPEC §5 invariant
// broken). The applier already refused to clobber a divergent mirror; make
// it LOUD (not silent) so the operator notices, and fold it into the exit.
if (applied.docmostFastForward && !applied.docmostFastForward.ok) {
divergentDocmost = true;
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
`fast-forwarded (${applied.docmostFastForward.reason ?? "not-fast-forward"}). ` +
`The §5 invariant ('docmost' mirrors what Docmost contains) is broken: ` +
`reconcile 'docmost' against the live Docmost tree before the next cycle.`);
}
// 7c. One-line summary (mirrors pull.ts's summary line).
log(`push complete: ${applied.created} created, ${applied.updated} updated, ` +
`${applied.deleted} deleted, ${applied.moved} moved, ${applied.renamed} ` +
`renamed, ${applied.noops.length} no-op(s), ${applied.skipped.length} ` +
`skipped, ${applied.failures.length} failure(s)` +
(divergentDocmost ? " [DIVERGENT docmost mirror]" : ""));
return {
mode: "apply",
base,
pushedCommit,
planned,
applied,
divergentDocmost,
failures: applied.failures,
};
}
/** Synthetic native meta from the live working tree (`current` side). */
async function readMetaCurrent(deps, path, spaceId) {
let text;
try {
text = await deps.readFile(path);
}
catch {
return null; // absent on disk (e.g. a D row's path) -> no current meta.
}
return nativeMeta(text, path, spaceId);
}
/** Synthetic native meta from the base ref's pre-image (`prev` side). */
async function readMetaPrev(deps, baseRef, path, spaceId) {
let text;
try {
text = await deps.git.showFileAtRef(baseRef, path);
}
catch {
return null;
}
if (text === null)
return null; // path absent at the base ref.
return nativeMeta(text, path, spaceId);
}
/** Emit the full plan (counts + per-item) to the injected logger. */
function logPlan(log, base, pushedCommit, actions, planned, dryRun) {
log(`push plan (${dryRun ? "DRY-RUN — no Docmost writes" : "APPLY"}): base=` +
`${base.ref} (${base.source}${base.sha ? ` ${base.sha.slice(0, 8)}` : ""}) ` +
`-> main ${pushedCommit.slice(0, 8)}`);
log(`push plan counts: ${planned.creates} create, ${planned.updates} update, ` +
`${planned.deletes} delete, ${planned.renamesMoves} rename/move, ` +
`${planned.skipped} skipped`);
for (const c of actions.creates)
log(` create: ${c.path}`);
for (const u of actions.updates)
log(` update: ${u.pageId} (${u.path})`);
for (const d of actions.deletes)
log(` delete: ${d.pageId}`);
for (const rm of actions.renamesMoves)
log(` rename/move: ${rm.oldPath} -> ${rm.newPath} (${rm.pageId})`);
for (const s of actions.skipped)
log(` skipped [${s.status}] ${s.path}: ${s.reason}`);
}
/**
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
*/
export function parseArgs(argv) {
return { apply: argv.includes("--apply") };
}

View File

@@ -1,41 +0,0 @@
/**
* Engine settings.
*
* The engine is driven IN-PROCESS by the NestJS server, which builds the
* `Settings` object from `EnvironmentService` — so this module must NOT reach
* into `process.env`. It exposes only:
* - the `Settings` type the engine consumes, and
* - `parseSettings(env)` as a PURE function (validate a raw env object -> typed
* `Settings`), kept for unit tests and for the server to reuse if it wants
* to validate an env-shaped object.
* There is no `.env`-loading side-effecting entry point.
*/
import { z } from 'zod';
export declare const envSchema: z.ZodObject<{
DOCMOST_API_URL: z.ZodString;
DOCMOST_EMAIL: z.ZodString;
DOCMOST_PASSWORD: z.ZodString;
DOCMOST_SPACE_ID: z.ZodString;
VAULT_PATH: z.ZodDefault<z.ZodString>;
GIT_REMOTE: z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodOptional<z.ZodString>>;
POLL_INTERVAL_MS: z.ZodDefault<z.ZodCoercedNumber<unknown>>;
DEBOUNCE_MS: z.ZodDefault<z.ZodCoercedNumber<unknown>>;
LOG_LEVEL: z.ZodDefault<z.ZodEnum<{
info: "info";
error: "error";
debug: "debug";
warn: "warn";
}>>;
}, z.core.$strip>;
export type Settings = {
docmostApiUrl: string;
docmostEmail: string;
docmostPassword: string;
docmostSpaceId: string;
vaultPath: string;
gitRemote?: string;
pollIntervalMs: number;
debounceMs: number;
logLevel: 'debug' | 'info' | 'warn' | 'error';
};
export declare function parseSettings(env: NodeJS.ProcessEnv): Settings;

View File

@@ -1,49 +0,0 @@
/**
* Engine settings.
*
* The engine is driven IN-PROCESS by the NestJS server, which builds the
* `Settings` object from `EnvironmentService` — so this module must NOT reach
* into `process.env`. It exposes only:
* - the `Settings` type the engine consumes, and
* - `parseSettings(env)` as a PURE function (validate a raw env object -> typed
* `Settings`), kept for unit tests and for the server to reuse if it wants
* to validate an env-shaped object.
* There is no `.env`-loading side-effecting entry point.
*/
import { z } from 'zod';
// Schema keyed by the real ENV variable names so validation errors name the
// exact variable. Credentials and the address of our OWN Docmost instance have
// NO default — a missing value must fail at startup, never silently fall back.
export const envSchema = z.object({
// Docmost connection — address of our own instance, no default.
DOCMOST_API_URL: z.string().url(),
// Credentials for /auth/login — no default, never hardcoded.
DOCMOST_EMAIL: z.string().min(1),
DOCMOST_PASSWORD: z.string().min(1),
// Which Docmost space to mirror.
DOCMOST_SPACE_ID: z.string().min(1),
// Local git vault (state store) — kept under data/ so the volume persists it.
VAULT_PATH: z.string().min(1).default('data/vault'),
// Optional git remote the vault pushes to. Empty string is treated as unset.
GIT_REMOTE: z.preprocess((v) => (v === '' ? undefined : v), z.string().min(1).optional()),
// Non-secret tunables — sensible defaults are fine.
POLL_INTERVAL_MS: z.coerce.number().int().positive().default(15000),
DEBOUNCE_MS: z.coerce.number().int().positive().default(2000),
LOG_LEVEL: z.enum(['debug', 'info', 'warn', 'error']).default('info'),
});
// Pure: validate a raw environment object and map it to a typed Settings.
// Throws ZodError on bad config. No side effects — safe to import in tests.
export function parseSettings(env) {
const e = envSchema.parse(env);
return {
docmostApiUrl: e.DOCMOST_API_URL,
docmostEmail: e.DOCMOST_EMAIL,
docmostPassword: e.DOCMOST_PASSWORD,
docmostSpaceId: e.DOCMOST_SPACE_ID,
vaultPath: e.VAULT_PATH,
gitRemote: e.GIT_REMOTE,
pollIntervalMs: e.POLL_INTERVAL_MS,
debounceMs: e.DEBOUNCE_MS,
logLevel: e.LOG_LEVEL,
};
}