build(git-sync): пакет @docmost/git-sync в develop, code-only (#326 step 1 / PR-A) #327

Merged
vvzvlad merged 2 commits from feat/293-A-git-sync-package into develop 2026-07-04 07:02:26 +03:00
161 changed files with 24316 additions and 7897 deletions
+5 -1
View File
@@ -4,7 +4,11 @@
data
# compiled output
/dist
/node_modules
node_modules/
# git-sync compiled output (built in CI/Docker via `pnpm build`, never committed,
# so src/ and prod can never silently diverge).
packages/git-sync/build/
# Logs
logs
-109
View File
@@ -1,109 +0,0 @@
/**
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
* rather than any concrete client, because the gitmost server writes NATIVELY —
* through repositories + collab `openDirectConnection`.
*
* `GitSyncClient` is that interface: the native datasource (server side)
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
* subsets of it. The signatures below MIRROR exactly the methods the engine's
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
* off each result), so a REST-style client is still structurally assignable and
* the native adapter has a precise contract.
*/
/**
* A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body).
* The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`,
* which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this
* lite shape documents the fields the tree walk surfaces. Real tree nodes also
* carry `position`, `icon`, `hasChildren` — kept open via the index signature.
*/
export interface GitSyncPageNodeLite {
id: string;
slugId?: string;
title?: string;
parentPageId?: string | null;
hasChildren?: boolean;
/** `listSpaceTree` nodes carry extra fields (position, icon, …). */
[key: string]: unknown;
}
/**
* The structural client the engine depends on. Only `Pick<GitSyncClient, ...>`
* subsets are ever used:
* - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`),
* - push writes: `importPageMarkdown` / `createPage` / `deletePage` /
* `movePage` / `renamePage`,
* - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`.
*/
export interface GitSyncClient {
/**
* Full tree of page nodes for the space (or the subtree rooted at
* `rootPageId`), each WITHOUT body content. `complete` is `false` when the
* walk was truncated / a fetch failed — the pull side suppresses absence
* deletions on an incomplete tree (SPEC §8). Native impl returns
* `complete: true` always (reads the DB, not a paginated REST endpoint).
*/
listSpaceTree(spaceId: string, rootPageId?: string): Promise<{
pages: GitSyncPageNodeLite[];
complete: boolean;
}>;
/**
* One page WITH its ProseMirror body content. `applyPullActions` reads
* `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
* `content` (to stabilize/serialize). `updatedAt` is carried for the
* poll-suppression loop-guard.
*/
getPageJson(pageId: string): Promise<{
id: string;
slugId: string;
title: string;
parentPageId: string | null;
spaceId: string;
updatedAt: string;
content: unknown;
}>;
/**
* Merge a page's body from a self-contained markdown file (meta + body). The
* collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite.
* `applyPushActions` reads only an optional `updatedAt` off the result
* (via `extractUpdatedAt`, tolerant of extra fields).
*
* `baseMarkdown` is the last-synced version of the file (`refs/docmost/
* last-pushed`), the common ancestor for a THREE-WAY merge against the live
* doc so concurrent human edits survive (review #5). Optional/null -> 2-way.
*/
importPageMarkdown(pageId: string, fullMarkdown: string, baseMarkdown?: string | null): Promise<{
updatedAt?: string;
[key: string]: unknown;
}>;
/**
* Create a new page and return the assigned id at `data.id`
* (`applyPushActions` reads `result.data.id`, then writes it back into the
* file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard.
*/
createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{
data: {
id: string;
};
updatedAt?: string;
[key: string]: unknown;
}>;
/** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */
deletePage(pageId: string): Promise<unknown>;
/**
* Reparent a page (and optionally set its fractional-index `position`). The
* engine passes `position` UNDEFINED for now; the native impl computes a
* default between siblings. Result is not inspected.
*/
movePage(pageId: string, parentPageId: string | null, position?: string): Promise<unknown>;
/** Change a page's title only (no body touch). Result is not inspected. */
renamePage(pageId: string, title: string): Promise<unknown>;
/**
* Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8).
* `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk.
*/
listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise<unknown[]>;
/** List soft-deleted (trashed) pages for the space (deletion detection). */
listTrash(spaceId: string): Promise<unknown[]>;
/** Restore a soft-deleted page from Trash. Result is not inspected. */
restorePage(pageId: string): Promise<unknown>;
}
@@ -1,13 +0,0 @@
/**
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
* rather than any concrete client, because the gitmost server writes NATIVELY —
* through repositories + collab `openDirectConnection`.
*
* `GitSyncClient` is that interface: the native datasource (server side)
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
* subsets of it. The signatures below MIRROR exactly the methods the engine's
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
* off each result), so a REST-style client is still structurally assignable and
* the native adapter has a precise contract.
*/
export {};
-1
View File
@@ -1 +0,0 @@
export declare function loadSettingsOrExit<T>(factory: () => T): T;
@@ -1,50 +0,0 @@
import { ZodError } from 'zod';
// Turn a ZodError from settings validation into a clear, actionable startup
// message that names the offending env var(s), then exit(1) — no raw stack
// trace. Mirrors the Python new-project skeleton's load_settings_or_exit.
// A non-ZodError is left to propagate unchanged.
export function loadSettingsOrExit(factory) {
try {
return factory();
}
catch (err) {
if (!(err instanceof ZodError))
throw err;
const missing = [];
const invalid = [];
for (const issue of err.issues) {
const name = issue.path.length ? String(issue.path[0]) : '?';
// A missing required variable surfaces as an `invalid_type` issue whose
// received value was `undefined`. zod 3 exposed `issue.received` directly;
// zod 4 dropped that field and instead folds it into the message
// ("expected string, received undefined"). Detect both shapes so the
// missing-vs-invalid split holds across zod majors. NOTE: an invalid (but
// present) value uses a different code (invalid_format / invalid_value) or
// an `invalid_type` message that reports a non-undefined received (e.g.
// "received NaN" from a coerced number), so neither is misread as missing.
const i = issue;
const isMissing = issue.code === 'invalid_type' &&
(i.received === 'undefined' ||
/received undefined/i.test(i.message ?? ''));
if (isMissing)
missing.push(name);
else
invalid.push(`${name}: ${issue.message}`);
}
const lines = ['Configuration error in environment / .env:'];
if (missing.length) {
lines.push(' Missing required variable(s):');
for (const n of [...new Set(missing)])
lines.push(` - ${n}`);
}
if (invalid.length) {
lines.push(' Invalid value(s):');
for (const item of invalid)
lines.push(` - ${item}`);
}
lines.push('');
lines.push('Set them in .env (see .env.example) and try again.');
process.stderr.write(lines.join('\n') + '\n');
process.exit(1);
}
}
-70
View File
@@ -1,70 +0,0 @@
import { VaultGit } from "./git.js";
import { GitSyncClient } from "./client.types.js";
import { Settings } from "./settings.js";
/**
* Absolute-path filesystem primitives the cycle needs. Injected (not imported)
* so the engine stays IO-free and unit-testable. `mkdir` is recursive; `rm` is
* force (a missing file is a no-op).
*/
export interface CycleFs {
readFile: (absPath: string) => Promise<string>;
writeFile: (absPath: string, text: string) => Promise<void>;
mkdir: (absDir: string) => Promise<void>;
rm: (absPath: string) => Promise<void>;
}
export interface RunCycleDeps {
spaceId: string;
/** The Docmost seam (reads for pull, writes for push). */
client: GitSyncClient;
/** The per-space git vault (a real working repo). */
vault: VaultGit;
/** Engine settings; `vaultPath` roots the relPath -> absolute-path mapping. */
settings: Settings;
fs: CycleFs;
log: (line: string) => void;
/**
* Delete-cap hook (the ONLY caller-specific policy). Called with the push
* dry-run's planned delete count (`Number.POSITIVE_INFINITY` when the dry-run
* itself failed, so the hook can fail safe) and the live client; returns the
* client to use for the REAL apply. The default (omitted) applies every op
* unmodified. gitmost uses it to neutralize deletes when over its cap.
*
* When omitted, NO dry-run is performed (one fewer push planning pass).
*/
resolveApplyClient?: (plannedDeletes: number, client: GitSyncClient) => GitSyncClient;
}
export interface RunCycleResult {
ran: boolean;
/** Set when the cycle short-circuited without running pull/push. */
skipped?: "merge-in-progress";
pull?: {
written: number;
deleted: number;
conflict: boolean;
};
push?: {
mode: string;
failures: number;
};
}
/**
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
* (vault -> Docmost), under the engine's required branch choreography. This is
* the single entry point the app drives — it owns the staging order so it can
* never drift from the engine it ships with.
*
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
* would fail otherwise.
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
* then checks out `main` and merges docmost -> main. Writing Docmost
* content straight onto `main` would clobber local file edits before push
* can diff them.
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
* 5. PUSH: optional dry-run to feed the delete-cap hook, then the real apply.
*
* Lock + cap POLICY live in the caller; this owns only the mechanics.
*/
export declare function runCycle(deps: RunCycleDeps): Promise<RunCycleResult>;
-97
View File
@@ -1,97 +0,0 @@
import { readExisting, computePullActions, applyPullActions } from "./pull.js";
import { runPush } from "./push.js";
/**
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
* (vault -> Docmost), under the engine's required branch choreography. This is
* the single entry point the app drives — it owns the staging order so it can
* never drift from the engine it ships with.
*
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
* would fail otherwise.
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
* then checks out `main` and merges docmost -> main. Writing Docmost
* content straight onto `main` would clobber local file edits before push
* can diff them.
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
* 5. PUSH: optional dry-run to feed the delete-cap hook, then the real apply.
*
* Lock + cap POLICY live in the caller; this owns only the mechanics.
*/
export async function runCycle(deps) {
const { spaceId, client, vault, settings, fs, log, resolveApplyClient } = deps;
const vaultRoot = settings.vaultPath;
const abs = (relPath) => `${vaultRoot}/${relPath}`;
// 1. The engine state store is git: make sure the repo + branches exist
// before any tracked-file listing or diff.
await vault.assertGitAvailable();
await vault.ensureRepo();
// 2. Refuse to run on top of an unresolved merge (SPEC §9): a prior
// conflicting pull leaves the vault mid-merge; the next checkout would fail.
if (await vault.isMergeInProgress()) {
log(`vault has an unresolved merge — resolve it (or 'git merge --abort') ` +
`and re-run (SPEC §9); skipping cycle.`);
return { ran: false, skipped: "merge-in-progress" };
}
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
await vault.ensureBranch("docmost", "main");
await vault.checkout("docmost");
// 4. PULL --------------------------------------------------------------------
const existing = await readExisting({
listTracked: () => vault.listTrackedFiles("*.md"),
readFile: (relPath) => fs.readFile(abs(relPath)),
});
const tree = await client.listSpaceTree(spaceId);
const pullActions = computePullActions({
pages: tree.pages,
treeComplete: tree.complete,
existing,
});
const pullResult = await applyPullActions({
client,
git: vault,
writeFile: (absPath, text) => fs.writeFile(absPath, text),
mkdir: (absDir) => fs.mkdir(absDir),
rm: (absPath) => fs.rm(absPath),
}, pullActions, vaultRoot);
// 5. PUSH --------------------------------------------------------------------
const pushDeps = {
settings,
git: vault,
makeClient: () => client,
readFile: (relPath) => fs.readFile(abs(relPath)),
writeFile: (relPath, text) => fs.writeFile(abs(relPath), text),
log,
};
let applyClient = client;
if (resolveApplyClient) {
// Plan the push as a DRY-RUN first to read the delete count, then let the
// caller decide the apply client (e.g. neutralize deletes over a cap). A
// failed dry-run yields Infinity so the hook can fail safe.
let plannedDeletes;
try {
const dry = await runPush(pushDeps, { dryRun: true });
plannedDeletes = dry.planned?.deletes ?? 0;
}
catch (err) {
log(`push dry-run planning failed (${err instanceof Error ? err.message : String(err)}); deferring deletion policy to the cap hook (fail-safe).`);
plannedDeletes = Number.POSITIVE_INFINITY;
}
applyClient = resolveApplyClient(plannedDeletes, client);
}
const pushResult = await runPush({ ...pushDeps, makeClient: () => applyClient }, { dryRun: false });
return {
ran: true,
pull: {
written: pullResult.written,
deleted: pullResult.deleted,
conflict: pullResult.merge.conflict,
},
push: {
mode: pushResult.mode,
failures: pushResult.failures?.length ?? 0,
},
};
}
-259
View File
@@ -1,259 +0,0 @@
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
export declare const BOT_AUTHOR_NAME = "Docmost Sync";
export declare const BOT_AUTHOR_EMAIL = "docmost-sync@local";
/** Default branch the vault repo is initialized on. */
export declare const DEFAULT_BRANCH = "main";
/**
* One row of `git diff --name-status` (SPEC §6 "ФС → Docmost"). `status` is the
* single-letter change code (`-M` rename detection on), `path` is the (new) file
* path; for a rename/copy (`R`/`C`) `oldPath` is the source and `path` is the
* destination, with `score` carrying git's similarity index (0–100).
*/
export interface DiffEntry {
status: "A" | "M" | "D" | "R" | "C";
/** New (destination) path. For A/M/D it is the only path. */
path: string;
/** Source path — present only for R/C. */
oldPath?: string;
/** Rename/copy similarity score (0–100) — present only for R/C. */
score?: number;
}
/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */
export interface MergeResult {
/** True when the merge applied cleanly (fast-forward or clean 3-way). */
ok: boolean;
/** True when the merge stopped on conflicts (markers left in the worktree). */
conflict: boolean;
/** Raw combined stdout+stderr, for logging/diagnostics. */
output: string;
}
/** Options for an engine-authored commit (provenance, SPEC §7.3). */
export interface CommitOptions {
authorName: string;
authorEmail: string;
/**
* Trailer lines appended to the commit message body (e.g.
* `Docmost-Sync-Source: docmost`). These are the machine-readable provenance
* the loop-guard keys on (SPEC §12, "commit-attribution").
*/
trailers?: string[];
}
/**
* A git wrapper bound to a single vault path. Construct once per vault; every
* method runs git with `cwd = vaultPath`.
*/
export declare class VaultGit {
private readonly vaultPath;
constructor(vaultPath: string);
/**
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
* to system `git` for every vault operation, so a missing binary (e.g. a slim
* container image without git) must fail fast with an actionable message
* rather than a cryptic ENOENT deep inside the first real git call. Presence
* check only — we do NOT gate on a specific version. Runs `git --version`
* with NO `cwd` (the vault dir may not exist yet at preflight time).
*/
assertGitAvailable(): Promise<void>;
/**
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
* the single `runRaw` primitive: throws a clear, unified Error (including
* stderr/stdout) on a non-zero exit.
*/
private run;
/**
* The ONE primitive every git invocation in this module flows through. Builds
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
* can treat a non-zero exit as either an error (`run`) or a meaningful state
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
*
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
* printing commands (ls-files, diff --name-only, …).
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
* - On a spawn/exec error we capture the error `message` too, so a failure
* before git could write to stderr (e.g. ENOENT) is NOT lost.
*/
private runRaw;
/**
* Ensure the vault directory exists and is an initialized git repo on `main`
* with an initial (empty) commit so branches exist. Idempotent: safe to call
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
* (so engine commits never fall back to a global/unset identity).
*/
ensureRepo(): Promise<void>;
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
private isRepo;
/** True if a LOCAL git config key is set in the vault repo. */
private hasLocalConfig;
/** True if the repo has at least one commit (HEAD resolves). */
private hasAnyCommit;
/** True if a branch with the given name exists. */
branchExists(name: string): Promise<boolean>;
/**
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
* checkout) when the branch is already present.
*/
ensureBranch(name: string, fromBranch: string): Promise<void>;
/** Name of the currently checked-out branch. */
currentBranch(): Promise<string>;
/** Check out an existing branch. */
checkout(name: string): Promise<void>;
/** Stage everything (adds, modifications, deletions). */
stageAll(): Promise<void>;
/**
* True if the vault is mid-merge (an unresolved merge from a previous run,
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
* BEFORE any checkout so a left-over merge produces a clear, actionable
* message instead of a raw "you need to resolve your current index first"
* failure deep inside `checkout`. This is what makes re-runs converge
* (resumability, SPEC §12).
*/
isMergeInProgress(): Promise<boolean>;
/**
* Commit the currently STAGED changes with an explicit author/committer
* identity and the given trailers appended to the message body (SPEC §7.3
* provenance). Returns `true` if a commit was made, `false` if there was
* nothing to commit (graceful no-op). The caller is expected to have staged
* its changes first (e.g. via `stageAll`).
*/
commit(message: string, opts: CommitOptions): Promise<boolean>;
/**
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
* Builds the full message with appended trailers and sets author + committer
* identity via env vars (so the committer matches the author, not the repo
* default).
*/
private commitRaw;
/**
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
* markers are left in the worktree for manual resolution by a later increment,
* and — critically — nothing is pushed to Docmost (we never write to Docmost
* anyway).
*/
merge(fromBranch: string): Promise<MergeResult>;
/** True if the index has any unmerged (conflicted) paths. */
private hasUnmergedPaths;
/**
* List tracked files on the current branch (paths relative to the vault
* root, forward-slash separated). An optional glob (a git pathspec) narrows
* the listing, e.g. `"*.md"`.
*
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
* breaking move/duplicate detection. We defeat that two ways at once:
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
* pass it inline here.
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
* ambiguity), which we split on `\0`.
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
* are returned verbatim — git already emits forward slashes.
*/
listTrackedFiles(glob?: string): Promise<string[]>;
/**
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
* (SPEC §6: the FS→Docmost push direction diffs `main` against
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
* file is reported as a single `R` row with both its old and new path instead
* of a delete+add pair — that distinction is what lets the push planner tell a
* move from a delete+create (SPEC §8 "Move vs delete").
*
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
* status:
* - A/M/D: `status\0path\0`
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
*/
diffNameStatus(fromRef: string, toRef: string): Promise<DiffEntry[]>;
/**
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
*/
revParse(ref: string): Promise<string | null>;
/**
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
* "что из `main` уже отражено в Docmost").
*/
readRef(ref: string): Promise<string | null>;
/**
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
*/
updateRef(ref: string, target: string): Promise<void>;
/**
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
* push succeeds, Docmost already contains the pushed `main` content, so the
* mirror must reflect it — otherwise the NEXT pull would diff our own write
* back and re-pull it (loop-guard).
*
* SAFETY — never force, never clobber divergent history:
* - If `branch` IS an ancestor of `toCommit`, advance it with
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
* NOT checked out during a push (push works on `main`), so updating the ref
* directly is safe and avoids any working-tree touch.
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
* let the caller log it. We must never overwrite a `docmost` history that
* has commits the push base does not contain.
*
* Returns `{ ok: true }` when the branch was advanced (or already at
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
*/
fastForwardBranch(branch: string, toCommit: string): Promise<{
ok: boolean;
reason?: string;
}>;
/**
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
* if the path does not exist there. Used by the push direction to read the
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
* that ref) maps to `null` rather than throwing.
*/
showFileAtRef(ref: string, path: string): Promise<string | null>;
}
/**
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
* Used by the single `runRaw` primitive every git command flows through, so
* these pins apply uniformly (including the `git --version` preflight).
*
* cwd-isolation is this module's central safety guarantee: every git command
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
* redirect the operation away from `cwd` (e.g. to the source repo or another
* checkout), defeating that guarantee. So we always strip them, regardless of
* whatever else the caller adds (author/committer identity, etc.).
*
* Exported for unit testing.
*/
export declare function vaultGitEnv(extra?: Record<string, string>): NodeJS.ProcessEnv;
/**
* Build a commit message body with trailer lines appended (SPEC §7.3). The
* trailers are separated from the subject by a blank line so `git interpret-
* trailers` / `git log --format=%(trailers)` parse them as trailers.
* Exported for unit testing.
*/
export declare function buildCommitMessage(subject: string, trailers?: string[]): string;
-570
View File
@@ -1,570 +0,0 @@
/**
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
*
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
* the gitmost application repo. This module MUST NEVER run git against the
* application repo. `data/` is gitignored, so a nested repo under `data/vault`
* is safe. The pull cycle is READ-ONLY toward Docmost; this module only touches
* the local vault git, never a git remote (push is deferred, see SPEC §7).
*
* Implementation notes:
* - We shell out via `node:child_process` `execFile` (promisified), passing
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
* if a page title / branch name contains shell metacharacters.
* - EVERY git invocation funnels through the single `runRaw` primitive, which
* ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git
* never blocks on a pager and always prints verbatim UTF-8 paths). There is
* no exception — even the `git --version` preflight goes through `runRaw`.
* - "nothing to commit" is treated as a graceful no-op, not an error.
*/
import { execFile } from "node:child_process";
import { mkdir } from "node:fs/promises";
import { promisify } from "node:util";
const execFileAsync = promisify(execFile);
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
export const BOT_AUTHOR_NAME = "Docmost Sync";
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
/** Default branch the vault repo is initialized on. */
export const DEFAULT_BRANCH = "main";
/**
* A git wrapper bound to a single vault path. Construct once per vault; every
* method runs git with `cwd = vaultPath`.
*/
export class VaultGit {
vaultPath;
constructor(vaultPath) {
this.vaultPath = vaultPath;
}
/**
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
* to system `git` for every vault operation, so a missing binary (e.g. a slim
* container image without git) must fail fast with an actionable message
* rather than a cryptic ENOENT deep inside the first real git call. Presence
* check only — we do NOT gate on a specific version. Runs `git --version`
* with NO `cwd` (the vault dir may not exist yet at preflight time).
*/
async assertGitAvailable() {
// Goes through the single `runRaw` primitive like every other invocation.
// `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at
// preflight time, so we must not point git at a missing directory.
const r = await this.runRaw(["--version"], { cwd: null });
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error("git binary not found or not runnable — install git (the vault state " +
`store requires it). Underlying error: ${detail}`);
}
}
/**
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
* the single `runRaw` primitive: throws a clear, unified Error (including
* stderr/stdout) on a non-zero exit.
*/
async run(args, opts) {
const r = await this.runRaw(args, opts);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
return r.stdout.trim();
}
/**
* The ONE primitive every git invocation in this module flows through. Builds
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
* can treat a non-zero exit as either an error (`run`) or a meaningful state
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
*
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
* printing commands (ls-files, diff --name-only, …).
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
* - On a spawn/exec error we capture the error `message` too, so a failure
* before git could write to stderr (e.g. ENOENT) is NOT lost.
*/
async runRaw(args, opts) {
const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath);
try {
const { stdout, stderr } = await execFileAsync("git", ["--no-pager", "-c", "core.quotepath=false", ...args], {
// Generous buffer: file listings / porcelain output on a large vault
// can be sizable.
...(cwd !== undefined ? { cwd } : {}),
maxBuffer: 64 * 1024 * 1024,
env: vaultGitEnv(opts?.env),
});
return { code: 0, stdout, stderr };
}
catch (err) {
const e = err;
return {
code: typeof e.code === "number" ? e.code : 1,
stdout: e.stdout ?? "",
// Preserve the error message when there is no stderr (e.g. a spawn
// failure like ENOENT, where promisified execFile sets stderr to an
// EMPTY STRING — so `||`, not `??`, to fall through to `message`).
stderr: e.stderr || e.message || "",
};
}
}
/**
* Ensure the vault directory exists and is an initialized git repo on `main`
* with an initial (empty) commit so branches exist. Idempotent: safe to call
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
* (so engine commits never fall back to a global/unset identity).
*/
async ensureRepo() {
await mkdir(this.vaultPath, { recursive: true });
if (!(await this.isRepo())) {
// `git init -b main` sets the initial branch on modern git; we still
// guard the branch name below for safety on older binaries.
await this.run(["init", "-b", DEFAULT_BRANCH]);
}
// Set a local identity for the vault repo if unset, so engine commits have
// a deterministic committer even on a machine with no global git config.
if (!(await this.hasLocalConfig("user.name"))) {
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
}
if (!(await this.hasLocalConfig("user.email"))) {
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
}
// Neutralize correctness-affecting git config in the vault's LOCAL config so
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
// values (which override global/system) are the right scope. Set
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
// writes to `--local` by default inside the repo. These MUST be in place
// before any add/commit/checkout that could be affected, hence they run
// before the initial-commit block below.
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
// byte-stable markdown churn and breaking the round-trip invariant.
// `false` guarantees git stores/checks out verbatim bytes.
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
// cannot rewrite the STORED blob and break §11 byte-stability (a config
// that core.autocrlf=false does not cover). POSIX-only path, which is
// fine: the daemon runs on Linux (Docker) / macOS. A system
// /etc/gitattributes remains the host admin's domain (out of scope).
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
// human running git by hand in the vault must inherit the same neutralized
// behavior; a transient `-c` would not persist. (core.quotepath, by
// contrast, only affects OUR parsing of output and so is baked into the
// `runRaw` argv baseline instead.)
try {
await this.run(["config", "core.autocrlf", "false"]);
await this.run(["config", "core.safecrlf", "false"]);
await this.run(["config", "commit.gpgsign", "false"]);
await this.run(["config", "core.attributesFile", "/dev/null"]);
}
catch (err) {
const detail = err instanceof Error ? err.message : String(err);
throw new Error(`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` +
"/.git/config is writable and not locked (e.g. stale config.lock): " +
detail);
}
// Create the initial empty commit on `main` if the repo has no commits yet,
// so both `main` and (later) `docmost` branches have a common base.
if (!(await this.hasAnyCommit())) {
// Make sure we are on the default branch before the first commit (covers
// the older-git case where `init -b` was not honored).
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
await this.commitRaw("init vault", {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
allowEmpty: true,
});
}
}
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
async isRepo() {
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
return r.code === 0 && r.stdout.trim() === "true";
}
/** True if a LOCAL git config key is set in the vault repo. */
async hasLocalConfig(key) {
const r = await this.runRaw(["config", "--local", "--get", key]);
return r.code === 0 && r.stdout.trim().length > 0;
}
/** True if the repo has at least one commit (HEAD resolves). */
async hasAnyCommit() {
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
return r.code === 0;
}
/** True if a branch with the given name exists. */
async branchExists(name) {
const r = await this.runRaw([
"rev-parse",
"--verify",
`refs/heads/${name}`,
]);
return r.code === 0;
}
/**
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
* checkout) when the branch is already present.
*/
async ensureBranch(name, fromBranch) {
if (await this.branchExists(name))
return;
await this.run(["branch", name, fromBranch]);
}
/** Name of the currently checked-out branch. */
async currentBranch() {
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
}
/** Check out an existing branch. */
async checkout(name) {
await this.run(["checkout", name]);
}
/** Stage everything (adds, modifications, deletions). */
async stageAll() {
await this.run(["add", "-A"]);
}
/**
* True if the vault is mid-merge (an unresolved merge from a previous run,
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
* BEFORE any checkout so a left-over merge produces a clear, actionable
* message instead of a raw "you need to resolve your current index first"
* failure deep inside `checkout`. This is what makes re-runs converge
* (resumability, SPEC §12).
*/
async isMergeInProgress() {
// MERGE_HEAD exists exactly while a merge is in progress.
const mergeHead = await this.runRaw([
"rev-parse",
"--verify",
"--quiet",
"MERGE_HEAD",
]);
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0)
return true;
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
// working tree is mid-conflict and a checkout would refuse.
const unmerged = await this.runRaw(["ls-files", "-u"]);
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
}
/**
* Commit the currently STAGED changes with an explicit author/committer
* identity and the given trailers appended to the message body (SPEC §7.3
* provenance). Returns `true` if a commit was made, `false` if there was
* nothing to commit (graceful no-op). The caller is expected to have staged
* its changes first (e.g. via `stageAll`).
*/
async commit(message, opts) {
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
// deterministic re-pull of unchanged pages produces identical bytes, so
// git sees no diff and we must not error).
const staged = await this.runRaw([
"diff",
"--cached",
"--quiet",
]);
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
// staged), 1 when there are staged changes.
if (staged.code === 0)
return false;
await this.commitRaw(message, opts);
return true;
}
/**
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
* Builds the full message with appended trailers and sets author + committer
* identity via env vars (so the committer matches the author, not the repo
* default).
*/
async commitRaw(message, opts) {
const fullMessage = buildCommitMessage(message, opts.trailers);
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
// (or any injected hook) must never interfere with engine commits in our
// dedicated vault repo.
const args = ["commit", "--no-verify", "-m", fullMessage];
if (opts.allowEmpty)
args.push("--allow-empty");
// Route through the single `runRaw` primitive; set author + committer
// identity via env vars (so the committer matches the author, not the repo
// default). Throw via the same unified message on a non-zero exit.
const r = await this.runRaw(args, {
env: {
GIT_AUTHOR_NAME: opts.authorName,
GIT_AUTHOR_EMAIL: opts.authorEmail,
GIT_COMMITTER_NAME: opts.authorName,
GIT_COMMITTER_EMAIL: opts.authorEmail,
},
});
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
}
/**
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
* markers are left in the worktree for manual resolution by a later increment,
* and — critically — nothing is pushed to Docmost (we never write to Docmost
* anyway).
*/
async merge(fromBranch) {
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
const output = `${r.stdout}\n${r.stderr}`.trim();
if (r.code === 0) {
return { ok: true, conflict: false, output };
}
// A non-zero exit on merge most commonly means a conflict. Confirm by
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
// an unrelated failure as a conflict.
const conflict = await this.hasUnmergedPaths();
return { ok: false, conflict, output };
}
/** True if the index has any unmerged (conflicted) paths. */
async hasUnmergedPaths() {
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
return r.code === 0 && r.stdout.trim().length > 0;
}
/**
* List tracked files on the current branch (paths relative to the vault
* root, forward-slash separated). An optional glob (a git pathspec) narrows
* the listing, e.g. `"*.md"`.
*
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
* breaking move/duplicate detection. We defeat that two ways at once:
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
* pass it inline here.
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
* ambiguity), which we split on `\0`.
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
* are returned verbatim — git already emits forward slashes.
*/
async listTrackedFiles(glob) {
const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ls-files failed: ${detail}`);
}
return r.stdout.split("\0").filter((p) => p.length > 0);
}
/**
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
* (SPEC §6: the FS→Docmost push direction diffs `main` against
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
* file is reported as a single `R` row with both its old and new path instead
* of a delete+add pair — that distinction is what lets the push planner tell a
* move from a delete+create (SPEC §8 "Move vs delete").
*
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
* status:
* - A/M/D: `status\0path\0`
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
*/
async diffNameStatus(fromRef, toRef) {
const r = await this.runRaw([
"diff",
"--name-status",
"-M",
"-z",
fromRef,
toRef,
]);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git diff --name-status failed: ${detail}`);
}
// Tokens alternate: <status> <path...> <status> <path...> ... With `-z`,
// each token (status code AND each path) is its own NUL-delimited field.
const tokens = r.stdout.split("\0").filter((t) => t.length > 0);
const entries = [];
let i = 0;
while (i < tokens.length) {
const raw = tokens[i++];
// The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading
// letter is the change kind; any trailing digits are the similarity score.
const letter = raw[0];
if (letter === "R" || letter === "C") {
const score = Number.parseInt(raw.slice(1), 10);
const oldPath = tokens[i++];
const path = tokens[i++];
if (oldPath === undefined || path === undefined)
break; // malformed tail
entries.push({
status: letter,
path,
oldPath,
...(Number.isFinite(score) ? { score } : {}),
});
}
else if (letter === "A" || letter === "M" || letter === "D") {
const path = tokens[i++];
if (path === undefined)
break; // malformed tail
entries.push({ status: letter, path });
}
else {
// Unknown/other status (e.g. T type-change, U unmerged) — consume one
// path token defensively so the walk stays aligned, but do not emit it
// (the push planner only handles A/M/D/R/C).
i++;
}
}
return entries;
}
/**
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
*/
async revParse(ref) {
const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]);
if (r.code !== 0)
return null;
const sha = r.stdout.trim();
return sha.length > 0 ? sha : null;
}
/**
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
* "что из `main` уже отражено в Docmost").
*/
async readRef(ref) {
return this.revParse(ref);
}
/**
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
*/
async updateRef(ref, target) {
await this.run(["update-ref", ref, target]);
}
/**
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
* push succeeds, Docmost already contains the pushed `main` content, so the
* mirror must reflect it — otherwise the NEXT pull would diff our own write
* back and re-pull it (loop-guard).
*
* SAFETY — never force, never clobber divergent history:
* - If `branch` IS an ancestor of `toCommit`, advance it with
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
* NOT checked out during a push (push works on `main`), so updating the ref
* directly is safe and avoids any working-tree touch.
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
* let the caller log it. We must never overwrite a `docmost` history that
* has commits the push base does not contain.
*
* Returns `{ ok: true }` when the branch was advanced (or already at
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
*/
async fastForwardBranch(branch, toCommit) {
const branchRef = `refs/heads/${branch}`;
// Resolve both endpoints first so a missing ref is a clean refusal, not a
// confusing `merge-base` failure.
const branchSha = await this.revParse(branchRef);
if (branchSha === null) {
return { ok: false, reason: `branch ${branch} does not exist` };
}
const targetSha = await this.revParse(toCommit);
if (targetSha === null) {
return { ok: false, reason: `target ${toCommit} does not resolve` };
}
// Already at the target -> a no-op fast-forward (still ok).
if (branchSha === targetSha)
return { ok: true };
// `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a
// true ancestor is a fast-forward; anything else is divergent and refused.
const ancestor = await this.runRaw([
"merge-base",
"--is-ancestor",
branchSha,
targetSha,
]);
if (ancestor.code !== 0) {
return { ok: false, reason: "not-fast-forward" };
}
// Safe to advance: the branch is not checked out during push, so a direct
// ref update avoids a checkout/working-tree touch.
await this.updateRef(branchRef, targetSha);
return { ok: true };
}
/**
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
* if the path does not exist there. Used by the push direction to read the
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
* that ref) maps to `null` rather than throwing.
*/
async showFileAtRef(ref, path) {
// `git show <ref>:<path>` requires the path relative to the repo root; pass
// it verbatim (forward-slash, matching `listTrackedFiles` / diff output).
const r = await this.runRaw(["show", `${ref}:${path}`]);
if (r.code !== 0)
return null;
return r.stdout;
}
}
/**
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
* Used by the single `runRaw` primitive every git command flows through, so
* these pins apply uniformly (including the `git --version` preflight).
*
* cwd-isolation is this module's central safety guarantee: every git command
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
* redirect the operation away from `cwd` (e.g. to the source repo or another
* checkout), defeating that guarantee. So we always strip them, regardless of
* whatever else the caller adds (author/committer identity, etc.).
*
* Exported for unit testing.
*/
export function vaultGitEnv(extra) {
const env = {
...process.env,
// Locale-independent output (defense in depth). We never parse localized
// prose, but pinning the locale prevents a future regression where some
// git message we DO key on is translated by an inherited LC_ALL/LANG.
LC_ALL: "C",
LANG: "C",
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
// bite) and never block on an interactive prompt (e.g. credentials) — the
// daemon runs unattended and must not hang.
GIT_PAGER: "cat",
GIT_TERMINAL_PROMPT: "0",
...extra,
};
delete env.GIT_DIR;
delete env.GIT_WORK_TREE;
return env;
}
/**
* Build a commit message body with trailer lines appended (SPEC §7.3). The
* trailers are separated from the subject by a blank line so `git interpret-
* trailers` / `git log --format=%(trailers)` parse them as trailers.
* Exported for unit testing.
*/
export function buildCommitMessage(subject, trailers) {
if (!trailers || trailers.length === 0)
return subject;
return `${subject}\n\n${trailers.join("\n")}`;
}
-44
View File
@@ -1,44 +0,0 @@
/**
* Pure page-tree -> vault path mapping (SPEC §12).
*
* Given the flat list of page nodes for a space (as returned by
* `listAllSpacePages`), compute for every page a deterministic, collision-free
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
* page's own name, no extension). This module is intentionally PURE and
* dependency-free apart from the sanitization helpers, so the whole tree ->
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
* lives in each file's meta block (pageId / slugId).
*/
/** Flat page node as returned by `listAllSpacePages` (no content). */
export interface PageNode {
id: string;
title?: string;
slugId?: string;
parentPageId?: string | null;
hasChildren?: boolean;
}
/** A page's resolved vault destination: folder path + file stem. */
export interface VaultEntry {
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
segments: string[];
/** The page's own file name without extension. */
stem: string;
}
/**
* Build the full vault layout for a space.
*
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
* deterministic for a given input and guarantees every full destination path
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
* another.
*
* Disambiguation is layered:
* 1. Sibling collisions (same sanitized title under the same parent) are
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
* sanitized, since slugId/id is untrusted data that must never inject a
* path separator).
* 2. A final full-path pass catches residual collisions that sibling-scoping
* cannot see — e.g. two pages whose parents are BOTH outside the input set
* both bucket at the root with `segments: []`.
*/
export declare function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry>;
-170
View File
@@ -1,170 +0,0 @@
/**
* Pure page-tree -> vault path mapping (SPEC §12).
*
* Given the flat list of page nodes for a space (as returned by
* `listAllSpacePages`), compute for every page a deterministic, collision-free
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
* page's own name, no extension). This module is intentionally PURE and
* dependency-free apart from the sanitization helpers, so the whole tree ->
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
* lives in each file's meta block (pageId / slugId).
*/
import { sanitizeTitle, disambiguate } from "./sanitize.js";
/**
* Build the full vault layout for a space.
*
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
* deterministic for a given input and guarantees every full destination path
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
* another.
*
* Disambiguation is layered:
* 1. Sibling collisions (same sanitized title under the same parent) are
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
* sanitized, since slugId/id is untrusted data that must never inject a
* path separator).
* 2. A final full-path pass catches residual collisions that sibling-scoping
* cannot see — e.g. two pages whose parents are BOTH outside the input set
* both bucket at the root with `segments: []`.
*/
export function buildVaultLayout(pages) {
// Index pages by id so the parent chain can be walked. Guard against
// duplicate ids in the input (first one wins).
const byId = new Map();
for (const p of pages) {
if (p && p.id && !byId.has(p.id))
byId.set(p.id, p);
}
// Resolve each node's display name once, deterministically, tracking sibling
// collisions per parent. `usedBySibling` maps a parent key -> set of names
// already taken under that parent. The bucket key is the node's parent ONLY
// when that parent is actually present in `byId`; otherwise (null parent, or
// an orphan whose parent is outside the input set) the node buckets at
// `"__root__"`. This is critical: orphans land at the vault root (see
// `folderSegmentsFor`), so they MUST share the root bucket with real root
// pages to be disambiguated against each other here — making `nameById` final
// before any `segments` are computed, so no ancestor name can drift later.
const usedBySibling = new Map();
const nameById = new Map();
for (const p of pages) {
if (p && p.id && !nameById.has(p.id)) {
const parentKey = p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
}
}
// Every id we index above MUST get a resolved name; this helper returns it
// and THROWS if it is somehow absent, rather than silently recomputing a
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
// its target file).
const nameOf = (id) => {
const name = nameById.get(id);
if (name === undefined) {
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
}
return name;
};
// Build the folder path for a page by walking parentPageId to the root. The
// page's OWN name is the file stem; its ancestors become folders. A `visited`
// guard prevents an infinite loop on a malformed parent cycle.
const folderSegmentsFor = (node) => {
const ancestors = [];
const visited = new Set();
let current = node.parentPageId
? byId.get(node.parentPageId)
: undefined;
while (current && current.id && !visited.has(current.id)) {
visited.add(current.id);
ancestors.unshift(nameOf(current.id));
current = current.parentPageId
? byId.get(current.parentPageId)
: undefined;
}
return ancestors;
};
// First pass: compute the provisional { segments, stem } for every node.
const layout = new Map();
for (const p of pages) {
if (!p || !p.id || layout.has(p.id))
continue;
layout.set(p.id, {
segments: folderSegmentsFor(p),
stem: nameOf(p.id),
});
}
// FOLDER-NOTE transform (native-Obsidian layout): a page WITH CHILDREN lives at
// `<…>/<stem>/<stem>.md` — its body is the folder-note INSIDE its own folder
// (LostPaul Folder Notes convention), and its children sit alongside it in that
// folder. A leaf stays `<…>/<stem>.md`. Children's segments already point into
// the parent's folder (folderSegmentsFor walks ancestor NAMES), so only the
// parent's own file relocates here; the sibling name pass above already made
// the parent name unique, so folder == file name stays consistent.
for (const p of pages) {
if (!p || !p.id)
continue;
const entry = layout.get(p.id);
if (entry && p.hasChildren) {
entry.segments = [...entry.segments, entry.stem];
}
}
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
// above (orphans share the "__root__" bucket), so ancestor names are final
// before `segments` are built and this pass should rarely/never re-stem an
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
// slugId/id, then (if still colliding) appends the id.
//
// Process FOLDER-NOTES (pages with children) FIRST so a parent claims its
// canonical `<name>/<name>.md` before a same-named CHILD — the child (a leaf)
// is the one that disambiguates, never the folder-note.
const usedPaths = new Set();
const seenIds = new Set();
const pathKey = (e) => [...e.segments, e.stem].join("/");
const ordered = pages
.filter((p) => Boolean(p && p.id))
.sort((a, b) => Number(Boolean(b.hasChildren)) - Number(Boolean(a.hasChildren)));
for (const p of ordered) {
if (seenIds.has(p.id))
continue;
seenIds.add(p.id);
const entry = layout.get(p.id);
if (!entry)
continue;
if (usedPaths.has(pathKey(entry))) {
// First attempt: disambiguate the stem with the sanitized slugId (or id).
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
if (usedPaths.has(pathKey(entry))) {
// Still colliding: append the (sanitized) id as a last resort. The id
// is globally unique, so this always resolves the collision.
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
}
}
usedPaths.add(pathKey(entry));
}
return layout;
}
/**
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
* `usedBySibling` maps a parent key -> set of names already taken, so two
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
* slugId/id is a second untrusted-data channel that must never leak a path
* separator into the name. `parentKey` is supplied by the caller (it resolves
* to `"__root__"` for root pages AND for orphans whose parent is outside the
* input set, so they share one bucket). The name is COSMETIC; identity lives in
* the meta block.
*/
function nameForNode(node, parentKey, usedBySibling) {
let used = usedBySibling.get(parentKey);
if (!used) {
used = new Set();
usedBySibling.set(parentKey, used);
}
let name = sanitizeTitle(node.title ?? "");
if (used.has(name)) {
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
// back to the sanitized pageId if no slugId is present).
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
}
used.add(name);
return name;
}
-13
View File
@@ -1,13 +0,0 @@
/**
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
* the same input string always yields the same digest, a different input a
* different one. Used to recognize our own write later (loop suppression).
*
* We hash the body STRING as-is (UTF-8) with SHA-256 and return lowercase hex.
* SPEC §10 keys on the body hash rather than file bytes; callers decide WHAT
* counts as "the body" (here it is the exact string passed in — typically the
* self-contained markdown that was pushed). No normalization is applied: the
* caller is responsible for passing a canonical/stable representation if it
* wants hash equality across cosmetic-only differences.
*/
export declare function bodyHash(markdownBody: string): string;
-136
View File
@@ -1,136 +0,0 @@
import type { GitSyncClient } from "./client.types.js";
import { type PageNode } from "./layout.js";
import { VaultGit } from "./git.js";
import { type MovedEntry, type DeletionDecision } from "./reconcile.js";
/**
* Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real
* `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile`
* rooted at the vault; tests pass fakes so the parsing/skip rules are unit-
* testable without a real git repo or filesystem.
*/
export interface ReadExistingDeps {
/** List tracked .md paths (forward-slash, vault-relative). */
listTracked: () => Promise<string[]>;
/** Read a tracked file's text by its (forward-slash) vault-relative path. */
readFile: (relPath: string) => Promise<string>;
}
/**
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
*/
export declare function readExisting(deps: ReadExistingDeps): Promise<{
pageId: string;
relPath: string;
}[]>;
/**
* Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live
* tree nodes + completeness flag (from `listSpaceTree`) and the parsed
* `existing` tracked files (from `readExisting`).
*/
export interface PullActionsInput {
/** Live page nodes for the space (from `listSpaceTree`). */
pages: PageNode[];
/** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */
treeComplete: boolean;
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
existing: {
pageId: string;
relPath: string;
}[];
}
/**
* The PURE decisions object computed by `computePullActions` (no IO). It holds
* the reconciliation plan plus the SPEC §8 absence-deletion decision, with the
* suppression already folded in: `toDelete` is the POST-suppression set the
* caller should actually remove (empty when `deletionDecision.apply` is false).
*/
export interface PullActions {
/** Pages to (re)write at their relPath (add + update + move target). */
toWrite: {
pageId: string;
relPath: string;
}[];
/** Moves: write new path, then remove old path (only on a successful write). */
moved: MovedEntry[];
/**
* Absence-based paths to delete AFTER suppression. Empty when the decision
* suppressed deletions this cycle, so the caller can apply it unconditionally.
*/
toDelete: string[];
/** Why absence deletions were (or were not) applied (for logging + tests). */
deletionDecision: DeletionDecision;
/** Tracked-file count (for the suppression log messages). */
existingCount: number;
/** Planned absence-delete count BEFORE suppression (for the log message). */
plannedDeleteCount: number;
}
/**
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
* tree nodes + completeness + existing tracked files and returns the full set of
* decisions with NO IO:
*
* - builds the vault layout (deterministic relPath per live page),
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
* POST-suppression set (empty when suppressed).
*
* Moves are NOT governed by the suppression: a moved page is present in `live`,
* so its old-path removal is real (the caller still gates it on the write
* succeeding). The expensive content fetch / file write / git ops happen in the
* thin `applyPullActions`.
*/
export declare function computePullActions(input: PullActionsInput): PullActions;
/**
* Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these
* to the live client, the vault git wrapper, and `node:fs/promises`; tests pass
* fakes that RECORD calls so the ordering + the move-on-success data-loss guard
* are testable without real git/fs/network.
*/
export interface ApplyPullActionsDeps {
client: Pick<GitSyncClient, "getPageJson">;
git: Pick<VaultGit, "stageAll" | "commit" | "checkout" | "merge">;
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
writeFile: (absPath: string, text: string) => Promise<void>;
/** Recursive mkdir of an ABSOLUTE directory path. */
mkdir: (absDir: string) => Promise<void>;
/** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */
rm: (absPath: string) => Promise<void>;
}
/** Outcome counters from `applyPullActions` (for the summary + tests). */
export interface ApplyResult {
written: number;
movedApplied: number;
deleted: number;
failed: number;
committed: boolean;
merge: {
ok: boolean;
conflict: boolean;
output: string;
};
}
/**
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
* order, with all the original safety guards preserved bit-for-bit:
*
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
* failed move-write keeps the old path so the page never vanishes).
* 3. apply (post-suppression) absence deletes.
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
*
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
*/
export declare function applyPullActions(deps: ApplyPullActionsDeps, actions: PullActions, vaultRoot: string): Promise<ApplyResult>;
-284
View File
@@ -1,284 +0,0 @@
/**
* Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС").
*
* This increment turns the read-only mirror into the git-backed pull cycle:
*
* 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12);
* ensureBranch("docmost", "main") (SPEC §5 branches)
* 2. checkout docmost
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
* 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
* is absence-only, moves are separate
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
* fetch (SPEC §8) and behind the mass-delete guard (defense in depth)
* 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11);
* apply moved-old-path removals (only when the move write SUCCEEDED) and
* absence-delete removals (only when the decision allowed them)
* 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3)
* 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved,
* SPEC §9); push is deferred (SPEC §7)
* 10. one-line summary
*
* DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost
* (read-only: listSpaceTree + getPageJson). All git operations run against
* the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts).
*
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
* the gitmost server drives the engine in-process (there is no standalone CLI
* entry point).
*/
import { dirname } from "node:path";
import { sep } from "node:path";
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
import { buildVaultLayout } from "./layout.js";
import { BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./git.js";
import { planReconciliation, decideAbsenceDeletions, } from "./reconcile.js";
import { stabilizePageBody } from "./stabilize.js";
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
const DOCMOST_BRANCH = "docmost";
// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12).
const SOURCE_TRAILER = "Docmost-Sync-Source: docmost";
// Number of pages fetched/stabilized concurrently. Bounded so a large space
// does not open thousands of simultaneous requests/conversions at once.
const CONCURRENCY = 6;
// How often to log incremental progress (every N completed pages).
const PROGRESS_EVERY = 25;
/** Convert a vault-relative path (forward-slash) to an absolute FS path. */
function relToAbs(vaultRoot, relPath) {
return [vaultRoot, ...relPath.split("/")].join("/");
}
/** Convert an absolute/relative segment list under the vault to a relPath. */
function segmentsToRelPath(segments, stem) {
return [...segments, `${stem}.md`].join("/");
}
/**
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
*/
export async function readExisting(deps) {
const tracked = await deps.listTracked();
const existing = [];
for (const relPath of tracked) {
// git ls-files always emits forward-slash paths; normalize just in case.
const rel = relPath.split(sep).join("/");
let text;
try {
text = await deps.readFile(rel);
}
catch {
// Tracked but missing on disk (mid-operation race) — skip; the next pull
// converges.
continue;
}
const { id } = parsePageFile(text);
if (id)
existing.push({ pageId: id, relPath: rel });
}
return existing;
}
/**
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
* tree nodes + completeness + existing tracked files and returns the full set of
* decisions with NO IO:
*
* - builds the vault layout (deterministic relPath per live page),
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
* POST-suppression set (empty when suppressed).
*
* Moves are NOT governed by the suppression: a moved page is present in `live`,
* so its old-path removal is real (the caller still gates it on the write
* succeeding). The expensive content fetch / file write / git ops happen in the
* thin `applyPullActions`.
*/
export function computePullActions(input) {
const { pages, treeComplete, existing } = input;
const layout = buildVaultLayout(pages);
const live = [];
for (const p of pages) {
if (!p || !p.id)
continue;
const entry = layout.get(p.id);
if (!entry)
continue;
live.push({
pageId: p.id,
relPath: segmentsToRelPath(entry.segments, entry.stem),
});
}
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
// `plan.moved` carries move old-path removals separately.
const plan = planReconciliation(live, existing);
// Decide whether the ABSENCE-based deletions may be applied this cycle
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
// Moves are NOT governed by this.
const deletionDecision = decideAbsenceDeletions({
treeComplete,
liveCount: live.length,
existingCount: existing.length,
deleteCount: plan.toDelete.length,
});
return {
toWrite: plan.toWrite,
moved: plan.moved,
// Fold the suppression in: a suppressed cycle deletes nothing.
toDelete: deletionDecision.apply ? plan.toDelete : [],
deletionDecision,
existingCount: existing.length,
plannedDeleteCount: plan.toDelete.length,
};
}
/**
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
* order, with all the original safety guards preserved bit-for-bit:
*
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
* failed move-write keeps the old path so the page never vanishes).
* 3. apply (post-suppression) absence deletes.
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
*
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
*/
export async function applyPullActions(deps, actions, vaultRoot) {
const { client, git } = deps;
// Emit the SPEC §8 suppression warnings (preserved from the original `main`).
const decision = actions.deletionDecision;
if (!decision.apply) {
if (decision.reason === "incomplete-fetch") {
console.warn("pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)");
}
else if (decision.reason === "empty-live") {
console.warn(`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` +
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
`Docmost is reachable.`);
}
else {
console.warn(`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` +
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`);
}
}
// 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11).
let written = 0;
let failed = 0;
let completed = 0;
let nextIndex = 0;
// pageIds whose write FAILED. A moved page whose new-path write failed must
// NOT have its old path removed (otherwise the page vanishes entirely).
const failedPageIds = new Set();
const writeOne = async (w) => {
try {
const page = await client.getPageJson(w.pageId);
// Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
// markdown body. title/parent/space are DERIVED (filename / folder / repo),
// so nothing but the pageId is persisted as meta.
const text = serializePageFile(page.id, await stabilizePageBody(page.content));
const abs = relToAbs(vaultRoot, w.relPath);
await deps.mkdir(dirname(abs));
await deps.writeFile(abs, text);
written++;
}
catch (err) {
failed++;
failedPageIds.add(w.pageId);
console.error(`pull: failed page ${w.pageId}:`, err instanceof Error ? err.message : String(err));
}
finally {
completed++;
if (completed % PROGRESS_EVERY === 0) {
console.log(`pulled ${completed}/${actions.toWrite.length}`);
}
}
};
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
// take the next index until the write list is exhausted. One bad page never
// aborts the whole pull (mirrors the fault-tolerant tree walk).
const runner = async () => {
while (true) {
const i = nextIndex++;
if (i >= actions.toWrite.length)
return;
await writeOne(actions.toWrite[i]);
}
};
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, () => runner()));
// Helper: `rm` with force:true is a no-op if the file is already gone.
const removePath = async (rel, what) => {
try {
await deps.rm(relToAbs(vaultRoot, rel));
return true;
}
catch (err) {
console.error(`pull: failed to ${what} ${rel}:`, err instanceof Error ? err.message : String(err));
return false;
}
};
// 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its
// old path is genuinely stale — NOT subject to the incomplete-fetch
// suppression. BUT only remove the old path when (a) the planner marked it
// removable (not reused by another live page) AND (b) the new-path write
// actually SUCCEEDED — otherwise we would delete the only copy of a page
// whose move-write failed (⭐ data-loss guard).
let movedApplied = 0;
for (const m of actions.moved) {
if (!m.removeOldPath)
continue;
if (failedPageIds.has(m.pageId)) {
console.warn(`pull: move write for ${m.pageId} failed — keeping old path ` +
`${m.fromRelPath} (SPEC §8)`);
continue;
}
if (await removePath(m.fromRelPath, "remove moved old path"))
movedApplied++;
}
// 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the
// post-suppression set (empty when the decision suppressed them, SPEC §8).
let deleted = 0;
for (const rel of actions.toDelete) {
if (await removePath(rel, "delete"))
deleted++;
}
// 4. Stage + commit on `docmost` (only if there is something to commit).
// Deterministic stabilized output means unchanged pages produce identical
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
// ACTUAL work applied (pages written + files deleted), not the planned size,
// so a run with failures does not over-report (SPEC §5 nit).
const subject = deleted > 0
? `docmost: sync ${written} page(s), ${deleted} deleted`
: `docmost: sync ${written} page(s)`;
await git.stageAll();
const committed = await git.commit(subject, {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: [SOURCE_TRAILER],
});
// Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
await git.checkout(DEFAULT_BRANCH);
const merge = await git.merge(DOCMOST_BRANCH);
if (merge.conflict) {
console.error("pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
"Docmost (read-only). Resolve locally, then re-run.");
}
else if (!merge.ok) {
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
}
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
return { written, movedApplied, deleted, failed, committed, merge };
}
-504
View File
@@ -1,504 +0,0 @@
/**
* Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment.
*
* This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref
* primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns
* a git diff into a classified action set with NO IO, and a THIN injectable
* applier (`applyPushActions`) exercised in tests via fakes only.
*
* Direction is vault -> Docmost. The diff is `main` against
* `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is
* translated into a Docmost mutation by `pageId` identity (SPEC §4):
* - A without pageId -> create_page (then write the assigned pageId back).
* - A with pageId -> update (restored/copied file; the page already exists).
* - M -> update content (collab/Yjs path, SPEC §2/§15.6).
* - D -> delete_page (pageId recovered from the PRE-IMAGE meta).
* - R -> rename/move (CLASSIFIED here, APPLIED in push #3).
*
* MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves
* each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH-
* derived parent (SPEC §5: the file path is the source of truth for tree
* position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions`
* then calls `move_page` / `rename_page` (both for a reparent+retitle), or
* records a NO-OP for a cosmetic local-only file-path rename.
*
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
* the gitmost server drives the engine in-process (there is no standalone CLI
* entry point).
*/
import { type DocmostMdMeta } from "../lib/index.js";
import type { GitSyncClient } from "./client.types.js";
import type { DiffEntry } from "./git.js";
import { VaultGit } from "./git.js";
import { type Settings } from "./settings.js";
export type { DiffEntry } from "./git.js";
/** A page to CREATE in Docmost (new local file, meta has no pageId yet). */
export interface CreateAction {
/** Vault-relative path of the new file. */
path: string;
}
/** A page whose CONTENT changed (meta carries the existing pageId). */
export interface UpdateAction {
pageId: string;
/** Vault-relative path of the changed file. */
path: string;
}
/** A page to soft-delete in Docmost (Trash, SPEC §8). */
export interface DeleteAction {
pageId: string;
}
/** A renamed/moved page (same pageId, new path). Resolution DEFERRED. */
export interface RenameMoveAction {
pageId: string;
oldPath: string;
newPath: string;
}
/**
* A CLASSIFIED rename/move (push #3): a `RenameMoveAction` resolved into the
* Docmost op(s) it actually needs. The file PATH is the source of truth for tree
* position (SPEC §5: "истина связи — pageId, не путь" — the path is COSMETIC and
* LOCAL, the page identity is its pageId), so we compare the RESOLVED parent of
* the new path against the resolved parent of the old path, and the title in the
* current meta against the title in the previous meta. Each sub-op is emitted
* ONLY when something real changed:
* - `move` — the resolved parent page changed (reparent in Docmost). A `null`
* `parentPageId` means the new parent is ROOT (the file sits at the space
* root, no enclosing folder).
* - `rename` — the page title changed (a pure title edit in Docmost).
* - `noop` — neither changed: a purely LOCAL file-path rename (same parent,
* same title). The page identity is its pageId, so Docmost is NOT called.
* `move` and `rename` are independent and may BOTH be present (reparent + retitle).
*/
export interface RenameMoveActionClassified {
pageId: string;
oldPath: string;
newPath: string;
/** Present iff the resolved parent changed -> `move_page` (reparent). */
move?: {
parentPageId: string | null;
};
/** Present iff the title changed -> `rename_page` (title-only). */
rename?: {
title: string;
};
/** True iff neither parent nor title changed (cosmetic local-only rename). */
noop?: true;
}
/**
* Injected resolvers for the PURE `classifyRenameMoves` (push #3). Both are PURE
* given a path + side; the real `main` (a follow-up) wires them to the file tree
* (`readFile` for `current`, `git.showFileAtRef` for `prev`), tests pass plain
* lookups. SPEC §5 path-as-truth:
* - `metaAt`: the file's synthetic native meta at that side (title from the
* filename, pageId from the `gitmost_id` frontmatter).
* - `resolveParentPageId`: the pageId of the page whose FILE is the parent
* FOLDER's `.md` (one level up from the given path), or `null` for ROOT.
*/
export interface ClassifyRenameMovesDeps {
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
resolveParentPageId: (path: string, side: MetaSide) => string | null;
}
/**
* PURE classifier for the `renamesMoves` produced by `computePushActions`
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
*
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
*
* For each entry:
* - `newParent = resolveParentPageId(newPath, 'current')`,
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
* - `newTitle = metaAt(newPath,'current')?.title`,
* `oldTitle = metaAt(oldPath,'prev')?.title`.
* - include `move` iff `newParent !== oldParent` (a real reparent),
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
* the page is its pageId, so Docmost is not touched).
*/
export declare function classifyRenameMoves(renamesMoves: RenameMoveAction[], deps: ClassifyRenameMovesDeps): RenameMoveActionClassified[];
/** The classified set of push actions (PURE output of `computePushActions`). */
export interface PushActions {
creates: CreateAction[];
updates: UpdateAction[];
deletes: DeleteAction[];
renamesMoves: RenameMoveAction[];
/**
* Diff rows that could NOT be classified into an action, with a reason — e.g.
* a deleted file whose PRE-IMAGE meta carried no recoverable pageId (the
* untracked-file guard, SPEC §8: only files that were tracked with a pageId
* are deleted in Docmost). Carried so the caller can log them.
*/
skipped: {
path: string;
status: DiffEntry["status"];
reason: string;
}[];
}
/**
* Which tree a `metaAt` lookup reads the file's native meta from:
* - `current`: the current `main` tree (the live file content) — used for
* A/M/R, where the file still exists.
* - `prev`: the last-pushed PRE-IMAGE (e.g. `refs/docmost/last-pushed:<path>`)
* — used for D, where the file is gone from `main` but its pageId must be
* recovered from the version Docmost last knew (SPEC §6/§8).
*/
export type MetaSide = "current" | "prev";
/** Input to the PURE planner. `metaAt` is injected (no IO inside the planner). */
export interface PushActionsInput {
/** Diff rows of `main` vs `refs/docmost/last-pushed` (SPEC §6 step 2). */
changes: DiffEntry[];
/**
* Resolve a file's synthetic native meta at a given side, or `null` if the file is
* absent there / has no parseable meta. PURE injection: the real `main` reads
* the working tree (current) or `git show <last-pushed>:<path>` (prev); tests
* pass a plain lookup.
*/
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
/**
* The pageIds present at ANY path in the current `main` tree (optional). When
* given, a deleted file whose pageId still lives somewhere in the tree is NOT
* a deletion but a MOVE — guards against trashing a live page when a layout
* reshuffle relocated its file (possibly across two cycles, so the matching
* add isn't in THIS diff). When omitted, only the in-diff D+A/M coalescing
* applies.
*/
currentPageIds?: Set<string>;
}
/**
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
*
* Classification rules:
* - `A` (added):
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
* page already exists; we push its content rather than create a dup).
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
* brand-new local file; the page does not exist in Docmost yet).
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
* (§16), and a new local file may carry only partial human meta. We
* refuse to create rather than guess a space (SPEC §8 guard spirit).
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
* file somehow lost its pageId it is skipped — there is nothing to target.)
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
* (untracked-file guard, SPEC §8: never delete an untracked page).
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
* DEFERRED to the next increment; here we only record oldPath/newPath/
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
* (`C` copy is treated the same as `R` for recording purposes.)
*/
export declare function computePushActions(input: PushActionsInput): PushActions;
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
export declare const LAST_PUSHED_REF = "refs/docmost/last-pushed";
/**
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
* commit closes the loop so the next pull diffs empty for the pushed pages.
*/
export declare const DOCMOST_BRANCH = "docmost";
/**
* Injectable IO for `applyPushActions`. The real `main` (NEXT increment) wires
* these to the live client, `node:fs/promises`, and the vault git wrapper; this
* increment drives them only through FAKES in tests (no live destructive run).
* - `client`: the create/update/delete/move/rename subset of `GitSyncClient`.
* - `readFile`/`writeFile`: read a changed file's body / write a file back
* (by vault-relative path; the applier does not resolve absolute paths so
* fakes stay trivial).
* - `git`: `updateRef` (advance `refs/docmost/last-pushed`) and
* `fastForwardBranch` (advance the `docmost` mirror after a clean push, the
* loop-close — SPEC §6 step 3 / §10).
*/
export interface ApplyPushDeps {
client: Pick<GitSyncClient, "importPageMarkdown" | "createPage" | "deletePage" | "movePage" | "renamePage">;
/** Read a changed file's full text by its vault-relative path. */
readFile: (path: string) => Promise<string>;
/** Write a file's full text by its vault-relative path. */
writeFile: (path: string, text: string) => Promise<void>;
/**
* The Docmost spaceId this vault mirrors. A CREATE targets this space (the
* native file carries no spaceId — every file in the vault belongs to it), and
* it backs the synthetic native meta the classifier reads.
*/
spaceId: string;
/**
* `updateRef` advances `refs/docmost/last-pushed`; `fastForwardBranch` advances
* the `docmost` mirror after a clean push. `showFileAtRef` reads a file's text
* at a ref (used by the move/rename classifier to resolve the PREVIOUS parent
* folder's `.md` at `refs/docmost/last-pushed`, SPEC §5 path-as-truth).
*/
git: Pick<VaultGit, "updateRef" | "fastForwardBranch" | "showFileAtRef">;
}
/** A file whose meta was rewritten with a freshly-assigned pageId (post-create). */
export interface WrittenBackPage {
path: string;
pageId: string;
}
/**
* The per-page push record consulted by a FUTURE poll-suppression (SPEC §10): a
* pulled page whose body hash + `updatedAt` match a record here is OUR OWN write
* and must not be re-pulled. PRODUCED here; CONSUMED on the pull side later.
*/
export interface PushedPageRecord {
/** The Docmost pageId that was updated/created. */
pageId: string;
/**
* The `updatedAt` from the create/update client result, when the result
* exposed one. Absent when the (fake) client did not return it.
*/
updatedAt?: string;
/** Stable hash of the markdown BODY that was pushed (SPEC §10 "хэш тела"). */
bodyHash: string;
}
/**
* One page whose operation FAILED during apply (SPEC §12 resumability). The bad
* page is isolated — recorded here — and the rest of the batch still runs; the
* refs are NOT advanced when there is any failure, so a re-run retries cleanly.
*/
export interface PushFailure {
kind: "update" | "create" | "delete" | "move" | "rename";
/** The pageId for update/delete/move/rename; absent for a never-id'd create. */
pageId?: string;
/** The vault-relative path for create/update/move/rename; absent for delete. */
path?: string;
/** The error message captured from the thrown error. */
error: string;
}
/**
* A rename/move action that resolved to a NO-OP (push #3, SPEC §5): a purely
* LOCAL file-path rename whose resolved parent AND title are both unchanged. The
* page identity is its pageId and the path is COSMETIC/local-only, so Docmost is
* NOT called — the skip is recorded here (with the reason) for logging.
*/
export interface PushNoop {
pageId: string;
oldPath: string;
newPath: string;
/** Why no Docmost op was emitted (currently always a path-only rename). */
reason: "path-only-rename";
}
/** Structured outcome of `applyPushActions` (counts + write-backs + noops). */
export interface ApplyPushResult {
created: number;
updated: number;
deleted: number;
/** Pages reparented in Docmost via `move_page` (push #3, SPEC §5/§16). */
moved: number;
/** Pages retitled in Docmost via `rename_page` (push #3, SPEC §5/§6). */
renamed: number;
/**
* Files whose `gitmost_id` frontmatter was written with the pageId Docmost assigned on
* create — these now need a FOLLOW-UP commit (the meta on disk changed). The
* commit itself is the caller's job (NEXT increment); recorded here so it is
* not lost.
*/
writtenBack: WrittenBackPage[];
/**
* Per-page push records (pageId + optional `updatedAt` + body hash) for every
* page successfully updated/created — the §10 loop-guard data a future
* poll-suppression (pull side) will consult so it does not re-pull our own
* write. Deletes are not included (no body was pushed).
*/
pushed: PushedPageRecord[];
/**
* Pages whose operation threw — isolated and recorded, the batch continued
* (SPEC §12). Non-empty here means the refs were NOT advanced.
*/
failures: PushFailure[];
/**
* Rename/move actions that resolved to a NO-OP — a purely LOCAL file-path
* rename (same parent, same title). NO Docmost call was made for these (SPEC
* §5: the page is its pageId, the path is local-only). Recorded for logging.
*/
noops: PushNoop[];
/** Diff rows the planner could not classify (carried through for logging). */
skipped: PushActions["skipped"];
/** Whether `refs/docmost/last-pushed` was advanced (only on a CLEAN push). */
lastPushedAdvanced: boolean;
/**
* Result of fast-forwarding the `docmost` mirror branch after a CLEAN push
* (the loop-close, SPEC §6 step 3 / §10). `null` when no advance was attempted
* (no `pushedCommit`, or there were failures). `{ ok:false, reason }` when a
* non-fast-forward was REFUSED (divergent `docmost` history is never clobbered).
*/
docmostFastForward: {
ok: boolean;
reason?: string;
} | null;
}
/**
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
* via FAKES only in this increment — there is no live wiring.
*
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
* `importPageMarkdown` parses the meta/body itself.
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
* `client.createPage(...)`, take the assigned pageId from the result, and
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
* `serializePageFile`, body preserved) so the file becomes
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
* is needed — NEXT increment).
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
* the parent pageId — path-as-truth — and the meta for the title), then:
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
* `position` is UNDEFINED for now — the client supplies a default),
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
* - BOTH -> move (reparent) THEN rename (title), in that order,
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
* file-path rename: the page is its pageId, the path is local, SPEC §5).
*
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
* is wrapped in its own try/catch: a single failing page is recorded in
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
* when `failures.length === 0`: a PARTIAL push must NOT advance
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
* whole batch cleanly (the already-applied pages are idempotent re-applies).
*
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
* `pushedCommit` is supplied:
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
* - fast-forward the `docmost` mirror branch to it via
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
*
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
* hash of what was pushed plus the write's `updatedAt` (when the client returned
* one). A future pull-side poll-suppression consults this so it does not re-pull
* our own write; producing it is in scope here, consuming it is deferred.
*
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
*/
export declare function applyPushActions(deps: ApplyPushDeps, actions: PushActions, pushedCommit?: string): Promise<ApplyPushResult>;
/**
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
* (forward-slash) path. `buildVaultLayout` puts a page with children at
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
* space root) has no parent folder file -> `null` (the parent is ROOT).
*/
export declare function parentFolderFile(path: string): string | null;
/**
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
* screen the PUSH diff so non-page files are never created/updated/deleted in
* Docmost (and never get a `gitmost_id` frontmatter written into them).
*/
export declare function isPageFile(path: string): boolean;
/**
* The human ("local") git identity used for engine-made commits on `main` in the
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
* which the loop-guard keys on; the identity is for history readability only.
* When the vault repo already has a configured `user.name`/`user.email`, git
* uses that for the working-tree commit; this is the fallback the daemon stamps.
*/
export declare const LOCAL_AUTHOR_NAME = "Local";
export declare const LOCAL_AUTHOR_EMAIL = "local@local";
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
export declare const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
/**
* Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that
* touches the outside world is here so tests pass fakes). `makeClient` is a
* FACTORY, not a client — a dry-run must build NO client at all (it is never
* called), and only `--apply` invokes it.
*/
export interface PushDeps {
settings: Settings;
git: Pick<VaultGit, "assertGitAvailable" | "ensureRepo" | "isMergeInProgress" | "checkout" | "stageAll" | "commit" | "readRef" | "revParse" | "diffNameStatus" | "showFileAtRef" | "updateRef" | "fastForwardBranch" | "listTrackedFiles">;
/** Build a real client — called ONLY on `--apply`, never on dry-run. */
makeClient: (settings: Settings) => ApplyPushDeps["client"];
/** Read a file's full text by its vault-relative (forward-slash) path. */
readFile: (path: string) => Promise<string>;
/** Write a file's full text by its vault-relative path. */
writeFile: (path: string, text: string) => Promise<void>;
/** Structured logger (defaults to console in `main`; a recorder in tests). */
log: (line: string) => void;
}
/** The structured outcome of a `runPush` cycle (returned + summarized). */
export interface PushRunResult {
/** Which path ran: `dry-run` (plan only) or `apply` (Docmost mutated). */
mode: "dry-run" | "apply";
/** Why the cycle stopped before planning, if it did (e.g. a left-over merge). */
aborted?: "merge-in-progress";
/** The diff base the plan was computed against (`last-pushed` else `docmost`). */
base?: {
ref: string;
source: "last-pushed" | "docmost";
sha: string | null;
};
/** The `main` commit the plan targets (the would-be pushed commit). */
pushedCommit?: string;
/** Planned action counts from the PURE planner (present once a plan was built). */
planned?: {
creates: number;
updates: number;
deletes: number;
renamesMoves: number;
skipped: number;
};
/** The applier's structured result — ONLY present on the `--apply` path. */
applied?: ApplyPushResult;
/**
* True when `applyPushActions` REFUSED to fast-forward a divergent `docmost`
* mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded
* into the CLI's non-zero exit.
*/
divergentDocmost?: boolean;
/** Per-page failures from the applier (empty/absent on a clean run). */
failures?: PushFailure[];
}
/**
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
*
* Steps (mirrors `pull.ts`):
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
* non-zero-ish result) if a merge is in progress — never push on top of an
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
* Docmost (SPEC §9).
* 2. Checkout `main` (the human-facing branch the push reads from).
* 3. Commit the human's pending working-tree changes on `main` with the
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
* resolver (current = working tree, prev = `git show <base>:<path>`); run
* the PURE `computePushActions`.
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
* calls, NO ref advance.
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
* then (a) if any pageIds were written back (creates), commit them on `main`
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
* WARNING and a non-zero-ish flag. Then log a one-line summary.
*/
export declare function runPush(deps: PushDeps, opts: {
dryRun: boolean;
}): Promise<PushRunResult>;
/** Parsed `push` CLI flags. DRY-RUN is the default; `--apply` opts into writes. */
export interface PushParsedArgs {
/** True when `--apply` was passed (the ONLY path that writes to Docmost). */
apply: boolean;
}
/**
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
*/
export declare function parseArgs(argv: string[]): PushParsedArgs;
-971
View File
@@ -1,971 +0,0 @@
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
import { DEFAULT_BRANCH } from "./git.js";
import { bodyHash } from "./loop-guard.js";
/**
* PURE classifier for the `renamesMoves` produced by `computePushActions`
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
*
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
*
* For each entry:
* - `newParent = resolveParentPageId(newPath, 'current')`,
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
* - `newTitle = metaAt(newPath,'current')?.title`,
* `oldTitle = metaAt(oldPath,'prev')?.title`.
* - include `move` iff `newParent !== oldParent` (a real reparent),
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
* the page is its pageId, so Docmost is not touched).
*/
export function classifyRenameMoves(renamesMoves, deps) {
return renamesMoves.map((rm) => {
const newParent = deps.resolveParentPageId(rm.newPath, "current");
const oldParent = deps.resolveParentPageId(rm.oldPath, "prev");
const newTitle = deps.metaAt(rm.newPath, "current")?.title;
const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
const out = {
pageId: rm.pageId,
oldPath: rm.oldPath,
newPath: rm.newPath,
};
// A reparent: the new path's resolved parent page differs from the old's.
if (newParent !== oldParent) {
out.move = { parentPageId: newParent };
}
// A title edit: only when there is a real, non-empty new title that changed.
if (typeof newTitle === "string" &&
newTitle.length > 0 &&
newTitle !== oldTitle) {
out.rename = { title: newTitle };
}
// Neither changed -> a purely LOCAL file-path rename; do NOT call Docmost.
if (!out.move && !out.rename) {
out.noop = true;
}
return out;
});
}
/**
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
*
* Classification rules:
* - `A` (added):
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
* page already exists; we push its content rather than create a dup).
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
* brand-new local file; the page does not exist in Docmost yet).
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
* (§16), and a new local file may carry only partial human meta. We
* refuse to create rather than guess a space (SPEC §8 guard spirit).
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
* file somehow lost its pageId it is skipped — there is nothing to target.)
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
* (untracked-file guard, SPEC §8: never delete an untracked page).
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
* DEFERRED to the next increment; here we only record oldPath/newPath/
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
* (`C` copy is treated the same as `R` for recording purposes.)
*/
export function computePushActions(input) {
const { metaAt, currentPageIds } = input;
// PAGE-FILE FILTER (design §"Адопция"): only `.md` files OUTSIDE any dot-folder
// are Docmost pages. `.obsidian/*`, attachments, and other non-page files are
// committed to the vault (no `.gitignore`) and so appear in the diff, but they
// are NEVER pages — Obsidian owns them. Without this filter every ADDED such
// file would be mis-classified as a CREATE (nativeMeta always supplies a
// spaceId, so the old `create-without-spaceId` skip no longer screens them),
// creating junk pages in Docmost and corrupting the file with a `gitmost_id`
// frontmatter. Filter BEFORE any classification so non-page A/M/D/R are ignored.
const changes = input.changes.filter((c) => isPageFile(c.path));
const actions = {
creates: [],
updates: [],
deletes: [],
renamesMoves: [],
skipped: [],
};
// GHOST-MOVE coalescing (⭐ data-loss guard). git's rename detection (`-M`)
// can miss a move when the two files are too dissimilar — which is exactly the
// case for the tiny meta-only files a layout RESHUFFLE produces (e.g.
// several untitled pages sharing the `_` fallback name; retitling one frees the
// bare `_` and another page's file relocates `_ ~slug.md` -> `_.md`). git then
// reports the move as a DELETE of the old path + an ADD of the new one. Taken
// literally that soft-deletes a page that merely MOVED — a live page vanishing
// into Trash. Identity is the pageId, not git's heuristic: a pageId that is
// BOTH deleted (pre-image) and added (current) is one page that relocated, so
// we classify it as a rename/move and NEVER as a delete.
// A pageId can land at its new path two ways: as an ADD (the path was free) or
// as a MODIFY (the path was occupied by ANOTHER page that left — the reshuffle
// case, where `_.md`'s occupant changes pageId). Both are "the page survives at
// a new path", so the surviving side is the CURRENT-meta pageId of A *and* M.
const deletedPath = new Map();
const survivingPath = new Map();
for (const change of changes) {
if (change.status === "D") {
const pid = metaAt(change.path, "prev")?.pageId;
if (pid)
deletedPath.set(pid, change.path);
}
else if (change.status === "A" || change.status === "M") {
const pid = metaAt(change.path, "current")?.pageId;
if (pid)
survivingPath.set(pid, change.path);
}
}
const ghostMove = new Map();
for (const [pid, oldPath] of deletedPath) {
const newPath = survivingPath.get(pid);
if (newPath && newPath !== oldPath) {
ghostMove.set(pid, { oldPath, newPath });
}
}
for (const change of changes) {
switch (change.status) {
case "A": {
const meta = metaAt(change.path, "current");
const pageId = meta?.pageId;
if (pageId && ghostMove.has(pageId)) {
// Half of a git-undetected move (a matching DELETE exists): record it
// as a rename/move (like a real `R`), NOT an update — the `D` side is
// suppressed so the page is never soft-deleted.
actions.renamesMoves.push({
pageId,
oldPath: ghostMove.get(pageId).oldPath,
newPath: change.path,
});
}
else if (pageId) {
// Added but already carries a pageId (restored/copied file): the page
// exists in Docmost, so push content as an UPDATE — never a duplicate.
actions.updates.push({ pageId, path: change.path });
}
else if (meta?.spaceId) {
// Brand-new local file with a target space -> create the page, then
// write the assigned pageId back into its meta (in `applyPushActions`).
// `meta.spaceId` is truthy here, so empty-string is also rejected.
actions.creates.push({ path: change.path });
}
else {
// A create needs a spaceId (Docmost `create_page` requires it, §16). A
// new file with partial meta and no usable spaceId is SKIPPED rather
// than created into a guessed space (SPEC §8 guard spirit).
actions.skipped.push({
path: change.path,
status: "A",
reason: "create-without-spaceId",
});
}
break;
}
case "M": {
const meta = metaAt(change.path, "current");
const pageId = meta?.pageId;
if (pageId && ghostMove.has(pageId)) {
// This path's occupant changed pageId: the previous page left and THIS
// page relocated here (a reshuffle). Its old file was DELETED elsewhere
// — coalesce into a rename/move so the page is never trashed.
actions.renamesMoves.push({
pageId,
oldPath: ghostMove.get(pageId).oldPath,
newPath: change.path,
});
}
else if (pageId) {
actions.updates.push({ pageId, path: change.path });
}
else {
// A modified file with no pageId has no Docmost target to update.
actions.skipped.push({
path: change.path,
status: "M",
reason: "modified file has no pageId in meta",
});
}
break;
}
case "D": {
// The file is gone from `main`; recover its pageId from the PRE-IMAGE
// (the version last pushed to Docmost) so we delete the RIGHT page.
const prevMeta = metaAt(change.path, "prev");
const pageId = prevMeta?.pageId;
if (pageId && ghostMove.has(pageId)) {
// The same pageId was re-ADDED at a new path: this is a git-undetected
// MOVE, handled by the `A` branch above. Suppress the delete so a moved
// page is never trashed (⭐ data-loss guard).
actions.skipped.push({
path: change.path,
status: "D",
reason: "ghost-move (re-added at a new path) — not a deletion",
});
}
else if (pageId && currentPageIds?.has(pageId)) {
// The pageId still EXISTS elsewhere in the current tree: the file moved
// (a layout reshuffle whose matching add was in an earlier cycle, so it
// is not in this diff). A live page must never be trashed because its
// FILENAME changed — identity is the pageId (⭐ data-loss guard).
actions.skipped.push({
path: change.path,
status: "D",
reason: "pageId still present in the tree (moved) — not a deletion",
});
}
else if (pageId) {
actions.deletes.push({ pageId });
}
else {
// Untracked-file guard (SPEC §8): a file with no recoverable pageId was
// never a Docmost page — do NOT translate its removal into a delete.
actions.skipped.push({
path: change.path,
status: "D",
reason: "deleted file has no recoverable pageId (pre-image meta)",
});
}
break;
}
case "R":
case "C": {
// Same page, new path. Identity comes from the CURRENT (post-rename) meta
// since the file still exists. RESOLUTION (move vs rename, parentPageId)
// is deferred — record oldPath/newPath/pageId only.
const meta = metaAt(change.path, "current");
const pageId = meta?.pageId;
const oldPath = change.oldPath ?? change.path;
if (pageId) {
actions.renamesMoves.push({
pageId,
oldPath,
newPath: change.path,
});
}
else {
actions.skipped.push({
path: change.path,
status: change.status,
reason: "renamed/moved file has no pageId in meta",
});
}
break;
}
default: {
// Unreachable for A/M/D/R/C; defensive for any future status.
actions.skipped.push({
path: change.path,
status: change.status,
reason: `unhandled diff status ${change.status}`,
});
}
}
}
return actions;
}
// --- thin apply (create/update/delete), fakes-only in this increment ---------
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
export const LAST_PUSHED_REF = "refs/docmost/last-pushed";
/**
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
* commit closes the loop so the next pull diffs empty for the pushed pages.
*/
export const DOCMOST_BRANCH = "docmost";
/**
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
* via FAKES only in this increment — there is no live wiring.
*
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
* `importPageMarkdown` parses the meta/body itself.
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
* `client.createPage(...)`, take the assigned pageId from the result, and
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
* `serializePageFile`, body preserved) so the file becomes
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
* is needed — NEXT increment).
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
* the parent pageId — path-as-truth — and the meta for the title), then:
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
* `position` is UNDEFINED for now — the client supplies a default),
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
* - BOTH -> move (reparent) THEN rename (title), in that order,
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
* file-path rename: the page is its pageId, the path is local, SPEC §5).
*
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
* is wrapped in its own try/catch: a single failing page is recorded in
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
* when `failures.length === 0`: a PARTIAL push must NOT advance
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
* whole batch cleanly (the already-applied pages are idempotent re-applies).
*
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
* `pushedCommit` is supplied:
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
* - fast-forward the `docmost` mirror branch to it via
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
*
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
* hash of what was pushed plus the write's `updatedAt` (when the client returned
* one). A future pull-side poll-suppression consults this so it does not re-pull
* our own write; producing it is in scope here, consuming it is deferred.
*
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
*/
export async function applyPushActions(deps, actions, pushedCommit) {
const { client, git } = deps;
let created = 0;
let updated = 0;
let deleted = 0;
let moved = 0;
let renamed = 0;
const writtenBack = [];
const pushed = [];
const failures = [];
const noops = [];
// 1. UPDATES — collab/Yjs write path (SPEC §2/§15.6), never a raw overwrite.
// Each update is isolated: a thrown page is recorded and the batch goes on.
for (const u of actions.updates) {
try {
// Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter
// is engine metadata, never page content. The server converts the markdown
// it receives verbatim, so stripping here keeps the id out of Docmost.
const body = parsePageFile(await deps.readFile(u.path)).body;
// The last-synced version of this file (pre-image) is the common ancestor
// for a 3-way merge against the live page, so concurrent human edits are
// not clobbered (review #5). Null when the file is new at last-pushed. Its
// body is stripped the SAME way so the merge compares body-to-body.
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body;
const result = await client.importPageMarkdown(u.pageId, body, baseMarkdown);
updated++;
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
pushed.push({
pageId: u.pageId,
...extractUpdatedAt(result),
bodyHash: bodyHash(body),
});
}
catch (err) {
failures.push({
kind: "update",
pageId: u.pageId,
path: u.path,
error: errMessage(err),
});
}
}
// 2. CREATES — create the page, then write the assigned pageId back to meta so
// the file becomes tracked (SPEC §4 "записать присвоенный pageId обратно").
// Isolated per page like updates.
for (const c of actions.creates) {
try {
const text = await deps.readFile(c.path);
const { body } = parsePageFile(text);
// Derive create args from the PATH (native-Obsidian, SPEC §5): title from
// the filename, parent from the enclosing folder's folder-note, space from
// the run (the vault's space). `parentPageId: null` -> created at ROOT.
const title = titleFromPath(c.path);
const parentPageId = (await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined;
const result = await client.createPage(title, body, deps.spaceId, parentPageId);
// `createPage` returns `{ data: { id, ... }, success }`; the assigned
// pageId is at `result.data.id`.
const assignedPageId = result?.data?.id;
if (assignedPageId) {
// Write the assigned pageId back as the `gitmost_id` frontmatter, body
// preserved — the file becomes engine-tracked (SPEC §4).
const rewritten = serializePageFile(assignedPageId, body);
await deps.writeFile(c.path, rewritten);
writtenBack.push({ path: c.path, pageId: assignedPageId });
// §10 loop-guard data for the created page (hash the pushed BODY).
pushed.push({
pageId: assignedPageId,
...extractUpdatedAt(result),
bodyHash: bodyHash(body),
});
}
created++;
}
catch (err) {
failures.push({ kind: "create", path: c.path, error: errMessage(err) });
}
}
// 3. DELETES — soft-delete to Trash (SPEC §8), reversible. Isolated per page.
for (const d of actions.deletes) {
try {
await client.deletePage(d.pageId);
deleted++;
}
catch (err) {
failures.push({
kind: "delete",
pageId: d.pageId,
error: errMessage(err),
});
}
}
// 4. RENAME/MOVE (push #3, SPEC §5/§6/§16). Classify each entry against the
// tree-backed resolvers (the NEW parent comes from the new path's enclosing
// folder `.md`, the OLD parent from the old path's at last-pushed — PATH is
// the truth, not stale `meta.parentPageId`; the title from the meta), then
// apply only the real ops. Each page is isolated like the cases above: a
// thrown op is recorded in `failures` and the batch continues. ORDER for a
// page that needs both: reparent (move) FIRST, then retitle (rename).
if (actions.renamesMoves.length > 0) {
// The classifier is PURE over sync resolvers; the tree reads are async, so
// prefetch every (path, side) lookup it will make into plain tables first.
const parentTable = new Map();
const metaTable = new Map();
// A tree read (readFile / git.showFileAtRef) throwing must isolate THAT page
// into `failures`, NOT abort the whole batch (§12 resumability). The helpers
// already swallow their own errors, but this per-entry try/catch keeps the
// batch-isolation invariant holding regardless of future changes to them.
const prefetchFailed = new Set();
for (const rm of actions.renamesMoves) {
// newParent + newTitle from the CURRENT tree; oldParent + oldTitle from the
// last-pushed pre-image (`prev`). Keyed by `path|side` so duplicates fold.
try {
parentTable.set(`${rm.newPath}|current`, await resolveParentPageIdViaTree(deps, rm.newPath, "current"));
parentTable.set(`${rm.oldPath}|prev`, await resolveParentPageIdViaTree(deps, rm.oldPath, "prev"));
metaTable.set(`${rm.newPath}|current`, await metaAtViaTree(deps, rm.newPath, "current", deps.spaceId));
metaTable.set(`${rm.oldPath}|prev`, await metaAtViaTree(deps, rm.oldPath, "prev", deps.spaceId));
}
catch (err) {
prefetchFailed.add(rm.pageId);
failures.push({
kind: "move",
pageId: rm.pageId,
path: rm.newPath,
error: errMessage(err),
});
}
}
const classified = classifyRenameMoves(actions.renamesMoves.filter((rm) => !prefetchFailed.has(rm.pageId)), {
metaAt: (path, side) => metaTable.get(`${path}|${side}`) ?? null,
resolveParentPageId: (path, side) => parentTable.get(`${path}|${side}`) ?? null,
});
for (const c of classified) {
if (c.noop) {
// Cosmetic local-only file-path rename — no Docmost op (SPEC §5).
noops.push({
pageId: c.pageId,
oldPath: c.oldPath,
newPath: c.newPath,
reason: "path-only-rename",
});
continue;
}
// Track which op is in flight so a failure is attributed to the op that
// ACTUALLY threw: for a page needing both, a move that succeeds then a
// rename that throws must be recorded as `rename`, not `move`.
let failingKind = c.move ? "move" : "rename";
try {
// Reparent FIRST so the page is in its new tree position, THEN retitle.
if (c.move) {
failingKind = "move";
// TODO(next): compute a fractional-index position between siblings
// (SPEC §16). `position` is UNDEFINED here; the client supplies a valid
// default. Pass `parentPageId: null` for a move to the space ROOT.
await client.movePage(c.pageId, c.move.parentPageId);
moved++;
}
if (c.rename) {
failingKind = "rename";
await client.renamePage(c.pageId, c.rename.title);
renamed++;
}
}
catch (err) {
// Isolate the failed page: the op that ACTUALLY threw is recorded so a
// re-run can retry. A move that threw before its rename leaves `rename`
// for the next run (idempotent re-apply); refs are NOT advanced (below).
failures.push({
kind: failingKind,
pageId: c.pageId,
path: c.newPath,
error: errMessage(err),
});
}
}
}
// 5. Advance the refs ONLY on a CLEAN push (no failures) AND when a pushed
// commit is supplied. A partial push must advance NEITHER ref, so a re-run
// retries the whole batch (SPEC §12). The loop-close (SPEC §6 step 3 / §10):
// advance `refs/docmost/last-pushed` AND fast-forward the `docmost` mirror,
// so Docmost's new content is mirrored and the next pull diffs empty.
let lastPushedAdvanced = false;
let docmostFastForward = null;
if (pushedCommit && failures.length === 0) {
await git.updateRef(LAST_PUSHED_REF, pushedCommit);
lastPushedAdvanced = true;
// Fast-forward the mirror (refused, not forced, on a non-fast-forward — the
// caller logs the reason). Surfaced in the result.
docmostFastForward = await git.fastForwardBranch(DOCMOST_BRANCH, pushedCommit);
}
return {
created,
updated,
deleted,
moved,
renamed,
writtenBack,
pushed,
failures,
noops,
skipped: actions.skipped,
lastPushedAdvanced,
docmostFastForward,
};
}
/** Stringify a thrown value into a stable error message. */
function errMessage(err) {
return err instanceof Error ? err.message : String(err);
}
/**
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
* (forward-slash) path. `buildVaultLayout` puts a page with children at
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
* space root) has no parent folder file -> `null` (the parent is ROOT).
*/
export function parentFolderFile(path) {
const slash = path.lastIndexOf("/");
if (slash < 0)
return null; // root-level file: parent is ROOT.
const dir = path.slice(0, slash); // the enclosing folder
// The page that OWNS the enclosing folder is its folder-note `<dir>/<base>.md`.
const folderNote = `${dir}/${baseSegment(dir)}.md`;
if (path === folderNote) {
// This path IS its folder's folder-note, so its parent is ONE LEVEL UP: the
// folder-note of the grandparent folder (or ROOT at the top level).
const up = dir.lastIndexOf("/");
if (up < 0)
return null; // top-level folder -> parent is ROOT.
const grandDir = dir.slice(0, up);
return `${grandDir}/${baseSegment(grandDir)}.md`;
}
// A leaf (or a nested folder-note) sitting inside `dir`: its parent is `dir`'s
// folder-note.
return folderNote;
}
/**
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
* screen the PUSH diff so non-page files are never created/updated/deleted in
* Docmost (and never get a `gitmost_id` frontmatter written into them).
*/
export function isPageFile(path) {
if (!path.endsWith(".md"))
return false;
return !path.split("/").some((seg) => seg.startsWith("."));
}
/** The last path segment of a forward-slash path (the folder/file base name). */
function baseSegment(path) {
const slash = path.lastIndexOf("/");
return slash < 0 ? path : path.slice(slash + 1);
}
/**
* The page TITLE derived from a vault path: the file's base name without the
* `.md` extension. In the native-Obsidian layout the filename IS the title — for
* a folder-note `<dir>/<base>.md` that base equals the folder name, so the same
* rule yields the folder's title. Self-consistent across pull/push: a pulled
* (possibly disambiguated) filename round-trips to the same title, so a stable
* file never pushes a spurious rename.
*/
function titleFromPath(path) {
const base = baseSegment(path);
return base.endsWith(".md") ? base.slice(0, -3) : base;
}
/**
* Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the
* NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the
* filename, `spaceId` from the run (the vault's space — every file belongs to
* it). `parentPageId` is intentionally absent: tree position is resolved from the
* PATH (`resolveParentPageId`), never from a stored field (SPEC §5).
*/
function nativeMeta(text, path, spaceId) {
const { id } = parsePageFile(text);
const meta = { version: 1, title: titleFromPath(path), spaceId };
if (id)
meta.pageId = id;
return meta;
}
/**
* Build the `resolveParentPageId(path, side)` resolver `classifyRenameMoves`
* needs, reading the PARENT FOLDER's `.md` (SPEC §5 path-as-truth):
* - `current` -> `deps.readFile(<dir>.md)` (the live working tree),
* - `prev` -> `git.showFileAtRef('refs/docmost/last-pushed', <dir>.md)` (the
* last-pushed pre-image),
* then read its `gitmost_id` frontmatter and return that page's pageId. A root-level path
* (no enclosing folder), a missing/unreadable parent file, or a parent file with
* no parseable pageId all resolve to `null` (parent is ROOT / unknown ->
* `parentPageId: null`, SPEC §16 "parentPageId: null -> в корень").
*
* The IO is async, so this returns an ASYNC resolver; the call sites prefetch the
* parent pageIds (the classifier itself stays pure/sync over a plain table).
*/
async function resolveParentPageIdViaTree(deps, path, side) {
const parentFile = parentFolderFile(path);
if (parentFile === null)
return null; // root-level: parent is ROOT.
let text;
try {
text =
side === "current"
? await deps.readFile(parentFile)
: await deps.git.showFileAtRef(LAST_PUSHED_REF, parentFile);
}
catch {
// Parent folder file missing/unreadable at that side -> treat as ROOT.
return null;
}
if (text === null)
return null; // showFileAtRef returns null when absent.
// The parent page's identity is its `gitmost_id` frontmatter; folder position
// is irrelevant here, only the pageId.
return parsePageFile(text).id;
}
/**
* Resolve the synthetic native meta at a side for the rename/move classifier (the
* title — derived from the path — comes from here). Mirrors
* `resolveParentPageIdViaTree`'s IO sides: `current` reads the working tree,
* `prev` reads `refs/docmost/last-pushed`. Returns `null` only when the file is
* missing/unreadable at that side (a real absence the classifier must see).
*/
async function metaAtViaTree(deps, path, side, spaceId) {
let text;
try {
text =
side === "current"
? await deps.readFile(path)
: await deps.git.showFileAtRef(LAST_PUSHED_REF, path);
}
catch {
return null;
}
if (text === null)
return null;
return nativeMeta(text, path, spaceId);
}
/**
* Pull an `updatedAt` out of a create/update client result, if present. The
* shape is `{ data: { updatedAt? }, ... }` (createPage) or a flatter object;
* absent in the simple fakes, so the field is omitted rather than `undefined`.
*/
function extractUpdatedAt(result) {
const r = result;
const raw = r?.data?.updatedAt ?? r?.updatedAt;
return typeof raw === "string" ? { updatedAt: raw } : {};
}
// --- runnable push orchestration (`runPush`) ---------------------------------
//
// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit
// diff/ref primitives + the PURE `computePushActions` planner + the THIN
// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the
// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO
// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that
// builds a client and mutates Docmost.
//
// Every external effect is injected (`PushDeps`) so the whole orchestration is
// driven by FAKES in tests — no live Docmost, git, fs, or network.
/**
* The human ("local") git identity used for engine-made commits on `main` in the
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
* which the loop-guard keys on; the identity is for history readability only.
* When the vault repo already has a configured `user.name`/`user.email`, git
* uses that for the working-tree commit; this is the fallback the daemon stamps.
*/
export const LOCAL_AUTHOR_NAME = "Local";
export const LOCAL_AUTHOR_EMAIL = "local@local";
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
/**
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
*
* Steps (mirrors `pull.ts`):
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
* non-zero-ish result) if a merge is in progress — never push on top of an
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
* Docmost (SPEC §9).
* 2. Checkout `main` (the human-facing branch the push reads from).
* 3. Commit the human's pending working-tree changes on `main` with the
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
* resolver (current = working tree, prev = `git show <base>:<path>`); run
* the PURE `computePushActions`.
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
* calls, NO ref advance.
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
* then (a) if any pageIds were written back (creates), commit them on `main`
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
* WARNING and a non-zero-ish flag. Then log a one-line summary.
*/
export async function runPush(deps, opts) {
const { git, settings, log } = deps;
const dryRun = opts.dryRun;
// 1. Preflight git. Fail fast (actionable message via main().catch) if the git
// binary is missing — the vault state store relies on it.
await git.assertGitAvailable();
await git.ensureRepo();
// 1b. Refuse to push on top of an unresolved merge (SPEC §9/§12). A previous
// conflicting pull leaves the vault mid-merge; pushing now could leak
// conflict markers into Docmost (SPEC §9, the cardinal invariant). Detect
// it BEFORE any checkout/diff and stop with a clear, actionable message so
// re-runs converge once the human resolves (or aborts) the merge.
if (await git.isMergeInProgress()) {
log(`push: vault has an unresolved merge at ${settings.vaultPath} — resolve ` +
`it (or 'git merge --abort') and re-run. Nothing was pushed to Docmost ` +
`(conflict markers must never reach Docmost, SPEC §9).`);
return { mode: dryRun ? "dry-run" : "apply", aborted: "merge-in-progress" };
}
// 2. Work on `main` — the human-facing branch the push diffs FROM.
await git.checkout(DEFAULT_BRANCH);
// 3. Commit the human's pending working-tree changes on `main` with the `local`
// provenance trailer (SPEC §7.3). A no-op commit when nothing changed is
// fine (`commit` returns false). The loop-guard keys on the trailer.
// Even on a "plan only" dry-run this commits the working tree (it is the
// only way to diff `base..main`, acceptable §6.1 behavior) — so make that
// LOCAL git mutation VISIBLE, never silent: a created commit is local-only
// and nothing is sent to Docmost.
await git.stageAll();
const committedWorkingTree = await git.commit("local: working-tree changes", {
authorName: LOCAL_AUTHOR_NAME,
authorEmail: LOCAL_AUTHOR_EMAIL,
trailers: [LOCAL_SOURCE_TRAILER],
});
if (committedWorkingTree) {
const sha = await git.revParse(DEFAULT_BRANCH);
log(`push: committed local working-tree changes on main` +
(sha ? ` as ${sha.slice(0, 8)}` : "") +
` (local git only — nothing sent to Docmost).`);
}
else {
log("push: working tree clean (no local changes to push).");
}
// 4. Pick the diff BASE (SPEC §5/§6): `refs/docmost/last-pushed` if it resolves
// (the marker of what `main` is already in Docmost), else fall back to the
// `docmost` mirror branch (the mirror of what Docmost currently has) — which
// is what exists before the first push ever advanced last-pushed.
let base;
const lastPushedSha = await git.readRef(LAST_PUSHED_REF);
if (lastPushedSha) {
base = { ref: LAST_PUSHED_REF, source: "last-pushed", sha: lastPushedSha };
}
else {
base = {
ref: DOCMOST_BRANCH,
source: "docmost",
sha: await git.revParse(DOCMOST_BRANCH),
};
}
const pushedCommit = await git.revParse(DEFAULT_BRANCH);
if (!pushedCommit) {
// `main` has no commit — `ensureRepo` always makes an initial one, so this is
// defensive. Nothing to diff.
log("push: `main` has no commit to push — nothing to do.");
return { mode: dryRun ? "dry-run" : "apply", base };
}
// 5. Diff the base against `main` and build the `metaAt` resolver (PURE planner
// input). `current` reads the live working tree; `prev` reads the base ref's
// pre-image via `git show <base>:<path>` (so a DELETE recovers its pageId).
const changes = await git.diffNameStatus(base.ref, DEFAULT_BRANCH);
// Synchronous resolver over PREFETCHED meta tables: `computePushActions` is
// PURE/sync, but the file/ref reads are async — so we prefetch every (path,
// side) the diff will ask for into a table first, then resolve from it.
const metaTable = new Map();
for (const change of changes) {
// `current`: A/M/R/C still have the file on `main`. `prev`: D needs the
// pre-image; R/C also benefit (old title). Prefetch both sides per path.
const currentPath = change.path;
const prevPath = change.oldPath ?? change.path;
if (!metaTable.has(`${currentPath}|current`)) {
metaTable.set(`${currentPath}|current`, await readMetaCurrent(deps, currentPath, settings.docmostSpaceId));
}
if (!metaTable.has(`${prevPath}|prev`)) {
metaTable.set(`${prevPath}|prev`, await readMetaPrev(deps, base.ref, prevPath, settings.docmostSpaceId));
}
}
const metaAt = (path, side) => metaTable.get(`${path}|${side}`) ?? null;
// The set of pageIds that STILL EXIST somewhere in the current `main` tree.
// Identity is the pageId, NOT the filename: a file vanishing from one path
// while the SAME pageId lives at another path is a MOVE (often a layout
// reshuffle of `_`-fallback names, whose two halves can even land in separate
// cycles), never a deletion. Built only when the diff contains deletes — the
// guard's whole job is to stop a phantom delete from trashing a live page.
let currentPageIds;
if (changes.some((c) => c.status === "D")) {
currentPageIds = new Set();
for (const relPath of await git.listTrackedFiles("*.md")) {
const pid = (await readMetaCurrent(deps, relPath, settings.docmostSpaceId))
?.pageId;
if (pid)
currentPageIds.add(pid);
}
}
const actions = computePushActions({ changes, metaAt, currentPageIds });
const planned = {
creates: actions.creates.length,
updates: actions.updates.length,
deletes: actions.deletes.length,
renamesMoves: actions.renamesMoves.length,
skipped: actions.skipped.length,
};
// 6. DRY-RUN (default): log the full plan and RETURN — build NO client, make
// ZERO Docmost calls, advance NO refs. This is the SAFE default.
logPlan(log, base, pushedCommit, actions, planned, dryRun);
if (dryRun) {
return { mode: "dry-run", base, pushedCommit, planned };
}
// 7. --apply: build the REAL client and execute. This is the ONLY write path.
const client = deps.makeClient(settings);
const applied = await applyPushActions({
client,
// Pass the WHOLE `git` object (it satisfies the applier's
// `Pick<VaultGit, ...>` deps surface). Passing bare method references
// (`git.updateRef`, …) would lose their `this` binding, so on a REAL
// `VaultGit` they would throw `this.runRaw is not a function`. Hand over
// the object so the methods keep their receiver — exactly as `pull.ts`
// does for `applyPullActions`.
git,
readFile: deps.readFile,
writeFile: deps.writeFile,
spaceId: settings.docmostSpaceId,
}, actions, pushedCommit);
// 7a. Persist freshly-assigned pageIds (creates) back into git. `applyPushActions`
// rewrote those files on disk; commit them on `main` with the `local` trailer
// so the new pageIds are recorded, then RE-advance `refs/docmost/last-pushed`
// to the new commit so what Docmost mirrors and what last-pushed points at
// stay in lock-step (the write-back commit is part of `main` now).
// Track a divergent-`docmost` mirror across BOTH ff sites (the applier's main
// push ff in 7b, and the write-back ff here). A divergent mirror is a §5
// invariant breach in EITHER branch and must escalate identically (exit 1).
let divergentDocmost = false;
if (applied.writtenBack.length > 0) {
await git.stageAll();
const recorded = await git.commit("local: record created pageIds", {
authorName: LOCAL_AUTHOR_NAME,
authorEmail: LOCAL_AUTHOR_EMAIL,
trailers: [LOCAL_SOURCE_TRAILER],
});
if (recorded) {
const newCommit = await git.revParse(DEFAULT_BRANCH);
// Only re-advance when the original push was CLEAN (last-pushed was already
// advanced by the applier); a partial push left the refs untouched and a
// re-run retries the whole batch, so we must not move them either.
if (newCommit && applied.lastPushedAdvanced) {
await git.updateRef(LAST_PUSHED_REF, newCommit);
const ff = await git.fastForwardBranch(DOCMOST_BRANCH, newCommit);
if (!ff.ok) {
// SYMMETRIC with the main escalation (7b): a divergent mirror in the
// write-back branch is the SAME §5 invariant breach and must escalate
// (exit 1), not just log a soft warning.
divergentDocmost = true;
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
`fast-forwarded to the pageId write-back commit ` +
`(${ff.reason ?? "not-fast-forward"}). The §5 invariant ('docmost' ` +
`mirrors what Docmost contains) is broken: reconcile 'docmost' ` +
`against the live Docmost tree before the next cycle.`);
}
}
}
}
// 7b. ESCALATE a divergent-`docmost` fast-forward refusal (SPEC §5 invariant
// broken). The applier already refused to clobber a divergent mirror; make
// it LOUD (not silent) so the operator notices, and fold it into the exit.
if (applied.docmostFastForward && !applied.docmostFastForward.ok) {
divergentDocmost = true;
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
`fast-forwarded (${applied.docmostFastForward.reason ?? "not-fast-forward"}). ` +
`The §5 invariant ('docmost' mirrors what Docmost contains) is broken: ` +
`reconcile 'docmost' against the live Docmost tree before the next cycle.`);
}
// 7c. One-line summary (mirrors pull.ts's summary line).
log(`push complete: ${applied.created} created, ${applied.updated} updated, ` +
`${applied.deleted} deleted, ${applied.moved} moved, ${applied.renamed} ` +
`renamed, ${applied.noops.length} no-op(s), ${applied.skipped.length} ` +
`skipped, ${applied.failures.length} failure(s)` +
(divergentDocmost ? " [DIVERGENT docmost mirror]" : ""));
return {
mode: "apply",
base,
pushedCommit,
planned,
applied,
divergentDocmost,
failures: applied.failures,
};
}
/** Synthetic native meta from the live working tree (`current` side). */
async function readMetaCurrent(deps, path, spaceId) {
let text;
try {
text = await deps.readFile(path);
}
catch {
return null; // absent on disk (e.g. a D row's path) -> no current meta.
}
return nativeMeta(text, path, spaceId);
}
/** Synthetic native meta from the base ref's pre-image (`prev` side). */
async function readMetaPrev(deps, baseRef, path, spaceId) {
let text;
try {
text = await deps.git.showFileAtRef(baseRef, path);
}
catch {
return null;
}
if (text === null)
return null; // path absent at the base ref.
return nativeMeta(text, path, spaceId);
}
/** Emit the full plan (counts + per-item) to the injected logger. */
function logPlan(log, base, pushedCommit, actions, planned, dryRun) {
log(`push plan (${dryRun ? "DRY-RUN — no Docmost writes" : "APPLY"}): base=` +
`${base.ref} (${base.source}${base.sha ? ` ${base.sha.slice(0, 8)}` : ""}) ` +
`-> main ${pushedCommit.slice(0, 8)}`);
log(`push plan counts: ${planned.creates} create, ${planned.updates} update, ` +
`${planned.deletes} delete, ${planned.renamesMoves} rename/move, ` +
`${planned.skipped} skipped`);
for (const c of actions.creates)
log(` create: ${c.path}`);
for (const u of actions.updates)
log(` update: ${u.pageId} (${u.path})`);
for (const d of actions.deletes)
log(` delete: ${d.pageId}`);
for (const rm of actions.renamesMoves)
log(` rename/move: ${rm.oldPath} -> ${rm.newPath} (${rm.pageId})`);
for (const s of actions.skipped)
log(` skipped [${s.status}] ${s.path}: ${s.reason}`);
}
/**
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
*/
export function parseArgs(argv) {
return { apply: argv.includes("--apply") };
}
-126
View File
@@ -1,126 +0,0 @@
/**
* Pure reconciliation planner (SPEC §5/§6/§8).
*
* Given the desired live set of files (computed from the current Docmost tree)
* and the set of files currently tracked in the vault, compute what to write,
* what to move (old path to remove), and what to delete. Identity is `pageId`
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
* the live tree is a DELETE.
*
* This module is intentionally PURE (no IO, no git) so the whole plan is
* unit-testable. The actual file writing / git operations happen in pull.ts.
*/
/** A page that SHOULD exist in the vault at a given path. */
export interface LiveEntry {
pageId: string;
/** Vault-relative path (forward-slash), e.g. `Space/Parent/Child.md`. */
relPath: string;
}
/** A page currently tracked in the vault (pageId parsed from its meta). */
export interface ExistingEntry {
pageId: string;
/** Vault-relative path (forward-slash) of the tracked file. */
relPath: string;
}
/** A page to (re)write at its destination path. */
export interface WriteEntry {
pageId: string;
relPath: string;
}
/** A page that moved: written at its NEW relPath, with the OLD path removed. */
export interface MovedEntry {
pageId: string;
fromRelPath: string;
toRelPath: string;
/**
* Whether the old path (`fromRelPath`) is SAFE to remove. False when another
* live page will (re)write that exact path (path reuse): removing it would
* destroy real data, so the caller must skip the removal. The move itself is
* still recorded (the new path is written regardless).
*/
removeOldPath: boolean;
}
/** The full reconciliation plan. */
export interface ReconciliationPlan {
/**
* Pages present in `live` -> (re)write at their relPath. This naturally
* covers add, content-update (same path) AND move (same pageId, new path),
* since every live page is (re)written regardless of whether it existed.
*/
toWrite: WriteEntry[];
/**
* Vault-relative paths to delete because their tracked pageId is ABSENT from
* `live` (page removed/trashed). This set is ONLY absence-based deletions —
* the OLD paths of moved pages are NOT here (they live in `moved` and are
* applied separately by the caller). Keeping the two apart lets pull.ts gate
* absence deletions behind the incomplete-fetch suppression + mass-delete
* guard (SPEC §8) while still applying real moves.
*/
toDelete: string[];
/**
* Tracked pages whose relPath changed. The caller writes the page at
* `toRelPath`, then removes `fromRelPath` — but ONLY after the new-path write
* succeeded. The old path is NOT in `toDelete`.
*/
moved: MovedEntry[];
}
/**
* Compute the reconciliation plan.
*
* Rules:
* - Every `live` page is written at its relPath (covers add + update + move).
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
* path is written) and is NEVER added to `toDelete`.
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
* is added to `toDelete`.
*
* Notes:
* - Safety filter (no data loss): no path that is a live TARGET path of any
* page is ever deleted/removed (a write owns it). This applies to BOTH the
* absence `toDelete` set AND a moved page's old-path removal — if a moved
* page's OLD path is reused by ANOTHER live page, the move records no old
* path to remove, because that path will be (re)written.
* - `existing` may legitimately contain duplicate pageIds (two stray files
* carrying the same meta pageId); each such file that is not the live target
* path is removed (as an absence/move) so the vault converges to exactly the
* live set.
*/
export declare function planReconciliation(live: LiveEntry[], existing: ExistingEntry[]): ReconciliationPlan;
/**
* Below this many tracked files the mass-delete fraction guard is not applied
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
*/
export declare const MASS_DELETE_MIN_EXISTING = 4;
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
export declare const MASS_DELETE_FRACTION = 0.5;
/** Why absence-based deletions were (or were not) applied this cycle. */
export type DeletionDecision = {
apply: true;
} | {
apply: false;
reason: "incomplete-fetch" | "empty-live" | "mass-delete";
};
/**
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
* testable without live creds or git:
*
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
* (almost always a failed fetch, never a real "delete everything").
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
*
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
* its old-path removal is real (handled by the caller separately).
*/
export declare function decideAbsenceDeletions(args: {
treeComplete: boolean;
liveCount: number;
existingCount: number;
deleteCount: number;
}): DeletionDecision;
-117
View File
@@ -1,117 +0,0 @@
/**
* Pure reconciliation planner (SPEC §5/§6/§8).
*
* Given the desired live set of files (computed from the current Docmost tree)
* and the set of files currently tracked in the vault, compute what to write,
* what to move (old path to remove), and what to delete. Identity is `pageId`
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
* the live tree is a DELETE.
*
* This module is intentionally PURE (no IO, no git) so the whole plan is
* unit-testable. The actual file writing / git operations happen in pull.ts.
*/
/**
* Compute the reconciliation plan.
*
* Rules:
* - Every `live` page is written at its relPath (covers add + update + move).
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
* path is written) and is NEVER added to `toDelete`.
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
* is added to `toDelete`.
*
* Notes:
* - Safety filter (no data loss): no path that is a live TARGET path of any
* page is ever deleted/removed (a write owns it). This applies to BOTH the
* absence `toDelete` set AND a moved page's old-path removal — if a moved
* page's OLD path is reused by ANOTHER live page, the move records no old
* path to remove, because that path will be (re)written.
* - `existing` may legitimately contain duplicate pageIds (two stray files
* carrying the same meta pageId); each such file that is not the live target
* path is removed (as an absence/move) so the vault converges to exactly the
* live set.
*/
export function planReconciliation(live, existing) {
// Desired path for each live pageId.
const liveByPageId = new Map();
// Set of all paths that WILL be written (never delete/remove one of these).
const liveTargetPaths = new Set();
for (const e of live) {
liveByPageId.set(e.pageId, e.relPath);
liveTargetPaths.add(e.relPath);
}
const toWrite = live.map((e) => ({
pageId: e.pageId,
relPath: e.relPath,
}));
const moved = [];
// Absence-based deletions ONLY (tracked pageId absent from `live`). Use a Set
// so the same path coming from multiple existing rows is queued only once.
const toDeleteSet = new Set();
for (const ex of existing) {
const liveRel = liveByPageId.get(ex.pageId);
if (liveRel === undefined) {
// Tracked page is gone from the live tree -> absence delete.
// Never queue a path a live page will (re)write (path reuse -> no loss).
if (!liveTargetPaths.has(ex.relPath))
toDeleteSet.add(ex.relPath);
continue;
}
if (liveRel !== ex.relPath) {
// Same pageId, different path -> a MOVE. Record it so the caller can write
// the new path first, then remove the old one. If the old path is itself a
// live target (reused by another page), it must NOT be removed — the write
// owns it — so flag `removeOldPath: false` (move still recorded).
moved.push({
pageId: ex.pageId,
fromRelPath: ex.relPath,
toRelPath: liveRel,
removeOldPath: !liveTargetPaths.has(ex.relPath),
});
}
// liveRel === ex.relPath -> content-update in place; nothing extra to do
// (the write above re-emits the file; identical bytes => git no-op).
}
const toDelete = [...toDeleteSet];
return { toWrite, toDelete, moved };
}
/**
* Below this many tracked files the mass-delete fraction guard is not applied
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
*/
export const MASS_DELETE_MIN_EXISTING = 4;
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
export const MASS_DELETE_FRACTION = 0.5;
/**
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
* testable without live creds or git:
*
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
* (almost always a failed fetch, never a real "delete everything").
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
*
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
* its old-path removal is real (handled by the caller separately).
*/
export function decideAbsenceDeletions(args) {
const { treeComplete, liveCount, existingCount, deleteCount } = args;
// No tracked files, or nothing to delete -> trivially fine to "apply".
if (existingCount === 0 || deleteCount === 0)
return { apply: true };
if (!treeComplete)
return { apply: false, reason: "incomplete-fetch" };
if (liveCount === 0)
return { apply: false, reason: "empty-live" };
if (existingCount >= MASS_DELETE_MIN_EXISTING &&
deleteCount > existingCount * MASS_DELETE_FRACTION) {
return { apply: false, reason: "mass-delete" };
}
return { apply: true };
}
-21
View File
@@ -1,21 +0,0 @@
/**
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
* round-trip harness that drives these lives in the package's tests, not in the
* engine.
*/
/**
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
* ignored when comparing the semantic shape of two documents. Returns a NEW
* tree; the input is not mutated.
*/
export declare function stripBlockIds(node: any): any;
/**
* Find the first divergence between two values via a recursive deep compare.
* Returns a short path + the two differing values, or null if they are equal.
*/
export declare function firstDivergence(a: any, b: any, path?: string): {
path: string;
a: any;
b: any;
} | null;
@@ -1,70 +0,0 @@
/**
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
* round-trip harness that drives these lives in the package's tests, not in the
* engine.
*/
/**
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
* ignored when comparing the semantic shape of two documents. Returns a NEW
* tree; the input is not mutated.
*/
export function stripBlockIds(node) {
if (Array.isArray(node)) {
return node.map(stripBlockIds);
}
if (node && typeof node === "object") {
const out = {};
for (const key of Object.keys(node)) {
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
// Drop the `id` attr; keep every other attribute.
const { id, ...rest } = node.attrs;
void id;
out.attrs = stripBlockIds(rest);
}
else {
out[key] = stripBlockIds(node[key]);
}
}
return out;
}
return node;
}
/**
* Find the first divergence between two values via a recursive deep compare.
* Returns a short path + the two differing values, or null if they are equal.
*/
export function firstDivergence(a, b, path = "$") {
if (a === b)
return null;
const ta = typeof a;
const tb = typeof b;
if (ta !== tb || a === null || b === null) {
return { path, a, b };
}
if (ta !== "object") {
return { path, a, b };
}
const aIsArr = Array.isArray(a);
const bIsArr = Array.isArray(b);
if (aIsArr !== bIsArr)
return { path, a, b };
if (aIsArr) {
if (a.length !== b.length) {
return { path: `${path}.length`, a: a.length, b: b.length };
}
for (let i = 0; i < a.length; i++) {
const d = firstDivergence(a[i], b[i], `${path}[${i}]`);
if (d)
return d;
}
return null;
}
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
for (const k of keys) {
const d = firstDivergence(a[k], b[k], `${path}.${k}`);
if (d)
return d;
}
return null;
}
-23
View File
@@ -1,23 +0,0 @@
/**
* Deterministic filename strategy (SPEC §12).
*
* The file name is COSMETIC — the source of truth for the file<->page link is
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
* functions are intentionally dependency-free and pure, so they are trivially
* unit-testable.
*/
/**
* Sanitize a page title into a safe file-name component (WITHOUT extension).
*
* Steps: replace forbidden / control characters with "-", collapse whitespace
* runs to a single space, trim, cap the length, then guard against an empty
* result, an all-dots result, or a reserved Windows device name by prefixing
* with "_".
*/
export declare function sanitizeTitle(title: string): string;
/**
* Disambiguate a sanitized name when two siblings in the same folder collapse
* to the same name. Appends a stable suffix built from the page's `slugId`, so
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
*/
export declare function disambiguate(name: string, slugId: string): string;
-41
View File
@@ -1,41 +0,0 @@
/**
* Engine settings.
*
* The engine is driven IN-PROCESS by the NestJS server, which builds the
* `Settings` object from `EnvironmentService` — so this module must NOT reach
* into `process.env`. It exposes only:
* - the `Settings` type the engine consumes, and
* - `parseSettings(env)` as a PURE function (validate a raw env object -> typed
* `Settings`), kept for unit tests and for the server to reuse if it wants
* to validate an env-shaped object.
* There is no `.env`-loading side-effecting entry point.
*/
import { z } from 'zod';
export declare const envSchema: z.ZodObject<{
DOCMOST_API_URL: z.ZodString;
DOCMOST_EMAIL: z.ZodString;
DOCMOST_PASSWORD: z.ZodString;
DOCMOST_SPACE_ID: z.ZodString;
VAULT_PATH: z.ZodDefault<z.ZodString>;
GIT_REMOTE: z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodOptional<z.ZodString>>;
POLL_INTERVAL_MS: z.ZodDefault<z.ZodCoercedNumber<unknown>>;
DEBOUNCE_MS: z.ZodDefault<z.ZodCoercedNumber<unknown>>;
LOG_LEVEL: z.ZodDefault<z.ZodEnum<{
info: "info";
error: "error";
debug: "debug";
warn: "warn";
}>>;
}, z.core.$strip>;
export type Settings = {
docmostApiUrl: string;
docmostEmail: string;
docmostPassword: string;
docmostSpaceId: string;
vaultPath: string;
gitRemote?: string;
pollIntervalMs: number;
debounceMs: number;
logLevel: 'debug' | 'info' | 'warn' | 'error';
};
export declare function parseSettings(env: NodeJS.ProcessEnv): Settings;
@@ -1,49 +0,0 @@
/**
* Engine settings.
*
* The engine is driven IN-PROCESS by the NestJS server, which builds the
* `Settings` object from `EnvironmentService` — so this module must NOT reach
* into `process.env`. It exposes only:
* - the `Settings` type the engine consumes, and
* - `parseSettings(env)` as a PURE function (validate a raw env object -> typed
* `Settings`), kept for unit tests and for the server to reuse if it wants
* to validate an env-shaped object.
* There is no `.env`-loading side-effecting entry point.
*/
import { z } from 'zod';
// Schema keyed by the real ENV variable names so validation errors name the
// exact variable. Credentials and the address of our OWN Docmost instance have
// NO default — a missing value must fail at startup, never silently fall back.
export const envSchema = z.object({
// Docmost connection — address of our own instance, no default.
DOCMOST_API_URL: z.string().url(),
// Credentials for /auth/login — no default, never hardcoded.
DOCMOST_EMAIL: z.string().min(1),
DOCMOST_PASSWORD: z.string().min(1),
// Which Docmost space to mirror.
DOCMOST_SPACE_ID: z.string().min(1),
// Local git vault (state store) — kept under data/ so the volume persists it.
VAULT_PATH: z.string().min(1).default('data/vault'),
// Optional git remote the vault pushes to. Empty string is treated as unset.
GIT_REMOTE: z.preprocess((v) => (v === '' ? undefined : v), z.string().min(1).optional()),
// Non-secret tunables — sensible defaults are fine.
POLL_INTERVAL_MS: z.coerce.number().int().positive().default(15000),
DEBOUNCE_MS: z.coerce.number().int().positive().default(2000),
LOG_LEVEL: z.enum(['debug', 'info', 'warn', 'error']).default('info'),
});
// Pure: validate a raw environment object and map it to a typed Settings.
// Throws ZodError on bad config. No side effects — safe to import in tests.
export function parseSettings(env) {
const e = envSchema.parse(env);
return {
docmostApiUrl: e.DOCMOST_API_URL,
docmostEmail: e.DOCMOST_EMAIL,
docmostPassword: e.DOCMOST_PASSWORD,
docmostSpaceId: e.DOCMOST_SPACE_ID,
vaultPath: e.VAULT_PATH,
gitRemote: e.GIT_REMOTE,
pollIntervalMs: e.POLL_INTERVAL_MS,
debounceMs: e.DEBOUNCE_MS,
logLevel: e.LOG_LEVEL,
};
}
-41
View File
@@ -1,41 +0,0 @@
/**
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
* compatible so files produced here match `exportPageBody`'s output exactly.
*/
export interface PageMeta {
version: 1;
pageId: string;
slugId: string;
title: string;
spaceId: string;
parentPageId: string | null;
}
/**
* Produce the self-contained `.md` file text for a page from its raw
* ProseMirror `content` + identity meta, in the verified fixpoint form.
*
* md1 = convertProseMirrorToMarkdown(content)
* doc2 = markdownToProseMirror(md1) // one import...
* stableBody = convertProseMirrorToMarkdown(doc2) // ...and re-export
* file = serializeDocmostMarkdownBody(meta, stableBody)
*
* The single export->import->export pass is the verified fixpoint (SPEC §11):
* idempotent for already-stable content, and the convergence point for the
* known converter asymmetries.
*/
export declare function stabilizePageFile(content: unknown, meta: PageMeta): Promise<string>;
/**
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
* envelope:
*
* md1 = convertProseMirrorToMarkdown(content) // export...
* doc2 = markdownToProseMirror(md1) // ...import...
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
*
* The single export->import->export pass is the verified fixpoint (SPEC §11):
* idempotent for already-stable content, and the convergence point for the known
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
*/
export declare function stabilizePageBody(content: unknown): Promise<string>;
-31
View File
@@ -1,31 +0,0 @@
/**
* Public surface of `@docmost/git-sync`.
*
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
* canonicalization) and the sync engine (reconcile planner, vault layout,
* pull/push, the git wrapper, and the settings parser) that the gitmost server
* drives in-process.
*/
export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, parseDocmostMarkdown, convertProseMirrorToMarkdown, markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, } from "./lib/index.js";
export type { DocmostMdMeta } from "./lib/index.js";
export { planReconciliation, decideAbsenceDeletions, MASS_DELETE_MIN_EXISTING, MASS_DELETE_FRACTION, } from "./engine/reconcile.js";
export type { LiveEntry, ExistingEntry, WriteEntry, MovedEntry, ReconciliationPlan, DeletionDecision, } from "./engine/reconcile.js";
export { buildVaultLayout } from "./engine/layout.js";
export type { PageNode, VaultEntry } from "./engine/layout.js";
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
export { stabilizePageFile } from "./engine/stabilize.js";
export type { PageMeta } from "./engine/stabilize.js";
export { bodyHash } from "./engine/loop-guard.js";
export type { GitSyncClient, GitSyncPageNodeLite } from "./engine/client.types.js";
export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git.js";
export type { DiffEntry, MergeResult, CommitOptions } from "./engine/git.js";
export { readExisting, computePullActions, applyPullActions, } from "./engine/pull.js";
export type { ReadExistingDeps, PullActionsInput, PullActions, ApplyPullActionsDeps, ApplyResult, } from "./engine/pull.js";
export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push.js";
export type { CreateAction, UpdateAction, DeleteAction, RenameMoveAction, RenameMoveActionClassified, ClassifyRenameMovesDeps, PushActions, PushActionsInput, MetaSide, ApplyPushDeps, WrittenBackPage, PushedPageRecord, PushFailure, PushNoop, ApplyPushResult, PushDeps, PushRunResult, PushParsedArgs, } from "./engine/push.js";
export { parseSettings, envSchema } from "./engine/settings.js";
export type { Settings } from "./engine/settings.js";
export { loadSettingsOrExit } from "./engine/config-errors.js";
export { runCycle } from "./engine/cycle.js";
export type { RunCycleDeps, RunCycleResult, CycleFs, } from "./engine/cycle.js";
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
-24
View File
@@ -1,24 +0,0 @@
/**
* Public surface of `@docmost/git-sync`.
*
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
* canonicalization) and the sync engine (reconcile planner, vault layout,
* pull/push, the git wrapper, and the settings parser) that the gitmost server
* drives in-process.
*/
// Pure converter (markdown <-> ProseMirror, file envelope, canonicalization).
export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, parseDocmostMarkdown, convertProseMirrorToMarkdown, markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, } from "./lib/index.js";
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
// loop-guard body hash.
export { planReconciliation, decideAbsenceDeletions, MASS_DELETE_MIN_EXISTING, MASS_DELETE_FRACTION, } from "./engine/reconcile.js";
export { buildVaultLayout } from "./engine/layout.js";
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
export { stabilizePageFile } from "./engine/stabilize.js";
export { bodyHash } from "./engine/loop-guard.js";
export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git.js";
export { readExisting, computePullActions, applyPullActions, } from "./engine/pull.js";
export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push.js";
export { parseSettings, envSchema } from "./engine/settings.js";
export { loadSettingsOrExit } from "./engine/config-errors.js";
export { runCycle } from "./engine/cycle.js";
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
-38
View File
@@ -1,38 +0,0 @@
/**
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
* form rather than raw bytes).
*
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
* `indent: null` where the source omitted it) and regenerates per-block ids on
* every import. A raw deep-equal of the source doc against the re-imported doc
* therefore diverges even when the two are semantically identical. This module
* normalizes a document so that two semantically-equal docs compare deep-equal
* regardless of block ids and absent-vs-explicit-default-null attributes.
*
* It is a self-contained module with no external dependencies.
*/
/**
* Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two
* semantically-equal documents compare deep-equal. Rules (applied recursively
* to the node, its `content`, and its `marks`):
*
* 1. Remove node-level `attrs.id` (regenerated on import). Mark attrs are NOT
* touched for `id` (marks carry no block id; only their meaningful attrs).
* 2. In any `attrs` object (node OR mark) drop keys whose value is `null`/
* `undefined` (absent ≡ explicit default null) OR equals that node/mark
* type's known non-null schema default (absent ≡ explicit default).
* Keep every non-default value. The type is passed into the attrs
* normalizer so it can look up `KNOWN_DEFAULTS`.
* 3. If an `attrs` object becomes empty after pruning, drop the `attrs` key.
* 4. Preserve `marks` (including the `comment` mark and its `commentId` — a
* meaningful anchor per SPEC §3; never strip it).
* 5. Preserve `text`, `type`, and `content` order exactly.
* 6. Never mutate the input.
*/
export declare function canonicalizeContent(node: any): any;
/**
* True when two ProseMirror documents are semantically equal: equal after
* canonicalization (block ids stripped, absent-vs-default-null normalized).
*/
export declare function docsCanonicallyEqual(a: any, b: any): boolean;
-54
View File
@@ -1,54 +0,0 @@
/**
* Headless, Docmost-equivalent document diff.
*
* Docmost's history editor computes a change set with the exact pipeline below
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
* editor decorations. This module runs the SAME computation but serializes the
* result to text + integrity counts instead of decorations, so a diff can be
* previewed without a browser.
*
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
* maintained published fork of the MIT prosemirror-recreate-steps source that
* Docmost vendors in @docmost/editor-ext; it exposes the identical
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
* signature.
*
* If recreateTransform / the changeset throws on a pathological document pair,
* we fall back to a coarse block-level text diff so the tool never hard-fails.
*/
/** A single inserted/deleted change with its containing-block context. */
export interface DiffChange {
op: "insert" | "delete";
/** Lead (plain) text of the block that contains the change, for context. */
block: string;
/** The inserted or deleted text. */
text: string;
}
/** Integrity counts as [old, new] tuples; footnoteMarkers as [oldList, newList]. */
export interface DiffIntegrity {
images: [number, number];
links: [number, number];
tables: [number, number];
callouts: [number, number];
footnoteMarkers: [number[], number[]];
}
export interface DiffResult {
summary: {
inserted: number;
deleted: number;
blocksChanged: number;
};
integrity: DiffIntegrity;
changes: DiffChange[];
/** Human-readable unified-ish summary. */
markdown: string;
}
/**
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
* serialize the result to text + integrity counts.
*
* @param oldDocJson the earlier document
* @param newDocJson the later document
* @param notesHeading heading delimiting body from notes for footnote counting
*/
export declare function diffDocs(oldDocJson: any, newDocJson: any, notesHeading?: string): DiffResult;
-273
View File
@@ -1,273 +0,0 @@
/**
* Headless, Docmost-equivalent document diff.
*
* Docmost's history editor computes a change set with the exact pipeline below
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
* editor decorations. This module runs the SAME computation but serializes the
* result to text + integrity counts instead of decorations, so a diff can be
* previewed without a browser.
*
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
* maintained published fork of the MIT prosemirror-recreate-steps source that
* Docmost vendors in @docmost/editor-ext; it exposes the identical
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
* signature.
*
* If recreateTransform / the changeset throws on a pathological document pair,
* we fall back to a coarse block-level text diff so the tool never hard-fails.
*/
import { getSchema } from "@tiptap/core";
import { Node } from "@tiptap/pm/model";
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
import { docmostExtensions } from "./docmost-schema.js";
/** Build the schema once; it is pure and reused across calls. */
const schema = getSchema(docmostExtensions);
/** Recursively concatenate the plain text of a JSON node. */
function plainText(node) {
if (!node || typeof node !== "object")
return "";
let out = "";
if (typeof node.text === "string")
out += node.text;
if (Array.isArray(node.content)) {
for (const child of node.content)
out += plainText(child);
}
return out;
}
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
function countNodes(doc, pred) {
let n = 0;
const visit = (node) => {
if (!node || typeof node !== "object")
return;
if (pred(node))
n++;
if (Array.isArray(node.content))
for (const c of node.content)
visit(c);
};
visit(doc);
return n;
}
/**
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
* run); counting link-bearing runs would over-count it. Walking the tree and
* collecting hrefs into a Set keys each distinct link once. Link marks with a
* missing/empty href are bucketed under a single "" key so a malformed link is
* still counted as one.
*/
function countUniqueLinks(doc) {
const hrefs = new Set();
const visit = (node) => {
if (!node || typeof node !== "object")
return;
if (node.type === "text" && Array.isArray(node.marks)) {
for (const m of node.marks) {
if (m && m.type === "link") {
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
hrefs.add(href);
}
}
}
if (Array.isArray(node.content))
for (const c of node.content)
visit(c);
};
visit(doc);
return hrefs.size;
}
/**
* Parse the ordered list of integers from `[N]` footnote markers found in the
* BODY only (every top-level block before the first "Примечания..." notes
* heading; if no such heading, the whole doc). Returned in reading order.
*/
function footnoteMarkers(doc, notesHeading) {
const top = Array.isArray(doc?.content) ? doc.content : [];
const notesIdx = top.findIndex((n) => n &&
n.type === "heading" &&
plainText(n).trim() === notesHeading);
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
const markers = [];
const re = /\[(\d+)\]/g;
for (const block of bodyBlocks) {
const text = plainText(block);
let m;
re.lastIndex = 0;
while ((m = re.exec(text)) !== null) {
markers.push(Number(m[1]));
}
}
return markers;
}
/** Compute the [old,new] integrity tuples for two JSON docs. */
function computeIntegrity(oldDoc, newDoc, notesHeading) {
const images = [
countNodes(oldDoc, (n) => n.type === "image"),
countNodes(newDoc, (n) => n.type === "image"),
];
const links = [
countUniqueLinks(oldDoc),
countUniqueLinks(newDoc),
];
const tables = [
countNodes(oldDoc, (n) => n.type === "table"),
countNodes(newDoc, (n) => n.type === "table"),
];
const callouts = [
countNodes(oldDoc, (n) => n.type === "callout"),
countNodes(newDoc, (n) => n.type === "callout"),
];
const fns = [
footnoteMarkers(oldDoc, notesHeading),
footnoteMarkers(newDoc, notesHeading),
];
return { images, links, tables, callouts, footnoteMarkers: fns };
}
/**
* Resolve the lead text of the top-level block in a ProseMirror Node that
* contains the given document position. Returns "" when out of range.
*/
function blockContextAt(node, pos) {
try {
const clamped = Math.max(0, Math.min(pos, node.content.size));
const $pos = node.resolve(clamped);
// depth 1 is the top-level block in a doc node.
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
const text = block.textContent || "";
return text.length > 80 ? text.slice(0, 77) + "..." : text;
}
catch {
return "";
}
}
/** Truncate a string for the markdown summary. */
function truncate(s, n = 120) {
return s.length > n ? s.slice(0, n - 3) + "..." : s;
}
/**
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
*/
function coarseDiff(oldDoc, newDoc) {
const oldBlocks = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
const newBlocks = Array.isArray(newDoc?.content) ? newDoc.content : [];
const oldTexts = oldBlocks.map(plainText);
const newTexts = newBlocks.map(plainText);
const oldSet = new Set(oldTexts);
const newSet = new Set(newTexts);
const changes = [];
for (const t of oldTexts) {
if (!newSet.has(t) && t.trim() !== "") {
changes.push({ op: "delete", block: truncate(t, 80), text: t });
}
}
for (const t of newTexts) {
if (!oldSet.has(t) && t.trim() !== "") {
changes.push({ op: "insert", block: truncate(t, 80), text: t });
}
}
return changes;
}
/** Build the human-readable unified-ish markdown summary. */
function renderMarkdown(result, fellBack) {
const lines = [];
const { summary, integrity, changes } = result;
lines.push(`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`);
if (fellBack) {
lines.push("");
lines.push("> note: precise diff failed; coarse block-level diff shown.");
}
lines.push("");
lines.push("## Integrity (old -> new)");
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
lines.push(`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`);
lines.push("");
lines.push("## Changes");
if (changes.length === 0) {
lines.push("(no textual changes)");
}
else {
for (const c of changes) {
const sign = c.op === "insert" ? "+" : "-";
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
}
}
return lines.join("\n");
}
/**
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
* serialize the result to text + integrity counts.
*
* @param oldDocJson the earlier document
* @param newDocJson the later document
* @param notesHeading heading delimiting body from notes for footnote counting
*/
export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
let changes = [];
let inserted = 0;
let deleted = 0;
let fellBack = false;
const changedBlocks = new Set();
try {
const oldNode = Node.fromJSON(schema, oldDocJson);
const newNode = Node.fromJSON(schema, newDocJson);
const tr = recreateTransform(oldNode, newNode, {
complexSteps: false,
wordDiffs: true,
simplifyDiff: true,
});
const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
const simplified = simplifyChanges(changeSet.changes, newNode);
for (const change of simplified) {
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
if (change.toA > change.fromA) {
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
if (text.length > 0) {
deleted += text.length;
const block = blockContextAt(oldNode, change.fromA);
changes.push({ op: "delete", block, text });
if (block)
changedBlocks.add("d:" + block);
}
}
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
if (change.toB > change.fromB) {
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
if (text.length > 0) {
inserted += text.length;
const block = blockContextAt(newNode, change.fromB);
changes.push({ op: "insert", block, text });
if (block)
changedBlocks.add("i:" + block);
}
}
}
}
catch {
// Pathological pair: degrade to a coarse block-level diff so we never throw.
fellBack = true;
changes = coarseDiff(oldDocJson, newDocJson);
for (const c of changes) {
if (c.op === "insert")
inserted += c.text.length;
else
deleted += c.text.length;
if (c.block)
changedBlocks.add(c.op[0] + ":" + c.block);
}
}
const partial = {
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
integrity,
changes,
};
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
}
-9
View File
@@ -1,9 +0,0 @@
import { Node, Extension, Mark } from "@tiptap/core";
export declare const clampCalloutType: (value: string | null | undefined) => string;
export declare const sanitizeCssColor: (value: string | null | undefined) => string | null;
/**
* Full extension list. Image is block-level (matches Docmost); the
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
* StarterKit v3 already bundles the link extension, configured here.
*/
export declare const docmostExtensions: (Node<any, any> | Mark<any, any> | Extension<any, any> | Extension<import("@tiptap/starter-kit").StarterKitOptions, any> | Node<import("@tiptap/extension-image").ImageOptions, any> | Node<import("@tiptap/extension-task-list").TaskListOptions, any> | Node<import("@tiptap/extension-task-item").TaskItemOptions, any> | Mark<import("@tiptap/extension-highlight").HighlightOptions, any> | Mark<import("@tiptap/extension-subscript").SubscriptExtensionOptions, any>)[];
@@ -1,999 +0,0 @@
/**
* Full TipTap extension set matching the real Docmost document schema.
*
* The default StarterKit-only schema silently destroys Docmost-specific
* nodes (callout, table) and drops attributes it does not know about
* (node ids, image sizing, link targets). Every code path that converts
* to or from ProseMirror JSON must use THIS set, otherwise a round-trip
* loses content.
*/
import StarterKit from "@tiptap/starter-kit";
import Image from "@tiptap/extension-image";
import TaskList from "@tiptap/extension-task-list";
import TaskItem from "@tiptap/extension-task-item";
import Highlight from "@tiptap/extension-highlight";
import Subscript from "@tiptap/extension-subscript";
import Superscript from "@tiptap/extension-superscript";
import { Node, Extension, Mark } from "@tiptap/core";
// Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this
// package can stay on the same @tiptap/core version as the editor and avoid a
// duplicate-tiptap version split in the monorepo. Reads a single declaration
// from an element's inline `style` attribute, last-wins, case-insensitive.
function getStyleProperty(element, propertyName) {
const styleAttr = element.getAttribute("style");
if (!styleAttr) {
return null;
}
const decls = styleAttr.split(";").map((decl) => decl.trim()).filter(Boolean);
const target = propertyName.toLowerCase();
for (let i = decls.length - 1; i >= 0; i -= 1) {
const decl = decls[i];
const colonIndex = decl.indexOf(":");
if (colonIndex === -1) {
continue;
}
const prop = decl.slice(0, colonIndex).trim().toLowerCase();
if (prop === target) {
return decl.slice(colonIndex + 1).trim();
}
}
return null;
}
/** Allowed Docmost callout types; anything else falls back to "info". */
const CALLOUT_TYPES = ["info", "warning", "danger", "success"];
export const clampCalloutType = (value) => value && CALLOUT_TYPES.includes(value.toLowerCase())
? value.toLowerCase()
: "info";
/**
* Allowlist guard for CSS color values imported from HTML.
*
* Docmost interpolates stored mark colors straight into an inline style
* attribute (e.g. style="background-color: ${color}" / "color: ${color}").
* An unsanitized value such as `red; --x: url(...)` or `red"><script>` would
* let a crafted document break out of the style attribute. We therefore only
* accept a narrow, well-formed subset of CSS <color> syntax and reject (-> null)
* anything else.
*
* Accepted forms:
* - named colors: letters only, e.g. "red", "rebeccapurple"
* - hex: #rgb, #rgba, #rrggbb, #rrggbbaa
* - functional notation: rgb()/rgba()/hsl()/hsla() containing only
* digits, %, ., commas, spaces and slashes
*/
const SAFE_COLOR_RE = /^(?:[a-zA-Z]+|#(?:[0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|(?:rgb|rgba|hsl|hsla)\([0-9.,%/\s]+\))$/;
export const sanitizeCssColor = (value) => {
if (typeof value !== "string")
return null;
const color = value.trim();
return color && SAFE_COLOR_RE.test(color) ? color : null;
};
/** Docmost callout (info/warning/danger/success banner). */
const Callout = Node.create({
name: "callout",
group: "block",
content: "block+",
defining: true,
addAttributes() {
return {
// Read the type from data-callout-type so generateJSON(html) preserves
// it; without an explicit parseHTML every imported callout became "info".
type: {
default: "info",
parseHTML: (el) => clampCalloutType(el.getAttribute("data-callout-type")),
renderHTML: (attrs) => ({
"data-callout-type": clampCalloutType(attrs.type),
}),
},
icon: {
default: null,
parseHTML: (el) => el.getAttribute("data-icon"),
renderHTML: (attrs) => attrs.icon ? { "data-icon": attrs.icon } : {},
},
};
},
parseHTML() {
return [{ tag: 'div[data-type="callout"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "callout", ...HTMLAttributes }, 0];
},
});
/** Minimal table family: enough for schema round-trips and HTML parsing. */
const Table = Node.create({
name: "table",
group: "block",
content: "tableRow+",
isolating: true,
parseHTML() {
return [{ tag: "table" }];
},
renderHTML() {
return ["table", ["tbody", 0]];
},
});
const TableRow = Node.create({
name: "tableRow",
content: "(tableCell | tableHeader)*",
parseHTML() {
return [{ tag: "tr" }];
},
renderHTML() {
return ["tr", 0];
},
});
const cellAttributes = () => ({
colspan: { default: 1 },
rowspan: { default: 1 },
colwidth: { default: null },
backgroundColor: { default: null },
backgroundColorName: { default: null },
// Column alignment so GFM aligned tables (|:--|:-:|--:|) round-trip.
align: {
default: null,
parseHTML: (el) => el.getAttribute("align") || el.style.textAlign || null,
renderHTML: (attrs) => attrs.align ? { align: attrs.align } : {},
},
});
const TableCell = Node.create({
name: "tableCell",
content: "block+",
isolating: true,
addAttributes: cellAttributes,
parseHTML() {
return [{ tag: "td" }];
},
renderHTML() {
return ["td", 0];
},
});
const TableHeader = Node.create({
name: "tableHeader",
content: "block+",
isolating: true,
addAttributes: cellAttributes,
parseHTML() {
return [{ tag: "th" }];
},
renderHTML() {
return ["th", 0];
},
});
/**
* Attributes Docmost stores on standard nodes that the stock extensions
* do not declare. Without these, Node.fromJSON silently drops them —
* including the block ids that heading anchors rely on.
*/
const DocmostAttributes = Extension.create({
name: "docmostAttributes",
addGlobalAttributes() {
return [
{
types: ["heading", "paragraph"],
attributes: {
id: { default: null },
indent: { default: null },
textAlign: { default: null },
},
},
{
types: ["image"],
attributes: {
align: { default: null },
attachmentId: { default: null },
aspectRatio: { default: null },
height: { default: null },
placeholder: { default: null },
size: { default: null },
width: { default: null },
},
},
{
types: ["orderedList"],
attributes: { type: { default: null } },
},
{
types: ["link"],
attributes: { internal: { default: null }, title: { default: null } },
},
];
},
});
/**
* Docmost inline comment mark. Anchors a comment thread to a text range via
* `commentId`. Without it, any document containing comment highlights fails to
* round-trip through the schema ("There is no mark type comment in this schema"),
* which breaks update_page_json and edit_page_text on every commented page.
* Mirrors Docmost's @docmost/editor-ext comment mark (commentId / resolved).
*/
const Comment = Mark.create({
name: "comment",
exitable: true,
inclusive: false,
addAttributes() {
return {
commentId: {
default: null,
parseHTML: (el) => el.getAttribute("data-comment-id"),
renderHTML: (attrs) => attrs.commentId ? { "data-comment-id": attrs.commentId } : {},
},
resolved: {
default: false,
parseHTML: (el) => el.getAttribute("data-resolved") === "true",
renderHTML: (attrs) => attrs.resolved ? { "data-resolved": "true" } : {},
},
};
},
parseHTML() {
return [{ tag: "span[data-comment-id]" }];
},
renderHTML({ HTMLAttributes }) {
return ["span", { class: "comment-mark", ...HTMLAttributes }, 0];
},
});
/**
* Text color mark. The markdown-converter emits colored text as
* <span style="color: ...">, but with no mark parsing it back the color was
* silently dropped on import. This mirrors TipTap's @tiptap/extension-text-style
* `textStyle` mark (the name Docmost expects) and carries a single `color`
* attribute. The parsed color is passed through the allowlist guard so a crafted
* style cannot break out of the attribute when Docmost re-renders it.
*/
const TextStyle = Mark.create({
name: "textStyle",
addAttributes() {
return {
color: {
default: null,
parseHTML: (el) => sanitizeCssColor(el.style.color || el.getAttribute("data-color")),
renderHTML: (attrs) => {
const color = sanitizeCssColor(attrs.color);
return color ? { style: `color: ${color}` } : {};
},
},
};
},
parseHTML() {
return [
{
tag: "span",
// Only claim a plain colored span. Do NOT match spans that are already a
// comment mark (data-comment-id) or a mention node (data-type=mention),
// otherwise importing such HTML would silently drop the comment/mention.
getAttrs: (el) => el.style.color &&
!el.getAttribute("data-comment-id") &&
el.getAttribute("data-type") !== "mention"
? {}
: false,
},
];
},
renderHTML({ HTMLAttributes }) {
return ["span", HTMLAttributes, 0];
},
});
/**
* Passthrough definitions for the remaining Docmost-specific nodes.
*
* TiptapTransformer.toYdoc (the write path every mutation uses) throws
* "Unknown node type: X" for any node not registered here, so editing ANY
* page that contains one of these nodes used to fail outright. The read path
* (fromYdoc) accepts them, which is why they appear in real documents.
*
* Each node below mirrors the real @docmost/editor-ext definition's name,
* group, content, inline/atom flags and attribute keys (with the same data-*
* HTML mapping) so that a fromYdoc -> transform -> toYdoc round-trip both
* validates and preserves attributes faithfully. Interactive concerns
* (node views, commands, keyboard shortcuts, input rules, suggestion plugins)
* are intentionally omitted: the MCP server never renders these nodes, it only
* needs the schema to accept and carry them. The Callout node above is the
* pattern these follow.
*/
/** Docmost @mention (user/page reference). Inline atom. */
const Mention = Node.create({
name: "mention",
group: "inline",
inline: true,
selectable: true,
atom: true,
draggable: true,
addAttributes() {
return {
id: {
default: null,
parseHTML: (el) => el.getAttribute("data-id"),
renderHTML: (attrs) => attrs.id ? { "data-id": attrs.id } : {},
},
label: {
default: null,
parseHTML: (el) => el.getAttribute("data-label"),
renderHTML: (attrs) => attrs.label ? { "data-label": attrs.label } : {},
},
entityType: {
default: null,
parseHTML: (el) => el.getAttribute("data-entity-type"),
renderHTML: (attrs) => attrs.entityType ? { "data-entity-type": attrs.entityType } : {},
},
entityId: {
default: null,
parseHTML: (el) => el.getAttribute("data-entity-id"),
renderHTML: (attrs) => attrs.entityId ? { "data-entity-id": attrs.entityId } : {},
},
slugId: {
default: null,
parseHTML: (el) => el.getAttribute("data-slug-id"),
renderHTML: (attrs) => attrs.slugId ? { "data-slug-id": attrs.slugId } : {},
},
creatorId: {
default: null,
parseHTML: (el) => el.getAttribute("data-creator-id"),
renderHTML: (attrs) => attrs.creatorId ? { "data-creator-id": attrs.creatorId } : {},
},
anchorId: {
default: null,
parseHTML: (el) => el.getAttribute("data-anchor-id"),
renderHTML: (attrs) => attrs.anchorId ? { "data-anchor-id": attrs.anchorId } : {},
},
};
},
parseHTML() {
return [{ tag: 'span[data-type="mention"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["span", { "data-type": "mention", ...HTMLAttributes }, 0];
},
});
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
const MathInline = Node.create({
name: "mathInline",
group: "inline",
inline: true,
atom: true,
addAttributes() {
return {
text: { default: "" },
};
},
parseHTML() {
return [{ tag: 'span[data-type="mathInline"]' }];
},
renderHTML({ HTMLAttributes }) {
return [
"span",
{ "data-type": "mathInline", "data-katex": "true" },
`${HTMLAttributes.text ?? ""}`,
];
},
});
/** Block KaTeX expression. Carries the LaTeX source in `text`. */
const MathBlock = Node.create({
name: "mathBlock",
group: "block",
atom: true,
isolating: true,
addAttributes() {
return {
text: { default: "" },
};
},
parseHTML() {
return [{ tag: 'div[data-type="mathBlock"]' }];
},
renderHTML({ HTMLAttributes }) {
return [
"div",
{ "data-type": "mathBlock", "data-katex": "true" },
`${HTMLAttributes.text ?? ""}`,
];
},
});
/** Collapsible <details> wrapper: summary + content children. */
const Details = Node.create({
name: "details",
group: "block",
content: "detailsSummary detailsContent",
defining: true,
isolating: true,
addAttributes() {
return {
open: {
default: false,
parseHTML: (el) => el.getAttribute("open"),
renderHTML: (attrs) => attrs.open ? { open: "" } : {},
},
};
},
parseHTML() {
return [{ tag: "details" }];
},
renderHTML({ HTMLAttributes }) {
return ["details", { ...HTMLAttributes }, 0];
},
});
/** Clickable summary line of a <details> block. */
const DetailsSummary = Node.create({
name: "detailsSummary",
group: "block",
content: "inline*",
defining: true,
isolating: true,
selectable: false,
parseHTML() {
return [{ tag: "summary" }];
},
renderHTML({ HTMLAttributes }) {
return ["summary", { "data-type": "detailsSummary", ...HTMLAttributes }, 0];
},
});
/** Body of a <details> block. Permissive content so fromYdoc output validates. */
const DetailsContent = Node.create({
name: "detailsContent",
group: "block",
// Docmost declares block* (an empty details body is valid); block+ would
// reject a collapsed/empty details on round-trip.
content: "block*",
defining: true,
selectable: false,
parseHTML() {
return [{ tag: 'div[data-type="detailsContent"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "detailsContent", ...HTMLAttributes }, 0];
},
});
/** File attachment card (non-image upload). Block atom. */
const Attachment = Node.create({
name: "attachment",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes() {
return {
url: {
default: "",
parseHTML: (el) => el.getAttribute("data-attachment-url"),
renderHTML: (attrs) => ({
"data-attachment-url": attrs.url ?? "",
}),
},
name: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-name"),
renderHTML: (attrs) => attrs.name ? { "data-attachment-name": attrs.name } : {},
},
mime: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-mime"),
renderHTML: (attrs) => attrs.mime ? { "data-attachment-mime": attrs.mime } : {},
},
size: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-size"),
renderHTML: (attrs) => attrs.size != null ? { "data-attachment-size": attrs.size } : {},
},
attachmentId: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-id"),
renderHTML: (attrs) => attrs.attachmentId
? { "data-attachment-id": attrs.attachmentId }
: {},
},
// Docmost declares `placeholder` (a transient upload key, not rendered
// to HTML). Carry it so a round-trip never hits "Unsupported attribute".
placeholder: { default: null },
};
},
parseHTML() {
return [{ tag: 'div[data-type="attachment"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "attachment", ...HTMLAttributes }, 0];
},
});
/** Uploaded <video> player. Block atom. */
const Video = Node.create({
name: "video",
group: "block",
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes() {
return {
src: {
default: "",
parseHTML: (el) => el.getAttribute("src"),
renderHTML: (attrs) => ({ src: attrs.src ?? "" }),
},
alt: {
default: null,
parseHTML: (el) => el.getAttribute("aria-label"),
renderHTML: (attrs) => attrs.alt ? { "aria-label": attrs.alt } : {},
},
attachmentId: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-id"),
renderHTML: (attrs) => attrs.attachmentId
? { "data-attachment-id": attrs.attachmentId }
: {},
},
width: {
default: null,
parseHTML: (el) => el.getAttribute("width"),
renderHTML: (attrs) => attrs.width != null ? { width: attrs.width } : {},
},
height: {
default: null,
parseHTML: (el) => el.getAttribute("height"),
renderHTML: (attrs) => attrs.height != null ? { height: attrs.height } : {},
},
size: {
default: null,
parseHTML: (el) => el.getAttribute("data-size"),
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
},
align: {
default: "center",
parseHTML: (el) => el.getAttribute("data-align"),
renderHTML: (attrs) => attrs.align ? { "data-align": attrs.align } : {},
},
aspectRatio: {
default: null,
parseHTML: (el) => el.getAttribute("data-aspect-ratio"),
renderHTML: (attrs) => attrs.aspectRatio != null
? { "data-aspect-ratio": attrs.aspectRatio }
: {},
},
// Docmost declares `placeholder` (a transient upload key, not rendered
// to HTML). Carry it so a round-trip never hits "Unsupported attribute".
placeholder: { default: null },
};
},
parseHTML() {
return [{ tag: "video" }];
},
renderHTML({ HTMLAttributes }) {
return ["video", { controls: "true", ...HTMLAttributes }];
},
});
/**
* Defensive passthrough for a `youtube` node. Docmost itself has no dedicated
* youtube node (YouTube is handled via `embed`), but the converter read path
* references this type, so accept it as a generic block atom that preserves
* its src so legacy/external documents survive a round-trip.
*/
const Youtube = Node.create({
name: "youtube",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes() {
return {
src: {
default: "",
parseHTML: (el) => el.getAttribute("data-src"),
renderHTML: (attrs) => ({
"data-src": attrs.src ?? "",
}),
},
width: {
default: null,
parseHTML: (el) => el.getAttribute("data-width"),
renderHTML: (attrs) => attrs.width != null ? { "data-width": attrs.width } : {},
},
height: {
default: null,
parseHTML: (el) => el.getAttribute("data-height"),
renderHTML: (attrs) => attrs.height != null ? { "data-height": attrs.height } : {},
},
align: {
default: "center",
parseHTML: (el) => el.getAttribute("data-align"),
renderHTML: (attrs) => attrs.align ? { "data-align": attrs.align } : {},
},
};
},
parseHTML() {
return [{ tag: 'div[data-type="youtube"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "youtube", ...HTMLAttributes }, 0];
},
});
/** Generic embed (provider iframe). Block atom. */
const Embed = Node.create({
name: "embed",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes() {
return {
src: {
default: "",
parseHTML: (el) => el.getAttribute("data-src"),
renderHTML: (attrs) => ({
"data-src": attrs.src ?? "",
}),
},
provider: {
default: "",
parseHTML: (el) => el.getAttribute("data-provider"),
renderHTML: (attrs) => ({
"data-provider": attrs.provider ?? "",
}),
},
align: {
default: "center",
parseHTML: (el) => el.getAttribute("data-align"),
renderHTML: (attrs) => ({
"data-align": attrs.align ?? "center",
}),
},
width: {
default: 800,
parseHTML: (el) => el.getAttribute("data-width"),
renderHTML: (attrs) => ({
"data-width": attrs.width,
}),
},
height: {
default: 600,
parseHTML: (el) => el.getAttribute("data-height"),
renderHTML: (attrs) => ({
"data-height": attrs.height,
}),
},
};
},
parseHTML() {
return [{ tag: 'div[data-type="embed"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "embed", ...HTMLAttributes }, 0];
},
});
/** Shared attribute set for drawio/excalidraw diagram nodes. */
const diagramAttributes = () => ({
src: {
default: "",
parseHTML: (el) => el.getAttribute("data-src"),
renderHTML: (attrs) => ({
"data-src": attrs.src ?? "",
}),
},
title: {
default: null,
parseHTML: (el) => el.getAttribute("data-title"),
renderHTML: (attrs) => attrs.title ? { "data-title": attrs.title } : {},
},
alt: {
default: null,
parseHTML: (el) => el.getAttribute("data-alt"),
renderHTML: (attrs) => attrs.alt ? { "data-alt": attrs.alt } : {},
},
width: {
default: null,
parseHTML: (el) => el.getAttribute("data-width"),
renderHTML: (attrs) => attrs.width != null ? { "data-width": attrs.width } : {},
},
height: {
default: null,
parseHTML: (el) => el.getAttribute("data-height"),
renderHTML: (attrs) => attrs.height != null ? { "data-height": attrs.height } : {},
},
size: {
default: null,
parseHTML: (el) => el.getAttribute("data-size"),
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
},
aspectRatio: {
default: null,
parseHTML: (el) => el.getAttribute("data-aspect-ratio"),
renderHTML: (attrs) => attrs.aspectRatio != null
? { "data-aspect-ratio": attrs.aspectRatio }
: {},
},
align: {
default: "center",
parseHTML: (el) => el.getAttribute("data-align"),
renderHTML: (attrs) => attrs.align ? { "data-align": attrs.align } : {},
},
attachmentId: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-id"),
renderHTML: (attrs) => attrs.attachmentId ? { "data-attachment-id": attrs.attachmentId } : {},
},
});
/** draw.io diagram. Block atom (image-backed). */
const Drawio = Node.create({
name: "drawio",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes: diagramAttributes,
parseHTML() {
return [{ tag: 'div[data-type="drawio"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "drawio", ...HTMLAttributes }, 0];
},
});
/** Excalidraw diagram. Block atom (image-backed). */
const Excalidraw = Node.create({
name: "excalidraw",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes: diagramAttributes,
parseHTML() {
return [{ tag: 'div[data-type="excalidraw"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "excalidraw", ...HTMLAttributes }, 0];
},
});
/** Multi-column layout container holding one or more `column` children. */
const Columns = Node.create({
name: "columns",
group: "block",
content: "column+",
defining: true,
isolating: true,
addAttributes() {
return {
layout: {
default: "two_equal",
parseHTML: (el) => el.getAttribute("data-layout"),
renderHTML: (attrs) => attrs.layout ? { "data-layout": attrs.layout } : {},
},
widthMode: {
default: "normal",
parseHTML: (el) => el.getAttribute("data-width-mode") || "normal",
renderHTML: (attrs) => attrs.widthMode && attrs.widthMode !== "normal"
? { "data-width-mode": attrs.widthMode }
: {},
},
};
},
parseHTML() {
return [{ tag: 'div[data-type="columns"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "columns", ...HTMLAttributes }, 0];
},
});
/** Single column within a `columns` layout. */
const Column = Node.create({
name: "column",
group: "block",
content: "block+",
defining: true,
isolating: true,
selectable: false,
addAttributes() {
return {
width: {
default: null,
parseHTML: (el) => {
const value = el.getAttribute("data-width");
return value ? parseFloat(value) : null;
},
renderHTML: (attrs) => attrs.width ? { "data-width": attrs.width } : {},
},
};
},
parseHTML() {
return [{ tag: 'div[data-type="column"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "column", ...HTMLAttributes }, 0];
},
});
/**
* Subpages listing block (auto-generated index of child pages). Docmost
* declares no attributes; the markdown-converter has a `case "subpages"`, so
* the read path can emit it and toYdoc must accept it. Block atom.
*/
const Subpages = Node.create({
name: "subpages",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
parseHTML() {
return [{ tag: 'div[data-type="subpages"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "subpages", ...HTMLAttributes }, 0];
},
});
/** Uploaded <audio> player. Block atom. Mirrors Docmost audio attrs. */
const Audio = Node.create({
name: "audio",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes() {
return {
src: {
default: "",
parseHTML: (el) => el.getAttribute("src"),
renderHTML: (attrs) => ({ src: attrs.src ?? "" }),
},
attachmentId: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-id"),
renderHTML: (attrs) => attrs.attachmentId
? { "data-attachment-id": attrs.attachmentId }
: {},
},
size: {
default: null,
parseHTML: (el) => el.getAttribute("data-size"),
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
},
// Transient upload key Docmost declares with rendered:false; carried so
// a round-trip never hits "Unsupported attribute".
placeholder: { default: null },
};
},
parseHTML() {
return [{ tag: "audio" }];
},
renderHTML({ HTMLAttributes }) {
return ["audio", { controls: "true", ...HTMLAttributes }];
},
});
/** Embedded PDF viewer. Block atom. Mirrors Docmost pdf attrs. */
const Pdf = Node.create({
name: "pdf",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
addAttributes() {
return {
src: {
default: "",
parseHTML: (el) => el.getAttribute("src"),
renderHTML: (attrs) => ({ src: attrs.src ?? "" }),
},
name: {
default: null,
parseHTML: (el) => el.getAttribute("data-name"),
renderHTML: (attrs) => attrs.name ? { "data-name": attrs.name } : {},
},
attachmentId: {
default: null,
parseHTML: (el) => el.getAttribute("data-attachment-id"),
renderHTML: (attrs) => attrs.attachmentId
? { "data-attachment-id": attrs.attachmentId }
: {},
},
size: {
default: null,
parseHTML: (el) => el.getAttribute("data-size"),
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
},
width: {
default: null,
parseHTML: (el) => el.getAttribute("width"),
renderHTML: (attrs) => attrs.width != null ? { width: attrs.width } : {},
},
height: {
default: null,
parseHTML: (el) => el.getAttribute("height"),
renderHTML: (attrs) => attrs.height != null ? { height: attrs.height } : {},
},
// Transient upload key Docmost declares with rendered:false; carried so
// a round-trip never hits "Unsupported attribute".
placeholder: { default: null },
};
},
parseHTML() {
return [{ tag: 'div[data-type="pdf"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "pdf", ...HTMLAttributes }, 0];
},
});
/** Page break (print/export divider). Block atom; Docmost declares no attrs. */
const PageBreak = Node.create({
name: "pageBreak",
group: "block",
inline: false,
isolating: true,
atom: true,
defining: true,
draggable: true,
parseHTML() {
return [{ tag: 'div[data-type="pageBreak"]' }];
},
renderHTML({ HTMLAttributes }) {
return ["div", { "data-type": "pageBreak", ...HTMLAttributes }];
},
});
/**
* Full extension list. Image is block-level (matches Docmost); the
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
* StarterKit v3 already bundles the link extension, configured here.
*/
export const docmostExtensions = [
StarterKit.configure({
codeBlock: {},
heading: {},
link: { openOnClick: false },
}),
Image.configure({ inline: false }),
TaskList,
TaskItem.configure({ nested: true }),
// Highlight stores its color unescaped and Docmost interpolates it into
// style="background-color: ${color}". Wrap the color attribute's parseHTML
// with the same allowlist guard used by textStyle so a crafted import color
// cannot break out of the style attribute. Multicolor behavior is preserved.
Highlight.extend({
addAttributes() {
const parent = this.parent?.() ?? {};
return {
...parent,
color: {
...parent.color,
parseHTML: (el) => sanitizeCssColor(el.getAttribute("data-color") ||
getStyleProperty(el, "background-color") ||
el.style.backgroundColor),
},
};
},
}).configure({ multicolor: true }),
Subscript,
Superscript,
// StarterKit does not provide a textStyle mark, so register ours; without it
// generateJSON drops <span style="color: ...">, defeating the color import.
TextStyle,
Comment,
Callout,
Table,
TableRow,
TableCell,
TableHeader,
Mention,
MathInline,
MathBlock,
Details,
DetailsSummary,
DetailsContent,
Attachment,
Video,
Youtube,
Embed,
Drawio,
Excalidraw,
Columns,
Column,
Subpages,
Audio,
Pdf,
PageBreak,
DocmostAttributes,
];
-15
View File
@@ -1,15 +0,0 @@
/**
* Public surface of the pure converter (`lib/`). This barrel re-exports the
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
* markdown -> ProseMirror import path, and semantic canonicalization for the
* round-trip idempotency check (SPEC §11).
*
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
* here — the gitmost server writes natively.
*/
export { serializeDocmostMarkdown, parseDocmostMarkdown, serializeDocmostMarkdownBody, } from "./markdown-document.js";
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
export { canonicalizeContent, docsCanonicallyEqual, } from "./canonicalize.js";
export { parsePageFile, serializePageFile } from "./page-file.js";
-5
View File
@@ -1,5 +0,0 @@
/**
* Convert ProseMirror/TipTap JSON content to Markdown
* Supports all Docmost-specific node types and extensions
*/
export declare function convertProseMirrorToMarkdown(content: any): string;
@@ -1,801 +0,0 @@
/**
* Convert ProseMirror/TipTap JSON content to Markdown
* Supports all Docmost-specific node types and extensions
*/
export function convertProseMirrorToMarkdown(content) {
if (!content || !content.content)
return "";
// Escape a value interpolated into an HTML double-quoted attribute value
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
// ATTRIBUTE context only the quote that delimits the value and the ampersand
// that starts an entity are special, so we escape ONLY & " (and ' for safety
// when single-quoted delimiters are used). We deliberately do NOT escape < or
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
// &lt;/&gt; back inside attribute values, so escaping them would corrupt the
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
// every round-trip (`a < b` -> `a &lt; b` -> `a &amp;lt; b`). Escaping & "
// keeps the value inert against attribute-injection while staying idempotent.
// NOTE: escape ONLY & and " here. The value is always wrapped in double
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
// value, and parse5 does not decode &#39; back inside attribute values, so
// escaping ' would (like < >) corrupt the value and accumulate &amp; on every
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
const escapeAttr = (value) => String(value)
.replace(/&/g, "&amp;")
.replace(/"/g, "&quot;");
// Escape a value placed as HTML element TEXT content (between tags), where
// <, >, and & are all significant. Used for text rendered inside raw-HTML
// blocks (table cells / columns) so stored characters cannot inject markup.
const escapeHtmlText = (value) => String(value)
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;");
// Percent-encode characters that would break out of a markdown URL target
// (...) — whitespace/newlines and parentheses — so a stored src stays a
// single inert token (used for image/video/youtube srcs).
const encodeMdUrl = (value) => String(value || "")
.replace(/\s/g, (c) => (c === " " ? "%20" : encodeURIComponent(c)))
.replace(/\(/g, "%28")
.replace(/\)/g, "%29");
const processNode = (node) => {
const type = node.type;
const nodeContent = node.content || [];
switch (type) {
case "doc":
return nodeContent.map(processNode).join("\n\n");
case "paragraph":
const text = nodeContent.map(processNode).join("");
const align = node.attrs?.textAlign;
if (align && align !== "left") {
return `<div align="${escapeAttr(align)}">${text}</div>`;
}
return text || "";
case "heading":
const level = node.attrs?.level || 1;
const headingText = nodeContent.map(processNode).join("");
return "#".repeat(level) + " " + headingText;
case "text":
let textContent = node.text || "";
// Apply marks (bold, italic, code, etc.)
if (node.marks) {
// The schema's `code` mark declares `excludes: "_"` — it excludes every
// other inline mark — so the editor can NEVER produce a text run that
// carries `code` together with another mark, and on import any
// co-occurring mark is always dropped (the run comes back as code-only).
// The lossless, byte-stable behavior is therefore: when a run has the
// `code` mark, emit ONLY the backtick code span and ignore every other
// mark, so md1 is already code-only and md2 === md1. Runs WITHOUT a code
// mark are rendered exactly as before.
const markTypes = node.marks.map((m) => m.type);
const hasCode = markTypes.includes("code");
if (hasCode) {
textContent = `\`${textContent}\``;
return textContent;
}
const codeCombined = false;
for (const mark of node.marks) {
switch (mark.type) {
case "bold":
textContent = codeCombined
? `<strong>${textContent}</strong>`
: `**${textContent}**`;
break;
case "italic":
textContent = codeCombined
? `<em>${textContent}</em>`
: `*${textContent}*`;
break;
case "code":
// When combined with another mark, wrap as <code> so the
// surrounding HTML marks can nest around it; otherwise use the
// plain backtick span.
textContent = codeCombined
? `<code>${textContent}</code>`
: `\`${textContent}\``;
break;
case "link": {
const href = mark.attrs?.href || "";
const title = mark.attrs?.title;
if (codeCombined) {
// Emit an HTML anchor so it can wrap the nested <code>.
const safeHref = escapeAttr(href);
if (title) {
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
}
else {
textContent = `<a href="${safeHref}">${textContent}</a>`;
}
}
else if (title) {
// Emit the optional markdown link title; escape an embedded
// double-quote so it cannot terminate the title string early.
const safeTitle = String(title).replace(/"/g, '\\"');
textContent = `[${textContent}](${href} "${safeTitle}")`;
}
else {
textContent = `[${textContent}](${href})`;
}
break;
}
case "strike":
textContent = codeCombined
? `<s>${textContent}</s>`
: `~~${textContent}~~`;
break;
case "underline":
textContent = `<u>${textContent}</u>`;
break;
case "subscript":
textContent = `<sub>${textContent}</sub>`;
break;
case "superscript":
textContent = `<sup>${textContent}</sup>`;
break;
case "highlight": {
// Preserve a null/empty color as a plain highlight (a bare
// <mark> with no background-color); only emit the style when a
// color is actually set, so a plain highlight is not forced to
// yellow on export.
const color = mark.attrs?.color;
textContent = color
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
: `<mark>${textContent}</mark>`;
break;
}
case "textStyle":
if (mark.attrs?.color) {
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
}
break;
case "comment": {
// Emit the inline comment anchor so highlights round-trip. The
// schema's Comment mark parses span[data-comment-id] (attrs
// commentId/resolved).
const cid = mark.attrs?.commentId;
if (cid) {
const resolvedAttr = mark.attrs?.resolved
? ` data-resolved="true"`
: "";
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
}
break;
}
}
}
}
return textContent;
case "codeBlock":
const language = node.attrs?.language || "";
// Strip ALL trailing newlines so the export is idempotent: marked
// re-adds exactly one trailing "\n" on import, so trimming only one
// here would let the text grow by "\n" on each round-trip. Removing
// every trailing newline makes repeated cycles stable.
const code = nodeContent
.map(processNode)
.join("")
.replace(/\n+$/, "");
return "```" + language + "\n" + code + "\n```";
case "bulletList":
return nodeContent
.map((item) => processListItem(item, "-"))
.join("\n");
case "orderedList":
return nodeContent
.map((item, index) => processListItem(item, `${index + 1}.`))
.join("\n");
case "taskList":
return nodeContent.map((item) => processTaskItem(item)).join("\n");
case "taskItem":
// Delegate to the same helper used by taskList so multi-block and
// nested task items render and indent consistently.
return processTaskItem(node);
case "listItem":
return nodeContent.map(processNode).join("\n");
case "blockquote":
// Prefix EVERY line of EVERY child with "> " and separate block-level
// children with a blank ">" line so code blocks / multi-paragraph
// quotes round-trip correctly.
return nodeContent
.map((n) => processNode(n)
.split("\n")
.map((line) => (line.length ? `> ${line}` : ">"))
.join("\n"))
.join("\n>\n");
case "horizontalRule":
return "---";
case "hardBreak":
// Two trailing spaces before the newline encode a markdown hard break;
// a bare "\n" would be reimported as a soft break and lost.
return " \n";
case "image":
const imgAlt = node.attrs?.alt || "";
// Neutralize characters that could break out of the markdown image
// URL: spaces/newlines and parentheses would terminate the (...) target
// and let a stored src inject following markdown/HTML. Percent-encode
// them so the URL stays a single inert token.
const imgSrc = encodeMdUrl(node.attrs?.src);
// No "caption" attribute exists in the Docmost image schema, so we do
// not emit one (the previous caption branch was dead).
return `![${imgAlt}](${imgSrc})`;
case "video": {
// Emit the schema-matching <video> element so generateJSON rebuilds the
// node with its attrs intact. The schema's parseHTML reads src/aria-label
// from the standard attributes and the remaining attrs from data-*.
const attrs = node.attrs || {};
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
if (attrs.alt)
parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.width != null)
parts.push(`width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`height="${escapeAttr(attrs.height)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
if (attrs.aspectRatio != null)
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
// Wrap in a block <div> so marked treats it as a block (a bare <video>
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
// empty paragraph beside the hoisted block atom). The wrapper has no
// data-type, so the schema parser ignores it and just hoists the video.
return `<div><video ${parts.join(" ")}></video></div>`;
}
case "youtube": {
// Emit the schema-matching div[data-type="youtube"]; the schema reads
// src from data-src and width/height/align from data-* attributes.
const attrs = node.attrs || {};
const parts = [
`data-type="youtube"`,
`data-src="${escapeAttr(attrs.src ?? "")}"`,
];
if (attrs.width != null)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "table": {
// A GFM pipe table cannot represent merged cells. If ANY cell carries
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
// rowspan read from the same-named HTML attrs and align via parseHTML)
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
const tableRows = nodeContent;
if (tableRows.length === 0)
return "";
const hasSpan = tableRows.some((row) => (row.content || []).some((cell) => (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1));
if (hasSpan) {
// Render each cell's block children to HTML (marked does NOT parse
// markdown inside a raw HTML block, so emitting markdown here would
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
// HTML so inner formatting re-parses into the right marks/nodes.
const renderHtmlCell = (cell) => {
const tag = cell.type === "tableHeader" ? "th" : "td";
const a = cell.attrs || {};
const cellParts = [];
if ((a.colspan ?? 1) > 1)
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
if ((a.rowspan ?? 1) > 1)
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
if (a.align)
cellParts.push(`align="${escapeAttr(a.align)}"`);
const open = cellParts.length
? `<${tag} ${cellParts.join(" ")}>`
: `<${tag}>`;
const inner = (cell.content || [])
.map((block) => blockToHtml(block))
.join("");
return `${open}${inner}</${tag}>`;
};
const htmlRows = tableRows
.map((row) => `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`)
.join("");
return `<table><tbody>${htmlRows}</tbody></table>`;
}
// No merged cells: emit a GFM table (header row + separator) so the
// markdown can be parsed back into a table on re-import.
const rows = tableRows.map(processNode);
const headerCells = tableRows[0]?.content || [];
const columns = headerCells.length || 1;
// Derive alignment markers (:--, :-:, --:) from each header cell.
const markers = Array.from({ length: columns }, (_, i) => {
const align = headerCells[i]?.attrs?.align;
switch (align) {
case "left":
return ":--";
case "center":
return ":-:";
case "right":
return "--:";
default:
return "---";
}
});
const separator = "| " + markers.join(" | ") + " |";
return [rows[0], separator, ...rows.slice(1)].join("\n");
}
case "tableRow":
return "| " + nodeContent.map(processNode).join(" | ") + " |";
case "tableCell":
case "tableHeader": {
// Join multiple block children with a space (not "") so adjacent blocks
// like a paragraph followed by a list don't collide into "line1- a".
// Then collapse newlines and escape pipes so a cell containing "|" or a
// line break cannot corrupt the surrounding GFM row.
return nodeContent
.map(processNode)
.join(" ")
.replace(/\r?\n/g, " ")
.replace(/\|/g, "\\|");
}
case "callout":
const calloutType = node.attrs?.type || "info";
const calloutContent = nodeContent.map(processNode).join("\n");
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
case "details":
return nodeContent.map(processNode).join("\n");
case "detailsSummary":
const summaryText = nodeContent.map(processNode).join("");
return `<details>\n<summary>${summaryText}</summary>\n`;
case "detailsContent":
const detailsText = nodeContent.map(processNode).join("\n");
return `${detailsText}\n</details>`;
case "mathInline": {
// The schema's `text` attribute has no parseHTML, so TipTap's default
// parser reads it from the `text` HTML attribute (NOT the element's text
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
// `text="..."` attribute so it round-trips. marked cannot parse $...$
// back, so the previous form was lossy.
const inlineMath = node.attrs?.text || "";
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
}
case "mathBlock": {
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
// for the schema's default parser to recover it.
const blockMath = node.attrs?.text || "";
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
}
case "mention": {
// Emit span[data-type="mention"] with the schema's data-* attributes so
// generateJSON rebuilds the mention node instead of leaving "@label"
// plain text that cannot re-parse.
const attrs = node.attrs || {};
const parts = [`data-type="mention"`];
if (attrs.id)
parts.push(`data-id="${escapeAttr(attrs.id)}"`);
if (attrs.label)
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
if (attrs.entityType)
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
if (attrs.entityId)
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
if (attrs.slugId)
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
if (attrs.creatorId)
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
if (attrs.anchorId)
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
// Keep the label as visible text content too; the schema reads attrs
// from data-*, so the inner text is purely cosmetic and harmless.
const mentionLabel = attrs.label || attrs.id || "";
// The label is visible element TEXT content here (the data-* attrs above
// carry the real values), so escape it for the text context, not attrs.
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
}
case "attachment": {
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
// the schema stores name/url (plus mime/size/attachmentId). Emit the
// schema-matching div[data-type="attachment"] with data-attachment-*
// attrs so the node round-trips instead of degrading to a markdown link.
const attrs = node.attrs || {};
const parts = [
`data-type="attachment"`,
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
];
if (attrs.name)
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
if (attrs.mime)
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
if (attrs.size != null)
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "drawio":
case "excalidraw": {
// Emit the schema-matching div[data-type=...] carrying the diagram's
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
// diagram round-trips instead of degrading to a lossy placeholder.
const attrs = node.attrs || {};
const parts = [
`data-type="${type}"`,
`data-src="${escapeAttr(attrs.src ?? "")}"`,
];
if (attrs.title != null)
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
if (attrs.alt != null)
parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
if (attrs.width != null)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.aspectRatio != null)
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "embed": {
// Emit the schema-matching div[data-type="embed"]; the schema reads
// src/provider/align/width/height from data-* attributes so the node
// (and its provider iframe info) survives the round-trip.
const attrs = node.attrs || {};
const parts = [
`data-type="embed"`,
`data-src="${escapeAttr(attrs.src ?? "")}"`,
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
];
if (attrs.align)
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
if (attrs.width != null)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "audio": {
// Emit the schema-matching <audio> element (was emitting nothing). The
// schema reads src from src and attachmentId/size from data-*.
const attrs = node.attrs || {};
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
// Wrap in a block <div> for the same reason as video: a bare <audio> is
// inline-level HTML that marked would wrap in <p>.
return `<div><audio ${parts.join(" ")}></audio></div>`;
}
case "pdf": {
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
// The schema reads src/width/height from standard attrs and name/
// attachmentId/size from data-*.
const attrs = node.attrs || {};
const parts = [
`data-type="pdf"`,
`src="${escapeAttr(attrs.src ?? "")}"`,
];
if (attrs.name)
parts.push(`data-name="${escapeAttr(attrs.name)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.width != null)
parts.push(`width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`height="${escapeAttr(attrs.height)}"`);
return `<div ${parts.join(" ")}></div>`;
}
case "columns": {
// Emit the schema-matching div[data-type="columns"] wrapper so the
// multi-column layout survives. Without a case the children were
// concatenated with no separator and the text merged. The schema reads
// layout from data-layout and widthMode from data-width-mode. The whole
// block is raw HTML, so render children via blockToHtml (NOT markdown,
// which marked would not re-parse inside a raw HTML block).
const attrs = node.attrs || {};
const parts = [`data-type="columns"`];
if (attrs.layout)
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
if (attrs.widthMode && attrs.widthMode !== "normal")
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
return `<div ${parts.join(" ")}>${inner}</div>`;
}
case "column": {
// Emit the schema-matching div[data-type="column"]; the schema reads the
// column width from data-width. Children are rendered as HTML so their
// formatting survives inside this raw HTML block.
const attrs = node.attrs || {};
const parts = [`data-type="column"`];
if (attrs.width)
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
return `<div ${parts.join(" ")}>${inner}</div>`;
}
case "pageBreak":
// Emit the schema-matching div[data-type="pageBreak"] so marked passes
// it through as a block and generateJSON rebuilds the pageBreak atom.
// Without this case the node fell through to `default` and rendered ""
// (the divider silently disappeared and could not round-trip).
return `<div data-type="pageBreak"></div>`;
case "subpages":
return "{{SUBPAGES}}";
default:
// Fallback: process children
return nodeContent.map(processNode).join("");
}
};
// Render inline content (text runs + their marks) to HTML. Used by the raw
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
// accepts so it re-imports as the matching ProseMirror mark.
const inlineToHtml = (inlineNodes) => (inlineNodes || [])
.map((n) => {
if (n.type === "hardBreak")
return "<br>";
if (n.type !== "text") {
// Inline atoms (mention, mathInline) already emit schema HTML.
return processNode(n);
}
let t = escapeHtmlText(n.text || "");
for (const mark of n.marks || []) {
switch (mark.type) {
case "bold":
t = `<strong>${t}</strong>`;
break;
case "italic":
t = `<em>${t}</em>`;
break;
case "code":
t = `<code>${t}</code>`;
break;
case "strike":
t = `<s>${t}</s>`;
break;
case "underline":
t = `<u>${t}</u>`;
break;
case "subscript":
t = `<sub>${t}</sub>`;
break;
case "superscript":
t = `<sup>${t}</sup>`;
break;
case "link":
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
break;
case "highlight":
t = mark.attrs?.color
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
: `<mark>${t}</mark>`;
break;
case "textStyle":
if (mark.attrs?.color)
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
break;
case "comment":
// Inline comment anchor inside a raw-HTML container (columns /
// spanned table cells), so commented text there also round-trips.
if (mark.attrs?.commentId) {
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
}
break;
}
}
return t;
})
.join("");
// Emit the schema-matching <img> for an image node. Shared so the image is
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
// or a spanned table cell), where markdown `![](...)` would NOT be re-parsed
// and would survive as literal text. The Image extension reads src/alt from
// the standard attributes; the Docmost extra attrs (width/height/align/size/
// attachmentId/aspectRatio) are global attributes read from same-named DOM
// attributes, so emit them by name.
const imageToHtml = (node) => {
const attrs = node.attrs || {};
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
if (attrs.alt)
parts.push(`alt="${escapeAttr(attrs.alt)}"`);
if (attrs.title)
parts.push(`title="${escapeAttr(attrs.title)}"`);
if (attrs.width != null)
parts.push(`width="${escapeAttr(attrs.width)}"`);
if (attrs.height != null)
parts.push(`height="${escapeAttr(attrs.height)}"`);
if (attrs.align)
parts.push(`align="${escapeAttr(attrs.align)}"`);
if (attrs.size != null)
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
if (attrs.attachmentId)
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
if (attrs.aspectRatio != null)
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
return `<img ${parts.join(" ")}>`;
};
// Emit the schema-matching div[data-type="callout"] for a callout node. The
// schema reads the banner type from data-callout-type. Children are rendered
// as HTML so they survive inside a raw-HTML container.
const calloutToHtml = (node) => {
const type = (node.attrs?.type || "info").toLowerCase();
const inner = (node.content || []).map(blockToHtml).join("");
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
};
// Emit a schema-matching <details> tree. The schema parses <details>,
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
const detailsToHtml = (node) => {
const inner = (node.content || []).map(blockToHtml).join("");
return `<details>${inner}</details>`;
};
const detailsSummaryToHtml = (node) => `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
const detailsContentToHtml = (node) => {
const inner = (node.content || []).map(blockToHtml).join("");
return `<div data-type="detailsContent">${inner}</div>`;
};
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
// collaboration.ts) recognizes ul[data-type="taskList"] with
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
// task lists inside columns/cells from degrading to literal "- [ ]" text.
const taskListToHtml = (node) => {
const items = (node.content || [])
.map((it) => {
const checked = it.attrs?.checked ? "true" : "false";
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
})
.join("");
return `<ul data-type="taskList">${items}</ul>`;
};
// Render a block node to HTML for the raw-HTML containers (spanned tables,
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
// EVERY block type that can appear inside a column or a spanned cell must be
// emitted as schema-matching HTML here — never as markdown, or it would land
// as literal text on re-import. Nodes whose processNode case already produces
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
// table) are delegated to processNode; the markdown-emitting cases
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
const blockToHtml = (block) => {
const children = block.content || [];
switch (block.type) {
case "paragraph":
return `<p>${inlineToHtml(children)}</p>`;
case "heading": {
const level = block.attrs?.level || 1;
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
}
case "bulletList":
return `<ul>${children
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
.join("")}</ul>`;
case "orderedList":
return `<ol>${children
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
.join("")}</ol>`;
case "codeBlock": {
const lang = block.attrs?.language || "";
// The code itself is element TEXT content (between <code> tags), so it
// must escape < > & — NOT the attribute escaper. The language rides in
// a class ATTRIBUTE, so it uses escapeAttr.
const code = escapeHtmlText(children
.map(processNode)
.join("")
.replace(/\n+$/, ""));
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
return `<pre><code${cls}>${code}</code></pre>`;
}
case "image":
return imageToHtml(block);
case "blockquote":
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
case "horizontalRule":
return "<hr>";
case "callout":
return calloutToHtml(block);
case "details":
return detailsToHtml(block);
case "detailsSummary":
return detailsSummaryToHtml(block);
case "detailsContent":
return detailsContentToHtml(block);
case "taskList":
return taskListToHtml(block);
case "taskItem":
// A bare taskItem (outside a taskList) still needs a wrapping list so
// the schema parses it; wrap it in a single-item taskList.
return taskListToHtml({ content: [block] });
// table (incl. spanned), columns/column, math, media, embed, attachment,
// mention, etc. already emit schema-matching HTML from processNode.
case "table":
case "columns":
case "column":
case "mathBlock":
case "video":
case "audio":
case "pdf":
case "youtube":
case "embed":
case "attachment":
case "drawio":
case "excalidraw":
return processNode(block);
default:
// Any still-unhandled block type: NEVER fall back to markdown inside a
// raw-HTML block (it would become literal text). Wrap its rendered
// children in a <div> so their content is preserved; if it has no block
// children, render its inline content instead.
if (children.length && children.some((c) => c.type !== "text")) {
return `<div>${children.map(blockToHtml).join("")}</div>`;
}
return `<div>${inlineToHtml(children)}</div>`;
}
};
// Render the block children of a list item to HTML (a listItem holds block+
// content). Mirrors processListItem but for the HTML fallback path.
const blockChildrenToHtml = (item) => (item.content || []).map((b) => blockToHtml(b)).join("");
// Indent the rendered children of a list item under a marker prefix.
// Each child block is a (possibly multi-line) string. The very first physical
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
// other line — the remaining lines of the first child AND all lines of every
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
// to align under the marker. Without indenting these continuation lines, the
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
//
// The continuation indent MUST equal the LIST marker width, which is not the
// same as the visible prefix width:
// - bullet "- " -> 2 columns
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
// CommonMark anchors nested content to the marker column, so an ordered item
// indented to only 2 columns would be re-parsed as a sibling/loose content on
// re-import. Callers therefore pass the exact indent width to use.
const indentItemChildren = (childStrings, prefix, indentWidth) => {
const indent = " ".repeat(indentWidth);
const lines = [];
childStrings.forEach((child, childIndex) => {
child.split("\n").forEach((line, lineIndex) => {
if (childIndex === 0 && lineIndex === 0) {
// First physical line of the first block gets the marker.
lines.push(`${prefix} ${line}`);
}
else {
// Indent every continuation line by the marker width; keep blank
// lines blank rather than emitting trailing whitespace.
lines.push(line.length ? `${indent}${line}` : "");
}
});
});
return lines.join("\n");
};
const processListItem = (item, prefix) => {
const itemContent = item.content || [];
const childStrings = itemContent.map(processNode);
if (childStrings.length === 0)
return prefix;
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
// and thus the continuation indent — is prefix.length + 1. This is correct
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
// since for those the visible prefix IS the list marker.
return indentItemChildren(childStrings, prefix, prefix.length + 1);
};
const processTaskItem = (item) => {
const checked = item.attrs?.checked || false;
const checkbox = checked ? "[x]" : "[ ]";
const prefix = `- ${checkbox}`;
const itemContent = item.content || [];
const childStrings = itemContent.map(processNode);
// An empty task item still needs its checkbox marker; without this guard
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
if (childStrings.length === 0)
return prefix;
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
// checkbox is item content, NOT part of the marker. So the continuation
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
return indentItemChildren(childStrings, prefix, 2);
};
return processNode(content).trim();
}
-68
View File
@@ -1,68 +0,0 @@
/**
* Self-contained Docmost-flavoured Markdown document (custom extensions).
*
* A single `.md` file that packages everything needed to losslessly round-trip
* a page through "download -> edit body -> re-upload":
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
* - a trailing `docmost:comments` block: a one-line JSON array of comment
* threads.
*
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
* importer without first stripping the blocks, the metadata cannot leak into the
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
* codeBlock node, so a fenced block is deliberately NOT used.)
*
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
* re-pastes an exported `.md` into a page, or a page documents this very
* format). To stay robust, parsing treats only the FINAL, document-ending
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
* opener whose closing `-->` sits at the very end of the file. Any earlier
* literal occurrence is left in the body untouched.
*
* NOTE on comments: in this version the comment THREAD records are preserved in
* the file but are NOT pushed back to the server on import — only the inline
* comment marks (anchors) embedded in the body are restored. Managing comment
* records stays with the comment tools/UI.
*/
export interface DocmostMdMeta {
version: number;
pageId?: string;
slugId?: string;
title?: string;
spaceId?: string;
parentPageId?: string | null;
}
/**
* Assemble the full self-contained markdown file: meta block, body, and the
* comments block. The meta block is always emitted; the comments block is always
* emitted too (with `[]` when there are no comments) so the format stays uniform
* and parsing stays simple.
*/
export declare function serializeDocmostMarkdown(meta: DocmostMdMeta, body: string, comments: any[]): string;
/**
* Split a self-contained file back into its parts. Tolerant: if the meta or
* comments block is missing (e.g. a hand-written plain-markdown file), the
* corresponding value is returned as `null` and the whole input is treated as
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
* inside a block that IS present is surfaced as a thrown Error with a clear
* message. Robust to `\r\n` line endings.
*/
export declare function parseDocmostMarkdown(full: string): {
meta: DocmostMdMeta | null;
body: string;
comments: any[] | null;
};
/**
* Serialize a self-contained markdown file with the meta block + body ONLY —
* NO trailing `docmost:comments` block. The sync engine never touches
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
* the body, where comment threads survive only as inline `<span
* data-comment-id>` anchor marks inside the body.
*
* `parseDocmostMarkdown` already tolerates a missing comments block (it returns
* `comments: null` and treats the rest as body), so a file produced here
* round-trips cleanly through the parser.
*/
export declare function serializeDocmostMarkdownBody(meta: DocmostMdMeta, body: string): string;
@@ -1,2 +0,0 @@
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export declare function markdownToProseMirror(markdownContent: string): Promise<any>;
@@ -1,306 +0,0 @@
/**
* Pure markdown -> ProseMirror conversion.
*
* The converter path is `markdownToProseMirror` (marked -> HTML ->
* generateJSON) plus the two pre/post processors it needs (`preprocessCallouts`,
* `bridgeTaskLists`). The gitmost server writes the resulting page bodies
* natively through the collab gateway, so no websocket/Yjs write-path lives
* here.
*/
import { generateJSON } from "@tiptap/html";
import { JSDOM } from "jsdom";
import { marked } from "marked";
import { docmostExtensions } from "./docmost-schema.js";
// Setup DOM environment for Tiptap HTML parsing in Node.js
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
global.window = dom.window;
global.document = dom.window.document;
// @ts-ignore
global.Element = dom.window.Element;
/**
* Hard ceiling above which we skip callout preprocessing entirely. The linear
* scanner below has no quadratic blow-up, but we still cap input defensively so
* a pathological multi-megabyte payload cannot tie up the event loop; in that
* case the markdown is passed through verbatim (callouts are simply not
* detected) rather than risking a slow scan.
*/
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
/** Matches a bare closing callout fence: `:::`. */
const CALLOUT_CLOSE_RE = /^:::\s*$/;
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
/**
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
* callout blocks (the syntax our markdown export produces) into HTML
* divs that the callout extension parses. The inner content is rendered
* through marked as regular markdown.
*
* Implemented as a single linear pass over the lines (no quadratic regex
* rescan). It:
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
* `:::` line that lives inside a code fence as a callout delimiter, so a
* callout body that itself contains a fenced code block with a `:::` line is
* no longer corrupted;
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
* nesting level, supporting NESTED callouts via a depth counter (an inner
* `:::type` opens a deeper level and consumes a matching `:::`);
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
* (inner rendered through marked) as the previous regex implementation.
*/
async function preprocessCallouts(markdown) {
// Defensive cap: skip preprocessing for pathologically large inputs.
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
return markdown;
}
// Recursively transform a slice of lines, converting top-level callouts in
// that slice into <div> blocks and rendering their inner content (which may
// itself contain nested callouts) through this same function.
const transform = async (lines) => {
const out = [];
let inCodeFence = false;
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
let i = 0;
while (i < lines.length) {
const line = lines[i];
// Inside a code fence, only its matching closing fence is significant;
// everything else (including `:::` lines) is copied through verbatim.
if (inCodeFence) {
out.push(line);
const fence = line.match(CODE_FENCE_RE);
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
fence[2].length >= codeFenceMarker.length) {
inCodeFence = false;
codeFenceMarker = "";
}
i++;
continue;
}
// A code fence opening outside any callout body: enter code-fence mode.
const fenceOpen = line.match(CODE_FENCE_RE);
if (fenceOpen) {
inCodeFence = true;
codeFenceMarker = fenceOpen[2];
out.push(line);
i++;
continue;
}
// An opening callout fence: scan forward (with code-fence and nested
// callout awareness) for its matching closing `:::` at the same level.
const open = line.match(CALLOUT_OPEN_RE);
if (open) {
const type = open[1].toLowerCase();
const bodyLines = [];
let depth = 1;
let innerInCodeFence = false;
let innerCodeFenceMarker = "";
let j = i + 1;
for (; j < lines.length; j++) {
const bl = lines[j];
if (innerInCodeFence) {
const f = bl.match(CODE_FENCE_RE);
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
f[2].length >= innerCodeFenceMarker.length) {
innerInCodeFence = false;
innerCodeFenceMarker = "";
}
bodyLines.push(bl);
continue;
}
const innerFence = bl.match(CODE_FENCE_RE);
if (innerFence) {
innerInCodeFence = true;
innerCodeFenceMarker = innerFence[2];
bodyLines.push(bl);
continue;
}
if (CALLOUT_OPEN_RE.test(bl)) {
depth++;
bodyLines.push(bl);
continue;
}
if (CALLOUT_CLOSE_RE.test(bl)) {
depth--;
if (depth === 0)
break; // matching close for THIS callout
bodyLines.push(bl);
continue;
}
bodyLines.push(bl);
}
if (j < lines.length) {
// Found the matching closing fence: render the body (recursively, so
// nested callouts are handled) and emit the callout div.
const inner = await transform(bodyLines);
const renderedInner = await marked.parse(inner);
out.push(`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`);
i = j + 1; // skip past the closing `:::`
continue;
}
// No matching close (unterminated callout): treat the opener as a
// literal line and continue, preserving the original text.
out.push(line);
i++;
continue;
}
out.push(line);
i++;
}
return out.join("\n");
};
return transform(markdown.split("\n"));
}
/**
* Bridge marked's checkbox lists to TipTap task lists.
*
* marked renders GitHub task list items (`- [x] done`) as a plain
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
* into the shape those extensions expect:
* TaskList parseHTML matches `ul[data-type="taskList"]`,
* TaskItem matches `li[data-type="taskItem"]`,
* the checked state is read from `data-checked === "true"`.
*
* A list is only converted when it has at least one `<li>` and EVERY direct
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
*/
function bridgeTaskLists(html) {
// Cheap early-out: if the markup contains no checkbox input at all there is
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
// common case (most pages have no task lists).
if (!/type=["']?checkbox/i.test(html)) {
return html;
}
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
// pathologically large inputs rather than running a second expensive JSDOM
// parse on a multi-megabyte payload. The markup is passed through verbatim.
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
return html;
}
const dom = new JSDOM(html);
const document = dom.window.document;
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
// bullet <li> that merely contains a nested task sublist is not misdetected.
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
// ALL of them so none survive into the converted item.
const directCheckboxes = (li) => {
const found = [];
for (const child of Array.from(li.children)) {
if (child.tagName === "INPUT" &&
child.getAttribute("type") === "checkbox") {
found.push(child);
continue;
}
if (child.tagName === "P") {
for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) {
found.push(inp);
}
}
}
return found;
};
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
// its own checkbox is a numbered checklist that must also become a taskList.
const lists = Array.from(document.querySelectorAll("ul, ol"));
for (const list of lists) {
// Only consider DIRECT child <li> elements; nested lists are handled by
// their own iteration of the outer loop.
const items = Array.from(list.children).filter((child) => child.tagName === "LI");
if (items.length === 0)
continue;
const itemCheckboxes = items.map((li) => directCheckboxes(li));
// Convert only when every direct <li> carries at least one OWN checkbox.
if (!itemCheckboxes.every((boxes) => boxes.length > 0))
continue;
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
// <ol> while tagging it data-type="taskList": generateJSON would then match
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
// emitting a phantom empty orderedList beside the real taskList. So rename a
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
let target = list;
if (list.tagName === "OL") {
const ul = document.createElement("ul");
// Carry over existing attributes (e.g. class) so nothing is silently lost.
for (const attr of Array.from(list.attributes)) {
ul.setAttribute(attr.name, attr.value);
}
// Move every child node (including the <li>s we collected) into the <ul>.
while (list.firstChild) {
ul.appendChild(list.firstChild);
}
list.replaceWith(ul);
target = ul;
}
target.setAttribute("data-type", "taskList");
items.forEach((li, index) => {
const boxes = itemCheckboxes[index];
// The first checkbox determines the checked state (matches the previous
// single-checkbox behaviour); any extras only need removing.
const input = boxes[0] ?? null;
li.setAttribute("data-type", "taskItem");
const checked = input != null &&
(input.hasAttribute("checked") || input.checked);
li.setAttribute("data-checked", checked ? "true" : "false");
// Remove ALL direct checkbox inputs so none survive into the content
// (a raw-inline-HTML <li> may carry more than one).
for (const box of boxes) {
box.remove();
}
});
}
return document.body.innerHTML;
}
/**
* Recursively strip content-less paragraph nodes from a generated doc.
*
* A block-level atom whose markdown form is INLINE (e.g. the block `image`'s
* `![](url)`, or a bare media element) is wrapped by marked in a <p>; the schema
* then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph
* sibling. On the next export that empty `<p>` renders to "" and the doc "\n\n"
* join injects a phantom blank gap, so the markdown is not byte-stable.
*
* Markdown blank lines are separators, never content, so generateJSON only ever
* produces an empty paragraph as such a hoist artifact — removing them is safe
* and general (it also subsumes the <div>-wrapper workaround the `video` case
* uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent
* or an empty array; every other node (including atoms without `content`) is
* preserved, and we recurse into the content of any node that has children.
*/
function stripEmptyParagraphs(node) {
if (!node || !Array.isArray(node.content)) {
// Atom / leaf node (no children to recurse into): keep as-is.
return node;
}
const mapped = node.content.map((child) => stripEmptyParagraphs(child));
const isEmptyParagraph = (child) => !!child &&
child.type === "paragraph" &&
(!Array.isArray(child.content) || child.content.length === 0);
const filtered = mapped.filter((child) => !isEmptyParagraph(child));
// Schema-validity guard: several nodes require NON-empty block content
// (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout,
// and the doc root). For an empty one of those, generateJSON materializes a
// single empty paragraph as its OBLIGATORY content — that is not a hoist
// artifact. If stripping would empty the container, keep ONE empty paragraph
// so the result stays schema-valid (an empty cell/quote must not become `[]`).
const cleaned = filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered;
return { ...node, content: cleaned };
}
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(markdownContent) {
const withCallouts = await preprocessCallouts(markdownContent);
const html = await marked.parse(withCallouts);
const bridged = bridgeTaskLists(html);
const doc = generateJSON(bridged, docmostExtensions);
return stripEmptyParagraphs(doc);
}
-194
View File
@@ -1,194 +0,0 @@
/**
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
* tree by node id.
*
* A ProseMirror node here is a plain JSON object of the shape produced by
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
* table cells hold their children in `content` just like any other block, so a
* single recursive walk reaches them all.
*
* Every exported function operates on a DEEP CLONE of the input document and
* returns the new document. The input doc and any `newNode`/`node` argument are
* never mutated. All functions are defensively null-safe: missing/!Array
* `content`, non-object nodes, and absent `attrs` are tolerated.
*/
/**
* Recursively concatenate all text contained in a node.
*
* Text nodes contribute their `text` string; container nodes contribute the
* joined `blockPlainText` of their `content` children. Returns "" for nullish
* or non-object inputs.
*/
export declare function blockPlainText(node: any): string;
/** One compact outline entry for a single top-level block. */
export interface OutlineEntry {
index: number;
type: string | undefined;
id: string | null;
firstText: string;
/** Present for headings only. */
level?: number | null;
/** Present for tables only. */
rows?: number;
cols?: number;
header?: string[];
/** Present for list blocks only (bulletList/orderedList/taskList). */
items?: number;
}
/**
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
* table cells — compactness is the point; use `getNodeByRef` to drill into a
* specific block.
*
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
* cell texts as `header`; list blocks (types ending in "List") add `items`.
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
* a missing or non-object doc/content yields `[]`.
*/
export declare function buildOutline(doc: any): OutlineEntry[];
/**
* Resolve a single node by reference and return `{ node, path, type }`, or
* `null` when nothing matches.
*
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
* `n` in `doc.content`. This is the only way to address table/tableRow/
* tableCell nodes, which carry no `attrs.id`.
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
* tree with `attrs.id === ref` is returned.
*
* `path` is the array of child indices from the doc root down to the node
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
* so callers can mutate it without touching the input doc. Null-safe.
*/
export declare function getNodeByRef(doc: any, ref: string): {
node: any;
path: number[];
type: string | undefined;
} | null;
/**
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
* `newNode`, anywhere in the tree (including inside callouts and table cells).
*
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
* is the number of nodes substituted. A fresh clone of `newNode` is used for
* each match so they do not share references.
*/
export declare function replaceNodeById(doc: any, nodeId: string, newNode: any): {
doc: any;
replaced: number;
};
/**
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
* array, anywhere in the tree (recursive, including callouts and tables).
*
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
* the number of nodes removed.
*/
export declare function deleteNodeById(doc: any, nodeId: string): {
doc: any;
deleted: number;
};
/**
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
* "Unexpected content type" when asked to store an `undefined` attribute value).
*
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
* legitimate JSON-storable values and are preserved. Operates on a clone and
* returns it; the input is never mutated. Defensively null-safe like the rest
* of the file.
*/
export declare function sanitizeForYjs(doc: any): any;
/**
* Diagnostics helper: walk the tree and return a human-readable path string for
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
* every attribute is storable. Null-safe.
*/
export declare function findUnstorableAttr(doc: any): string | null;
/** Options controlling where `insertNodeRelative` places the new node. */
export interface InsertOptions {
position: "before" | "after" | "append";
/** Resolve the anchor by node id anywhere in the tree (preferred). */
anchorNodeId?: string;
/** Fallback: first TOP-LEVEL block whose plain text includes this string. */
anchorText?: string;
}
/**
* Insert a deep clone of `node` relative to an anchor.
*
* - position "append": push the node onto the top-level `doc.content`.
* - position "before"/"after": locate the anchor and splice the node into the
* anchor's parent `content` array immediately before / after it.
*
* Anchor resolution for before/after:
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
* anywhere in the tree (recursive);
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
*
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
* false when the anchor could not be resolved (the doc is returned unchanged
* apart from being cloned).
*/
export declare function insertNodeRelative(doc: any, node: any, opts: InsertOptions): {
doc: any;
inserted: boolean;
};
/**
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
*
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
* Tables may be ragged (rows of differing length), so `cols` reflects only
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
* width.
* - `cells`: `string[][]` of each cell's `blockPlainText`.
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
* so callers can `patch_node` a cell for rich-formatted edits.
* - `path`: index path of the table within the doc.
*/
export declare function readTable(doc: any, tableRef: string): {
rows: number;
cols: number;
cells: string[][];
cellIds: (string | null)[][];
path: number[];
} | null;
/**
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
*
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
* MORE cells than columns throws. Each new cell copies `colwidth` for its
* column from the header row when present, gets a fresh-id paragraph, and a
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
* the row there; otherwise the row is appended at the end.
*/
export declare function insertTableRow(doc: any, tableRef: string, cells: string[], index?: number): {
doc: any;
inserted: boolean;
};
/**
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
* `deleted` is false only when the table cannot be located. Throws on an
* out-of-range index, and refuses to delete the table's only row.
*/
export declare function deleteTableRow(doc: any, tableRef: string, index: number): {
doc: any;
deleted: boolean;
};
/**
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
* that reuses the cell's existing first-paragraph id when present, else a fresh
* one.
*/
export declare function updateTableCell(doc: any, tableRef: string, row: number, col: number, text: string): {
doc: any;
updated: boolean;
};
-770
View File
@@ -1,770 +0,0 @@
/**
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
* tree by node id.
*
* A ProseMirror node here is a plain JSON object of the shape produced by
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
* table cells hold their children in `content` just like any other block, so a
* single recursive walk reaches them all.
*
* Every exported function operates on a DEEP CLONE of the input document and
* returns the new document. The input doc and any `newNode`/`node` argument are
* never mutated. All functions are defensively null-safe: missing/!Array
* `content`, non-object nodes, and absent `attrs` are tolerated.
*/
/** Deep-clone a JSON-serializable value without mutating the original. */
function clone(value) {
if (typeof structuredClone === "function") {
return structuredClone(value);
}
// Fallback for environments without structuredClone.
return JSON.parse(JSON.stringify(value));
}
/** True if `value` is a non-null object (and not an array). */
function isObject(value) {
return value != null && typeof value === "object" && !Array.isArray(value);
}
/** True if `node` carries the given id in `node.attrs.id`. */
function matchesId(node, nodeId) {
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
}
/**
* Recursively concatenate all text contained in a node.
*
* Text nodes contribute their `text` string; container nodes contribute the
* joined `blockPlainText` of their `content` children. Returns "" for nullish
* or non-object inputs.
*/
export function blockPlainText(node) {
if (!isObject(node))
return "";
let out = "";
if (typeof node.text === "string") {
out += node.text;
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
out += blockPlainText(child);
}
}
return out;
}
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
function truncate(text, n) {
return text.length > n ? text.slice(0, n) + "…" : text;
}
/**
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
* table cells — compactness is the point; use `getNodeByRef` to drill into a
* specific block.
*
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
* cell texts as `header`; list blocks (types ending in "List") add `items`.
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
* a missing or non-object doc/content yields `[]`.
*/
export function buildOutline(doc) {
if (!isObject(doc) || !Array.isArray(doc.content))
return [];
const out = [];
for (let i = 0; i < doc.content.length; i++) {
const block = doc.content[i];
const type = isObject(block) ? block.type : undefined;
const entry = {
index: i,
type,
id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
firstText: truncate(blockPlainText(block), 100),
};
if (type === "heading") {
entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
}
else if (type === "table") {
const headerRow = block.content?.[0]?.content ?? [];
entry.rows = block.content?.length ?? 0;
entry.cols = block.content?.[0]?.content?.length ?? 0;
entry.header = headerRow.map((cell) => truncate(blockPlainText(cell), 40));
}
else if (typeof type === "string" && type.endsWith("List")) {
entry.items = block.content?.length ?? 0;
}
out.push(entry);
}
return out;
}
/**
* Resolve a single node by reference and return `{ node, path, type }`, or
* `null` when nothing matches.
*
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
* `n` in `doc.content`. This is the only way to address table/tableRow/
* tableCell nodes, which carry no `attrs.id`.
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
* tree with `attrs.id === ref` is returned.
*
* `path` is the array of child indices from the doc root down to the node
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
* so callers can mutate it without touching the input doc. Null-safe.
*/
export function getNodeByRef(doc, ref) {
if (!isObject(doc))
return null;
// "#<n>": index into the top-level content array.
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
if (indexMatch) {
const index = Number(indexMatch[1]);
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
if (!isObject(block))
return null;
return { node: clone(block), path: [index], type: block.type };
}
// Otherwise: depth-first search for the first node with attrs.id === ref.
const search = (node, trail) => {
if (!isObject(node))
return null;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const path = [...trail, i];
if (matchesId(child, ref)) {
return { node: clone(child), path, type: child.type };
}
const hit = search(child, path);
if (hit != null)
return hit;
}
}
return null;
};
return search(doc, []);
}
/**
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
* `newNode`, anywhere in the tree (including inside callouts and table cells).
*
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
* is the number of nodes substituted. A fresh clone of `newNode` is used for
* each match so they do not share references.
*/
export function replaceNodeById(doc, nodeId, newNode) {
const out = clone(doc);
let replaced = 0;
// Walk a content array, replacing direct matches and recursing into the
// (possibly new) children of non-matching nodes.
const walkContent = (content) => {
for (let i = 0; i < content.length; i++) {
const child = content[i];
if (matchesId(child, nodeId)) {
content[i] = clone(newNode);
replaced++;
// Do not recurse into a freshly substituted node.
continue;
}
if (isObject(child) && Array.isArray(child.content)) {
walkContent(child.content);
}
}
};
if (isObject(out) && Array.isArray(out.content)) {
walkContent(out.content);
}
return { doc: out, replaced };
}
/**
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
* array, anywhere in the tree (recursive, including callouts and tables).
*
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
* the number of nodes removed.
*/
export function deleteNodeById(doc, nodeId) {
const out = clone(doc);
let deleted = 0;
// Filter a content array in place, dropping matches and recursing into the
// surviving children.
const walkContent = (content) => {
const kept = [];
for (const child of content) {
if (matchesId(child, nodeId)) {
deleted++;
continue;
}
if (isObject(child) && Array.isArray(child.content)) {
child.content = walkContent(child.content);
}
kept.push(child);
}
return kept;
};
if (isObject(out) && Array.isArray(out.content)) {
out.content = walkContent(out.content);
}
return { doc: out, deleted };
}
/**
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
* "Unexpected content type" when asked to store an `undefined` attribute value).
*
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
* legitimate JSON-storable values and are preserved. Operates on a clone and
* returns it; the input is never mutated. Defensively null-safe like the rest
* of the file.
*/
export function sanitizeForYjs(doc) {
const out = clone(doc);
// Drop every key whose value is strictly `undefined` from an attrs object.
const stripUndefined = (attrs) => {
if (!isObject(attrs))
return;
for (const key of Object.keys(attrs)) {
if (attrs[key] === undefined) {
delete attrs[key];
}
}
};
const walk = (node) => {
if (!isObject(node))
return;
stripUndefined(node.attrs);
if (Array.isArray(node.marks)) {
for (const mark of node.marks) {
if (isObject(mark))
stripUndefined(mark.attrs);
}
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
walk(child);
}
}
};
walk(out);
return out;
}
/**
* Diagnostics helper: walk the tree and return a human-readable path string for
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
* every attribute is storable. Null-safe.
*/
export function findUnstorableAttr(doc) {
const isUnstorable = (value) => {
if (value === undefined)
return "undefined";
const t = typeof value;
if (t === "function")
return "function";
if (t === "symbol")
return "symbol";
if (t === "bigint")
return "bigint";
return null;
};
// Check an attrs object; return the offending sub-path or null.
const checkAttrs = (attrs, basePath) => {
if (!isObject(attrs))
return null;
for (const key of Object.keys(attrs)) {
const kind = isUnstorable(attrs[key]);
if (kind != null)
return `${basePath}.${key} (${kind})`;
}
return null;
};
const walk = (node, path) => {
if (!isObject(node))
return null;
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
if (attrHit != null)
return attrHit;
if (Array.isArray(node.marks)) {
for (let i = 0; i < node.marks.length; i++) {
const markHit = checkAttrs(node.marks[i]?.attrs, `${path}.marks[${i}].attrs`);
if (markHit != null)
return markHit;
}
}
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const childHit = walk(node.content[i], `${path}.content[${i}]`);
if (childHit != null)
return childHit;
}
}
return null;
};
// The root doc node carries no useful index, so start the path at "doc".
if (!isObject(doc))
return null;
const attrHit = checkAttrs(doc.attrs, "attrs");
if (attrHit != null)
return attrHit;
if (Array.isArray(doc.content)) {
for (let i = 0; i < doc.content.length; i++) {
const childHit = walk(doc.content[i], `content[${i}]`);
if (childHit != null)
return childHit;
}
}
return null;
}
/**
* Table structural node types and the container each must live directly inside.
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
* rather than blindly into the anchor's direct parent (which would corrupt the
* table's nesting).
*/
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
const REQUIRED_CONTAINER = {
tableRow: "table",
tableCell: "tableRow",
tableHeader: "tableRow",
};
/**
* Locate an anchor and return its ancestor chain (from `doc` down to and
* including the matched node). Each chain entry is `{ node, index }` where
* `index` is the node's position inside its parent's `content` array (the root
* doc has index -1). Returns `null` when the anchor cannot be resolved.
*/
function findAnchorChain(doc, opts) {
if (!isObject(doc))
return null;
// DFS by id anywhere in the tree, accumulating the path.
if (opts.anchorNodeId != null) {
const targetId = opts.anchorNodeId;
const search = (node, index, trail) => {
if (!isObject(node))
return null;
const here = [...trail, { node, index }];
if (matchesId(node, targetId))
return here;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const hit = search(node.content[i], i, here);
if (hit != null)
return hit;
}
}
return null;
};
return search(doc, -1, []);
}
// By text: only top-level blocks are scanned (same rule as the JSON path).
if (opts.anchorText != null && Array.isArray(doc.content)) {
for (let i = 0; i < doc.content.length; i++) {
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
return [
{ node: doc, index: -1 },
{ node: doc.content[i], index: i },
];
}
}
}
return null;
}
/**
* Insert a deep clone of `node` relative to an anchor.
*
* - position "append": push the node onto the top-level `doc.content`.
* - position "before"/"after": locate the anchor and splice the node into the
* anchor's parent `content` array immediately before / after it.
*
* Anchor resolution for before/after:
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
* anywhere in the tree (recursive);
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
*
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
* false when the anchor could not be resolved (the doc is returned unchanged
* apart from being cloned).
*/
export function insertNodeRelative(doc, node, opts) {
const out = clone(doc);
const fresh = clone(node);
// Defensive: stay null-safe like the other exports — a missing opts means
// there is nothing actionable to do.
if (!isObject(opts))
return { doc: out, inserted: false };
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
// "append": top-level push.
if (opts.position === "append") {
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
// top level — appending one would produce invalid nesting.
if (isStructural) {
throw new Error(`insert_node: cannot append a ${node.type} at the top level; use ` +
`position before/after with an anchor inside the target table`);
}
if (isObject(out)) {
if (!Array.isArray(out.content))
out.content = [];
out.content.push(fresh);
return { doc: out, inserted: true };
}
return { doc: out, inserted: false };
}
const offset = opts.position === "after" ? 1 : 0;
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
// into the nearest enclosing table/tableRow rather than the anchor's direct
// parent, so the row/cell lands at the correct level of the table.
if (isStructural) {
const containerType = REQUIRED_CONTAINER[node.type];
const chain = findAnchorChain(out, opts);
// Anchor not resolved at all — keep the existing "anchor not found" path.
if (chain == null)
return { doc: out, inserted: false };
// Find the DEEPEST ancestor (including the anchor itself) of the required
// container type.
let containerIdx = -1;
for (let i = chain.length - 1; i >= 0; i--) {
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
containerIdx = i;
break;
}
}
if (containerIdx === -1) {
throw new Error(`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
`that lives inside the target table.`);
}
const container = chain[containerIdx].node;
if (!Array.isArray(container.content))
container.content = [];
if (containerIdx === chain.length - 1) {
// The matched container IS the anchor node itself (e.g. anchorText
// resolved to the table block): append/prepend within it.
const at = opts.position === "after" ? container.content.length : 0;
container.content.splice(at, 0, fresh);
}
else {
// The immediate child on the path leading to the anchor is the row/cell
// to splice next to.
const enclosingChildIndex = chain[containerIdx + 1].index;
container.content.splice(enclosingChildIndex + offset, 0, fresh);
}
return { doc: out, inserted: true };
}
// Resolve by id anywhere in the tree: splice into the parent content array.
if (opts.anchorNodeId != null) {
let inserted = false;
const walkContent = (content) => {
for (let i = 0; i < content.length; i++) {
const child = content[i];
if (matchesId(child, opts.anchorNodeId)) {
content.splice(i + offset, 0, fresh);
inserted = true;
return;
}
if (isObject(child) && Array.isArray(child.content)) {
walkContent(child.content);
if (inserted)
return;
}
}
};
if (isObject(out) && Array.isArray(out.content)) {
walkContent(out.content);
}
return { doc: out, inserted };
}
// Resolve by text: only top-level doc.content blocks are scanned.
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
for (let i = 0; i < out.content.length; i++) {
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
out.content.splice(i + offset, 0, fresh);
return { doc: out, inserted: true };
}
}
}
return { doc: out, inserted: false };
}
// ===========================================================================
// Table editing helpers
//
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
// table -> { type:"table", content:[tableRow...] }
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
// content:[paragraph...] }
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
// of the input doc (via `clone`) and never mutate their inputs.
// ===========================================================================
/**
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
* `makeFreshId` so generated paragraph ids never collide with existing ones.
*/
function collectIds(node, used) {
if (!isObject(node))
return;
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
used.add(node.attrs.id);
}
if (Array.isArray(node.content)) {
for (const child of node.content)
collectIds(child, used);
}
}
/**
* Fresh-id generator: returns a random Docmost-style id (12 chars from
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
* exact string, so randomness is fine — and unlike a module-local counter it
* needs no reset and cannot become predictable across calls.
*/
function makeFreshId(used) {
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
let id;
do {
id = "";
for (let i = 0; i < 12; i++) {
id += alphabet[Math.floor(Math.random() * alphabet.length)];
}
} while (used.has(id) || id === "");
used.add(id);
return id;
}
/**
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
* its index path. Returns null when no table matches.
*
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
* ancestor chain to the nearest `type === "table"` ancestor.
*/
function locateTable(rootClone, tableRef) {
if (!isObject(rootClone))
return null;
// "#<n>": index into the top-level content array; must be a table.
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
if (indexMatch) {
const index = Number(indexMatch[1]);
const block = Array.isArray(rootClone.content)
? rootClone.content[index]
: undefined;
if (isObject(block) && block.type === "table") {
return { table: block, path: [index] };
}
return null;
}
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
// climb to the nearest enclosing table.
const search = (node, trail) => {
if (!isObject(node))
return null;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const here = [...trail, { node: child, index: i }];
if (matchesId(child, tableRef)) {
// Walk UP to the nearest table ancestor (including the match itself).
for (let j = here.length - 1; j >= 0; j--) {
if (isObject(here[j].node) && here[j].node.type === "table") {
return {
table: here[j].node,
path: here.slice(0, j + 1).map((e) => e.index),
};
}
}
return null; // id found but no enclosing table
}
const hit = search(child, here);
if (hit != null)
return hit;
}
}
return null;
};
return search(rootClone, []);
}
/** Build the plain-text → single-paragraph cell content used by all writers. */
function makeCellParagraph(id, text) {
return {
type: "paragraph",
attrs: { id, indent: 0 },
// Empty string → a paragraph with an empty content array.
content: text ? [{ type: "text", text }] : [],
};
}
/**
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
*
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
* Tables may be ragged (rows of differing length), so `cols` reflects only
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
* width.
* - `cells`: `string[][]` of each cell's `blockPlainText`.
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
* so callers can `patch_node` a cell for rich-formatted edits.
* - `path`: index path of the table within the doc.
*/
export function readTable(doc, tableRef) {
const root = clone(doc);
const located = locateTable(root, tableRef);
if (located == null)
return null;
const { table, path } = located;
const rowNodes = Array.isArray(table.content) ? table.content : [];
const rows = rowNodes.length;
const cols = rowNodes[0]?.content?.length ?? 0;
const cells = [];
const cellIds = [];
for (const rowNode of rowNodes) {
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
const rowText = [];
const rowIds = [];
for (const cellNode of cellNodes) {
rowText.push(blockPlainText(cellNode));
// The cell's first paragraph carries the id used for patch_node.
const firstPara = Array.isArray(cellNode?.content)
? cellNode.content[0]
: undefined;
const id = isObject(firstPara) && isObject(firstPara.attrs)
? firstPara.attrs.id ?? null
: null;
rowIds.push(id);
}
cells.push(rowText);
cellIds.push(rowIds);
}
return { rows, cols, cells, cellIds, path };
}
/**
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
*
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
* MORE cells than columns throws. Each new cell copies `colwidth` for its
* column from the header row when present, gets a fresh-id paragraph, and a
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
* the row there; otherwise the row is appended at the end.
*/
export function insertTableRow(doc, tableRef, cells, index) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
return { doc: out, inserted: false };
const { table } = located;
if (!Array.isArray(table.content))
table.content = [];
const rows = table.content.length;
const headerRow = table.content[0];
const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
// Column count is the WIDEST existing row, so the guard below stays
// meaningful for ragged tables and the new row matches the table's width.
// Fall back to the supplied cell count only when the table has no rows.
let colCount = 0;
for (const r of table.content) {
if (isObject(r) && Array.isArray(r.content))
colCount = Math.max(colCount, r.content.length);
}
if (colCount === 0)
colCount = Array.isArray(cells) ? cells.length : 0;
if (Array.isArray(cells) && cells.length > colCount) {
throw new Error(`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`);
}
// Resolve the landing index up front so the cell-type decision and the splice
// below agree: a valid integer in [0, rows] splices there, else we append.
const landingIndex = typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
? index
: rows;
// Seed the id generator with every id already in the doc so the new cell
// paragraph ids are unique within the whole document.
const used = new Set();
collectIds(out, used);
const newCells = [];
for (let i = 0; i < colCount; i++) {
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
const attrs = { colspan: 1, rowspan: 1 };
// Copy this column's colwidth from the header row's cell when present.
const colwidth = headerCells[i]?.attrs?.colwidth;
if (colwidth !== undefined)
attrs.colwidth = colwidth;
// A row landing at index 0 becomes the new header row, so inherit the
// current header cell's type per column (Docmost uses "tableHeader" there);
// every other position is a plain data cell.
const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
newCells.push({
type: cellType,
attrs,
content: [makeCellParagraph(makeFreshId(used), text)],
});
}
const newRow = { type: "tableRow", content: newCells };
// Splice at the resolved landing index (append when index was omitted/invalid).
table.content.splice(landingIndex, 0, newRow);
return { doc: out, inserted: true };
}
/**
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
* `deleted` is false only when the table cannot be located. Throws on an
* out-of-range index, and refuses to delete the table's only row.
*/
export function deleteTableRow(doc, tableRef, index) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
return { doc: out, deleted: false };
const { table } = located;
if (!Array.isArray(table.content))
table.content = [];
const rows = table.content.length;
if (!Number.isInteger(index) || index < 0 || index >= rows) {
throw new Error(`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`);
}
if (rows <= 1) {
throw new Error("table_delete_row: refusing to delete the only row of the table");
}
table.content.splice(index, 1);
return { doc: out, deleted: true };
}
/**
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
* that reuses the cell's existing first-paragraph id when present, else a fresh
* one.
*/
export function updateTableCell(doc, tableRef, row, col, text) {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null)
return { doc: out, updated: false };
const { table } = located;
const rowNodes = Array.isArray(table.content) ? table.content : [];
const rows = rowNodes.length;
const rowNode = rowNodes[row];
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
? rowNode.content.length
: 0;
if (!Number.isInteger(row) ||
row < 0 ||
row >= rows ||
!Number.isInteger(col) ||
col < 0 ||
col >= cols) {
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
}
const cellNode = rowNode.content[col];
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
const existingPara = Array.isArray(cellNode?.content)
? cellNode.content[0]
: undefined;
let id = isObject(existingPara) && isObject(existingPara.attrs)
? existingPara.attrs.id
: undefined;
if (typeof id !== "string" || id.length === 0) {
const used = new Set();
collectIds(out, used);
id = makeFreshId(used);
}
cellNode.content = [makeCellParagraph(id, text)];
return { doc: out, updated: true };
}
-50
View File
@@ -1,50 +0,0 @@
/**
* The native-Obsidian page-file format (design: docs/backlog/git-sync-thin-meta.md).
* A page file is CLEAN markdown with a minimal YAML frontmatter carrying ONLY the
* page's durable identity:
*
* ---
* gitmost_id: 019ef6fc-2638-7ce1-9ce3-2756ce038480
* ---
* <clean markdown body>
*
* Everything else is derived (title = filename, parentPageId = enclosing folder,
* spaceId = the vault, updatedAt = git). `gitmost_id` (a Docmost pageId) is the
* only non-derivable bit and travels WITH the file so identity survives any move,
* even one git's rename detection misses. Third-party editors (Obsidian, …) see
* clean markdown; the frontmatter is hidden in their preview.
*
* No backward-compat with the old `docmost:meta` format: vaults are a cache, wiped
* and rebuilt native. A file WITHOUT a `gitmost_id` frontmatter is an un-tracked
* (e.g. hand-written) file -> the caller ADOPTS it (creates a page, writes the id).
*/
/**
* The frontmatter key carrying the Docmost pageId. NAMESPACED (not a bare `id`)
* so it never collides with a user's own frontmatter fields.
*/
export declare const ID_KEY = "gitmost_id";
/**
* Parse a page file into its identity (`id`) and clean markdown `body`. Tolerant:
* a file with no frontmatter (a hand-written third-party file) returns `id: null`
* and the whole text as the body — the caller then ADOPTS it (creates a page,
* writes the id back).
*
* KNOWN LIMITATION (phase 4 — adoption, see docs/backlog/git-sync-thin-meta.md):
* a leading frontmatter block is stripped from `body` even when it carries NO
* `gitmost_id` but DOES carry the user's own Obsidian properties (`tags:` etc.).
* On adoption those fields are not yet round-tripped — `serializePageFile`
* write-back persists only `gitmost_id`. Preserving arbitrary user frontmatter
* across the Docmost round-trip (BOTH adoption write-back AND the next pull's
* re-serialize) is deferred to the adoption phase; until then, do NOT roll the
* native format onto a real Obsidian vault whose notes carry properties.
*/
export declare function parsePageFile(full: string): {
id: string | null;
body: string;
};
/**
* Serialize a page into the thin format: `id` frontmatter + a blank line + the
* clean body + a trailing newline. Deterministic so an unchanged page re-syncs to
* byte-identical output (no churn — the loop-guard relies on it).
*/
export declare function serializePageFile(id: string, body: string): string;
-14
View File
@@ -1,14 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
"$basedir/../../../../node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/bin/esbuild" "$@"
exit $?
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/jiti-cli.mjs" "$@"
else
exec node "$basedir/../../../../node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/jiti-cli.mjs" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/less@4.2.0/node_modules/less/bin/lessc" "$@"
else
exec node "$basedir/../../../../node_modules/.pnpm/less@4.2.0/node_modules/less/bin/lessc" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../marked/bin/marked.js" "$@"
else
exec node "$basedir/../marked/bin/marked.js" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/terser" "$@"
else
exec node "$basedir/../../../../node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/terser" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../typescript/bin/tsc" "$@"
else
exec node "$basedir/../typescript/bin/tsc" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../typescript/bin/tsserver" "$@"
else
exec node "$basedir/../typescript/bin/tsserver" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/cli.mjs" "$@"
else
exec node "$basedir/../../../../node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/cli.mjs" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/vite.js" "$@"
else
exec node "$basedir/../../../../node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/vite.js" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules/vitest/node_modules:/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules/vitest/node_modules:/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../vitest/vitest.mjs" "$@"
else
exec node "$basedir/../vitest/vitest.mjs" "$@"
fi
-17
View File
@@ -1,17 +0,0 @@
#!/bin/sh
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
case `uname` in
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
esac
if [ -z "$NODE_PATH" ]; then
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/node_modules:/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
else
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/node_modules:/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
fi
if [ -x "$basedir/node" ]; then
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/bin.mjs" "$@"
else
exec node "$basedir/../../../../node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/bin.mjs" "$@"
fi
@@ -1 +0,0 @@
{"version":"4.1.6","results":[[":test/node-ops.test.ts",{"duration":73.83617300000003,"failed":false}],[":test/markdown-converter.test.ts",{"duration":52.24364600000001,"failed":false}],[":test/diff.test.ts",{"duration":48.002140000000054,"failed":false}],[":test/node-ops-extra.test.ts",{"duration":64.79457399999995,"failed":false}],[":test/reconcile.test.ts",{"duration":13.454662000000042,"failed":false}],[":test/canonicalize.test.ts",{"duration":15.510864999999967,"failed":false}],[":test/markdown-roundtrip.property.test.ts",{"duration":10142.778976,"failed":false}],[":test/stabilize.test.ts",{"duration":180.60366900000008,"failed":false}],[":test/canonicalize-extra.test.ts",{"duration":265.1806279999996,"failed":false}],[":test/loop-guard.test.ts",{"duration":9.12148000000002,"failed":false}],[":test/markdown-document.test.ts",{"duration":9.338571000000002,"failed":false}],[":test/sanitize.test.ts",{"duration":20.903294999999957,"failed":false}],[":test/markdown-converter-golden.test.ts",{"duration":20.178874000000008,"failed":false}],[":test/roundtrip-corpus.test.ts",{"duration":375.9727969999999,"failed":false}],[":test/layout.test.ts",{"duration":25.806564999999978,"failed":false}],[":test/markdown-document-envelope.test.ts",{"duration":17.760928999999976,"failed":false}],[":test/roundtrip.test.ts",{"duration":202.1052659999998,"failed":false}],[":test/compute-push-actions.test.ts",{"duration":18.895632999999975,"failed":false}],[":test/apply-pull-actions.test.ts",{"duration":312.7543149999997,"failed":false}],[":test/git.test.ts",{"duration":2510.628562,"failed":false}],[":test/run-push.test.ts",{"duration":52.35109799999998,"failed":false}],[":test/compute-pull-actions.test.ts",{"duration":12.83178799999996,"failed":false}],[":test/apply-push-actions.test.ts",{"duration":40.049105,"failed":false}],[":test/classify-rename-moves.test.ts",{"duration":11.772115999999983,"failed":false}],[":test/git-merge.test.ts",{"duration":394.734729,"failed":false}],[":test/read-existing.test.ts",{"duration":9.485771000000113,"failed":false}],[":test/config-errors-invalid.test.ts",{"duration":22.83441799999997,"failed":false}],[":test/run-push-realgit.test.ts",{"duration":341.63427,"failed":false}],[":test/settings.test.ts",{"duration":18.815516000000002,"failed":false}],[":test/config-errors.test.ts",{"duration":22.358415000000036,"failed":false}],[":test/git-sync-client.contract.test-d.ts",{"duration":0,"failed":false}],[":test/engine-gaps.test.ts",{"duration":107.23285100000021,"failed":false}],[":test/markdown-converter-gaps.test.ts",{"duration":397.53935699999965,"failed":false}],[":test/git-integration-gaps.test.ts",{"duration":401.41072199999996,"failed":false}],[":test/markdown-to-prosemirror-gaps.test.ts",{"duration":446.77069600000004,"failed":false}],[":test/zzprobe.test.ts",{"duration":206.321958,"failed":false}],[":test/_probe_rt.test.ts",{"duration":113.90998200000013,"failed":false}],[":test/_probe2.test.ts",{"duration":87.88095900000008,"failed":false}],[":test/zz-probe.test.ts",{"duration":61.425263000000086,"failed":false}],[":test/zzz-probe.test.ts",{"duration":128.94683599999985,"failed":true}],[":test/_probe.test.ts",{"duration":135.79946900000004,"failed":false}],[":test/__probe.test.ts",{"duration":5.685652999999945,"failed":false}],[":test/markdown-converter-html-marks.test.ts",{"duration":10.321619999999996,"failed":false}],[":test/_probe/probe.test.ts",{"duration":71.38958900000011,"failed":false}],[":test/media-roundtrip.test.ts",{"duration":196.99739999999997,"failed":false}],[":test/diagram-roundtrip.test.ts",{"duration":82.55217999999968,"failed":false}],[":test/git-error-paths.test.ts",{"duration":303.43118300000003,"failed":false}],[":test/zzprobe2.test.ts",{"duration":54.94561099999987,"failed":false}],[":test/zzprobe3.test.ts",{"duration":77.88595900000018,"failed":false}],[":test/docmost-schema-attrs.test.ts",{"duration":10.282551000000012,"failed":false}],[":test/_valid_probe.test.ts",{"duration":92.35715300000015,"failed":false}],[":test/strip-empty-paragraphs-validity.test.ts",{"duration":127.7716620000001,"failed":false}],[":test/cycle.test.ts",{"duration":17.375657000000047,"failed":false}],[":test/cycle-roundtrip.test.ts",{"duration":582.6821960000002,"failed":false}],[":test/vault-index.test.ts",{"duration":9.033900000000017,"failed":false}],[":test/page-file.test.ts",{"duration":7.111135999999988,"failed":false}]]}
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@fellow+prosemirror-recreate-transform@1.2.3/node_modules/@fellow/prosemirror-recreate-transform
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-highlight@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-highlight
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-image@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-image
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-subscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-subscript
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-superscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-superscript
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-task-item@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_f120fce1a3d9fc85461b67496f03c362/node_modules/@tiptap/extension-task-item
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+extension-task-list@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_c94f69f56aee3556ec680ab7491aa1d4/node_modules/@tiptap/extension-task-list
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+html@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4_happy-dom@20.8.9/node_modules/@tiptap/html
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+pm@3.20.4/node_modules/@tiptap/pm
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@tiptap+starter-kit@3.20.4/node_modules/@tiptap/starter-kit
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@types+jsdom@21.1.7/node_modules/@types/jsdom
-1
View File
@@ -1 +0,0 @@
../../../../node_modules/.pnpm/@types+node@20.19.43/node_modules/@types/node
-1
View File
@@ -1 +0,0 @@
../../../node_modules/.pnpm/fast-check@4.8.0/node_modules/fast-check
-1
View File
@@ -1 +0,0 @@
../../../node_modules/.pnpm/jsdom@25.0.0/node_modules/jsdom
-1
View File
@@ -1 +0,0 @@
../../../node_modules/.pnpm/marked@17.0.5/node_modules/marked
-1
View File
@@ -1 +0,0 @@
../../../node_modules/.pnpm/typescript@5.9.3/node_modules/typescript
-1
View File
@@ -1 +0,0 @@
../../../node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules/vitest
-1
View File
@@ -1 +0,0 @@
../../../node_modules/.pnpm/zod@4.3.6/node_modules/zod
+45
View File
@@ -0,0 +1,45 @@
{
"name": "@docmost/git-sync",
"version": "0.1.0",
"description": "Pure converter + pure sync engine for the Docmost <-> git Markdown sync. See docs/backlog/git-sync-thin-meta.md.",
"private": true,
"type": "module",
"main": "./build/index.js",
"types": "./build/index.d.ts",
"exports": {
".": {
"types": "./build/index.d.ts",
"default": "./build/index.js"
}
},
"scripts": {
"build": "tsc",
"watch": "tsc --watch",
"test": "vitest run",
"test:watch": "vitest"
},
"license": "MIT",
"dependencies": {
"@tiptap/core": "3.20.4",
"@tiptap/extension-highlight": "3.20.4",
"@tiptap/extension-image": "3.20.4",
"@tiptap/extension-subscript": "3.20.4",
"@tiptap/extension-superscript": "3.20.4",
"@tiptap/extension-task-item": "3.20.4",
"@tiptap/extension-task-list": "3.20.4",
"@tiptap/html": "3.20.4",
"@tiptap/pm": "3.20.4",
"@tiptap/starter-kit": "3.20.4",
"jsdom": "25.0.0",
"marked": "17.0.5",
"zod": "4.3.6"
},
"devDependencies": {
"@docmost/editor-ext": "workspace:*",
"@types/jsdom": "^21.1.7",
"@types/node": "^20.0.0",
"fast-check": "^4.8.0",
"typescript": "^5.0.0",
"vitest": "4.1.6"
}
}
@@ -0,0 +1,136 @@
/**
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
* rather than any concrete client, because the gitmost server writes NATIVELY —
* through repositories + collab `openDirectConnection`.
*
* `GitSyncClient` is that interface: the native datasource (server side)
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
* subsets of it. The signatures below MIRROR exactly the methods the engine's
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
* off each result), so a REST-style client is still structurally assignable and
* the native adapter has a precise contract.
*/
/**
* A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body).
* The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`,
* which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this
* lite shape documents the fields the tree walk surfaces. Real tree nodes also
* carry `position`, `icon`, `hasChildren` — kept open via the index signature.
*/
export interface GitSyncPageNodeLite {
id: string;
slugId?: string;
title?: string;
parentPageId?: string | null;
hasChildren?: boolean;
/** `listSpaceTree` nodes carry extra fields (position, icon, …). */
[key: string]: unknown;
}
/**
* The structural client the engine depends on. Only `Pick<GitSyncClient, ...>`
* subsets are ever used:
* - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`),
* - push writes: `importPageMarkdown` / `createPage` / `deletePage` /
* `movePage` / `renamePage`,
* - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`.
*/
export interface GitSyncClient {
// --- reads (pull) ---------------------------------------------------------
/**
* Full tree of page nodes for the space (or the subtree rooted at
* `rootPageId`), each WITHOUT body content. `complete` is `false` when the
* walk was truncated / a fetch failed — the pull side suppresses absence
* deletions on an incomplete tree (SPEC §8). Native impl returns
* `complete: true` always (reads the DB, not a paginated REST endpoint).
*/
listSpaceTree(
spaceId: string,
rootPageId?: string,
): Promise<{ pages: GitSyncPageNodeLite[]; complete: boolean }>;
/**
* One page WITH its ProseMirror body content. `applyPullActions` reads
* `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
* `content` (to stabilize/serialize). `updatedAt` is carried for the
* poll-suppression loop-guard.
*/
getPageJson(pageId: string): Promise<{
id: string;
slugId: string;
title: string;
parentPageId: string | null;
spaceId: string;
updatedAt: string;
content: unknown;
}>;
// --- writes (push) --------------------------------------------------------
/**
* Merge a page's markdown BODY into the live page. `applyPushActions` passes
* the file's body with the frontmatter AND any git conflict markers already
* stripped — NOT the raw self-contained file — so `fullMarkdown` here is clean
* body text (the datasource re-parses defensively). The collab/Yjs write path
* (SPEC §2/§15.6) — never a raw jsonb overwrite. `applyPushActions` reads only
* an optional `updatedAt` off the result (via `extractUpdatedAt`).
*
* `baseMarkdown` is the last-synced body (from `refs/docmost/last-pushed`,
* likewise stripped), the common ancestor for a THREE-WAY merge against the
* live doc so concurrent human edits survive (review #5). Optional/null -> 2-way.
*/
importPageMarkdown(
pageId: string,
fullMarkdown: string,
baseMarkdown?: string | null,
): Promise<{ updatedAt?: string; [key: string]: unknown }>;
/**
* Create a new page and return the assigned id at `data.id`
* (`applyPushActions` reads `result.data.id`, then writes it back into the
* file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard.
*/
createPage(
title: string,
content: string,
spaceId: string,
parentPageId?: string,
): Promise<{ data: { id: string }; updatedAt?: string; [key: string]: unknown }>;
/** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */
deletePage(pageId: string): Promise<unknown>;
/**
* Reparent a page (and optionally set its fractional-index `position`). The
* engine passes `position` UNDEFINED for now; the native impl computes a
* default between siblings. Result is not inspected.
*/
movePage(
pageId: string,
parentPageId: string | null,
position?: string,
): Promise<unknown>;
/** Change a page's title only (no body touch). Result is not inspected. */
renamePage(pageId: string, title: string): Promise<unknown>;
// --- continuous (phase B+) ------------------------------------------------
/**
* Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8).
* `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk.
*/
listRecentSince(
spaceId: string | undefined,
sinceIso: string | null,
hardPageCap?: number,
): Promise<unknown[]>;
/** List soft-deleted (trashed) pages for the space (deletion detection). */
listTrash(spaceId: string): Promise<unknown[]>;
/** Restore a soft-deleted page from Trash. Result is not inspected. */
restorePage(pageId: string): Promise<unknown>;
}
+244
View File
@@ -0,0 +1,244 @@
import { VaultGit, DEFAULT_BRANCH } from "./git.js";
import { GitSyncClient } from "./client.types.js";
import { Settings } from "./settings.js";
import { readExisting, computePullActions, applyPullActions } from "./pull.js";
import { runPush } from "./push.js";
import { assertVaultPathSafe, type PathGuardIo } from "./path-guard.js";
/**
* Absolute-path filesystem primitives the cycle needs. Injected (not imported)
* so the engine stays IO-free and unit-testable. `mkdir` is recursive; `rm` is
* force (a missing file is a no-op).
*
* `lstat`/`realpath` back the SYMLINK GUARD (see ./path-guard.ts): every
* read/write/mkdir is screened so a pushed symlink (e.g. `leak.md -> /etc/passwd`
* or `-> .env`) cannot be followed to publish or overwrite a file outside the
* vault. Both MUST resolve to `null` on ENOENT and reject on any other error.
*/
export interface CycleFs extends PathGuardIo {
readFile: (absPath: string) => Promise<string>;
writeFile: (absPath: string, text: string) => Promise<void>;
mkdir: (absDir: string) => Promise<void>;
rm: (absPath: string) => Promise<void>;
}
export interface RunCycleDeps {
spaceId: string;
/** The Docmost seam (reads for pull, writes for push). */
client: GitSyncClient;
/** The per-space git vault (a real working repo). */
vault: VaultGit;
/** Engine settings; `vaultPath` roots the relPath -> absolute-path mapping. */
settings: Settings;
fs: CycleFs;
log: (line: string) => void;
/**
* Optional cooperative-abort signal. The caller (orchestrator) wires this to
* the per-space lock: if a heartbeat refresh cannot CONFIRM the lock is still
* held (CAS-miss / Redis error), the signal is aborted and the cycle bails at
* its next checkpoint (before the pull-apply and before the push-apply — the
* two destructive write phases) instead of writing blind after a possible
* lock loss. This is a COARSE best-effort guard; a fully fenced cross-process
* single-writer still needs the fencing-token redesign (follow-up).
*/
signal?: AbortSignal;
}
export interface RunCycleResult {
ran: boolean;
/** Set when the cycle short-circuited without running pull/push. */
skipped?: "merge-in-progress";
pull?: { written: number; deleted: number; conflict: boolean };
push?: { mode: string; failures: number };
/**
* Forwarded from the push result: `true` when the push REFUSED to fast-forward
* a divergent `docmost` mirror (the §5 invariant — `docmost` mirrors what
* Docmost contains — is broken). Surfaced here so a caller driving `runCycle`
* can detect the breach without scraping logs (red-team #15).
*/
divergentDocmost?: boolean;
}
/**
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
* (vault -> Docmost), under the engine's required branch choreography. This is
* the single entry point the app drives — it owns the staging order so it can
* never drift from the engine it ships with.
*
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
* would fail otherwise.
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
* then checks out `main` and merges docmost -> main. Writing Docmost
* content straight onto `main` would clobber local file edits before push
* can diff them.
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
* 5. PUSH: vault -> Docmost apply.
*
* Lock POLICY lives in the caller; this owns only the mechanics. Deletes are
* soft (Trash, reversible) and always logged, so there is no per-cycle
* delete-cap — engine convergence is the guard against phantom deletions.
*/
export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
const { spaceId, client, vault, settings, fs, log, signal } = deps;
const vaultRoot = settings.vaultPath;
const abs = (relPath: string) => `${vaultRoot}/${relPath}`;
// SYMLINK GUARD (defense-in-depth, see ./path-guard.ts). Wrap the injected
// read/write/mkdir primitives so EVERY engine file access is screened: a path
// that is — or traverses — a symlink, or whose realpath escapes the vault, is
// refused. `rm` is deliberately NOT wrapped: removing a path only deletes the
// link itself (force, non-recursive), never the target, and we WANT to be able
// to clean up a stray pushed symlink. A refusal THROWS; the pull/push loops
// already isolate per-file errors (skip + log), so a single poisoned entry is
// skipped while the rest of the space keeps syncing.
const guard = (p: string) => assertVaultPathSafe(fs, vaultRoot, p);
const safeFs = {
readFile: async (p: string): Promise<string> => {
await guard(p);
return fs.readFile(p);
},
writeFile: async (p: string, text: string): Promise<void> => {
await guard(p);
return fs.writeFile(p, text);
},
mkdir: async (p: string): Promise<void> => {
await guard(p);
return fs.mkdir(p);
},
rm: (p: string): Promise<void> => fs.rm(p),
};
// 1. The engine state store is git: make sure the repo + branches exist
// before any tracked-file listing or diff.
await vault.assertGitAvailable();
await vault.ensureRepo();
// 1b. CLEAR stale git lock files left by an interrupted git op (bug D3-N3). A
// hard crash / OOM-kill / abrupt container stop mid `git add`/`commit`/
// `checkout` leaves a `.git/index.lock` (or a ref `*.lock`); git then refuses
// every later op ("Unable to create '…/index.lock': File exists"), wedging the
// space forever with no self-heal. Only locks OLDER than the staleness
// threshold are removed (a fresh lock from a concurrent replica in the
// TTL-lapse window is preserved), before the merge check + any checkout/diff
// below.
await vault.clearStaleGitLocks();
// 1c. RESTORE a missing `main` branch (bug D3-N1). Ref-store damage can leave an
// existing repo without `main`; the ensureBranch("docmost","main") + checkout
// below would then throw every cycle ("pathspec 'main' did not match"),
// wedging the space forever. Re-create it from `docmost`/HEAD before use.
await vault.ensureMainBranch();
// 2. RECOVER from a vault left mid-merge by a PRIOR cycle (SPEC §9 wedge fix).
// A leftover merge used to WEDGE THE WHOLE SPACE: this check returned
// `skipped: "merge-in-progress"` so EVERY later cycle skipped the entire
// space (all pages, both directions) forever, with no recovery. The pull
// phase below no longer leaves the vault mid-merge (it commits a conflicting
// merge with markers and isolates the one bad page), but a vault wedged by a
// PRE-FIX build (or a manual/interrupted git op) must still self-heal.
// So instead of skipping, ABORT the stale half-merge and continue — the
// fresh pull re-runs and, on a real conflict, commits-with-markers rather
// than re-wedging. A stray unmerged index that `merge --abort` can't clear
// (no MERGE_HEAD) is force-cleared with a hard reset to HEAD.
if (await vault.isMergeInProgress()) {
log(
`vault was left mid-merge by a prior cycle — aborting the stale merge and ` +
`continuing so the space is not wedged (SPEC §9 recovery).`,
);
await vault.abortMerge();
if (await vault.isMergeInProgress()) {
log(
`vault still mid-merge after 'merge --abort' — hard-resetting to HEAD ` +
`to recover (SPEC §9).`,
);
await vault.resetHardToHead();
}
}
try {
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
await vault.ensureBranch("docmost", "main");
await vault.checkout("docmost");
// 4. PULL ------------------------------------------------------------------
const existing = await readExisting({
listTracked: () => vault.listTrackedFiles("*.md"),
readFile: (relPath) => safeFs.readFile(abs(relPath)),
});
const tree = await client.listSpaceTree(spaceId);
const pullActions = computePullActions({
pages: tree.pages,
treeComplete: tree.complete,
existing,
});
// Bail before the first destructive write phase if the lock was lost.
signal?.throwIfAborted();
const pullResult = await applyPullActions(
{
client,
git: vault,
writeFile: (absPath, text) => safeFs.writeFile(absPath, text),
mkdir: (absDir) => safeFs.mkdir(absDir),
rm: (absPath) => safeFs.rm(absPath),
log,
},
pullActions,
vaultRoot,
);
// 5. PUSH ------------------------------------------------------------------
const pushDeps = {
settings,
git: vault,
makeClient: () => client,
readFile: (relPath: string) => safeFs.readFile(abs(relPath)),
writeFile: (relPath: string, text: string) =>
safeFs.writeFile(abs(relPath), text),
log,
};
// Bail before pushing to Docmost if the lock was lost during pull.
signal?.throwIfAborted();
const pushResult = await runPush(pushDeps, { dryRun: false });
return {
ran: true,
pull: {
written: pullResult.written,
deleted: pullResult.deleted,
conflict: pullResult.merge.conflict,
},
push: {
mode: pushResult.mode,
failures: pushResult.failures?.length ?? 0,
},
// Forward a divergent-`docmost` escalation so the caller can act on the §5
// invariant breach without scraping logs (red-team #15).
divergentDocmost: pushResult.divergentDocmost ?? false,
};
} finally {
// STABLE SERVED HEAD (bug #3). The pull transiently checks out the read-only
// `docmost` mirror, and the smart-HTTP host advertises whatever HEAD resolves
// to — so a clone racing a cycle could default to `docmost`. The happy path
// already ends on `main` (runPush), but a throw mid-pull would leave HEAD on
// `docmost`; restore it here so the advertised default branch is `main` BETWEEN
// cycles. Best-effort: skipped if the lock was lost (do not write the working
// tree after a possible takeover), and a failing checkout (e.g. a dirty tree
// from an aborted write) is swallowed — the next cycle's recovery resyncs and
// the read advertisement pins HEAD under the lock regardless.
if (!signal?.aborted) {
try {
await vault.checkout(DEFAULT_BRANCH);
} catch {
/* best-effort: next cycle recovers; advertisement pins HEAD under lock */
}
}
}
}
+850
View File
@@ -0,0 +1,850 @@
/**
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
*
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
* the gitmost application repo. This module MUST NEVER run git against the
* application repo. `data/` is gitignored, so a nested repo under `data/vault`
* is safe. The pull cycle is READ-ONLY toward Docmost; this module only touches
* the local vault git, never a git remote (push is deferred, see SPEC §7).
*
* Implementation notes:
* - We shell out via `node:child_process` `execFile` (promisified), passing
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
* if a page title / branch name contains shell metacharacters.
* - EVERY git invocation funnels through the single `runRaw` primitive, which
* ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git
* never blocks on a pager and always prints verbatim UTF-8 paths). There is
* no exception — even the `git --version` preflight goes through `runRaw`.
* - "nothing to commit" is treated as a graceful no-op, not an error.
*/
import { execFile } from "node:child_process";
import { mkdir, rm, stat } from "node:fs/promises";
import { promisify } from "node:util";
const execFileAsync = promisify(execFile);
// Safety net: kill a hung git subprocess. This engine performs only LOCAL git
// operations (no network pushes), so a legitimate call never approaches this
// bound; it only prevents an indefinitely-stuck subprocess from wedging a sync
// cycle (the same risk the http-backend watchdog guards on the server side).
const GIT_EXEC_TIMEOUT_MS = 120_000;
// A live git op holds a lock for at most GIT_EXEC_TIMEOUT_MS (then it is killed),
// so a lock whose mtime is older than a few multiples of that cannot have a live
// holder — it is a genuine crash-leftover, safe to remove. A fresh lock from a
// concurrently-running replica (mtime within the timeout) is preserved so we never
// delete a lock a live git process still holds.
const STALE_LOCK_MIN_AGE_MS = GIT_EXEC_TIMEOUT_MS * 3; // 6 min, >> max git-op duration
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
export const BOT_AUTHOR_NAME = "Docmost Sync";
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
/** Default branch the vault repo is initialized on. */
export const DEFAULT_BRANCH = "main";
/**
* One row of `git diff --name-status` (SPEC §6 "FS -> Docmost"). `status` is the
* single-letter change code (`-M` rename detection on), `path` is the (new) file
* path; for a rename/copy (`R`/`C`) `oldPath` is the source and `path` is the
* destination, with `score` carrying git's similarity index (0–100).
*/
export interface DiffEntry {
status: "A" | "M" | "D" | "R" | "C";
/** New (destination) path. For A/M/D it is the only path. */
path: string;
/** Source path — present only for R/C. */
oldPath?: string;
/** Rename/copy similarity score (0–100) — present only for R/C. */
score?: number;
}
/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */
export interface MergeResult {
/** True when the merge applied cleanly (fast-forward or clean 3-way). */
ok: boolean;
/** True when the merge stopped on conflicts (markers left in the worktree). */
conflict: boolean;
/** Raw combined stdout+stderr, for logging/diagnostics. */
output: string;
}
/** Options for an engine-authored commit (provenance, SPEC §7.3). */
export interface CommitOptions {
authorName: string;
authorEmail: string;
/**
* Trailer lines appended to the commit message body (e.g.
* `Docmost-Sync-Source: docmost`). These are the machine-readable provenance
* the loop-guard keys on (SPEC §12, "commit-attribution").
*/
trailers?: string[];
}
/**
* A git wrapper bound to a single vault path. Construct once per vault; every
* method runs git with `cwd = vaultPath`.
*/
export class VaultGit {
constructor(private readonly vaultPath: string) {}
/**
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
* to system `git` for every vault operation, so a missing binary (e.g. a slim
* container image without git) must fail fast with an actionable message
* rather than a cryptic ENOENT deep inside the first real git call. Presence
* check only — we do NOT gate on a specific version. Runs `git --version`
* with NO `cwd` (the vault dir may not exist yet at preflight time).
*/
async assertGitAvailable(): Promise<void> {
// Goes through the single `runRaw` primitive like every other invocation.
// `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at
// preflight time, so we must not point git at a missing directory.
const r = await this.runRaw(["--version"], { cwd: null });
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(
"git binary not found or not runnable — install git (the vault state " +
`store requires it). Underlying error: ${detail}`,
);
}
}
/**
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
* the single `runRaw` primitive: throws a clear, unified Error (including
* stderr/stdout) on a non-zero exit.
*/
private async run(
args: string[],
opts?: { cwd?: string | null; env?: Record<string, string> },
): Promise<string> {
const r = await this.runRaw(args, opts);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
return r.stdout.trim();
}
/**
* The ONE primitive every git invocation in this module flows through. Builds
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
* can treat a non-zero exit as either an error (`run`) or a meaningful state
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
*
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
* printing commands (ls-files, diff --name-only, …).
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
* - On a spawn/exec error we capture the error `message` too, so a failure
* before git could write to stderr (e.g. ENOENT) is NOT lost.
*/
private async runRaw(
args: string[],
opts?: { cwd?: string | null; env?: Record<string, string> },
): Promise<{ code: number; stdout: string; stderr: string }> {
const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath);
try {
const { stdout, stderr } = await execFileAsync(
"git",
["--no-pager", "-c", "core.quotepath=false", ...args],
{
// Generous buffer: file listings / porcelain output on a large vault
// can be sizable.
...(cwd !== undefined ? { cwd } : {}),
maxBuffer: 64 * 1024 * 1024,
timeout: GIT_EXEC_TIMEOUT_MS,
env: vaultGitEnv(opts?.env),
},
);
return { code: 0, stdout, stderr };
} catch (err: unknown) {
const e = err as {
code?: number;
stdout?: string;
stderr?: string;
message?: string;
};
return {
code: typeof e.code === "number" ? e.code : 1,
stdout: e.stdout ?? "",
// Preserve the error message when there is no stderr (e.g. a spawn
// failure like ENOENT, where promisified execFile sets stderr to an
// EMPTY STRING — so `||`, not `??`, to fall through to `message`).
stderr: e.stderr || e.message || "",
};
}
}
/**
* Ensure the vault directory exists and is an initialized git repo on `main`
* with an initial (empty) commit so branches exist. Idempotent: safe to call
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
* (so engine commits never fall back to a global/unset identity).
*/
async ensureRepo(): Promise<void> {
await mkdir(this.vaultPath, { recursive: true });
if (!(await this.isRepo())) {
// `git init -b main` sets the initial branch on modern git; we still
// guard the branch name below for safety on older binaries.
await this.run(["init", "-b", DEFAULT_BRANCH]);
}
// Set a local identity for the vault repo if unset, so engine commits have
// a deterministic committer even on a machine with no global git config.
if (!(await this.hasLocalConfig("user.name"))) {
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
}
if (!(await this.hasLocalConfig("user.email"))) {
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
}
// Neutralize correctness-affecting git config in the vault's LOCAL config so
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
// values (which override global/system) are the right scope. Set
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
// writes to `--local` by default inside the repo. These MUST be in place
// before any add/commit/checkout that could be affected, hence they run
// before the initial-commit block below.
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
// byte-stable markdown churn and breaking the round-trip invariant.
// `false` guarantees git stores/checks out verbatim bytes.
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
// cannot rewrite the STORED blob and break §11 byte-stability (a config
// that core.autocrlf=false does not cover). POSIX-only path, which is
// fine: the daemon runs on Linux (Docker) / macOS. A system
// /etc/gitattributes remains the host admin's domain (out of scope).
// - merge.conflictStyle=merge — CRITICAL (SPEC §9, conflict-marker leak):
// a global `merge.conflictStyle=diff3`/`zdiff3` makes a conflicting merge
// emit an EXTRA `|||||||` base-marker section. The conflict-marker
// scrub on the push side (`stripConflictMarkers`) handles `|||||||` too,
// but pinning the classic `merge` style keeps the markers the engine
// produces to the canonical three (`<<<<<<<`/`=======`/`>>>>>>>`) so
// behavior is deterministic regardless of the operator's global config.
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
// human running git by hand in the vault must inherit the same neutralized
// behavior; a transient `-c` would not persist. (core.quotepath, by
// contrast, only affects OUR parsing of output and so is baked into the
// `runRaw` argv baseline instead.)
try {
await this.run(["config", "core.autocrlf", "false"]);
await this.run(["config", "core.safecrlf", "false"]);
await this.run(["config", "commit.gpgsign", "false"]);
await this.run(["config", "core.attributesFile", "/dev/null"]);
await this.run(["config", "merge.conflictStyle", "merge"]);
} catch (err: unknown) {
const detail = err instanceof Error ? err.message : String(err);
throw new Error(
`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` +
"/.git/config is writable and not locked (e.g. stale config.lock): " +
detail,
);
}
// Create the initial empty commit on `main` if the repo has no commits yet,
// so both `main` and (later) `docmost` branches have a common base.
if (!(await this.hasAnyCommit())) {
// Make sure we are on the default branch before the first commit (covers
// the older-git case where `init -b` was not honored).
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
await this.commitRaw("init vault", {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
allowEmpty: true,
});
}
}
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
private async isRepo(): Promise<boolean> {
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
return r.code === 0 && r.stdout.trim() === "true";
}
/** True if a LOCAL git config key is set in the vault repo. */
private async hasLocalConfig(key: string): Promise<boolean> {
const r = await this.runRaw(["config", "--local", "--get", key]);
return r.code === 0 && r.stdout.trim().length > 0;
}
/** True if the repo has at least one commit (HEAD resolves). */
private async hasAnyCommit(): Promise<boolean> {
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
return r.code === 0;
}
/** True if a branch with the given name exists. */
async branchExists(name: string): Promise<boolean> {
const r = await this.runRaw([
"rev-parse",
"--verify",
`refs/heads/${name}`,
]);
return r.code === 0;
}
/**
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
* checkout) when the branch is already present.
*/
async ensureBranch(name: string, fromBranch: string): Promise<void> {
if (await this.branchExists(name)) return;
await this.run(["branch", name, fromBranch]);
}
/**
* Re-create a MISSING `main` branch (bug D3-N1). Ref-store damage (a deleted
* `refs/heads/main`, a bad ref update) can leave an existing repo without
* `main`. Every cycle then throws (`ensureBranch("docmost","main")` /
* `checkout main` -> "pathspec 'main' did not match"), wedging the space
* FOREVER with no self-heal — `ensureRepo` only creates branches on a FRESH
* `git init`. Restore `main` in the preflight from the best available source:
* the `docmost` mirror branch if present (they track each other), else the
* current `HEAD` commit. If the repo has no commit at all, ensureRepo's
* fresh-init path owns it — nothing to do here.
*/
async ensureMainBranch(): Promise<void> {
if (await this.branchExists(DEFAULT_BRANCH)) return;
if (await this.branchExists("docmost")) {
await this.run(["branch", DEFAULT_BRANCH, "docmost"]);
return;
}
const head = await this.runRaw(["rev-parse", "--verify", "--quiet", "HEAD"]);
if (head.code === 0 && head.stdout.trim().length > 0) {
await this.run(["branch", DEFAULT_BRANCH, head.stdout.trim()]);
}
}
/** Name of the currently checked-out branch. */
async currentBranch(): Promise<string> {
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
}
/** Check out an existing branch. */
async checkout(name: string): Promise<void> {
await this.run(["checkout", name]);
}
/** Stage everything (adds, modifications, deletions). */
async stageAll(): Promise<void> {
await this.run(["add", "-A"]);
}
/**
* True if the vault is mid-merge (an unresolved merge from a previous run,
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
* BEFORE any checkout so a left-over merge produces a clear, actionable
* message instead of a raw "you need to resolve your current index first"
* failure deep inside `checkout`. This is what makes re-runs converge
* (resumability, SPEC §12).
*/
async isMergeInProgress(): Promise<boolean> {
// MERGE_HEAD exists exactly while a merge is in progress.
const mergeHead = await this.runRaw([
"rev-parse",
"--verify",
"--quiet",
"MERGE_HEAD",
]);
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0) return true;
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
// working tree is mid-conflict and a checkout would refuse.
const unmerged = await this.runRaw(["ls-files", "-u"]);
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
}
/**
* Remove STALE git lock files left by an INTERRUPTED git operation (a hard
* crash / OOM-kill / abrupt container stop mid `git add`/`commit`/`checkout`
* leaves `.git/index.lock`; interrupted ref updates leave `*.lock` files). Git
* then refuses EVERY subsequent operation ("Unable to create '…/index.lock':
* File exists"), which WEDGES the space's sync loop indefinitely with no
* self-heal (bug D3-N3). We target the known/fixed set of index + ref lock
* files this engine itself writes (a hardcoded list, NOT a `*.lock` glob), and
* remove a lock ONLY when it is provably stale by mtime: a lock older than
* STALE_LOCK_MIN_AGE_MS cannot have a live holder (a live git op is killed
* after GIT_EXEC_TIMEOUT_MS), so it is a genuine crash-leftover. A FRESH lock —
* e.g. one a concurrently-running replica still holds during the documented
* multi-replica TTL-lapse window — is PRESERVED, so we never delete a lock a
* live git process is still using (which would corrupt the index/refs). Clear
* them best-effort in the cycle preflight, alongside the mid-merge recovery.
* Missing files are a no-op.
*/
async clearStaleGitLocks(): Promise<void> {
const gitDir = `${this.vaultPath}/.git`;
const locks = [
"index.lock",
"HEAD.lock",
"config.lock",
"packed-refs.lock",
"MERGE_HEAD.lock",
"ORIG_HEAD.lock",
"refs/heads/main.lock",
"refs/heads/docmost.lock",
"refs/docmost/last-pushed.lock",
];
await Promise.all(
locks.map(async (rel) => {
const path = `${gitDir}/${rel}`;
try {
const stats = await stat(path);
// Only remove a lock old enough that no live git process can hold it.
// A fresh lock (mtime within the staleness window) is left in place.
if (Date.now() - stats.mtimeMs >= STALE_LOCK_MIN_AGE_MS) {
await rm(path, { force: true }).catch(() => undefined);
}
} catch {
// Missing lock (ENOENT) or unreadable — nothing to clear.
}
}),
);
}
/**
* Commit the currently STAGED changes with an explicit author/committer
* identity and the given trailers appended to the message body (SPEC §7.3
* provenance). Returns `true` if a commit was made, `false` if there was
* nothing to commit (graceful no-op). The caller is expected to have staged
* its changes first (e.g. via `stageAll`).
*/
async commit(message: string, opts: CommitOptions): Promise<boolean> {
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
// deterministic re-pull of unchanged pages produces identical bytes, so
// git sees no diff and we must not error).
const staged = await this.runRaw([
"diff",
"--cached",
"--quiet",
]);
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
// staged), 1 when there are staged changes.
if (staged.code === 0) return false;
await this.commitRaw(message, opts);
return true;
}
/**
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
* Builds the full message with appended trailers and sets author + committer
* identity via env vars (so the committer matches the author, not the repo
* default).
*/
private async commitRaw(
message: string,
opts: CommitOptions & { allowEmpty?: boolean },
): Promise<void> {
const fullMessage = buildCommitMessage(message, opts.trailers);
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
// (or any injected hook) must never interfere with engine commits in our
// dedicated vault repo.
const args = ["commit", "--no-verify", "-m", fullMessage];
if (opts.allowEmpty) args.push("--allow-empty");
// Route through the single `runRaw` primitive; set author + committer
// identity via env vars (so the committer matches the author, not the repo
// default). Throw via the same unified message on a non-zero exit.
const r = await this.runRaw(args, {
env: {
GIT_AUTHOR_NAME: opts.authorName,
GIT_AUTHOR_EMAIL: opts.authorEmail,
GIT_COMMITTER_NAME: opts.authorName,
GIT_COMMITTER_EMAIL: opts.authorEmail,
},
});
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
}
}
/**
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
* markers are left in the worktree for manual resolution by a later increment,
* and — critically — nothing is pushed to Docmost (we never write to Docmost
* anyway).
*/
async merge(fromBranch: string): Promise<MergeResult> {
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
const output = `${r.stdout}\n${r.stderr}`.trim();
if (r.code === 0) {
return { ok: true, conflict: false, output };
}
// A non-zero exit on merge most commonly means a conflict. Confirm by
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
// an unrelated failure as a conflict.
const conflict = await this.hasUnmergedPaths();
return { ok: false, conflict, output };
}
/** True if the index has any unmerged (conflicted) paths. */
private async hasUnmergedPaths(): Promise<boolean> {
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
return r.code === 0 && r.stdout.trim().length > 0;
}
/**
* The vault-relative (forward-slash) paths with UNMERGED (conflicted) index
* entries after a conflicting merge. NUL-delimited + `core.quotepath=false`
* (the `runRaw` baseline) so Cyrillic/space paths come back verbatim. Used by
* the pull cycle to LOG and ISOLATE the conflicted page(s) when it commits a
* conflicted merge instead of leaving the whole vault wedged (SPEC §9 wedge
* fix). Returns `[]` on any error (best-effort diagnostics).
*/
async listUnmergedPaths(): Promise<string[]> {
const r = await this.runRaw([
"diff",
"--name-only",
"--diff-filter=U",
"-z",
]);
if (r.code !== 0) return [];
return r.stdout.split("\0").filter((p) => p.length > 0);
}
/**
* Commit an IN-PROGRESS (conflicted) merge AS-IS so the vault is NOT left
* wedged mid-merge (SPEC §9 wedge fix). A `git merge` that conflicts leaves
* `MERGE_HEAD` + unmerged index entries; the next cycle's `isMergeInProgress`
* check would then skip the ENTIRE space forever (the reported wedge). Instead
* we stage everything — including the conflicted file(s), whose conflict
* markers are PRESERVED in the committed tree — and record the two-parent merge
* commit. The cleanly-merged pages land normally; the conflicted page carries
* its markers on `main`, where the push side isolates it (a per-page push
* failure when `autoMergeConflicts` is off; the markers never reach Docmost)
* while every other page keeps syncing. Recovery: resolve the markers in git
* and the next push sends the clean body.
*
* `--allow-empty` guards the degenerate case where the staged conflict
* resolution nets to no tree change; while `MERGE_HEAD` exists `git commit`
* still records the merge commit so the half-merge is cleared.
*/
async commitMerge(message: string, opts: CommitOptions): Promise<void> {
await this.run(["add", "-A"]);
await this.commitRaw(message, { ...opts, allowEmpty: true });
}
/**
* Abort an in-progress merge (`git merge --abort`), restoring the pre-merge
* working tree + index. Best-effort: a non-zero exit (e.g. no MERGE_HEAD) is
* swallowed. Used by the cycle's RECOVERY path to unwedge a vault that a
* PRIOR (pre-fix) cycle left mid-merge, so the fresh pull can re-run instead of
* skipping the space forever (SPEC §9 wedge recovery).
*/
async abortMerge(): Promise<void> {
await this.runRaw(["merge", "--abort"]);
}
/** Hard-reset the working tree + index to HEAD (drops a stray half-merge that
* `merge --abort` could not clear — no MERGE_HEAD but lingering unmerged
* entries). Best-effort recovery primitive (SPEC §9). */
async resetHardToHead(): Promise<void> {
await this.runRaw(["reset", "--hard", "HEAD"]);
}
/**
* List tracked files on the current branch (paths relative to the vault
* root, forward-slash separated). An optional glob (a git pathspec) narrows
* the listing, e.g. `"*.md"`.
*
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
* (e.g. `Column.md` in Cyrillic). With git's DEFAULT `core.quotepath=true`, `ls-files`
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
* breaking move/duplicate detection. We defeat that two ways at once:
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
* pass it inline here.
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
* ambiguity), which we split on `\0`.
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
* are returned verbatim — git already emits forward slashes.
*/
async listTrackedFiles(glob?: string): Promise<string[]> {
const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git ls-files failed: ${detail}`);
}
return r.stdout.split("\0").filter((p) => p.length > 0);
}
/**
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
* (SPEC §6: the FS→Docmost push direction diffs `main` against
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
* file is reported as a single `R` row with both its old and new path instead
* of a delete+add pair — that distinction is what lets the push planner tell a
* move from a delete+create (SPEC §8 "Move vs delete").
*
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
* status:
* - A/M/D: `status\0path\0`
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
*/
async diffNameStatus(
fromRef: string,
toRef: string,
): Promise<DiffEntry[]> {
const r = await this.runRaw([
"diff",
"--name-status",
"-M",
"-z",
fromRef,
toRef,
]);
if (r.code !== 0) {
const detail = (r.stderr || r.stdout || "").trim();
throw new Error(`git diff --name-status failed: ${detail}`);
}
// Tokens alternate: <status> <path...> <status> <path...> ... With `-z`,
// each token (status code AND each path) is its own NUL-delimited field.
const tokens = r.stdout.split("\0").filter((t) => t.length > 0);
const entries: DiffEntry[] = [];
let i = 0;
while (i < tokens.length) {
const raw = tokens[i++];
// The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading
// letter is the change kind; any trailing digits are the similarity score.
const letter = raw[0] as DiffEntry["status"];
if (letter === "R" || letter === "C") {
const score = Number.parseInt(raw.slice(1), 10);
const oldPath = tokens[i++];
const path = tokens[i++];
if (oldPath === undefined || path === undefined) break; // malformed tail
entries.push({
status: letter,
path,
oldPath,
...(Number.isFinite(score) ? { score } : {}),
});
} else if (letter === "A" || letter === "M" || letter === "D") {
const path = tokens[i++];
if (path === undefined) break; // malformed tail
entries.push({ status: letter, path });
} else {
// Unknown/other status (e.g. T type-change, U unmerged) — consume one
// path token defensively so the walk stays aligned, but do not emit it
// (the push planner only handles A/M/D/R/C).
i++;
}
}
return entries;
}
/**
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
*/
async revParse(ref: string): Promise<string | null> {
const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]);
if (r.code !== 0) return null;
const sha = r.stdout.trim();
return sha.length > 0 ? sha : null;
}
/**
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
* "what of `main` is already reflected in Docmost").
*/
async readRef(ref: string): Promise<string | null> {
return this.revParse(ref);
}
/**
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
*/
async updateRef(ref: string, target: string): Promise<void> {
await this.run(["update-ref", ref, target]);
}
/**
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
* push succeeds, Docmost already contains the pushed `main` content, so the
* mirror must reflect it — otherwise the NEXT pull would diff our own write
* back and re-pull it (loop-guard).
*
* SAFETY — never force, never clobber divergent history:
* - If `branch` IS an ancestor of `toCommit`, advance it with
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
* NOT checked out during a push (push works on `main`), so updating the ref
* directly is safe and avoids any working-tree touch.
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
* let the caller log it. We must never overwrite a `docmost` history that
* has commits the push base does not contain.
*
* Returns `{ ok: true }` when the branch was advanced (or already at
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
*/
async fastForwardBranch(
branch: string,
toCommit: string,
): Promise<{ ok: boolean; reason?: string }> {
const branchRef = `refs/heads/${branch}`;
// Resolve both endpoints first so a missing ref is a clean refusal, not a
// confusing `merge-base` failure.
const branchSha = await this.revParse(branchRef);
if (branchSha === null) {
return { ok: false, reason: `branch ${branch} does not exist` };
}
const targetSha = await this.revParse(toCommit);
if (targetSha === null) {
return { ok: false, reason: `target ${toCommit} does not resolve` };
}
// Already at the target -> a no-op fast-forward (still ok).
if (branchSha === targetSha) return { ok: true };
// `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a
// true ancestor is a fast-forward; anything else is divergent and refused.
const ancestor = await this.runRaw([
"merge-base",
"--is-ancestor",
branchSha,
targetSha,
]);
if (ancestor.code !== 0) {
return { ok: false, reason: "not-fast-forward" };
}
// Safe to advance: the branch is not checked out during push, so a direct
// ref update avoids a checkout/working-tree touch.
await this.updateRef(branchRef, targetSha);
return { ok: true };
}
/**
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
* if the path does not exist there. Used by the push direction to read the
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
* that ref) maps to `null` rather than throwing.
*/
async showFileAtRef(ref: string, path: string): Promise<string | null> {
// `git show <ref>:<path>` requires the path relative to the repo root; pass
// it verbatim (forward-slash, matching `listTrackedFiles` / diff output).
const r = await this.runRaw(["show", `${ref}:${path}`]);
if (r.code !== 0) return null;
return r.stdout;
}
/**
* Read ONE side of a conflicted file from the merge index (`git show :N:path`),
* where the stage `N` is the standard 3-way merge slot:
* 1 = merge BASE (common ancestor), 2 = OURS (the current branch = `main`),
* 3 = THEIRS (the merged-in branch = `docmost`).
* Returns the blob text, or `null` when that stage is absent (e.g. an add/add
* conflict has no base, a modify/delete conflict has only one content side).
*
* Used by the pull cycle (SPEC §9) to RESOLVE a conflicted docmost->main merge
* deterministically instead of committing raw conflict markers onto the
* published `main`: a conflict whose two sides differ ONLY in trailing/empty
* lines is SPURIOUS (normalize -> identical -> clean), and a genuine conflict is
* resolved to a clean side (no `<<<<<<<`/`>>>>>>>` markers ever reach `main`).
*/
async showStage(stage: 1 | 2 | 3, path: string): Promise<string | null> {
const r = await this.runRaw(["show", `:${stage}:${path}`]);
if (r.code !== 0) return null;
return r.stdout;
}
/**
* Pin the repo's symbolic `HEAD` to `main` WITHOUT touching the working tree or
* index (`git symbolic-ref HEAD refs/heads/main`). The smart-HTTP host advertises
* whatever `HEAD` resolves to as the clone's default branch, so a clone that
* races a cycle mid-pull (when the engine has transiently checked out the
* read-only `docmost` mirror) would otherwise default to `docmost`. Pinning HEAD
* back to the canonical writable branch makes the advertised symref deterministic.
*
* symbolic-ref only rewrites `.git/HEAD`; it does NOT move the working tree, so
* it must only ever run when the working tree is ALREADY on `main` (between
* cycles / under the per-space lock with no cycle in flight) — otherwise HEAD and
* the index would desync. Callers serialize this with the engine via the lock.
*/
async pinHeadToMain(): Promise<void> {
await this.run(["symbolic-ref", "HEAD", `refs/heads/${DEFAULT_BRANCH}`]);
}
}
/**
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
* Used by the single `runRaw` primitive every git command flows through, so
* these pins apply uniformly (including the `git --version` preflight).
*
* cwd-isolation is this module's central safety guarantee: every git command
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
* redirect the operation away from `cwd` (e.g. to the source repo or another
* checkout), defeating that guarantee. So we always strip them, regardless of
* whatever else the caller adds (author/committer identity, etc.).
*
* Exported for unit testing.
*/
export function vaultGitEnv(
extra?: Record<string, string>,
): NodeJS.ProcessEnv {
const env: NodeJS.ProcessEnv = {
...process.env,
// Locale-independent output (defense in depth). We never parse localized
// prose, but pinning the locale prevents a future regression where some
// git message we DO key on is translated by an inherited LC_ALL/LANG.
LC_ALL: "C",
LANG: "C",
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
// bite) and never block on an interactive prompt (e.g. credentials) — the
// daemon runs unattended and must not hang.
GIT_PAGER: "cat",
GIT_TERMINAL_PROMPT: "0",
...extra,
};
delete env.GIT_DIR;
delete env.GIT_WORK_TREE;
return env;
}
/**
* Build a commit message body with trailer lines appended (SPEC §7.3). The
* trailers are separated from the subject by a blank line so `git interpret-
* trailers` / `git log --format=%(trailers)` parse them as trailers.
* Exported for unit testing.
*/
export function buildCommitMessage(
subject: string,
trailers?: string[],
): string {
if (!trailers || trailers.length === 0) return subject;
return `${subject}\n\n${trailers.join("\n")}`;
}
+202
View File
@@ -0,0 +1,202 @@
/**
* Pure page-tree -> vault path mapping (SPEC §12).
*
* Given the flat list of page nodes for a space (as returned by
* `listAllSpacePages`), compute for every page a deterministic, collision-free
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
* page's own name, no extension). This module is intentionally PURE and
* dependency-free apart from the sanitization helpers, so the whole tree ->
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
* lives in each file's meta block (pageId / slugId).
*/
import { sanitizeTitle, disambiguate } from "./sanitize.js";
/** Flat page node as returned by `listAllSpacePages` (no content). */
export interface PageNode {
id: string;
title?: string;
slugId?: string;
parentPageId?: string | null;
hasChildren?: boolean;
}
/** A page's resolved vault destination: folder path + file stem. */
export interface VaultEntry {
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
segments: string[];
/** The page's own file name without extension. */
stem: string;
}
/**
* Build the full vault layout for a space.
*
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
* deterministic for a given input and guarantees every full destination path
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
* another.
*
* Disambiguation is layered:
* 1. Sibling collisions (same sanitized title under the same parent) are
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
* sanitized, since slugId/id is untrusted data that must never inject a
* path separator).
* 2. A final full-path pass catches residual collisions that sibling-scoping
* cannot see — e.g. two pages whose parents are BOTH outside the input set
* both bucket at the root with `segments: []`.
*/
export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
// Index pages by id so the parent chain can be walked. Guard against
// duplicate ids in the input (first one wins).
const byId = new Map<string, PageNode>();
for (const p of pages) {
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
}
// Resolve each node's display name once, deterministically. The bucket key is
// the node's parent ONLY when that parent is actually present in `byId`;
// otherwise (null parent, or an orphan whose parent is outside the input set)
// the node buckets at `"__root__"`. This is critical: orphans land at the vault
// root (see `folderSegmentsFor`), so they MUST share the root bucket with real
// root pages to be disambiguated against each other here — making `nameById`
// final before any `segments` are computed, so no ancestor name can drift.
const parentKeyOf = (p: PageNode): string =>
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
// Group nodes by (parentKey, sanitized base title) so sibling collisions are
// resolved by a STABLE rule that does NOT depend on input array order. Dedupe
// ids (first occurrence wins, matching `byId`).
const siblingGroups = new Map<string, PageNode[]>();
const namedIds = new Set<string>();
for (const p of pages) {
if (!p || !p.id || namedIds.has(p.id)) continue;
namedIds.add(p.id);
const key = `${parentKeyOf(p)}\u0000${sanitizeTitle(p.title ?? "")}`;
const bucket = siblingGroups.get(key);
if (bucket) bucket.push(p);
else siblingGroups.set(key, [p]);
}
// Assign each node its display name. Within a colliding group, sort the
// siblings by their stable disambiguation key (`slugId` else `id`) and let the
// FIRST keep the bare sanitized title; every OTHER gets the ` ~<slugId>`
// suffix. This makes `nameById` a pure function of the page SET — reordering
// the input never moves the suffix onto a different page (red-team #4a). The
// suffix is itself sanitized (the slugId/id is untrusted and must never inject
// a path separator).
const nameById = new Map<string, string>();
const disambKeyOf = (p: PageNode): string => p.slugId ?? p.id;
for (const bucket of siblingGroups.values()) {
const base = sanitizeTitle(bucket[0].title ?? "");
if (bucket.length === 1) {
nameById.set(bucket[0].id, base);
continue;
}
const sorted = [...bucket].sort((a, b) => {
const ka = disambKeyOf(a);
const kb = disambKeyOf(b);
return ka < kb ? -1 : ka > kb ? 1 : 0;
});
sorted.forEach((p, i) => {
nameById.set(
p.id,
i === 0 ? base : disambiguate(base, sanitizeTitle(disambKeyOf(p))),
);
});
}
// Every id we index above MUST get a resolved name; this helper returns it
// and THROWS if it is somehow absent, rather than silently recomputing a
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
// its target file).
const nameOf = (id: string): string => {
const name = nameById.get(id);
if (name === undefined) {
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
}
return name;
};
// Build the folder path for a page by walking parentPageId to the root. The
// page's OWN name is the file stem; its ancestors become folders. A `visited`
// guard prevents an infinite loop on a malformed parent cycle.
const folderSegmentsFor = (node: PageNode): string[] => {
const ancestors: string[] = [];
const visited = new Set<string>();
let current: PageNode | undefined = node.parentPageId
? byId.get(node.parentPageId)
: undefined;
while (current && current.id && !visited.has(current.id)) {
visited.add(current.id);
ancestors.unshift(nameOf(current.id));
current = current.parentPageId
? byId.get(current.parentPageId)
: undefined;
}
return ancestors;
};
// First pass: compute the provisional { segments, stem } for every node.
const layout = new Map<string, VaultEntry>();
for (const p of pages) {
if (!p || !p.id || layout.has(p.id)) continue;
layout.set(p.id, {
segments: folderSegmentsFor(p),
stem: nameOf(p.id),
});
}
// FOLDER-NOTE transform (native-Obsidian layout): a page WITH CHILDREN lives at
// `<…>/<stem>/<stem>.md` — its body is the folder-note INSIDE its own folder
// (LostPaul Folder Notes convention), and its children sit alongside it in that
// folder. A leaf stays `<…>/<stem>.md`. Children's segments already point into
// the parent's folder (folderSegmentsFor walks ancestor NAMES), so only the
// parent's own file relocates here; the sibling name pass above already made
// the parent name unique, so folder == file name stays consistent.
for (const p of pages) {
if (!p || !p.id) continue;
const entry = layout.get(p.id);
if (entry && p.hasChildren) {
entry.segments = [...entry.segments, entry.stem];
}
}
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
// above (orphans share the "__root__" bucket), so ancestor names are final
// before `segments` are built and this pass should rarely/never re-stem an
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
// slugId/id, then (if still colliding) appends the id.
//
// Process FOLDER-NOTES (pages with children) FIRST so a parent claims its
// canonical `<name>/<name>.md` before a same-named CHILD — the child (a leaf)
// is the one that disambiguates, never the folder-note.
const usedPaths = new Set<string>();
const seenIds = new Set<string>();
const pathKey = (e: VaultEntry): string => [...e.segments, e.stem].join("/");
const ordered = pages
.filter((p): p is PageNode => Boolean(p && p.id))
.sort(
(a, b) =>
Number(Boolean(b.hasChildren)) - Number(Boolean(a.hasChildren)),
);
for (const p of ordered) {
if (seenIds.has(p.id)) continue;
seenIds.add(p.id);
const entry = layout.get(p.id);
if (!entry) continue;
if (usedPaths.has(pathKey(entry))) {
// First attempt: disambiguate the stem with the sanitized slugId (or id).
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
if (usedPaths.has(pathKey(entry))) {
// Still colliding: append the (sanitized) id as a last resort. The id
// is globally unique, so this always resolves the collision.
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
}
}
usedPaths.add(pathKey(entry));
}
return layout;
}
@@ -11,8 +11,9 @@
* only PRODUCE the hash and the per-page push record (see `src/push.ts`).
*/
import { createHash } from "node:crypto";
/**
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
* Stable hash of a page's markdown BODY (SPEC §10 "body hash"). Deterministic:
* the same input string always yields the same digest, a different input a
* different one. Used to recognize our own write later (loop suppression).
*
@@ -23,6 +24,6 @@ import { createHash } from "node:crypto";
* caller is responsible for passing a canonical/stable representation if it
* wants hash equality across cosmetic-only differences.
*/
export function bodyHash(markdownBody) {
return createHash("sha256").update(markdownBody, "utf8").digest("hex");
export function bodyHash(markdownBody: string): string {
return createHash("sha256").update(markdownBody, "utf8").digest("hex");
}
+132
View File
@@ -0,0 +1,132 @@
/**
* Vault path guard (security, defense-in-depth).
*
* A user with push access to a git-sync space could commit a `.md` entry that is
* a SYMLINK (e.g. `leak.md -> /etc/passwd` or `-> <server>/.env`). On the next
* cycle a naive `fs.readFile` would follow the link and PUBLISH the target's
* contents as a Docmost page (a read primitive that escalates a writer to
* arbitrary server-file disclosure — including the JWT secret / DB creds in
* `.env`); a symlinked DIRECTORY gives the inverse write-outside-the-vault
* primitive on pull. The primary defense is `core.symlinks=false` in each
* vault's git config (git then materializes a pushed symlink as a PLAIN FILE
* holding the link text, never a real link). This module is the second layer:
* before every engine read/write/mkdir we reject a path that IS — or traverses —
* a symlink, or whose real location escapes the vault root.
*
* IO-free by construction: the `lstat`/`realpath` primitives are injected
* (mirroring the rest of the engine) so the rules are unit-testable with fakes
* and the engine never imports `node:fs`. Path math uses `node:path`, which is
* pure.
*/
import { isAbsolute, relative, resolve, sep } from "node:path";
/** Why a path was refused. */
export type VaultPathUnsafeReason = "symlink" | "escape";
/**
* Thrown when a path is refused by the guard. Engine read/write loops already
* isolate per-file errors (skip + log), so throwing here yields the review's
* required "skip+log" behavior without a separate control channel.
*/
export class VaultPathUnsafeError extends Error {
constructor(
readonly absPath: string,
readonly reason: VaultPathUnsafeReason,
readonly vaultRoot: string,
) {
super(
reason === "symlink"
? `git-sync: refusing to access '${absPath}' — it is (or traverses) a ` +
`symlink under vault '${vaultRoot}' (symlink guard)`
: `git-sync: refusing to access '${absPath}' — it resolves outside ` +
`vault '${vaultRoot}' (symlink guard)`,
);
this.name = "VaultPathUnsafeError";
}
}
/**
* The injected IO the guard needs. Both MUST resolve to `null` on ENOENT (the
* normal case for a not-yet-created file on a write/mkdir) and reject on any
* other error.
*/
export interface PathGuardIo {
/** lstat WITHOUT following the final symlink. `null` when the path is absent. */
lstat: (absPath: string) => Promise<{ isSymbolicLink: boolean } | null>;
/** realpath (follows symlinks). `null` when the path is absent. */
realpath: (absPath: string) => Promise<string | null>;
}
/**
* Lexical containment: is `target` EQUAL to, or NESTED under, `root`? Catches a
* `..` traversal baked into a relPath before any IO. Both operands are resolved
* first so `.`/`..` segments are normalized.
*/
export function isWithinRoot(root: string, target: string): boolean {
const r = resolve(root);
const t = resolve(target);
if (t === r) return true;
const rel = relative(r, t);
return rel.length > 0 && !rel.startsWith(`..${sep}`) && rel !== ".." && !isAbsolute(rel);
}
/**
* Reject `absPath` (resolving silently when it is safe) if it:
* - escapes `vaultRoot` lexically (a `..` traversal), OR
* - IS, or traverses, a symlink at any EXISTING segment from the root down
* (a symlinked ancestor dir, or the target file/dir itself), OR
* - resolves (realpath of its deepest existing ancestor) outside the vault.
*
* Absent leaf segments — the normal case when writing/mkdir'ing a NEW file — are
* safe: the walk stops at the first non-existent segment (nothing to follow).
*/
export async function assertVaultPathSafe(
io: PathGuardIo,
vaultRoot: string,
absPath: string,
): Promise<void> {
const root = resolve(vaultRoot);
const target = resolve(absPath);
// 1. Lexical containment — a `..` in a relPath never even reaches an lstat.
if (!isWithinRoot(root, target)) {
throw new VaultPathUnsafeError(absPath, "escape", vaultRoot);
}
// 2. lstat-walk: reject a symlink at ANY existing level between the root and
// the target (inclusive). A symlinked ancestor or a symlinked target both
// let a follow-the-link read/write escape; rejecting the link itself is the
// surgical guard.
if (target !== root) {
const segments = relative(root, target)
.split(sep)
.filter((s) => s.length > 0);
let cur = root;
for (const segment of segments) {
cur = resolve(cur, segment);
const st = await io.lstat(cur);
if (st === null) break; // absent from here down — nothing left to follow
if (st.isSymbolicLink) {
throw new VaultPathUnsafeError(cur, "symlink", vaultRoot);
}
}
}
// 3. realpath belt-and-suspenders: the deepest EXISTING ancestor must resolve
// inside the vault root's realpath. Catches an ancestor relocated via a
// symlink the lexical check would miss (e.g. the data dir itself being a
// link farm) and bounds the lstat→use TOCTOU window.
const realRoot = await io.realpath(root);
if (realRoot === null) return; // root absent — ensureRepo creates it first
let probe = target;
let realProbe = await io.realpath(probe);
while (realProbe === null && probe !== root) {
const parent = resolve(probe, "..");
if (parent === probe) break; // reached the filesystem root
probe = parent;
realProbe = await io.realpath(probe);
}
if (realProbe !== null && !isWithinRoot(realRoot, realProbe)) {
throw new VaultPathUnsafeError(absPath, "escape", vaultRoot);
}
}
+545
View File
@@ -0,0 +1,545 @@
/**
* Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> FS").
*
* This increment turns the read-only mirror into the git-backed pull cycle:
*
* 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12);
* ensureBranch("docmost", "main") (SPEC §5 branches)
* 2. checkout docmost
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
* 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
* is absence-only, moves are separate
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
* fetch (SPEC §8) and behind the mass-delete guard (defense in depth)
* 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11);
* apply moved-old-path removals (only when the move write SUCCEEDED) and
* absence-delete removals (only when the decision allowed them)
* 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3)
* 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved,
* SPEC §9); push is deferred (SPEC §7)
* 10. one-line summary
*
* DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost
* (read-only: listSpaceTree + getPageJson). All git operations run against
* the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts).
*
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
* the gitmost server drives the engine in-process (there is no standalone CLI
* entry point).
*/
import { dirname } from "node:path";
import { sep } from "node:path";
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
import type { GitSyncClient } from "./client.types.js";
import { buildVaultLayout, type PageNode } from "./layout.js";
import {
VaultGit,
BOT_AUTHOR_NAME,
BOT_AUTHOR_EMAIL,
DEFAULT_BRANCH,
} from "./git.js";
import {
planReconciliation,
decideAbsenceDeletions,
type LiveEntry,
type MovedEntry,
type DeletionDecision,
} from "./reconcile.js";
import { stabilizePageBody } from "./stabilize.js";
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
const DOCMOST_BRANCH = "docmost";
// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12).
const SOURCE_TRAILER = "Docmost-Sync-Source: docmost";
// Number of pages fetched/stabilized concurrently. Bounded so a large space
// does not open thousands of simultaneous requests/conversions at once.
const CONCURRENCY = 6;
// How often to log incremental progress (every N completed pages).
const PROGRESS_EVERY = 25;
/** Convert a vault-relative path (forward-slash) to an absolute FS path. */
function relToAbs(vaultRoot: string, relPath: string): string {
return [vaultRoot, ...relPath.split("/")].join("/");
}
/**
* Canonicalize a file's TRAILING whitespace: drop any trailing blank /
* whitespace-only lines (and trailing spaces on the last line) and end with
* exactly one newline; an empty body becomes a single "\n". This matches
* `serializePageFile`'s trailing form (`body.trim()` + a single "\n").
*
* Why (SPEC §9 spurious-conflict fix): the engine writes pages in their
* normalize-on-write form (one trailing newline), but a user can push a `.md` to
* `main` with EXTRA trailing/empty lines (e.g. a double-blank-line append). When
* the docmost mirror (normalized) and `main` (raw) both change near end-of-file,
* git's line-based 3-way merge reports a CONFLICT even though the only difference
* is trailing blank lines. Normalizing BOTH sides before comparing collapses that
* difference to nothing, so the pull cycle can recognize the conflict as SPURIOUS
* and resolve it cleanly instead of committing raw conflict markers onto `main`.
*/
function normalizeTrailingWhitespace(text: string): string {
const body = text.replace(/[\s]+$/, "");
return body.length > 0 ? `${body}\n` : "\n";
}
/** Convert an absolute/relative segment list under the vault to a relPath. */
function segmentsToRelPath(segments: string[], stem: string): string {
return [...segments, `${stem}.md`].join("/");
}
/**
* Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real
* `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile`
* rooted at the vault; tests pass fakes so the parsing/skip rules are unit-
* testable without a real git repo or filesystem.
*/
export interface ReadExistingDeps {
/** List tracked .md paths (forward-slash, vault-relative). */
listTracked: () => Promise<string[]>;
/** Read a tracked file's text by its (forward-slash) vault-relative path. */
readFile: (relPath: string) => Promise<string>;
}
/**
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
*/
export async function readExisting(
deps: ReadExistingDeps,
): Promise<{ pageId: string; relPath: string }[]> {
const tracked = await deps.listTracked();
const existing: { pageId: string; relPath: string }[] = [];
for (const relPath of tracked) {
// git ls-files always emits forward-slash paths; normalize just in case.
const rel = relPath.split(sep).join("/");
let text: string;
try {
text = await deps.readFile(rel);
} catch {
// Tracked but missing on disk (mid-operation race) — skip; the next pull
// converges.
continue;
}
const { id } = parsePageFile(text);
if (id) existing.push({ pageId: id, relPath: rel });
}
return existing;
}
/**
* Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live
* tree nodes + completeness flag (from `listSpaceTree`) and the parsed
* `existing` tracked files (from `readExisting`).
*/
export interface PullActionsInput {
/** Live page nodes for the space (from `listSpaceTree`). */
pages: PageNode[];
/** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */
treeComplete: boolean;
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
existing: { pageId: string; relPath: string }[];
}
/**
* The PURE decisions object computed by `computePullActions` (no IO). It holds
* the reconciliation plan plus the SPEC §8 absence-deletion decision, with the
* suppression already folded in: `toDelete` is the POST-suppression set the
* caller should actually remove (empty when `deletionDecision.apply` is false).
*/
export interface PullActions {
/** Pages to (re)write at their relPath (add + update + move target). */
toWrite: { pageId: string; relPath: string }[];
/** Moves: write new path, then remove old path (only on a successful write). */
moved: MovedEntry[];
/**
* Absence-based paths to delete AFTER suppression. Empty when the decision
* suppressed deletions this cycle, so the caller can apply it unconditionally.
*/
toDelete: string[];
/** Why absence deletions were (or were not) applied (for logging + tests). */
deletionDecision: DeletionDecision;
/** Tracked-file count (for the suppression log messages). */
existingCount: number;
/** Planned absence-delete count BEFORE suppression (for the log message). */
plannedDeleteCount: number;
}
/**
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
* tree nodes + completeness + existing tracked files and returns the full set of
* decisions with NO IO:
*
* - builds the vault layout (deterministic relPath per live page),
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
* POST-suppression set (empty when suppressed).
*
* Moves are NOT governed by the suppression: a moved page is present in `live`,
* so its old-path removal is real (the caller still gates it on the write
* succeeding). The expensive content fetch / file write / git ops happen in the
* thin `applyPullActions`.
*/
export function computePullActions(input: PullActionsInput): PullActions {
const { pages, treeComplete, existing } = input;
const layout = buildVaultLayout(pages);
const live: LiveEntry[] = [];
for (const p of pages) {
if (!p || !p.id) continue;
const entry = layout.get(p.id);
if (!entry) continue;
live.push({
pageId: p.id,
relPath: segmentsToRelPath(entry.segments, entry.stem),
});
}
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
// `plan.moved` carries move old-path removals separately.
const plan = planReconciliation(live, existing);
// Decide whether the ABSENCE-based deletions may be applied this cycle
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
// Moves are NOT governed by this.
const deletionDecision = decideAbsenceDeletions({
treeComplete,
liveCount: live.length,
existingCount: existing.length,
deleteCount: plan.toDelete.length,
});
return {
toWrite: plan.toWrite,
moved: plan.moved,
// Fold the suppression in: a suppressed cycle deletes nothing.
toDelete: deletionDecision.apply ? plan.toDelete : [],
deletionDecision,
existingCount: existing.length,
plannedDeleteCount: plan.toDelete.length,
};
}
/**
* Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these
* to the live client, the vault git wrapper, and `node:fs/promises`; tests pass
* fakes that RECORD calls so the ordering + the move-on-success data-loss guard
* are testable without real git/fs/network.
*/
export interface ApplyPullActionsDeps {
client: Pick<GitSyncClient, "getPageJson">;
git: Pick<
VaultGit,
| "stageAll"
| "commit"
| "checkout"
| "merge"
| "listUnmergedPaths"
| "commitMerge"
| "showStage"
>;
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
writeFile: (absPath: string, text: string) => Promise<void>;
/** Recursive mkdir of an ABSOLUTE directory path. */
mkdir: (absDir: string) => Promise<void>;
/** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */
rm: (absPath: string) => Promise<void>;
/**
* Injected logger for cycle diagnostics (mirrors the push side). Optional —
* falls back to `console.log` so existing callers stay green.
*/
log?: (line: string) => void;
}
/** Outcome counters from `applyPullActions` (for the summary + tests). */
export interface ApplyResult {
written: number;
movedApplied: number;
deleted: number;
failed: number;
committed: boolean;
merge: { ok: boolean; conflict: boolean; output: string };
/**
* Vault-relative paths of the page(s) that had a GENUINE same-block conflict in
* the docmost -> main merge and were AUTO-RESOLVED to the git/main side (git
* wins, SPEC §9) — committed CLEAN, never with raw conflict markers. Empty on a
* clean merge AND when the only conflicts were spurious trailing-whitespace
* differences (those are normalized, not reported). Surfaced for logging /
* /status visibility; the docmost-side content stays recoverable via the
* `docmost` branch + page history.
*/
conflictedPaths: string[];
}
/**
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
* order, with all the original safety guards preserved bit-for-bit:
*
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
* failed move-write keeps the old path so the page never vanishes).
* 3. apply (post-suppression) absence deletes.
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
*
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
*/
export async function applyPullActions(
deps: ApplyPullActionsDeps,
actions: PullActions,
vaultRoot: string,
): Promise<ApplyResult> {
const { client, git } = deps;
// One channel, mirroring the push side: route every cycle diagnostic through
// the injected logger; fall back to `console.log` when none is supplied.
const log = deps.log ?? ((line: string) => console.log(line));
// Emit the SPEC §8 suppression warnings (preserved from the original `main`).
const decision = actions.deletionDecision;
if (!decision.apply) {
if (decision.reason === "incomplete-fetch") {
log(
"pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)",
);
} else if (decision.reason === "empty-live") {
log(
`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` +
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
`Docmost is reachable.`,
);
} else {
log(
`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` +
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`,
);
}
}
// 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11).
let written = 0;
let failed = 0;
let completed = 0;
let nextIndex = 0;
// pageIds whose write FAILED. A moved page whose new-path write failed must
// NOT have its old path removed (otherwise the page vanishes entirely).
const failedPageIds = new Set<string>();
const writeOne = async (w: {
pageId: string;
relPath: string;
}): Promise<void> => {
try {
const page = await client.getPageJson(w.pageId);
// Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
// markdown body. title/parent/space are DERIVED (filename / folder / repo),
// so nothing but the pageId is persisted as meta.
const text = serializePageFile(
page.id,
await stabilizePageBody(page.content),
);
const abs = relToAbs(vaultRoot, w.relPath);
await deps.mkdir(dirname(abs));
await deps.writeFile(abs, text);
written++;
} catch (err) {
failed++;
failedPageIds.add(w.pageId);
log(
`pull: failed page ${w.pageId}: ` +
(err instanceof Error ? err.message : String(err)),
);
} finally {
completed++;
if (completed % PROGRESS_EVERY === 0) {
log(`pulled ${completed}/${actions.toWrite.length}`);
}
}
};
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
// take the next index until the write list is exhausted. One bad page never
// aborts the whole pull (mirrors the fault-tolerant tree walk).
const runner = async (): Promise<void> => {
while (true) {
const i = nextIndex++;
if (i >= actions.toWrite.length) return;
await writeOne(actions.toWrite[i]);
}
};
await Promise.all(
Array.from(
{ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 },
() => runner(),
),
);
// Helper: `rm` with force:true is a no-op if the file is already gone.
const removePath = async (rel: string, what: string): Promise<boolean> => {
try {
await deps.rm(relToAbs(vaultRoot, rel));
return true;
} catch (err) {
log(
`pull: failed to ${what} ${rel}: ` +
(err instanceof Error ? err.message : String(err)),
);
return false;
}
};
// 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its
// old path is genuinely stale — NOT subject to the incomplete-fetch
// suppression. BUT only remove the old path when (a) the planner marked it
// removable (not reused by another live page) AND (b) the new-path write
// actually SUCCEEDED — otherwise we would delete the only copy of a page
// whose move-write failed (⭐ data-loss guard).
let movedApplied = 0;
for (const m of actions.moved) {
if (!m.removeOldPath) continue;
if (failedPageIds.has(m.pageId)) {
log(
`pull: move write for ${m.pageId} failed — keeping old path ` +
`${m.fromRelPath} (SPEC §8)`,
);
continue;
}
if (await removePath(m.fromRelPath, "remove moved old path")) movedApplied++;
}
// 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the
// post-suppression set (empty when the decision suppressed them, SPEC §8).
let deleted = 0;
for (const rel of actions.toDelete) {
if (await removePath(rel, "delete")) deleted++;
}
// 4. Stage + commit on `docmost` (only if there is something to commit).
// Deterministic stabilized output means unchanged pages produce identical
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
// ACTUAL work applied (pages written + files deleted), not the planned size,
// so a run with failures does not over-report (SPEC §5 nit).
const subject =
deleted > 0
? `docmost: sync ${written} page(s), ${deleted} deleted`
: `docmost: sync ${written} page(s)`;
await git.stageAll();
const committed = await git.commit(subject, {
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: [SOURCE_TRAILER],
});
// Merge docmost -> main. A CONFLICT must NOT wedge the whole space (the
// reported bug: ONE same-line conflict on ONE page froze sync for EVERY page
// in both directions because the next cycle's `isMergeInProgress` check kept
// skipping the entire space). It must ALSO never commit raw `<<<<<<<`/`>>>>>>>`
// markers onto the published `main` (round-1 round-2: external clones would see
// the markers AND the body re-conflicts every cycle while git and Docmost
// silently diverge). So on a conflict we RESOLVE each conflicted file to a
// clean, marker-free form and commit that (SPEC §9):
//
// - SPURIOUS conflict — the ROOT CAUSE of the leak: the two sides differ ONLY
// in trailing/empty-line normalization (the engine writes one trailing
// newline; a user pushed extra blank lines). Once both sides are
// `normalizeTrailingWhitespace`d they are IDENTICAL, so this is no real
// conflict at all: write the normalized form. Content stays in sync; git
// and the page never diverge.
// - GENUINE same-block conflict: resolve to OURS (the `main`/git side), so git
// wins the published branch — mirroring the live-doc 3-way "git wins" rule.
// The docmost-side content is preserved on the `docmost` branch and remains
// recoverable via page history; the next push carries git's body to Docmost,
// so both sides converge. No markers ever reach `main`.
await git.checkout(DEFAULT_BRANCH);
const merge = await git.merge(DOCMOST_BRANCH);
let conflictedPaths: string[] = [];
let mergeResult = merge;
if (merge.conflict) {
const unmerged = await git.listUnmergedPaths();
const genuine: string[] = [];
for (const rel of unmerged) {
const ours = await git.showStage(2, rel); // main side
const theirs = await git.showStage(3, rel); // docmost side
if (
ours !== null &&
theirs !== null &&
normalizeTrailingWhitespace(ours) === normalizeTrailingWhitespace(theirs)
) {
// SPURIOUS: identical once trailing/empty-line normalization is applied.
// Commit the canonical (normalized) form — no conflict, no markers.
await deps.writeFile(
relToAbs(vaultRoot, rel),
normalizeTrailingWhitespace(theirs),
);
} else {
// GENUINE conflict: resolve to the non-null side (OURS preferred so git
// wins the published branch; THEIRS kept when OURS is absent — e.g. a
// modify/delete conflict — to avoid dropping the remaining content). If
// BOTH are null (delete/delete) leave it; commitMerge's `git add -A`
// stages the deletion.
genuine.push(rel);
const resolved = ours ?? theirs;
if (resolved !== null) {
await deps.writeFile(relToAbs(vaultRoot, rel), resolved);
}
}
}
conflictedPaths = genuine;
await git.commitMerge(
genuine.length > 0
? `docmost: sync, ${genuine.length} page(s) auto-resolved (git wins, SPEC §9)`
: `docmost: sync (trailing-whitespace conflicts normalized, SPEC §9)`,
{
authorName: BOT_AUTHOR_NAME,
authorEmail: BOT_AUTHOR_EMAIL,
trailers: [SOURCE_TRAILER],
},
);
// The committed tree is CLEAN (every conflicted file was overwritten with a
// marker-free resolution). `conflict` now reflects only the GENUINE conflicts
// that were auto-resolved (git won); a merge that conflicted ONLY on trailing
// whitespace is reported as clean so /status does not cry wolf.
mergeResult = { ok: true, conflict: genuine.length > 0, output: merge.output };
if (genuine.length > 0) {
log(
`pull: merge of docmost -> main had ${genuine.length} GENUINE conflict(s) ` +
`auto-resolved to the git/main side (git wins, SPEC §9): ` +
`${genuine.join(", ")}. NO conflict markers were written to main; the ` +
`docmost-side content is on the 'docmost' branch and recoverable via ` +
`page history, and the next push reconciles Docmost to the git body.`,
);
} else {
log(
`pull: merge of docmost -> main conflicted ONLY on trailing/empty-line ` +
`normalization (${unmerged.length} file(s)) — auto-normalized, no ` +
`markers, content stays in sync (SPEC §9 spurious-conflict fix).`,
);
}
} else if (!merge.ok) {
log(`pull: merge of docmost -> main failed: ${merge.output}`);
}
log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
return {
written,
movedApplied,
deleted,
failed,
committed,
merge: mergeResult,
conflictedPaths,
};
}
File diff suppressed because it is too large Load Diff
+200
View File
@@ -0,0 +1,200 @@
/**
* Pure reconciliation planner (SPEC §5/§6/§8).
*
* Given the desired live set of files (computed from the current Docmost tree)
* and the set of files currently tracked in the vault, compute what to write,
* what to move (old path to remove), and what to delete. Identity is `pageId`
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
* the live tree is a DELETE.
*
* This module is intentionally PURE (no IO, no git) so the whole plan is
* unit-testable. The actual file writing / git operations happen in pull.ts.
*/
/** A page that SHOULD exist in the vault at a given path. */
export interface LiveEntry {
pageId: string;
/** Vault-relative path (forward-slash), e.g. `Space/Parent/Child.md`. */
relPath: string;
}
/** A page currently tracked in the vault (pageId parsed from its meta). */
export interface ExistingEntry {
pageId: string;
/** Vault-relative path (forward-slash) of the tracked file. */
relPath: string;
}
/** A page to (re)write at its destination path. */
export interface WriteEntry {
pageId: string;
relPath: string;
}
/** A page that moved: written at its NEW relPath, with the OLD path removed. */
export interface MovedEntry {
pageId: string;
fromRelPath: string;
toRelPath: string;
/**
* Whether the old path (`fromRelPath`) is SAFE to remove. False when another
* live page will (re)write that exact path (path reuse): removing it would
* destroy real data, so the caller must skip the removal. The move itself is
* still recorded (the new path is written regardless).
*/
removeOldPath: boolean;
}
/** The full reconciliation plan. */
export interface ReconciliationPlan {
/**
* Pages present in `live` -> (re)write at their relPath. This naturally
* covers add, content-update (same path) AND move (same pageId, new path),
* since every live page is (re)written regardless of whether it existed.
*/
toWrite: WriteEntry[];
/**
* Vault-relative paths to delete because their tracked pageId is ABSENT from
* `live` (page removed/trashed). This set is ONLY absence-based deletions —
* the OLD paths of moved pages are NOT here (they live in `moved` and are
* applied separately by the caller). Keeping the two apart lets pull.ts gate
* absence deletions behind the incomplete-fetch suppression + mass-delete
* guard (SPEC §8) while still applying real moves.
*/
toDelete: string[];
/**
* Tracked pages whose relPath changed. The caller writes the page at
* `toRelPath`, then removes `fromRelPath` — but ONLY after the new-path write
* succeeded. The old path is NOT in `toDelete`.
*/
moved: MovedEntry[];
}
/**
* Compute the reconciliation plan.
*
* Rules:
* - Every `live` page is written at its relPath (covers add + update + move).
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
* path is written) and is NEVER added to `toDelete`.
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
* is added to `toDelete`.
*
* Notes:
* - Safety filter (no data loss): no path that is a live TARGET path of any
* page is ever deleted/removed (a write owns it). This applies to BOTH the
* absence `toDelete` set AND a moved page's old-path removal — if a moved
* page's OLD path is reused by ANOTHER live page, the move records no old
* path to remove, because that path will be (re)written.
* - `existing` may legitimately contain duplicate pageIds (two stray files
* carrying the same meta pageId); each such file that is not the live target
* path is removed (as an absence/move) so the vault converges to exactly the
* live set.
*/
export function planReconciliation(
live: LiveEntry[],
existing: ExistingEntry[],
): ReconciliationPlan {
// Desired path for each live pageId.
const liveByPageId = new Map<string, string>();
// Set of all paths that WILL be written (never delete/remove one of these).
const liveTargetPaths = new Set<string>();
for (const e of live) {
liveByPageId.set(e.pageId, e.relPath);
liveTargetPaths.add(e.relPath);
}
const toWrite: WriteEntry[] = live.map((e) => ({
pageId: e.pageId,
relPath: e.relPath,
}));
const moved: MovedEntry[] = [];
// Absence-based deletions ONLY (tracked pageId absent from `live`). Use a Set
// so the same path coming from multiple existing rows is queued only once.
const toDeleteSet = new Set<string>();
for (const ex of existing) {
const liveRel = liveByPageId.get(ex.pageId);
if (liveRel === undefined) {
// Tracked page is gone from the live tree -> absence delete.
// Never queue a path a live page will (re)write (path reuse -> no loss).
if (!liveTargetPaths.has(ex.relPath)) toDeleteSet.add(ex.relPath);
continue;
}
if (liveRel !== ex.relPath) {
// Same pageId, different path -> a MOVE. Record it so the caller can write
// the new path first, then remove the old one. If the old path is itself a
// live target (reused by another page), it must NOT be removed — the write
// owns it — so flag `removeOldPath: false` (move still recorded).
moved.push({
pageId: ex.pageId,
fromRelPath: ex.relPath,
toRelPath: liveRel,
removeOldPath: !liveTargetPaths.has(ex.relPath),
});
}
// liveRel === ex.relPath -> content-update in place; nothing extra to do
// (the write above re-emits the file; identical bytes => git no-op).
}
const toDelete = [...toDeleteSet];
return { toWrite, toDelete, moved };
}
/**
* Below this many tracked files the mass-delete fraction guard is not applied
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
*/
export const MASS_DELETE_MIN_EXISTING = 4;
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
export const MASS_DELETE_FRACTION = 0.5;
/** Why absence-based deletions were (or were not) applied this cycle. */
export type DeletionDecision =
| { apply: true }
| { apply: false; reason: "incomplete-fetch" | "empty-live" | "mass-delete" };
/**
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
* testable without live creds or git:
*
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
* (almost always a failed fetch, never a real "delete everything").
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
*
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
* its old-path removal is real (handled by the caller separately).
*/
export function decideAbsenceDeletions(args: {
treeComplete: boolean;
liveCount: number;
existingCount: number;
deleteCount: number;
}): DeletionDecision {
const { treeComplete, liveCount, existingCount, deleteCount } = args;
// No tracked files, or nothing to delete -> trivially fine to "apply".
if (existingCount === 0 || deleteCount === 0) return { apply: true };
if (!treeComplete) return { apply: false, reason: "incomplete-fetch" };
if (liveCount === 0) return { apply: false, reason: "empty-live" };
if (
existingCount >= MASS_DELETE_MIN_EXISTING &&
deleteCount > existingCount * MASS_DELETE_FRACTION
) {
return { apply: false, reason: "mass-delete" };
}
return { apply: true };
}
@@ -6,56 +6,62 @@
* functions are intentionally dependency-free and pure, so they are trivially
* unit-testable.
*/
// Printable characters forbidden in file names on common filesystems (mainly
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
// Spaces are NOT in this set; whitespace is normalized separately below.
// ASCII control characters (code points 0..31) are stripped in a separate pass
// (see stripControlChars) to keep this literal free of embedded control bytes.
const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g;
// Runs of whitespace (including tabs/newlines) collapse to a single space.
const WHITESPACE_RUN_RE = /\s+/g;
// Reserved Windows device names (case-insensitive). A bare match (with or
// without an extension) is unusable as a file name, so it is prefixed with "_".
const RESERVED_WINDOWS_NAMES = new Set([
"con",
"prn",
"aux",
"nul",
"com1",
"com2",
"com3",
"com4",
"com5",
"com6",
"com7",
"com8",
"com9",
"lpt1",
"lpt2",
"lpt3",
"lpt4",
"lpt5",
"lpt6",
"lpt7",
"lpt8",
"lpt9",
"con",
"prn",
"aux",
"nul",
"com1",
"com2",
"com3",
"com4",
"com5",
"com6",
"com7",
"com8",
"com9",
"lpt1",
"lpt2",
"lpt3",
"lpt4",
"lpt5",
"lpt6",
"lpt7",
"lpt8",
"lpt9",
]);
// Cap on the sanitized length to stay well within filesystem path-component
// limits (255 bytes on most FSes) while leaving room for an extension and a
// disambiguation suffix.
const MAX_LENGTH = 120;
/**
* Replace every ASCII control character (code points 0..31) with "-". Done by
* scanning code points rather than a control-range regex literal, so the source
* file carries no embedded control bytes.
*/
function stripControlChars(input) {
let out = "";
for (let i = 0; i < input.length; i++) {
out += input.charCodeAt(i) < 32 ? "-" : input[i];
}
return out;
function stripControlChars(input: string): string {
let out = "";
for (let i = 0; i < input.length; i++) {
out += input.charCodeAt(i) < 32 ? "-" : input[i];
}
return out;
}
/**
* Sanitize a page title into a safe file-name component (WITHOUT extension).
*
@@ -64,34 +70,40 @@ function stripControlChars(input) {
* result, an all-dots result, or a reserved Windows device name by prefixing
* with "_".
*/
export function sanitizeTitle(title) {
let name = stripControlChars(title ?? "")
.replace(FORBIDDEN_PRINTABLE_RE, "-")
.replace(WHITESPACE_RUN_RE, " ")
.trim();
if (name.length > MAX_LENGTH) {
name = name.slice(0, MAX_LENGTH).trim();
}
// Compare the base name (before the first dot) against reserved names, so
// both "CON" and "con.md" are caught.
const base = name.split(".")[0]?.toLowerCase() ?? "";
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
// reserved Windows device name is unusable as a path component. The all-dots
// case is a path-traversal hazard in particular: an unprefixed ".." would
// become a parent-directory segment and let a page escape the vault, so it
// MUST be neutralized here (becomes "_..", which is a literal file name).
if (name.length === 0 ||
/^\.+$/.test(name) ||
RESERVED_WINDOWS_NAMES.has(base)) {
name = "_" + name;
}
return name;
export function sanitizeTitle(title: string): string {
let name = stripControlChars(title ?? "")
.replace(FORBIDDEN_PRINTABLE_RE, "-")
.replace(WHITESPACE_RUN_RE, " ")
.trim();
if (name.length > MAX_LENGTH) {
name = name.slice(0, MAX_LENGTH).trim();
}
// Compare the base name (before the first dot) against reserved names, so
// both "CON" and "con.md" are caught.
const base = name.split(".")[0]?.toLowerCase() ?? "";
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
// reserved Windows device name is unusable as a path component. The all-dots
// case is a path-traversal hazard in particular: an unprefixed ".." would
// become a parent-directory segment and let a page escape the vault, so it
// MUST be neutralized here (becomes "_..", which is a literal file name).
if (
name.length === 0 ||
/^\.+$/.test(name) ||
RESERVED_WINDOWS_NAMES.has(base)
) {
name = "_" + name;
}
return name;
}
/**
* Disambiguate a sanitized name when two siblings in the same folder collapse
* to the same name. Appends a stable suffix built from the page's `slugId`, so
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
*/
export function disambiguate(name, slugId) {
return `${name} ~${slugId}`;
export function disambiguate(name: string, slugId: string): string {
return `${name} ~${slugId}`;
}
+28
View File
@@ -0,0 +1,28 @@
/**
* Engine settings.
*
* The engine is driven IN-PROCESS by the NestJS server, which builds the
* `Settings` object from `EnvironmentService`. This module therefore exposes
* ONLY the `Settings` type the engine consumes — there is no `.env`-loading
* side-effecting entry point and no env-validation here (the server owns that).
*/
export type Settings = {
docmostSpaceId: string;
vaultPath: string;
gitRemote?: string;
pollIntervalMs: number;
debounceMs: number;
logLevel: 'debug' | 'info' | 'warn' | 'error';
/**
* Per-space PUSH policy for a page whose committed body still contains
* unresolved git conflict markers (`<<<<<<<` / `=======` / `>>>>>>>`):
* - false (DEFAULT, SAFE): SKIP that page's push (it is recorded as a push
* failure, so refs are NOT advanced) — the user must resolve the git
* conflict first before the page reaches Docmost.
* - true: strip the marker lines and push BOTH sides' content (the
* `stripConflictMarkers` behavior).
* Optional/undefined is treated as false.
*/
autoMergeConflicts?: boolean;
};
@@ -1,5 +1,5 @@
/**
* Normalize-on-write helper (SPEC §11 "Резолюция").
* Normalize-on-write helper (SPEC §11 "Resolution").
*
* git diffs byte-for-byte, so writing a page in a NON-fixpoint markdown form
* would make the next pull re-export it to a slightly different (but stable)
@@ -12,7 +12,26 @@
* Already-stable content is unaffected (the pass is idempotent), so re-pulls of
* unchanged pages produce identical bytes and git sees no diff.
*/
import { convertProseMirrorToMarkdown, markdownToProseMirror, serializeDocmostMarkdownBody, } from "../lib/index.js";
import {
convertProseMirrorToMarkdown,
markdownToProseMirror,
serializeDocmostMarkdownBody,
type DocmostMdMeta,
} from "../lib/index.js";
/**
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
* compatible so files produced here match `exportPageBody`'s output exactly.
*/
export interface PageMeta {
version: 1;
pageId: string;
slugId: string;
title: string;
spaceId: string;
parentPageId: string | null;
}
/**
* Produce the self-contained `.md` file text for a page from its raw
* ProseMirror `content` + identity meta, in the verified fixpoint form.
@@ -26,11 +45,18 @@ import { convertProseMirrorToMarkdown, markdownToProseMirror, serializeDocmostMa
* idempotent for already-stable content, and the convergence point for the
* known converter asymmetries.
*/
export async function stabilizePageFile(content, meta) {
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
// DocmostMdMeta (a superset with optional fields) for the serializer.
return serializeDocmostMarkdownBody(meta, await stabilizePageBody(content));
export async function stabilizePageFile(
content: unknown,
meta: PageMeta,
): Promise<string> {
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
// DocmostMdMeta (a superset with optional fields) for the serializer.
return serializeDocmostMarkdownBody(
meta as DocmostMdMeta,
await stabilizePageBody(content),
);
}
/**
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
* envelope:
@@ -45,8 +71,8 @@ export async function stabilizePageFile(content, meta) {
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
*/
export async function stabilizePageBody(content) {
const md1 = convertProseMirrorToMarkdown(content);
const doc2 = await markdownToProseMirror(md1);
return convertProseMirrorToMarkdown(doc2);
export async function stabilizePageBody(content: unknown): Promise<string> {
const md1 = convertProseMirrorToMarkdown(content);
const doc2 = await markdownToProseMirror(md1);
return convertProseMirrorToMarkdown(doc2);
}
+126
View File
@@ -0,0 +1,126 @@
/**
* Public surface of `@docmost/git-sync`.
*
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
* canonicalization) and the sync engine (reconcile planner, vault layout,
* pull/push, the git wrapper, and the settings parser) that the gitmost server
* drives in-process.
*/
// Pure converter (markdown <-> ProseMirror, file envelope, canonicalization).
export {
serializeDocmostMarkdown,
serializeDocmostMarkdownBody,
parseDocmostMarkdown,
convertProseMirrorToMarkdown,
markdownToProseMirror,
canonicalizeContent,
docsCanonicallyEqual,
} from "./lib/index.js";
export type { DocmostMdMeta } from "./lib/index.js";
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
// loop-guard body hash.
export {
planReconciliation,
decideAbsenceDeletions,
MASS_DELETE_MIN_EXISTING,
MASS_DELETE_FRACTION,
} from "./engine/reconcile.js";
export type {
LiveEntry,
ExistingEntry,
WriteEntry,
MovedEntry,
ReconciliationPlan,
DeletionDecision,
} from "./engine/reconcile.js";
export { buildVaultLayout } from "./engine/layout.js";
export type { PageNode, VaultEntry } from "./engine/layout.js";
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
export { stabilizePageFile } from "./engine/stabilize.js";
export type { PageMeta } from "./engine/stabilize.js";
export { bodyHash } from "./engine/loop-guard.js";
// IO engine: the client seam, the VaultGit git wrapper, the
// pull (Docmost->FS) + push (FS->Docmost) planners/appliers, and the (pure)
// settings parser. The engine consumes the native `GitSyncClient` seam (the
// server implements it) rather than any REST client.
export type { GitSyncClient, GitSyncPageNodeLite } from "./engine/client.types.js";
export {
VaultGit,
vaultGitEnv,
buildCommitMessage,
BOT_AUTHOR_NAME,
BOT_AUTHOR_EMAIL,
DEFAULT_BRANCH,
} from "./engine/git.js";
export type { DiffEntry, MergeResult, CommitOptions } from "./engine/git.js";
export {
readExisting,
computePullActions,
applyPullActions,
} from "./engine/pull.js";
export type {
ReadExistingDeps,
PullActionsInput,
PullActions,
ApplyPullActionsDeps,
ApplyResult,
} from "./engine/pull.js";
export {
classifyRenameMoves,
computePushActions,
applyPushActions,
runPush,
parentFolderFile,
LAST_PUSHED_REF,
DOCMOST_BRANCH,
LOCAL_AUTHOR_NAME,
LOCAL_AUTHOR_EMAIL,
LOCAL_SOURCE_TRAILER,
} from "./engine/push.js";
export type {
CreateAction,
UpdateAction,
DeleteAction,
RenameMoveAction,
RenameMoveActionClassified,
ClassifyRenameMovesDeps,
PushActions,
PushActionsInput,
MetaSide,
ApplyPushDeps,
WrittenBackPage,
PushedPageRecord,
PushFailure,
PushNoop,
ApplyPushResult,
PushDeps,
PushRunResult,
} from "./engine/push.js";
export type { Settings } from "./engine/settings.js";
export { runCycle } from "./engine/cycle.js";
export type {
RunCycleDeps,
RunCycleResult,
CycleFs,
} from "./engine/cycle.js";
export {
assertVaultPathSafe,
isWithinRoot,
VaultPathUnsafeError,
} from "./engine/path-guard.js";
export type { PathGuardIo, VaultPathUnsafeReason } from "./engine/path-guard.js";
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
@@ -1,6 +1,6 @@
/**
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
* idempotency check (SPEC §11, "Task #0", option (b): compare a CANONICALIZED
* form rather than raw bytes).
*
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
@@ -12,6 +12,7 @@
*
* It is a self-contained module with no external dependencies.
*/
/**
* Known NON-NULL schema defaults that `markdownToProseMirror` materializes on
* import, keyed by node/mark type { attr: defaultValue }.
@@ -48,35 +49,36 @@
* (docmost-schema.ts L174), so it is already handled by the null-drop rule and
* is intentionally NOT listed here.
*/
const KNOWN_DEFAULTS = {
// mark types
link: {
target: "_blank",
rel: "noopener noreferrer nofollow",
},
comment: {
resolved: false,
},
// node types
orderedList: {
start: 1,
},
drawio: {
align: "center",
},
excalidraw: {
align: "center",
},
video: {
align: "center",
},
youtube: {
align: "center",
},
embed: {
align: "center",
},
const KNOWN_DEFAULTS: Record<string, Record<string, unknown>> = {
// mark types
link: {
target: "_blank",
rel: "noopener noreferrer nofollow",
},
comment: {
resolved: false,
},
// node types
orderedList: {
start: 1,
},
drawio: {
align: "center",
},
excalidraw: {
align: "center",
},
video: {
align: "center",
},
youtube: {
align: "center",
},
embed: {
align: "center",
},
};
/**
* Prune an `attrs` object in place on a fresh copy: drop keys whose value is
* `null` or `undefined` (an absent attribute and an explicit default of `null`
@@ -93,29 +95,31 @@ const KNOWN_DEFAULTS = {
* left (so the caller can drop the `attrs` key entirely: `{attrs:{}}` no
* attrs).
*/
function canonicalizeAttrs(attrs, dropId, type) {
const defaults = type ? KNOWN_DEFAULTS[type] : undefined;
const out = {};
// Stable key order so a JSON.stringify of the canonical form is comparable
// regardless of the input's key order.
for (const key of Object.keys(attrs).sort()) {
// Block ids are regenerated on import; drop them on NODE attrs only.
if (dropId && key === "id")
continue;
const value = attrs[key];
// Absent ≡ explicit-default-null/undefined.
if (value === null || value === undefined)
continue;
// Absent ≡ explicit known non-null default (e.g. link.target="_blank").
// A non-default value (e.g. orderedList.start=5) does NOT match, so it is
// kept. The `comment` mark's `commentId` is never a default, so it always
// survives (SPEC §3); only its `resolved: false` default is normalized away.
if (defaults && key in defaults && value === defaults[key])
continue;
out[key] = value;
}
return Object.keys(out).length > 0 ? out : undefined;
function canonicalizeAttrs(
attrs: Record<string, unknown>,
dropId: boolean,
type: string | undefined,
): Record<string, unknown> | undefined {
const defaults = type ? KNOWN_DEFAULTS[type] : undefined;
const out: Record<string, unknown> = {};
// Stable key order so a JSON.stringify of the canonical form is comparable
// regardless of the input's key order.
for (const key of Object.keys(attrs).sort()) {
// Block ids are regenerated on import; drop them on NODE attrs only.
if (dropId && key === "id") continue;
const value = attrs[key];
// Absent ≡ explicit-default-null/undefined.
if (value === null || value === undefined) continue;
// Absent ≡ explicit known non-null default (e.g. link.target="_blank").
// A non-default value (e.g. orderedList.start=5) does NOT match, so it is
// kept. The `comment` mark's `commentId` is never a default, so it always
// survives (SPEC §3); only its `resolved: false` default is normalized away.
if (defaults && key in defaults && value === defaults[key]) continue;
out[key] = value;
}
return Object.keys(out).length > 0 ? out : undefined;
}
/**
* Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two
* semantically-equal documents compare deep-equal. Rules (applied recursively
@@ -134,43 +138,45 @@ function canonicalizeAttrs(attrs, dropId, type) {
* 5. Preserve `text`, `type`, and `content` order exactly.
* 6. Never mutate the input.
*/
export function canonicalizeContent(node) {
if (Array.isArray(node)) {
return node.map((child) => canonicalizeContent(child));
export function canonicalizeContent(node: any): any {
if (Array.isArray(node)) {
return node.map((child) => canonicalizeContent(child));
}
if (node === null || typeof node !== "object") {
// Primitive leaf (string/number/boolean/null): returned as-is.
return node;
}
// A node is a mark when it has a `type` but never carries block `content`
// and lives inside a `marks` array. We cannot tell from the node alone, so
// we distinguish at the recursion site: node `attrs` drop `id`, mark `attrs`
// do not. This is handled by passing a `dropId` flag down for the `attrs`
// key specifically (nodes) vs the `marks[].attrs` path (marks).
const out: Record<string, unknown> = {};
for (const key of Object.keys(node)) {
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
// Node-level attrs: drop the block id, null/undefined attrs, and any
// attr at this node type's known non-null schema default.
const canon = canonicalizeAttrs(
node.attrs as Record<string, unknown>,
true,
typeof node.type === "string" ? node.type : undefined,
);
if (canon !== undefined) out.attrs = canon;
// else: drop the `attrs` key entirely (rule 3).
} else if (key === "marks" && Array.isArray(node.marks)) {
// Marks: keep them all (incl. comment); canonicalize their attrs but do
// NOT drop `id` (a mark's `id` would be a meaningful attr, not a block
// id). An empty marks array is dropped so `marks:[]` ≡ no marks.
const marks = (node.marks as any[]).map((mark) => canonicalizeMark(mark));
if (marks.length > 0) out.marks = marks;
} else {
out[key] = canonicalizeContent(node[key]);
}
if (node === null || typeof node !== "object") {
// Primitive leaf (string/number/boolean/null): returned as-is.
return node;
}
// A node is a mark when it has a `type` but never carries block `content`
// and lives inside a `marks` array. We cannot tell from the node alone, so
// we distinguish at the recursion site: node `attrs` drop `id`, mark `attrs`
// do not. This is handled by passing a `dropId` flag down for the `attrs`
// key specifically (nodes) vs the `marks[].attrs` path (marks).
const out = {};
for (const key of Object.keys(node)) {
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
// Node-level attrs: drop the block id, null/undefined attrs, and any
// attr at this node type's known non-null schema default.
const canon = canonicalizeAttrs(node.attrs, true, typeof node.type === "string" ? node.type : undefined);
if (canon !== undefined)
out.attrs = canon;
// else: drop the `attrs` key entirely (rule 3).
}
else if (key === "marks" && Array.isArray(node.marks)) {
// Marks: keep them all (incl. comment); canonicalize their attrs but do
// NOT drop `id` (a mark's `id` would be a meaningful attr, not a block
// id). An empty marks array is dropped so `marks:[]` ≡ no marks.
const marks = node.marks.map((mark) => canonicalizeMark(mark));
if (marks.length > 0)
out.marks = marks;
}
else {
out[key] = canonicalizeContent(node[key]);
}
}
return out;
}
return out;
}
/**
* Canonicalize a single mark: keep `type`, prune its `attrs` (null/undefined
* AND known non-null defaults dropped, empty attrs removed) but NEVER drop a
@@ -180,66 +186,62 @@ export function canonicalizeContent(node) {
* survives SPEC §3); only known defaults like `link.target="_blank"`,
* `link.rel="noopener…"` and `comment.resolved=false` are normalized away.
*/
function canonicalizeMark(mark) {
if (mark === null || typeof mark !== "object")
return mark;
const out = {};
for (const key of Object.keys(mark)) {
if (key === "attrs" && mark.attrs && typeof mark.attrs === "object") {
const canon = canonicalizeAttrs(mark.attrs, false, typeof mark.type === "string" ? mark.type : undefined);
if (canon !== undefined)
out.attrs = canon;
}
else {
out[key] = canonicalizeContent(mark[key]);
}
function canonicalizeMark(mark: any): any {
if (mark === null || typeof mark !== "object") return mark;
const out: Record<string, unknown> = {};
for (const key of Object.keys(mark)) {
if (key === "attrs" && mark.attrs && typeof mark.attrs === "object") {
const canon = canonicalizeAttrs(
mark.attrs as Record<string, unknown>,
false,
typeof mark.type === "string" ? mark.type : undefined,
);
if (canon !== undefined) out.attrs = canon;
} else {
out[key] = canonicalizeContent(mark[key]);
}
return out;
}
return out;
}
/**
* Deep structural equality of two values that is key-order-insensitive.
* Used to compare canonical forms. (`canonicalizeContent` already emits
* `attrs` in a stable key order, but the top-level node keys preserve input
* order, so we compare structurally rather than by string.)
*/
function deepEqual(a, b) {
if (a === b)
return true;
if (typeof a !== typeof b)
return false;
if (a === null || b === null)
return a === b;
if (typeof a !== "object")
return false;
const aIsArr = Array.isArray(a);
const bIsArr = Array.isArray(b);
if (aIsArr !== bIsArr)
return false;
if (aIsArr) {
if (a.length !== b.length)
return false;
for (let i = 0; i < a.length; i++) {
if (!deepEqual(a[i], b[i]))
return false;
}
return true;
}
const aKeys = Object.keys(a);
const bKeys = Object.keys(b);
if (aKeys.length !== bKeys.length)
return false;
for (const k of aKeys) {
if (!Object.prototype.hasOwnProperty.call(b, k))
return false;
if (!deepEqual(a[k], b[k]))
return false;
function deepEqual(a: any, b: any): boolean {
if (a === b) return true;
if (typeof a !== typeof b) return false;
if (a === null || b === null) return a === b;
if (typeof a !== "object") return false;
const aIsArr = Array.isArray(a);
const bIsArr = Array.isArray(b);
if (aIsArr !== bIsArr) return false;
if (aIsArr) {
if (a.length !== b.length) return false;
for (let i = 0; i < a.length; i++) {
if (!deepEqual(a[i], b[i])) return false;
}
return true;
}
const aKeys = Object.keys(a);
const bKeys = Object.keys(b);
if (aKeys.length !== bKeys.length) return false;
for (const k of aKeys) {
if (!Object.prototype.hasOwnProperty.call(b, k)) return false;
if (!deepEqual(a[k], b[k])) return false;
}
return true;
}
/**
* True when two ProseMirror documents are semantically equal: equal after
* canonicalization (block ids stripped, absent-vs-default-null normalized).
*/
export function docsCanonicallyEqual(a, b) {
return deepEqual(canonicalizeContent(a), canonicalizeContent(b));
export function docsCanonicallyEqual(a: any, b: any): boolean {
return deepEqual(canonicalizeContent(a), canonicalizeContent(b));
}
File diff suppressed because it is too large Load Diff
@@ -8,9 +8,19 @@
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
* here the gitmost server writes natively.
*/
export { serializeDocmostMarkdown, parseDocmostMarkdown, serializeDocmostMarkdownBody, } from "./markdown-document.js";
export {
serializeDocmostMarkdown,
parseDocmostMarkdown,
serializeDocmostMarkdownBody,
} from "./markdown-document.js";
export type { DocmostMdMeta } from "./markdown-document.js";
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
export { canonicalizeContent, docsCanonicallyEqual, } from "./canonicalize.js";
export {
canonicalizeContent,
docsCanonicallyEqual,
} from "./canonicalize.js";
export { parsePageFile, serializePageFile } from "./page-file.js";
File diff suppressed because it is too large Load Diff
@@ -26,6 +26,16 @@
* comment marks (anchors) embedded in the body are restored. Managing comment
* records stays with the comment tools/UI.
*/
export interface DocmostMdMeta {
version: number;
pageId?: string;
slugId?: string;
title?: string;
spaceId?: string;
parentPageId?: string | null;
}
// Match the leading meta block (allow leading whitespace). Capture group 1 is
// the JSON text between the markers.
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
@@ -33,20 +43,28 @@ const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
// rather than end-anchoring a single regex (which would mis-capture across a
// literal opener that appears earlier in the body).
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
/**
* Assemble the full self-contained markdown file: meta block, body, and the
* comments block. The meta block is always emitted; the comments block is always
* emitted too (with `[]` when there are no comments) so the format stays uniform
* and parsing stays simple.
*/
export function serializeDocmostMarkdown(meta, body, comments) {
const metaJson = JSON.stringify(meta);
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
const trimmedBody = (body ?? "").trim();
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
`${trimmedBody}\n\n` +
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
export function serializeDocmostMarkdown(
meta: DocmostMdMeta,
body: string,
comments: any[],
): string {
const metaJson = JSON.stringify(meta);
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
const trimmedBody = (body ?? "").trim();
return (
`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
`${trimmedBody}\n\n` +
`<!-- docmost:comments\n${commentsJson}\n-->\n`
);
}
/**
* Split a self-contained file back into its parts. Tolerant: if the meta or
* comments block is missing (e.g. a hand-written plain-markdown file), the
@@ -55,53 +73,68 @@ export function serializeDocmostMarkdown(meta, body, comments) {
* inside a block that IS present is surfaced as a thrown Error with a clear
* message. Robust to `\r\n` line endings.
*/
export function parseDocmostMarkdown(full) {
// Normalize line endings so the anchored regexes work regardless of CRLF.
const normalized = (full ?? "").replace(/\r\n/g, "\n");
// Extract the leading meta block (start-anchored — already unambiguous).
let meta = null;
let metaEnd = 0;
const metaMatch = normalized.match(META_RE);
if (metaMatch) {
try {
meta = JSON.parse(metaMatch[1]);
}
catch (e) {
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
}
// Body starts right after the matched meta block.
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
export function parseDocmostMarkdown(full: string): {
meta: DocmostMdMeta | null;
body: string;
comments: any[] | null;
} {
// Normalize line endings so the anchored regexes work regardless of CRLF.
const normalized = (full ?? "").replace(/\r\n/g, "\n");
// Extract the leading meta block (start-anchored — already unambiguous).
let meta: DocmostMdMeta | null = null;
let metaEnd = 0;
const metaMatch = normalized.match(META_RE);
if (metaMatch) {
try {
meta = JSON.parse(metaMatch[1]);
} catch (e) {
throw new Error(
`Invalid docmost:meta JSON block: ${
e instanceof Error ? e.message : String(e)
}`,
);
}
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
// the final one whose closing `-->` ends the document. Any earlier literal
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
let lastOpenStart = -1;
let lastOpenEnd = -1;
let m;
COMMENTS_OPEN_RE.lastIndex = 0;
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
lastOpenStart = m.index;
lastOpenEnd = m.index + m[0].length;
// Body starts right after the matched meta block.
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
}
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
// the final one whose closing `-->` ends the document. Any earlier literal
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
let lastOpenStart = -1;
let lastOpenEnd = -1;
let m: RegExpExecArray | null;
COMMENTS_OPEN_RE.lastIndex = 0;
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
lastOpenStart = m.index;
lastOpenEnd = m.index + m[0].length;
}
let comments: any[] | null = null;
let bodyEnd = normalized.length;
if (lastOpenStart !== -1) {
const rest = normalized.slice(lastOpenEnd);
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
if (close) {
const jsonText = rest.slice(0, close.index);
try {
comments = JSON.parse(jsonText);
} catch (e) {
throw new Error(
`Invalid docmost:comments JSON block: ${
e instanceof Error ? e.message : String(e)
}`,
);
}
bodyEnd = lastOpenStart; // strip from the opener to end of document
}
let comments = null;
let bodyEnd = normalized.length;
if (lastOpenStart !== -1) {
const rest = normalized.slice(lastOpenEnd);
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
if (close) {
const jsonText = rest.slice(0, close.index);
try {
comments = JSON.parse(jsonText);
}
catch (e) {
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
}
bodyEnd = lastOpenStart; // strip from the opener to end of document
}
}
const body = normalized.slice(metaEnd, bodyEnd).trim();
return { meta, body, comments };
}
const body = normalized.slice(metaEnd, bodyEnd).trim();
return { meta, body, comments };
}
/**
* Serialize a self-contained markdown file with the meta block + body ONLY
* NO trailing `docmost:comments` block. The sync engine never touches
@@ -113,6 +146,9 @@ export function parseDocmostMarkdown(full) {
* `comments: null` and treats the rest as body), so a file produced here
* round-trips cleanly through the parser.
*/
export function serializeDocmostMarkdownBody(meta, body) {
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
export function serializeDocmostMarkdownBody(
meta: DocmostMdMeta,
body: string,
): string {
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
}
@@ -0,0 +1,365 @@
/**
* Pure markdown -> ProseMirror conversion.
*
* The converter path is `markdownToProseMirror` (marked -> HTML ->
* generateJSON) plus the two pre/post processors it needs (`preprocessCallouts`,
* `bridgeTaskLists`). The gitmost server writes the resulting page bodies
* natively through the collab gateway, so no websocket/Yjs write-path lives
* here.
*/
import { generateJSON } from "@tiptap/html";
import { JSDOM } from "jsdom";
import { marked } from "marked";
import { docmostExtensions } from "./docmost-schema.js";
// Setup DOM environment for Tiptap HTML parsing in Node.js
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
global.window = dom.window as any;
global.document = dom.window.document;
// @ts-ignore
global.Element = dom.window.Element;
/**
* Hard ceiling above which we skip callout preprocessing entirely. The linear
* scanner below has no quadratic blow-up, but we still cap input defensively so
* a pathological multi-megabyte payload cannot tie up the event loop; in that
* case the markdown is passed through verbatim (callouts are simply not
* detected) rather than risking a slow scan.
*/
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
/** Matches a bare closing callout fence: `:::`. */
const CALLOUT_CLOSE_RE = /^:::\s*$/;
/**
* Matches an Obsidian-native callout opener: `> [!type]` (type captured). An
* optional title after the type is allowed but ignored (the Docmost callout
* schema has no title). The body is the following contiguous blockquote lines.
*/
const CALLOUT_BQ_OPEN_RE = /^>\s*\[!(\w+)\]/;
/** Matches any blockquote continuation line (`>` … ). */
const BLOCKQUOTE_LINE_RE = /^>/;
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
/**
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
* callout blocks (the syntax our markdown export produces) into HTML
* divs that the callout extension parses. The inner content is rendered
* through marked as regular markdown.
*
* Implemented as a single linear pass over the lines (no quadratic regex
* rescan). It:
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
* `:::` line that lives inside a code fence as a callout delimiter, so a
* callout body that itself contains a fenced code block with a `:::` line is
* no longer corrupted;
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
* nesting level, supporting NESTED callouts via a depth counter (an inner
* `:::type` opens a deeper level and consumes a matching `:::`);
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
* (inner rendered through marked) as the previous regex implementation.
*/
async function preprocessCallouts(markdown: string): Promise<string> {
// Defensive cap: skip preprocessing for pathologically large inputs.
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
return markdown;
}
// Recursively transform a slice of lines, converting top-level callouts in
// that slice into <div> blocks and rendering their inner content (which may
// itself contain nested callouts) through this same function.
const transform = async (lines: string[]): Promise<string> => {
const out: string[] = [];
let inCodeFence = false;
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
let i = 0;
while (i < lines.length) {
const line = lines[i];
// Inside a code fence, only its matching closing fence is significant;
// everything else (including `:::` lines) is copied through verbatim.
if (inCodeFence) {
out.push(line);
const fence = line.match(CODE_FENCE_RE);
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
fence[2].length >= codeFenceMarker.length) {
inCodeFence = false;
codeFenceMarker = "";
}
i++;
continue;
}
// A code fence opening outside any callout body: enter code-fence mode.
const fenceOpen = line.match(CODE_FENCE_RE);
if (fenceOpen) {
inCodeFence = true;
codeFenceMarker = fenceOpen[2];
out.push(line);
i++;
continue;
}
// An opening callout fence: scan forward (with code-fence and nested
// callout awareness) for its matching closing `:::` at the same level.
const open = line.match(CALLOUT_OPEN_RE);
if (open) {
const type = open[1].toLowerCase();
const bodyLines: string[] = [];
let depth = 1;
let innerInCodeFence = false;
let innerCodeFenceMarker = "";
let j = i + 1;
for (; j < lines.length; j++) {
const bl = lines[j];
if (innerInCodeFence) {
const f = bl.match(CODE_FENCE_RE);
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
f[2].length >= innerCodeFenceMarker.length) {
innerInCodeFence = false;
innerCodeFenceMarker = "";
}
bodyLines.push(bl);
continue;
}
const innerFence = bl.match(CODE_FENCE_RE);
if (innerFence) {
innerInCodeFence = true;
innerCodeFenceMarker = innerFence[2];
bodyLines.push(bl);
continue;
}
if (CALLOUT_OPEN_RE.test(bl)) {
depth++;
bodyLines.push(bl);
continue;
}
if (CALLOUT_CLOSE_RE.test(bl)) {
depth--;
if (depth === 0) break; // matching close for THIS callout
bodyLines.push(bl);
continue;
}
bodyLines.push(bl);
}
if (j < lines.length) {
// Found the matching closing fence: render the body (recursively, so
// nested callouts are handled) and emit the callout div.
const inner = await transform(bodyLines);
const renderedInner = await marked.parse(inner);
out.push(
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
);
i = j + 1; // skip past the closing `:::`
continue;
}
// No matching close (unterminated callout): treat the opener as a
// literal line and continue, preserving the original text.
out.push(line);
i++;
continue;
}
// An Obsidian-native callout: `> [!type]` opener; the body is the following
// CONTIGUOUS blockquote (`>`-prefixed) lines. Strip ONE blockquote level and
// recurse so nested callouts (`> > [!type]`) are handled, then emit the same
// callout div the `:::` path produces. A normal blockquote (no `[!type]` on
// its first line) does not match and stays a blockquote.
const bqOpen = line.match(CALLOUT_BQ_OPEN_RE);
if (bqOpen) {
const type = bqOpen[1].toLowerCase();
const bodyLines: string[] = [];
let j = i + 1;
for (; j < lines.length; j++) {
if (!BLOCKQUOTE_LINE_RE.test(lines[j])) break;
bodyLines.push(lines[j].replace(/^>\s?/, ""));
}
const inner = await transform(bodyLines);
const renderedInner = await marked.parse(inner);
out.push(
`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`,
);
i = j;
continue;
}
out.push(line);
i++;
}
return out.join("\n");
};
return transform(markdown.split("\n"));
}
/**
* Bridge marked's checkbox lists to TipTap task lists.
*
* marked renders GitHub task list items (`- [x] done`) as a plain
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
* into the shape those extensions expect:
* TaskList parseHTML matches `ul[data-type="taskList"]`,
* TaskItem matches `li[data-type="taskItem"]`,
* the checked state is read from `data-checked === "true"`.
*
* A list is only converted when it has at least one `<li>` and EVERY direct
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
*/
function bridgeTaskLists(html: string): string {
// Cheap early-out: if the markup contains no checkbox input at all there is
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
// common case (most pages have no task lists).
if (!/type=["']?checkbox/i.test(html)) {
return html;
}
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
// pathologically large inputs rather than running a second expensive JSDOM
// parse on a multi-megabyte payload. The markup is passed through verbatim.
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
return html;
}
const dom = new JSDOM(html);
const document = dom.window.document;
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
// bullet <li> that merely contains a nested task sublist is not misdetected.
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
// ALL of them so none survive into the converted item.
const directCheckboxes = (li: Element): Element[] => {
const found: Element[] = [];
for (const child of Array.from(li.children)) {
if (
child.tagName === "INPUT" &&
child.getAttribute("type") === "checkbox"
) {
found.push(child);
continue;
}
if (child.tagName === "P") {
for (const inp of Array.from(
child.querySelectorAll(":scope > input[type='checkbox']"),
)) {
found.push(inp);
}
}
}
return found;
};
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
// its own checkbox is a numbered checklist that must also become a taskList.
const lists = Array.from(document.querySelectorAll("ul, ol"));
for (const list of lists) {
// Only consider DIRECT child <li> elements; nested lists are handled by
// their own iteration of the outer loop.
const items = Array.from(list.children).filter(
(child) => child.tagName === "LI",
);
if (items.length === 0) continue;
const itemCheckboxes = items.map((li) => directCheckboxes(li));
// Convert only when every direct <li> carries at least one OWN checkbox.
if (!itemCheckboxes.every((boxes) => boxes.length > 0)) continue;
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
// <ol> while tagging it data-type="taskList": generateJSON would then match
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
// emitting a phantom empty orderedList beside the real taskList. So rename a
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
let target: Element = list;
if (list.tagName === "OL") {
const ul = document.createElement("ul");
// Carry over existing attributes (e.g. class) so nothing is silently lost.
for (const attr of Array.from(list.attributes)) {
ul.setAttribute(attr.name, attr.value);
}
// Move every child node (including the <li>s we collected) into the <ul>.
while (list.firstChild) {
ul.appendChild(list.firstChild);
}
list.replaceWith(ul);
target = ul;
}
target.setAttribute("data-type", "taskList");
items.forEach((li, index) => {
const boxes = itemCheckboxes[index];
// The first checkbox determines the checked state (matches the previous
// single-checkbox behaviour); any extras only need removing.
const input = boxes[0] ?? null;
li.setAttribute("data-type", "taskItem");
const checked =
input != null &&
(input.hasAttribute("checked") || (input as any).checked);
li.setAttribute("data-checked", checked ? "true" : "false");
// Remove ALL direct checkbox inputs so none survive into the content
// (a raw-inline-HTML <li> may carry more than one).
for (const box of boxes) {
box.remove();
}
});
}
return document.body.innerHTML;
}
/**
* Recursively strip content-less paragraph nodes from a generated doc.
*
* A block-level atom whose markdown form is INLINE (e.g. the block `image`'s
* `![](url)`, or a bare media element) is wrapped by marked in a <p>; the schema
* then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph
* sibling. On the next export that empty `<p>` renders to "" and the doc "\n\n"
* join injects a phantom blank gap, so the markdown is not byte-stable.
*
* Markdown blank lines are separators, never content, so generateJSON only ever
* produces an empty paragraph as such a hoist artifact — removing them is safe
* and general (it also subsumes the <div>-wrapper workaround the `video` case
* uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent
* or an empty array; every other node (including atoms without `content`) is
* preserved, and we recurse into the content of any node that has children.
*/
function stripEmptyParagraphs(node: any): any {
if (!node || !Array.isArray(node.content)) {
// Atom / leaf node (no children to recurse into): keep as-is.
return node;
}
const mapped = node.content.map((child: any) => stripEmptyParagraphs(child));
const isEmptyParagraph = (child: any): boolean =>
!!child &&
child.type === "paragraph" &&
(!Array.isArray(child.content) || child.content.length === 0);
const filtered = mapped.filter((child: any) => !isEmptyParagraph(child));
// Schema-validity guard: several nodes require NON-empty block content
// (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout,
// and the doc root). For an empty one of those, generateJSON materializes a
// single empty paragraph as its OBLIGATORY content — that is not a hoist
// artifact. If stripping would empty the container, keep ONE empty paragraph
// so the result stays schema-valid (an empty cell/quote must not become `[]`).
const cleaned =
filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered;
return { ...node, content: cleaned };
}
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
export async function markdownToProseMirror(
markdownContent: string,
): Promise<any> {
const withCallouts = await preprocessCallouts(markdownContent);
const html = await marked.parse(withCallouts);
const bridged = bridgeTaskLists(html);
const doc = generateJSON(bridged, docmostExtensions);
return stripEmptyParagraphs(doc);
}
+897
View File
@@ -0,0 +1,897 @@
/**
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
* tree by node id.
*
* A ProseMirror node here is a plain JSON object of the shape produced by
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
* table cells hold their children in `content` just like any other block, so a
* single recursive walk reaches them all.
*
* Every exported function operates on a DEEP CLONE of the input document and
* returns the new document. The input doc and any `newNode`/`node` argument are
* never mutated. All functions are defensively null-safe: missing/!Array
* `content`, non-object nodes, and absent `attrs` are tolerated.
*/
/** Deep-clone a JSON-serializable value without mutating the original. */
function clone<T>(value: T): T {
if (typeof structuredClone === "function") {
return structuredClone(value);
}
// Fallback for environments without structuredClone.
return JSON.parse(JSON.stringify(value)) as T;
}
/** True if `value` is a non-null object (and not an array). */
function isObject(value: any): value is Record<string, any> {
return value != null && typeof value === "object" && !Array.isArray(value);
}
/** True if `node` carries the given id in `node.attrs.id`. */
function matchesId(node: any, nodeId: string): boolean {
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
}
/**
* Recursively concatenate all text contained in a node.
*
* Text nodes contribute their `text` string; container nodes contribute the
* joined `blockPlainText` of their `content` children. Returns "" for nullish
* or non-object inputs.
*/
export function blockPlainText(node: any): string {
if (!isObject(node)) return "";
let out = "";
if (typeof node.text === "string") {
out += node.text;
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
out += blockPlainText(child);
}
}
return out;
}
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
function truncate(text: string, n: number): string {
return text.length > n ? text.slice(0, n) + "…" : text;
}
/** One compact outline entry for a single top-level block. */
export interface OutlineEntry {
index: number;
type: string | undefined;
id: string | null;
firstText: string;
/** Present for headings only. */
level?: number | null;
/** Present for tables only. */
rows?: number;
cols?: number;
header?: string[];
/** Present for list blocks only (bulletList/orderedList/taskList). */
items?: number;
}
/**
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
* table cells — compactness is the point; use `getNodeByRef` to drill into a
* specific block.
*
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
* cell texts as `header`; list blocks (types ending in "List") add `items`.
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
* a missing or non-object doc/content yields `[]`.
*/
export function buildOutline(doc: any): OutlineEntry[] {
if (!isObject(doc) || !Array.isArray(doc.content)) return [];
const out: OutlineEntry[] = [];
for (let i = 0; i < doc.content.length; i++) {
const block = doc.content[i];
const type = isObject(block) ? block.type : undefined;
const entry: OutlineEntry = {
index: i,
type,
id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
firstText: truncate(blockPlainText(block), 100),
};
if (type === "heading") {
entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
} else if (type === "table") {
const headerRow = block.content?.[0]?.content ?? [];
entry.rows = block.content?.length ?? 0;
entry.cols = block.content?.[0]?.content?.length ?? 0;
entry.header = headerRow.map((cell: any) =>
truncate(blockPlainText(cell), 40),
);
} else if (typeof type === "string" && type.endsWith("List")) {
entry.items = block.content?.length ?? 0;
}
out.push(entry);
}
return out;
}
/**
* Resolve a single node by reference and return `{ node, path, type }`, or
* `null` when nothing matches.
*
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
* `n` in `doc.content`. This is the only way to address table/tableRow/
* tableCell nodes, which carry no `attrs.id`.
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
* tree with `attrs.id === ref` is returned.
*
* `path` is the array of child indices from the doc root down to the node
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
* so callers can mutate it without touching the input doc. Null-safe.
*/
export function getNodeByRef(
doc: any,
ref: string,
): { node: any; path: number[]; type: string | undefined } | null {
if (!isObject(doc)) return null;
// "#<n>": index into the top-level content array.
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
if (indexMatch) {
const index = Number(indexMatch[1]);
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
if (!isObject(block)) return null;
return { node: clone(block), path: [index], type: block.type };
}
// Otherwise: depth-first search for the first node with attrs.id === ref.
const search = (
node: any,
trail: number[],
): { node: any; path: number[]; type: string } | null => {
if (!isObject(node)) return null;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const path = [...trail, i];
if (matchesId(child, ref)) {
return { node: clone(child), path, type: child.type };
}
const hit = search(child, path);
if (hit != null) return hit;
}
}
return null;
};
return search(doc, []);
}
/**
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
* `newNode`, anywhere in the tree (including inside callouts and table cells).
*
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
* is the number of nodes substituted. A fresh clone of `newNode` is used for
* each match so they do not share references.
*/
export function replaceNodeById(
doc: any,
nodeId: string,
newNode: any,
): { doc: any; replaced: number } {
const out = clone(doc);
let replaced = 0;
// Walk a content array, replacing direct matches and recursing into the
// (possibly new) children of non-matching nodes.
const walkContent = (content: any[]): void => {
for (let i = 0; i < content.length; i++) {
const child = content[i];
if (matchesId(child, nodeId)) {
content[i] = clone(newNode);
replaced++;
// Do not recurse into a freshly substituted node.
continue;
}
if (isObject(child) && Array.isArray(child.content)) {
walkContent(child.content);
}
}
};
if (isObject(out) && Array.isArray(out.content)) {
walkContent(out.content);
}
return { doc: out, replaced };
}
/**
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
* array, anywhere in the tree (recursive, including callouts and tables).
*
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
* the number of nodes removed.
*/
export function deleteNodeById(
doc: any,
nodeId: string,
): { doc: any; deleted: number } {
const out = clone(doc);
let deleted = 0;
// Filter a content array in place, dropping matches and recursing into the
// surviving children.
const walkContent = (content: any[]): any[] => {
const kept: any[] = [];
for (const child of content) {
if (matchesId(child, nodeId)) {
deleted++;
continue;
}
if (isObject(child) && Array.isArray(child.content)) {
child.content = walkContent(child.content);
}
kept.push(child);
}
return kept;
};
if (isObject(out) && Array.isArray(out.content)) {
out.content = walkContent(out.content);
}
return { doc: out, deleted };
}
/**
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
* "Unexpected content type" when asked to store an `undefined` attribute value).
*
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
* legitimate JSON-storable values and are preserved. Operates on a clone and
* returns it; the input is never mutated. Defensively null-safe like the rest
* of the file.
*/
export function sanitizeForYjs(doc: any): any {
const out = clone(doc);
// Drop every key whose value is strictly `undefined` from an attrs object.
const stripUndefined = (attrs: any): void => {
if (!isObject(attrs)) return;
for (const key of Object.keys(attrs)) {
if (attrs[key] === undefined) {
delete attrs[key];
}
}
};
const walk = (node: any): void => {
if (!isObject(node)) return;
stripUndefined(node.attrs);
if (Array.isArray(node.marks)) {
for (const mark of node.marks) {
if (isObject(mark)) stripUndefined(mark.attrs);
}
}
if (Array.isArray(node.content)) {
for (const child of node.content) {
walk(child);
}
}
};
walk(out);
return out;
}
/**
* Diagnostics helper: walk the tree and return a human-readable path string for
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
* every attribute is storable. Null-safe.
*/
export function findUnstorableAttr(doc: any): string | null {
const isUnstorable = (value: any): string | null => {
if (value === undefined) return "undefined";
const t = typeof value;
if (t === "function") return "function";
if (t === "symbol") return "symbol";
if (t === "bigint") return "bigint";
return null;
};
// Check an attrs object; return the offending sub-path or null.
const checkAttrs = (attrs: any, basePath: string): string | null => {
if (!isObject(attrs)) return null;
for (const key of Object.keys(attrs)) {
const kind = isUnstorable(attrs[key]);
if (kind != null) return `${basePath}.${key} (${kind})`;
}
return null;
};
const walk = (node: any, path: string): string | null => {
if (!isObject(node)) return null;
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
if (attrHit != null) return attrHit;
if (Array.isArray(node.marks)) {
for (let i = 0; i < node.marks.length; i++) {
const markHit = checkAttrs(
node.marks[i]?.attrs,
`${path}.marks[${i}].attrs`,
);
if (markHit != null) return markHit;
}
}
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const childHit = walk(node.content[i], `${path}.content[${i}]`);
if (childHit != null) return childHit;
}
}
return null;
};
// The root doc node carries no useful index, so start the path at "doc".
if (!isObject(doc)) return null;
const attrHit = checkAttrs(doc.attrs, "attrs");
if (attrHit != null) return attrHit;
if (Array.isArray(doc.content)) {
for (let i = 0; i < doc.content.length; i++) {
const childHit = walk(doc.content[i], `content[${i}]`);
if (childHit != null) return childHit;
}
}
return null;
}
/**
* Table structural node types and the container each must live directly inside.
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
* rather than blindly into the anchor's direct parent (which would corrupt the
* table's nesting).
*/
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
const REQUIRED_CONTAINER: Record<string, string> = {
tableRow: "table",
tableCell: "tableRow",
tableHeader: "tableRow",
};
/**
* Locate an anchor and return its ancestor chain (from `doc` down to and
* including the matched node). Each chain entry is `{ node, index }` where
* `index` is the node's position inside its parent's `content` array (the root
* doc has index -1). Returns `null` when the anchor cannot be resolved.
*/
function findAnchorChain(
doc: any,
opts: InsertOptions,
): { node: any; index: number }[] | null {
if (!isObject(doc)) return null;
// DFS by id anywhere in the tree, accumulating the path.
if (opts.anchorNodeId != null) {
const targetId = opts.anchorNodeId;
const search = (
node: any,
index: number,
trail: { node: any; index: number }[],
): { node: any; index: number }[] | null => {
if (!isObject(node)) return null;
const here = [...trail, { node, index }];
if (matchesId(node, targetId)) return here;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const hit = search(node.content[i], i, here);
if (hit != null) return hit;
}
}
return null;
};
return search(doc, -1, []);
}
// By text: only top-level blocks are scanned (same rule as the JSON path).
if (opts.anchorText != null && Array.isArray(doc.content)) {
for (let i = 0; i < doc.content.length; i++) {
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
return [
{ node: doc, index: -1 },
{ node: doc.content[i], index: i },
];
}
}
}
return null;
}
/** Options controlling where `insertNodeRelative` places the new node. */
export interface InsertOptions {
position: "before" | "after" | "append";
/** Resolve the anchor by node id anywhere in the tree (preferred). */
anchorNodeId?: string;
/** Fallback: first TOP-LEVEL block whose plain text includes this string. */
anchorText?: string;
}
/**
* Insert a deep clone of `node` relative to an anchor.
*
* - position "append": push the node onto the top-level `doc.content`.
* - position "before"/"after": locate the anchor and splice the node into the
* anchor's parent `content` array immediately before / after it.
*
* Anchor resolution for before/after:
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
* anywhere in the tree (recursive);
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
*
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
* false when the anchor could not be resolved (the doc is returned unchanged
* apart from being cloned).
*/
export function insertNodeRelative(
doc: any,
node: any,
opts: InsertOptions,
): { doc: any; inserted: boolean } {
const out = clone(doc);
const fresh = clone(node);
// Defensive: stay null-safe like the other exports — a missing opts means
// there is nothing actionable to do.
if (!isObject(opts)) return { doc: out, inserted: false };
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
// "append": top-level push.
if (opts.position === "append") {
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
// top level — appending one would produce invalid nesting.
if (isStructural) {
throw new Error(
`insert_node: cannot append a ${node.type} at the top level; use ` +
`position before/after with an anchor inside the target table`,
);
}
if (isObject(out)) {
if (!Array.isArray(out.content)) out.content = [];
out.content.push(fresh);
return { doc: out, inserted: true };
}
return { doc: out, inserted: false };
}
const offset = opts.position === "after" ? 1 : 0;
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
// into the nearest enclosing table/tableRow rather than the anchor's direct
// parent, so the row/cell lands at the correct level of the table.
if (isStructural) {
const containerType = REQUIRED_CONTAINER[node.type];
const chain = findAnchorChain(out, opts);
// Anchor not resolved at all — keep the existing "anchor not found" path.
if (chain == null) return { doc: out, inserted: false };
// Find the DEEPEST ancestor (including the anchor itself) of the required
// container type.
let containerIdx = -1;
for (let i = chain.length - 1; i >= 0; i--) {
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
containerIdx = i;
break;
}
}
if (containerIdx === -1) {
throw new Error(
`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
`that lives inside the target table.`,
);
}
const container = chain[containerIdx].node;
if (!Array.isArray(container.content)) container.content = [];
if (containerIdx === chain.length - 1) {
// The matched container IS the anchor node itself (e.g. anchorText
// resolved to the table block): append/prepend within it.
const at = opts.position === "after" ? container.content.length : 0;
container.content.splice(at, 0, fresh);
} else {
// The immediate child on the path leading to the anchor is the row/cell
// to splice next to.
const enclosingChildIndex = chain[containerIdx + 1].index;
container.content.splice(enclosingChildIndex + offset, 0, fresh);
}
return { doc: out, inserted: true };
}
// Resolve by id anywhere in the tree: splice into the parent content array.
if (opts.anchorNodeId != null) {
let inserted = false;
const walkContent = (content: any[]): void => {
for (let i = 0; i < content.length; i++) {
const child = content[i];
if (matchesId(child, opts.anchorNodeId as string)) {
content.splice(i + offset, 0, fresh);
inserted = true;
return;
}
if (isObject(child) && Array.isArray(child.content)) {
walkContent(child.content);
if (inserted) return;
}
}
};
if (isObject(out) && Array.isArray(out.content)) {
walkContent(out.content);
}
return { doc: out, inserted };
}
// Resolve by text: only top-level doc.content blocks are scanned.
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
for (let i = 0; i < out.content.length; i++) {
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
out.content.splice(i + offset, 0, fresh);
return { doc: out, inserted: true };
}
}
}
return { doc: out, inserted: false };
}
// ===========================================================================
// Table editing helpers
//
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
// table -> { type:"table", content:[tableRow...] }
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
// content:[paragraph...] }
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
// of the input doc (via `clone`) and never mutate their inputs.
// ===========================================================================
/**
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
* `makeFreshId` so generated paragraph ids never collide with existing ones.
*/
function collectIds(node: any, used: Set<string>): void {
if (!isObject(node)) return;
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
used.add(node.attrs.id);
}
if (Array.isArray(node.content)) {
for (const child of node.content) collectIds(child, used);
}
}
/**
* Fresh-id generator: returns a random Docmost-style id (12 chars from
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
* exact string, so randomness is fine — and unlike a module-local counter it
* needs no reset and cannot become predictable across calls.
*/
function makeFreshId(used: Set<string>): string {
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
let id: string;
do {
id = "";
for (let i = 0; i < 12; i++) {
id += alphabet[Math.floor(Math.random() * alphabet.length)];
}
} while (used.has(id) || id === "");
used.add(id);
return id;
}
/**
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
* its index path. Returns null when no table matches.
*
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
* ancestor chain to the nearest `type === "table"` ancestor.
*/
function locateTable(
rootClone: any,
tableRef: string,
): { table: any; path: number[] } | null {
if (!isObject(rootClone)) return null;
// "#<n>": index into the top-level content array; must be a table.
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
if (indexMatch) {
const index = Number(indexMatch[1]);
const block = Array.isArray(rootClone.content)
? rootClone.content[index]
: undefined;
if (isObject(block) && block.type === "table") {
return { table: block, path: [index] };
}
return null;
}
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
// climb to the nearest enclosing table.
const search = (
node: any,
trail: { node: any; index: number }[],
): { table: any; path: number[] } | null => {
if (!isObject(node)) return null;
if (Array.isArray(node.content)) {
for (let i = 0; i < node.content.length; i++) {
const child = node.content[i];
const here = [...trail, { node: child, index: i }];
if (matchesId(child, tableRef)) {
// Walk UP to the nearest table ancestor (including the match itself).
for (let j = here.length - 1; j >= 0; j--) {
if (isObject(here[j].node) && here[j].node.type === "table") {
return {
table: here[j].node,
path: here.slice(0, j + 1).map((e) => e.index),
};
}
}
return null; // id found but no enclosing table
}
const hit = search(child, here);
if (hit != null) return hit;
}
}
return null;
};
return search(rootClone, []);
}
/** Build the plain-text → single-paragraph cell content used by all writers. */
function makeCellParagraph(id: string, text: string): any {
return {
type: "paragraph",
attrs: { id, indent: 0 },
// Empty string → a paragraph with an empty content array.
content: text ? [{ type: "text", text }] : [],
};
}
/**
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
*
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
* Tables may be ragged (rows of differing length), so `cols` reflects only
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
* width.
* - `cells`: `string[][]` of each cell's `blockPlainText`.
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
* so callers can `patch_node` a cell for rich-formatted edits.
* - `path`: index path of the table within the doc.
*/
export function readTable(
doc: any,
tableRef: string,
): {
rows: number;
cols: number;
cells: string[][];
cellIds: (string | null)[][];
path: number[];
} | null {
const root = clone(doc);
const located = locateTable(root, tableRef);
if (located == null) return null;
const { table, path } = located;
const rowNodes = Array.isArray(table.content) ? table.content : [];
const rows = rowNodes.length;
const cols = rowNodes[0]?.content?.length ?? 0;
const cells: string[][] = [];
const cellIds: (string | null)[][] = [];
for (const rowNode of rowNodes) {
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
const rowText: string[] = [];
const rowIds: (string | null)[] = [];
for (const cellNode of cellNodes) {
rowText.push(blockPlainText(cellNode));
// The cell's first paragraph carries the id used for patch_node.
const firstPara = Array.isArray(cellNode?.content)
? cellNode.content[0]
: undefined;
const id =
isObject(firstPara) && isObject(firstPara.attrs)
? firstPara.attrs.id ?? null
: null;
rowIds.push(id);
}
cells.push(rowText);
cellIds.push(rowIds);
}
return { rows, cols, cells, cellIds, path };
}
/**
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
*
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
* MORE cells than columns throws. Each new cell copies `colwidth` for its
* column from the header row when present, gets a fresh-id paragraph, and a
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
* the row there; otherwise the row is appended at the end.
*/
export function insertTableRow(
doc: any,
tableRef: string,
cells: string[],
index?: number,
): { doc: any; inserted: boolean } {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null) return { doc: out, inserted: false };
const { table } = located;
if (!Array.isArray(table.content)) table.content = [];
const rows = table.content.length;
const headerRow = table.content[0];
const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
// Column count is the WIDEST existing row, so the guard below stays
// meaningful for ragged tables and the new row matches the table's width.
// Fall back to the supplied cell count only when the table has no rows.
let colCount = 0;
for (const r of table.content) {
if (isObject(r) && Array.isArray(r.content)) colCount = Math.max(colCount, r.content.length);
}
if (colCount === 0) colCount = Array.isArray(cells) ? cells.length : 0;
if (Array.isArray(cells) && cells.length > colCount) {
throw new Error(
`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`,
);
}
// Resolve the landing index up front so the cell-type decision and the splice
// below agree: a valid integer in [0, rows] splices there, else we append.
const landingIndex =
typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
? index
: rows;
// Seed the id generator with every id already in the doc so the new cell
// paragraph ids are unique within the whole document.
const used = new Set<string>();
collectIds(out, used);
const newCells: any[] = [];
for (let i = 0; i < colCount; i++) {
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
const attrs: Record<string, any> = { colspan: 1, rowspan: 1 };
// Copy this column's colwidth from the header row's cell when present.
const colwidth = headerCells[i]?.attrs?.colwidth;
if (colwidth !== undefined) attrs.colwidth = colwidth;
// A row landing at index 0 becomes the new header row, so inherit the
// current header cell's type per column (Docmost uses "tableHeader" there);
// every other position is a plain data cell.
const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
newCells.push({
type: cellType,
attrs,
content: [makeCellParagraph(makeFreshId(used), text)],
});
}
const newRow = { type: "tableRow", content: newCells };
// Splice at the resolved landing index (append when index was omitted/invalid).
table.content.splice(landingIndex, 0, newRow);
return { doc: out, inserted: true };
}
/**
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
* `deleted` is false only when the table cannot be located. Throws on an
* out-of-range index, and refuses to delete the table's only row.
*/
export function deleteTableRow(
doc: any,
tableRef: string,
index: number,
): { doc: any; deleted: boolean } {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null) return { doc: out, deleted: false };
const { table } = located;
if (!Array.isArray(table.content)) table.content = [];
const rows = table.content.length;
if (!Number.isInteger(index) || index < 0 || index >= rows) {
throw new Error(
`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`,
);
}
if (rows <= 1) {
throw new Error(
"table_delete_row: refusing to delete the only row of the table",
);
}
table.content.splice(index, 1);
return { doc: out, deleted: true };
}
/**
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
* that reuses the cell's existing first-paragraph id when present, else a fresh
* one.
*/
export function updateTableCell(
doc: any,
tableRef: string,
row: number,
col: number,
text: string,
): { doc: any; updated: boolean } {
const out = clone(doc);
const located = locateTable(out, tableRef);
if (located == null) return { doc: out, updated: false };
const { table } = located;
const rowNodes = Array.isArray(table.content) ? table.content : [];
const rows = rowNodes.length;
const rowNode = rowNodes[row];
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
? rowNode.content.length
: 0;
if (
!Number.isInteger(row) ||
row < 0 ||
row >= rows ||
!Number.isInteger(col) ||
col < 0 ||
col >= cols
) {
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
}
const cellNode = rowNode.content[col];
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
const existingPara = Array.isArray(cellNode?.content)
? cellNode.content[0]
: undefined;
let id =
isObject(existingPara) && isObject(existingPara.attrs)
? existingPara.attrs.id
: undefined;
if (typeof id !== "string" || id.length === 0) {
const used = new Set<string>();
collectIds(out, used);
id = makeFreshId(used);
}
cellNode.content = [makeCellParagraph(id, text)];
return { doc: out, updated: true };
}
@@ -18,25 +18,29 @@
* and rebuilt native. A file WITHOUT a `gitmost_id` frontmatter is an un-tracked
* (e.g. hand-written) file -> the caller ADOPTS it (creates a page, writes the id).
*/
/**
* The frontmatter key carrying the Docmost pageId. NAMESPACED (not a bare `id`)
* so it never collides with a user's own frontmatter fields.
*/
export const ID_KEY = "gitmost_id";
/** Leading YAML frontmatter block: `---\n…\n---` at the very start of the file. */
const FRONTMATTER_RE = /^?---\n([\s\S]*?)\n---\n?/;
/** The top-level `<ID_KEY>: <value>` line inside the frontmatter (quotes optional). */
function readIdFromYaml(yaml) {
const re = new RegExp(`^${ID_KEY}:\\s*(.+?)\\s*$`);
for (const line of yaml.split("\n")) {
const m = line.match(re);
if (m) {
const v = m[1].trim().replace(/^["']|["']$/g, "");
return v === "" ? null : v;
}
function readIdFromYaml(yaml: string): string | null {
const re = new RegExp(`^${ID_KEY}:\\s*(.+?)\\s*$`);
for (const line of yaml.split("\n")) {
const m = line.match(re);
if (m) {
const v = m[1].trim().replace(/^["']|["']$/g, "");
return v === "" ? null : v;
}
return null;
}
return null;
}
/**
* Parse a page file into its identity (`id`) and clean markdown `body`. Tolerant:
* a file with no frontmatter (a hand-written third-party file) returns `id: null`
@@ -52,21 +56,26 @@ function readIdFromYaml(yaml) {
* re-serialize) is deferred to the adoption phase; until then, do NOT roll the
* native format onto a real Obsidian vault whose notes carry properties.
*/
export function parsePageFile(full) {
const text = (full ?? "").replace(/\r\n/g, "\n");
// Native format: a `gitmost_id` YAML frontmatter. Anything else (no frontmatter,
// or frontmatter without the key) is an un-tracked file -> adopt.
const fm = text.match(FRONTMATTER_RE);
if (fm) {
return { id: readIdFromYaml(fm[1]), body: text.slice(fm[0].length).trim() };
}
return { id: null, body: text.trim() };
export function parsePageFile(full: string): {
id: string | null;
body: string;
} {
const text = (full ?? "").replace(/\r\n/g, "\n");
// Native format: a `gitmost_id` YAML frontmatter. Anything else (no frontmatter,
// or frontmatter without the key) is an un-tracked file -> adopt.
const fm = text.match(FRONTMATTER_RE);
if (fm) {
return { id: readIdFromYaml(fm[1]), body: text.slice(fm[0].length).trim() };
}
return { id: null, body: text.trim() };
}
/**
* Serialize a page into the thin format: `id` frontmatter + a blank line + the
* clean body + a trailing newline. Deterministic so an unchanged page re-syncs to
* byte-identical output (no churn the loop-guard relies on it).
*/
export function serializePageFile(id, body) {
return `---\n${ID_KEY}: ${id}\n---\n\n${body.trim()}\n`;
export function serializePageFile(id: string, body: string): string {
return `---\n${ID_KEY}: ${id}\n---\n\n${body.trim()}\n`;
}
@@ -0,0 +1,829 @@
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
import { applyPullActions } from '../src/engine/pull';
import type {
PullActions,
ApplyPullActionsDeps,
} from '../src/engine/pull';
import type { DeletionDecision } from '../src/engine/reconcile';
// R-Pull-2 (test-strategy report §5): `applyPullActions` is the THIN IO half of
// the pull cycle. These tests drive it with FAKES that record every call — no
// real git, fs, or network — so the ordering and the ⭐ move-on-success
// data-loss guard are verifiable. SPEC §8 (delete suppression) + SPEC §5 (commit
// subject reflects ACTUAL counts) are asserted here.
const VAULT = '/vault';
/** A getPageJson fake: returns a minimal page whose content stabilizes cheaply. */
function makeClient(opts?: { failFor?: Set<string> }) {
const calls: string[] = [];
const client = {
getPageJson: vi.fn(async (pageId: string) => {
calls.push(pageId);
if (opts?.failFor?.has(pageId)) {
throw new Error(`fetch failed for ${pageId}`);
}
return {
id: pageId,
slugId: `slug-${pageId}`,
title: `Title ${pageId}`,
spaceId: 'space',
parentPageId: null,
updatedAt: '2026-01-01T00:00:00.000Z',
// A trivial doc so stabilizePageFile (the real one) runs fast.
content: {
type: 'doc',
content: [
{ type: 'paragraph', content: [{ type: 'text', text: pageId }] },
],
},
};
}),
};
return { client, calls };
}
/** A git fake recording the order of ops; merge result is configurable. */
function makeGit(
merge: { ok: boolean; conflict: boolean; output?: string } = {
ok: true,
conflict: false,
},
conflictStages?: {
unmerged?: string[];
/** path -> { ours, theirs } blob content for showStage(2|3, path). */
stages?: Record<string, { ours: string | null; theirs: string | null }>;
},
) {
const order: string[] = [];
let committedSubject: string | undefined;
const unmerged = conflictStages?.unmerged ?? ['Conflicted.md'];
// Default stages: genuinely-different ours/theirs (a real same-block conflict).
const stages = conflictStages?.stages ?? {
'Conflicted.md': { ours: 'git side\n', theirs: 'docmost side\n' },
};
const git = {
stageAll: vi.fn(async () => {
order.push('stageAll');
}),
commit: vi.fn(async (subject: string) => {
order.push(`commit:${subject}`);
committedSubject = subject;
return true;
}),
checkout: vi.fn(async (branch: string) => {
order.push(`checkout:${branch}`);
}),
merge: vi.fn(async () => {
order.push('merge');
return { ok: merge.ok, conflict: merge.conflict, output: merge.output ?? '' };
}),
listUnmergedPaths: vi.fn(async () => unmerged),
showStage: vi.fn(async (stage: 1 | 2 | 3, path: string) => {
const s = stages[path];
if (!s) return null;
return stage === 2 ? s.ours : stage === 3 ? s.theirs : null;
}),
commitMerge: vi.fn(async (subject: string) => {
order.push(`commitMerge:${subject}`);
}),
};
return {
git,
order,
get committedSubject() {
return committedSubject;
},
};
}
/** A recording fs fake: writes/mkdirs/rms tracked in arrays. */
function makeFs(opts?: { failWriteFor?: Set<string> }) {
const writes: { abs: string; text: string }[] = [];
const mkdirs: string[] = [];
const rms: string[] = [];
const fs = {
writeFile: vi.fn(async (abs: string, text: string) => {
// Fail a specific destination path if asked (to simulate a write failure).
if (opts?.failWriteFor?.has(abs)) {
throw new Error(`write failed for ${abs}`);
}
writes.push({ abs, text });
}),
mkdir: vi.fn(async (abs: string) => {
mkdirs.push(abs);
}),
rm: vi.fn(async (abs: string) => {
rms.push(abs);
}),
};
return { fs, writes, mkdirs, rms };
}
// A single injected `log` spy mirrors the push side: applyPullActions now routes
// EVERY cycle diagnostic through `deps.log` (one channel), so tests inspect this
// spy instead of console.warn/console.error. `deps()` creates a fresh spy per
// call and stashes it on `lastLog` for the current test to assert against.
let lastLog: ReturnType<typeof vi.fn>;
function deps(
client: any,
git: any,
fs: ReturnType<typeof makeFs>,
): ApplyPullActionsDeps {
lastLog = vi.fn();
return {
client,
git,
writeFile: fs.fs.writeFile,
mkdir: fs.fs.mkdir,
rm: fs.fs.rm,
log: lastLog,
};
}
const APPLY: DeletionDecision = { apply: true };
function actions(partial: Partial<PullActions>): PullActions {
return {
toWrite: [],
moved: [],
toDelete: [],
deletionDecision: APPLY,
existingCount: 0,
plannedDeleteCount: 0,
...partial,
};
}
beforeEach(() => {
vi.spyOn(console, 'log').mockImplementation(() => {});
vi.spyOn(console, 'warn').mockImplementation(() => {});
vi.spyOn(console, 'error').mockImplementation(() => {});
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('applyPullActions — happy path (write + commit + merge)', () => {
it('fetches, writes each page, stages, commits, checks out main, merges', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [
{ pageId: 'p1', relPath: 'A.md' },
{ pageId: 'p2', relPath: 'Sub/B.md' },
],
}),
VAULT,
);
expect(res.written).toBe(2);
expect(res.failed).toBe(0);
expect(res.committed).toBe(true);
expect(res.merge).toEqual({ ok: true, conflict: false, output: '' });
// Both pages were fetched and written at their absolute paths.
expect(client.getPageJson).toHaveBeenCalledTimes(2);
const writtenPaths = fs.writes.map((w) => w.abs).sort();
expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']);
// Every written file is in the native-Obsidian format: a `gitmost_id`
// frontmatter at the very top and NO legacy `docmost:meta` envelope. Guards
// against a regression back to the heavy meta block.
for (const w of fs.writes) {
expect(w.text.startsWith('---\ngitmost_id: ')).toBe(true);
expect(w.text).not.toContain('docmost:meta');
}
// The git op order is: stageAll -> commit -> checkout main -> merge.
expect(g.order).toEqual([
'stageAll',
`commit:docmost: sync 2 page(s)`,
'checkout:main',
'merge',
]);
});
});
describe('applyPullActions — ordering (write before move/delete before commit)', () => {
it('does writes, then move-old-path removals, then deletes, then commit/merge', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
toDelete: ['Dead.md'],
plannedDeleteCount: 1,
existingCount: 3,
}),
VAULT,
);
// The write to the new path happened (the page was fetched first).
expect(fs.writes.map((w) => w.abs)).toEqual(['/vault/New/M.md']);
// The move old-path removal AND the absence delete both ran, old path first.
expect(fs.rms).toEqual(['/vault/Old/M.md', '/vault/Dead.md']);
// git ops happen AFTER all fs work.
expect(g.order).toEqual([
'stageAll',
'commit:docmost: sync 1 page(s), 1 deleted',
'checkout:main',
'merge',
]);
});
});
describe('applyPullActions — ⭐ data-loss guard (move-on-success)', () => {
it('does NOT remove the OLD path when the new-path write FAILS', async () => {
// The page "m" is being moved Old/M.md -> New/M.md, but its new-path write
// FAILS. Removing the old path now would erase the only copy of the page.
// The guard must KEEP the old path.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs({ failWriteFor: new Set(['/vault/New/M.md']) });
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
}),
VAULT,
);
// The write failed -> recorded as a failure, nothing written.
expect(res.failed).toBe(1);
expect(res.written).toBe(0);
expect(fs.writes).toEqual([]);
// ⭐ The OLD path was NOT removed: the data-loss guard kept it.
expect(fs.rms).not.toContain('/vault/Old/M.md');
expect(fs.rms).toEqual([]);
expect(res.movedApplied).toBe(0);
// The commit subject reflects ACTUAL counts: 0 written, 0 deleted.
expect(g.committedSubject).toBe('docmost: sync 0 page(s)');
});
it('DOES remove the old path when the new-path write SUCCEEDS', async () => {
// Same move, but the write succeeds -> the old path is safely removed. This
// is the positive control proving the guard is keyed on write success.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs(); // no write failures
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
}),
VAULT,
);
expect(res.written).toBe(1);
expect(res.movedApplied).toBe(1);
expect(fs.rms).toContain('/vault/Old/M.md');
expect(g.committedSubject).toBe('docmost: sync 1 page(s)');
});
it('honours removeOldPath:false (path reused by another live page is kept)', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'X.md',
toRelPath: 'New/M.md',
removeOldPath: false, // X.md is a live target of another page
},
],
}),
VAULT,
);
// The reused old path is never removed.
expect(fs.rms).not.toContain('/vault/X.md');
expect(fs.rms).toEqual([]);
});
});
describe('applyPullActions — deletion suppression (SPEC §8)', () => {
it('skips deletions when the decision SUPPRESSES them (toDelete already empty)', async () => {
// computePullActions empties toDelete when suppressed, but assert the applier
// ALSO does no removals and the subject omits the deleted count.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'p1', relPath: 'A.md' }],
// Suppressed: toDelete is empty even though 5 were planned.
toDelete: [],
deletionDecision: { apply: false, reason: 'incomplete-fetch' },
plannedDeleteCount: 5,
existingCount: 6,
}),
VAULT,
);
expect(res.deleted).toBe(0);
expect(fs.rms).toEqual([]);
// Subject reflects 0 deleted (no ", N deleted" suffix).
expect(g.committedSubject).toBe('docmost: sync 1 page(s)');
// The suppression warning was emitted.
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/tree fetch incomplete/),
);
});
it('applies deletions present in toDelete when the decision allows them', async () => {
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'p1', relPath: 'A.md' }],
toDelete: ['Dead1.md', 'Dead2.md'],
deletionDecision: APPLY,
plannedDeleteCount: 2,
existingCount: 5,
}),
VAULT,
);
expect(res.deleted).toBe(2);
expect(fs.rms).toEqual(['/vault/Dead1.md', '/vault/Dead2.md']);
// Subject reflects ACTUAL written + deleted counts.
expect(g.committedSubject).toBe('docmost: sync 1 page(s), 2 deleted');
});
});
describe('applyPullActions — commit subject reflects ACTUAL counts', () => {
it('counts only SUCCESSFUL writes when some page fetches fail', async () => {
// p2 fetch fails; the subject must say 1 page (only p1 was written), not 2.
const { client } = makeClient({ failFor: new Set(['p2']) });
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [
{ pageId: 'p1', relPath: 'A.md' },
{ pageId: 'p2', relPath: 'B.md' },
],
}),
VAULT,
);
expect(res.written).toBe(1);
expect(res.failed).toBe(1);
expect(g.committedSubject).toBe('docmost: sync 1 page(s)');
});
});
describe('applyPullActions — merge result is surfaced, not swallowed', () => {
it('GENUINE conflict: auto-resolves to OURS (git wins), no markers, surfaces conflictedPaths', async () => {
// QA #119 round-2: a genuine same-block docmost -> main conflict must NOT be
// committed with raw markers onto `main` (external clones would see them and
// the body re-conflicts forever). It is auto-resolved to the git/main side
// (git wins, SPEC §9), the conflicted page is surfaced in `conflictedPaths`,
// and the merge is committed CLEAN (no wedge).
const { client } = makeClient();
const g = makeGit(
{ ok: false, conflict: true, output: 'CONFLICT' },
{
unmerged: ['Conflicted.md'],
stages: {
'Conflicted.md': { ours: 'git wins body\n', theirs: 'docmost body\n' },
},
},
);
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }),
VAULT,
);
// A genuine conflict was detected and auto-resolved (git won): reported as a
// (now-clean) committed merge with the conflicting page surfaced.
expect(res.merge.conflict).toBe(true);
expect(res.merge.ok).toBe(true);
expect(res.conflictedPaths).toEqual(['Conflicted.md']);
// The conflicted file was rewritten with OURS (git side) — NO markers.
const resolved = fs.writes.find((w) => w.abs === '/vault/Conflicted.md');
expect(resolved?.text).toBe('git wins body\n');
expect(resolved?.text).not.toContain('<<<<<<<');
expect(resolved?.text).not.toContain('>>>>>>>');
// The merge was COMMITTED (vault no longer mid-merge).
expect(g.git.commitMerge).toHaveBeenCalledTimes(1);
expect(g.order.some((o) => o.startsWith('commitMerge:'))).toBe(true);
});
it('SPURIOUS conflict (trailing-blank only): normalizes clean, NOT reported as a conflict', async () => {
// Root-cause fix: when the two sides differ ONLY in trailing/empty lines (the
// normalize-on-write form vs a user's blank-line append), the conflict is
// spurious — both normalize to the same text. It is resolved to the normalized
// form (no markers) and NOT counted as a conflict (so /status does not cry wolf).
const { client } = makeClient();
const g = makeGit(
{ ok: false, conflict: true, output: 'CONFLICT' },
{
unmerged: ['Trailing.md'],
stages: {
// Same content; OURS has a double-blank-line append, THEIRS is normalized.
'Trailing.md': { ours: 'Hello world\n\n\n', theirs: 'Hello world\n' },
},
},
);
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }),
VAULT,
);
// No GENUINE conflict — reported clean.
expect(res.merge.conflict).toBe(false);
expect(res.merge.ok).toBe(true);
expect(res.conflictedPaths).toEqual([]);
// The file was rewritten to the canonical normalized form (single trailing \n).
const resolved = fs.writes.find((w) => w.abs === '/vault/Trailing.md');
expect(resolved?.text).toBe('Hello world\n');
// Still committed (clears the merge), but as a clean merge.
expect(g.git.commitMerge).toHaveBeenCalledTimes(1);
});
// NULL-EDGE coverage (round-2 review F1): the genuine-conflict branch resolves
// to `ours ?? theirs`. The two cases where a stage is ABSENT are the
// data-preservation core on the published `main` and were previously untested.
it('NULL-EDGE modify/delete (ours absent): keeps THEIRS so the surviving edit is not dropped', async () => {
// modify/delete conflict: OUR side (main) deleted the page (stage 2 absent),
// but THEIR side (docmost) still has a modified body. Losing the `?? theirs`
// fallback here would silently drop a surviving Docmost edit. The resolution
// must keep theirs — marker-free — on `main`.
const { client } = makeClient();
const g = makeGit(
{ ok: false, conflict: true, output: 'CONFLICT (modify/delete)' },
{
unmerged: ['Gone.md'],
stages: {
'Gone.md': { ours: null, theirs: 'surviving docmost body\n' },
},
},
);
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [] }),
VAULT,
);
expect(res.merge.conflict).toBe(true);
expect(res.merge.ok).toBe(true);
expect(res.conflictedPaths).toEqual(['Gone.md']);
// `resolved = ours ?? theirs` fell through to THEIRS (content preserved).
const w = fs.writes.find((x) => x.abs === '/vault/Gone.md');
expect(w?.text).toBe('surviving docmost body\n');
expect(w?.text).not.toContain('<<<<<<<');
expect(w?.text).not.toContain('>>>>>>>');
// The merge was committed clean (no wedge).
expect(g.git.commitMerge).toHaveBeenCalledTimes(1);
});
it('NULL-EDGE delete/delete (both absent): writes NOTHING; commitMerge stages the deletion', async () => {
// delete/delete conflict: BOTH sides removed the path (stage 2 AND 3 absent),
// so `resolved = ours ?? theirs` is null. The file must NOT be re-created;
// commitMerge's `git add -A` stages the deletion. A regression that wrongly
// wrote on the both-null path would resurrect a page both sides deleted.
const { client } = makeClient();
const g = makeGit(
{ ok: false, conflict: true, output: 'CONFLICT' },
{
unmerged: ['Both.md'],
stages: {
'Both.md': { ours: null, theirs: null },
},
},
);
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [] }),
VAULT,
);
// The path is surfaced as a resolved conflict, the merge committed clean...
expect(res.merge.conflict).toBe(true);
expect(res.merge.ok).toBe(true);
expect(res.conflictedPaths).toEqual(['Both.md']);
// ...but NOTHING was written for it (resolved === null): no re-creation.
expect(fs.writes.find((x) => x.abs === '/vault/Both.md')).toBeUndefined();
expect(fs.writes).toEqual([]);
expect(g.git.commitMerge).toHaveBeenCalledTimes(1);
});
it('returns ok:false conflict:false on a non-conflict merge failure', async () => {
const { client } = makeClient();
const g = makeGit({ ok: false, conflict: false, output: 'some error' });
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({ toWrite: [{ pageId: 'p1', relPath: 'A.md' }] }),
VAULT,
);
expect(res.merge.ok).toBe(false);
expect(res.merge.conflict).toBe(false);
});
});
// ===========================================================================
// R-Pull-2 coverage gaps (review-driven): the suppression warning FORKS for
// `empty-live` and `mass-delete` reasons (pull.ts 278-290), and the
// fault-tolerant `removePath` catch branch (pull.ts 354-364) where `deps.rm`
// REJECTS. The existing block above only exercises the `incomplete-fetch`
// reason and an rm that always succeeds.
//
// Helper: build a deps object whose `rm` rejects for a chosen set of absolute
// paths and resolves otherwise. We override the recording fs's `rm` (a vi.fn)
// in place so `fs.rms` still records the SUCCESSFUL calls only (a rejecting rm
// throws before pushing), matching the real `node:fs/promises` semantics where
// a thrown rm performed no removal.
function makeFsWithRejectingRm(rejectFor: Set<string>) {
const base = makeFs();
base.fs.rm = vi.fn(async (abs: string) => {
if (rejectFor.has(abs)) {
throw new Error(`rm failed for ${abs}`);
}
base.rms.push(abs);
});
return base;
}
describe('applyPullActions — suppression warning forks (empty-live / mass-delete)', () => {
it('emits the empty-live warning (with existingCount) and performs no removals', async () => {
// SPEC §8 empty-live fork: live fetch returned 0 pages but files are
// tracked. Mirrors the incomplete-fetch suppression test, but the message
// text + its `existingCount` interpolation are a DISTINCT branch.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'p1', relPath: 'A.md' }],
toDelete: [], // suppressed -> already empty
deletionDecision: { apply: false, reason: 'empty-live' },
plannedDeleteCount: 3,
existingCount: 4,
}),
VAULT,
);
expect(res.deleted).toBe(0);
expect(fs.rms).toEqual([]);
// The empty-live message names the tracked-file count and "deletions
// suppressed".
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/live fetch returned 0 pages but 4 file\(s\) are tracked/),
);
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/deletions suppressed/),
);
});
it('emits the mass-delete guard warning (with planned AND existing counts) and performs no removals', async () => {
// SPEC §8 mass-delete fork (the final else branch): the message
// interpolates BOTH plannedDeleteCount and existingCount ("would delete N
// of M"), distinct from the other two suppression messages.
const { client } = makeClient();
const g = makeGit();
const fs = makeFs();
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'p1', relPath: 'A.md' }],
toDelete: [],
deletionDecision: { apply: false, reason: 'mass-delete' },
plannedDeleteCount: 5,
existingCount: 6,
}),
VAULT,
);
expect(res.deleted).toBe(0);
expect(fs.rms).toEqual([]);
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/plan would delete 5 of 6 tracked file\(s\) \(mass-delete guard\)/),
);
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/deletions suppressed/),
);
});
});
describe('applyPullActions — removePath fault tolerance (rm REJECTS)', () => {
it('does NOT reject, logs the failure, and does not count the failed removal', async () => {
// pull.ts removePath catch: when `deps.rm` throws, it logs via the injected
// `log` and returns false; the run continues. Existing delete tests use an rm
// that always succeeds, leaving this catch branch uncovered.
const { client } = makeClient();
const g = makeGit();
const fs = makeFsWithRejectingRm(new Set(['/vault/Dead.md']));
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [],
toDelete: ['Dead.md'],
deletionDecision: APPLY,
plannedDeleteCount: 1,
existingCount: 1,
}),
VAULT,
);
// Resolved (not rejected) — the pull is fault-tolerant.
expect(res.deleted).toBe(0);
// removePath's catch logs "pull: failed to delete Dead.md: ...".
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/failed to .* Dead\.md/),
);
// The (would-be) removal never succeeded, so nothing was recorded.
expect(fs.rms).toEqual([]);
});
it('counts ONLY successful removals on a partial-failure delete batch (1 reject of 3)', async () => {
// pull.ts 388-391 increments `deleted` only when removePath returns true.
// Here Dead1/Dead3 succeed and Dead2 rejects -> deleted === 2, and the
// deleted>0 subject branch (399-400) fires with written=0.
const { client } = makeClient();
const g = makeGit();
const fs = makeFsWithRejectingRm(new Set(['/vault/Dead2.md']));
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [],
moved: [],
toDelete: ['Dead1.md', 'Dead2.md', 'Dead3.md'],
deletionDecision: APPLY,
plannedDeleteCount: 3,
existingCount: 5,
}),
VAULT,
);
expect(res.deleted).toBe(2);
expect(fs.rms).toEqual(['/vault/Dead1.md', '/vault/Dead3.md']);
expect(g.committedSubject).toBe('docmost: sync 0 page(s), 2 deleted');
// Exactly one rejection was logged (Dead2.md). Other diagnostics share the
// `log` channel, so count ONLY the "failed to ..." failure lines.
const failLines = lastLog.mock.calls
.map((c: unknown[]) => String(c[0]))
.filter((m: string) => /failed to /.test(m));
expect(failLines.length).toBe(1);
expect(failLines[0]).toMatch(/failed to .* Dead2\.md/);
// The run still reached commit + checkout + merge.
expect(g.order).toEqual([
'stageAll',
'commit:docmost: sync 0 page(s), 2 deleted',
'checkout:main',
'merge',
]);
});
});
describe('applyPullActions — move old-path removal rejects vs move-write fails', () => {
it('a move old-path rm REJECTION does not increment movedApplied but an independent delete still succeeds', async () => {
// pull.ts 383 increments movedApplied only when removePath of the old path
// succeeds. Here the new-path write SUCCEEDS (so the page is not in
// failedPageIds and the move loop proceeds to rm) but the old-path rm
// REJECTS — distinct from the move-write-failure guard at 376. An absence
// delete in the same run must still succeed independently.
const { client } = makeClient();
const g = makeGit();
const fs = makeFsWithRejectingRm(new Set(['/vault/Old/M.md']));
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
toDelete: ['Dead.md'],
deletionDecision: APPLY,
plannedDeleteCount: 1,
existingCount: 3,
}),
VAULT,
);
expect(res.written).toBe(1);
expect(res.movedApplied).toBe(0); // old-path rm failed -> not counted
expect(res.deleted).toBe(1); // independent absence delete still succeeded
expect(fs.rms).toEqual(['/vault/Dead.md']); // Old/M.md rm threw, not recorded
expect(g.committedSubject).toBe('docmost: sync 1 page(s), 1 deleted');
// The failure log named the moved old path.
expect(lastLog).toHaveBeenCalledWith(
expect.stringMatching(/failed to .* Old\/M\.md/),
);
});
it('a move-write FAILURE keeps the old path: rm is never attempted for it (data-loss guard, 374-383)', async () => {
// Distinct branch from the move-old-path rm rejection above: here the
// new-path WRITE itself fails, so `m` enters failedPageIds and the move
// loop short-circuits BEFORE calling rm — emitting a warning via the
// injected `log` and PRESERVING the old path (the only copy).
const { client } = makeClient();
const g = makeGit();
const fs = makeFs({ failWriteFor: new Set(['/vault/New/M.md']) });
const res = await applyPullActions(
deps(client, g.git, fs),
actions({
toWrite: [{ pageId: 'm', relPath: 'New/M.md' }],
moved: [
{
pageId: 'm',
fromRelPath: 'Old/M.md',
toRelPath: 'New/M.md',
removeOldPath: true,
},
],
toDelete: [],
deletionDecision: APPLY,
plannedDeleteCount: 0,
existingCount: 1,
}),
VAULT,
);
expect(res.written).toBe(0);
expect(res.movedApplied).toBe(0);
// The old path was NEVER removed (rm not even attempted for it).
expect(fs.fs.rm).not.toHaveBeenCalledWith('/vault/Old/M.md');
expect(fs.rms).toEqual([]);
// The "keeping old path" warning fired exactly once for `m`.
const warnCalls = lastLog.mock.calls
.map((c: unknown[]) => String(c[0]))
.filter((m: string) => m.includes('move write for m failed'));
expect(warnCalls.length).toBe(1);
expect(warnCalls[0]).toContain('keeping old path Old/M.md');
// deleted === 0 -> no ", N deleted" suffix.
expect(g.committedSubject).toBe('docmost: sync 0 page(s)');
});
});
@@ -0,0 +1,891 @@
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
import { applyPushActions, LAST_PUSHED_REF } from '../src/engine/push';
import { bodyHash } from '../src/engine/loop-guard';
import type { ApplyPushDeps, PushActions } from '../src/engine/push';
import { parsePageFile, serializePageFile } from '../src/lib/page-file';
// The Docmost space this vault mirrors (native files carry no spaceId; the run
// supplies it). A CREATE targets this space.
const SPACE_ID = 'sp-test';
// FS→Docmost push, FIRST increment (SPEC §6). `applyPushActions` is the THIN IO
// half: create/update/delete via FAKES that record every call — no real network,
// git, or fs. Asserts: update uses importPageMarkdown (collab path, SPEC
// §2/§15.6); create writes the assigned pageId BACK into the file meta; delete
// soft-deletes; rename/move is returned as `deferred` with NO client call; the
// last-pushed ref is advanced.
/** A recording client fake; createPage returns a configurable assigned id. */
function makeClient(opts?: { createId?: string }) {
const client = {
// Empty live tree by default -> creates take the normal createPage path; the
// retry-adopt lookup only fires when a (parentPageId, title) node matches.
listSpaceTree: vi.fn(async () => ({
pages: [] as { id: string; parentPageId?: string | null; title?: string }[],
complete: true,
})),
importPageMarkdown: vi.fn(async (_pageId: string, _md: string) => ({
success: true,
})),
createPage: vi.fn(
async (
title: string,
_content: string,
_spaceId: string,
_parentPageId?: string,
) => ({
// Mirrors the real `createPage` shape: `{ data: { id, ... }, success }`.
data: { id: opts?.createId ?? 'assigned-id', title },
success: true,
}),
),
deletePage: vi.fn(async (_pageId: string) => ({ success: true })),
movePage: vi.fn(
async (
_pageId: string,
_parentPageId: string | null,
_position?: string,
) => ({ success: true }),
),
renamePage: vi.fn(async (pageId: string, title: string) => ({
success: true,
pageId,
title,
})),
};
return client;
}
/**
* A recording git fake: `updateRef` (advance last-pushed) and `fastForwardBranch`
* (advance the `docmost` mirror, the loop-close). `ffResult` configures what the
* ff returns (default a successful advance).
*/
function makeGit(opts?: {
ffResult?: { ok: boolean; reason?: string };
/** Pre-image tree at `refs/docmost/last-pushed` (path -> text). */
prevTree?: Record<string, string>;
}) {
const updateRefCalls: { ref: string; target: string }[] = [];
const ffCalls: { branch: string; toCommit: string }[] = [];
const prevTree = opts?.prevTree ?? {};
const git = {
updateRef: vi.fn(async (ref: string, target: string) => {
updateRefCalls.push({ ref, target });
}),
fastForwardBranch: vi.fn(async (branch: string, toCommit: string) => {
ffCalls.push({ branch, toCommit });
return opts?.ffResult ?? { ok: true };
}),
// The move/rename classifier reads the PREVIOUS parent folder's `.md` at
// refs/docmost/last-pushed via this; `null` when absent there (SPEC §5).
showFileAtRef: vi.fn(async (_ref: string, path: string) =>
path in prevTree ? prevTree[path] : null,
),
};
return { git, updateRefCalls, ffCalls };
}
/** A recording fs fake over a path->text store. */
function makeFs(initial: Record<string, string> = {}) {
const store: Record<string, string> = { ...initial };
const writes: { path: string; text: string }[] = [];
const reads: string[] = [];
const fs = {
readFile: vi.fn(async (path: string) => {
reads.push(path);
if (!(path in store)) throw new Error(`no such file: ${path}`);
return store[path];
}),
writeFile: vi.fn(async (path: string, text: string) => {
store[path] = text;
writes.push({ path, text });
}),
};
return { fs, store, writes, reads };
}
function deps(client: any, git: any, fs: ReturnType<typeof makeFs>): ApplyPushDeps {
return {
client,
git,
readFile: fs.fs.readFile,
writeFile: fs.fs.writeFile,
spaceId: SPACE_ID,
};
}
/**
* A native page file: `gitmost_id` frontmatter + a clean body. The TITLE is NOT
* stored — it is derived from the filename — so this helper takes only a pageId.
* Used to seed both the working tree (fs) and the prev tree (showFileAtRef).
*/
function fileFor(pageId: string, body = 'body'): string {
return serializePageFile(pageId, body);
}
function actions(partial: Partial<PushActions>): PushActions {
return {
creates: [],
updates: [],
deletes: [],
renamesMoves: [],
skipped: [],
...partial,
};
}
beforeEach(() => {
vi.spyOn(console, 'log').mockImplementation(() => {});
vi.spyOn(console, 'warn').mockImplementation(() => {});
});
afterEach(() => {
vi.restoreAllMocks();
});
describe('applyPushActions — update (collab path, SPEC §2/§15.6)', () => {
it('reads the file and calls importPageMarkdown with the STRIPPED body', async () => {
const fileBody = fileFor('p-1', 'updated body');
const client = makeClient();
const { git } = makeGit();
const fs = makeFs({ 'Doc.md': fileBody });
const res = await applyPushActions(
deps(client, git, fs),
actions({ updates: [{ pageId: 'p-1', path: 'Doc.md' }] }),
);
expect(res.updated).toBe(1);
// The collab/Yjs write path is used — NOT a raw jsonb overwrite. The pushed
// content is the CLEAN body (no gitmost_id frontmatter leaks to Docmost).
expect(client.importPageMarkdown).toHaveBeenCalledTimes(1);
expect(client.importPageMarkdown).toHaveBeenCalledWith('p-1', 'updated body', null);
// No raw-overwrite path exists on the injected client surface at all.
expect((client as any).updatePageJson).toBeUndefined();
expect(client.createPage).not.toHaveBeenCalled();
expect(client.deletePage).not.toHaveBeenCalled();
});
it('forwards the last-pushed base body (3-way merge ancestor) when present', async () => {
const client = makeClient();
// The pre-image (refs/docmost/last-pushed) carries the base version; both
// sides are stripped to their clean body for a body-to-body 3-way merge.
const { git } = makeGit({ prevTree: { 'Doc.md': fileFor('p-1', 'base body') } });
const fs = makeFs({ 'Doc.md': fileFor('p-1', 'updated body') });
await applyPushActions(
deps(client, git, fs),
actions({ updates: [{ pageId: 'p-1', path: 'Doc.md' }] }),
);
// importPageMarkdown receives the stripped base so the server 3-way merges it.
expect(client.importPageMarkdown).toHaveBeenCalledWith(
'p-1',
'updated body',
'base body',
);
expect(git.showFileAtRef).toHaveBeenCalledWith(LAST_PUSHED_REF, 'Doc.md');
});
it('RENAME-derived update resolves the 3-way base from the OLD path (basePath), not the new path', async () => {
// The residual flaw after F4: a rename+edit emits an UPDATE whose `path` is the
// NEW path, but at refs/docmost/last-pushed the file lived at the OLD path. If
// the base were looked up at the NEW path it would return null and the merge
// would degrade to a 2-way (clobbering a concurrent Docmost-side edit). The fix
// threads `basePath = oldPath` so the base is the pre-rename file (honest 3-way).
const client = makeClient();
// The pre-image tree only has the OLD path; the NEW path does NOT exist there.
const { git } = makeGit({
prevTree: { 'Old/Path.md': fileFor('p-mv', 'base body') },
});
// The working tree has the file at its NEW path with the EDITED body.
const fs = makeFs({ 'New/Path.md': fileFor('p-mv', 'edited body') });
await applyPushActions(
deps(client, git, fs),
actions({
updates: [
{ pageId: 'p-mv', path: 'New/Path.md', basePath: 'Old/Path.md' },
],
}),
);
// The BODY is read from the NEW path (working tree) -> the EDITED body is pushed
// (not lost). The BASE is resolved from the OLD path -> a NON-NULL common
// ancestor ('base body'), so importPageMarkdown does an honest 3-way merge and
// a concurrent Docmost-side edit to a different block is preserved (not rolled
// back to git's body, which a null 2-way base would have done).
expect(client.importPageMarkdown).toHaveBeenCalledWith(
'p-mv',
'edited body',
'base body',
);
// The base lookup hit the OLD path, NOT the new path (the core of the fix).
expect(git.showFileAtRef).toHaveBeenCalledWith(LAST_PUSHED_REF, 'Old/Path.md');
expect(git.showFileAtRef).not.toHaveBeenCalledWith(
LAST_PUSHED_REF,
'New/Path.md',
);
});
it('PURE rename (no body edit) still 3-way merges against the old-path base (no 2-way clobber)', async () => {
// Even a rename with NO body change pushes a body update (F4). Before this fix
// its base would be null (new path absent at last-pushed) -> a 2-way merge that
// could roll a concurrent Docmost edit back to git's body. With basePath=oldPath
// the base equals the (unchanged) body, so the 3-way merge of
// (base=oldBody, incoming=oldBody) is a no-op and any live Docmost edit survives.
const client = makeClient();
const { git } = makeGit({
prevTree: { 'Old.md': fileFor('p-pure', 'same body') },
});
const fs = makeFs({ 'New.md': fileFor('p-pure', 'same body') });
await applyPushActions(
deps(client, git, fs),
actions({
updates: [{ pageId: 'p-pure', path: 'New.md', basePath: 'Old.md' }],
}),
);
// Incoming body == base body == 'same body' -> the 3-way merge is a no-op AND
// preserves any concurrent live edit (the base is the real ancestor, not null).
expect(client.importPageMarkdown).toHaveBeenCalledWith(
'p-pure',
'same body',
'same body',
);
expect(git.showFileAtRef).toHaveBeenCalledWith(LAST_PUSHED_REF, 'Old.md');
});
});
describe('applyPushActions — create (assigned pageId written back to meta)', () => {
it('createPage gets title/parent from the PATH and writes the pageId back', async () => {
// A brand-new local file with NO frontmatter (a hand-written Obsidian note)
// under a parent folder. title = filename, parent = the folder's folder-note,
// space = the run's space — all DERIVED, none stored in the file.
const client = makeClient({ createId: 'page-new-42' });
const { git } = makeGit();
const fs = makeFs({
'Parent/My New Page.md': '# My New Page\n\nbody text\n',
// The enclosing folder's folder-note identifies the parent page.
'Parent/Parent.md': fileFor('parent-9'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({ creates: [{ path: 'Parent/My New Page.md' }] }),
);
expect(res.created).toBe(1);
expect(client.createPage).toHaveBeenCalledTimes(1);
const [title, content, spaceId, parentPageId] =
client.createPage.mock.calls[0];
expect(title).toBe('My New Page'); // from the filename
expect(spaceId).toBe(SPACE_ID); // from the run
expect(parentPageId).toBe('parent-9'); // from the folder's folder-note
expect(content).toContain('body text');
// The file was rewritten with the assigned pageId as gitmost_id frontmatter,
// body preserved, NO docmost:meta.
expect(fs.writes.map((w) => w.path)).toEqual(['Parent/My New Page.md']);
const rewritten = fs.store['Parent/My New Page.md'];
expect(rewritten.startsWith('---\ngitmost_id: page-new-42\n---')).toBe(true);
expect(rewritten).not.toContain('docmost:meta');
const parsed = parsePageFile(rewritten);
expect(parsed.id).toBe('page-new-42');
expect(parsed.body).toContain('body text');
// The write-back is recorded so a follow-up commit can be made.
expect(res.writtenBack).toEqual([
{ path: 'Parent/My New Page.md', pageId: 'page-new-42' },
]);
});
});
describe('applyPushActions — create RETRY-ADOPT idempotency (#1)', () => {
// Create is NOT atomic with the pageId write-back: if a prior cycle created the
// page in Docmost but died before persisting the id back, the file is re-seen as
// a CREATE. The applier must ADOPT the existing page (write the id back + push the
// body as an idempotent UPDATE) instead of calling createPage again (which would
// duplicate the page). The live page is matched by (parentPageId, title).
it('ADOPTS an existing page (no createPage) when the live tree already has a match', async () => {
const client = makeClient({ createId: 'should-not-be-used' });
// The live Docmost tree already has the page this create targets:
// title "My New Page" under the parent folder's page `parent-9`.
client.listSpaceTree.mockResolvedValue({
pages: [
{ id: 'parent-9', parentPageId: null, title: 'Parent' },
{ id: 'already-created-7', parentPageId: 'parent-9', title: 'My New Page' },
],
complete: true,
});
const { git } = makeGit();
const fs = makeFs({
'Parent/My New Page.md': '# My New Page\n\nbody text\n',
'Parent/Parent.md': fileFor('parent-9'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({ creates: [{ path: 'Parent/My New Page.md' }] }),
);
expect(res.created).toBe(1);
// CRITICAL: createPage was NOT called — no duplicate page in Docmost.
expect(client.createPage).not.toHaveBeenCalled();
// The body was pushed as an UPDATE targeting the EXISTING id (idempotent).
expect(client.importPageMarkdown).toHaveBeenCalledTimes(1);
expect(client.importPageMarkdown).toHaveBeenCalledWith(
'already-created-7',
expect.stringContaining('body text'),
null,
);
// The file was rewritten with the EXISTING id as gitmost_id (now tracked).
expect(fs.writes.map((w) => w.path)).toEqual(['Parent/My New Page.md']);
const rewritten = fs.store['Parent/My New Page.md'];
expect(parsePageFile(rewritten).id).toBe('already-created-7');
expect(res.writtenBack).toEqual([
{ path: 'Parent/My New Page.md', pageId: 'already-created-7' },
]);
});
it('does NOT adopt from an INCOMPLETE tree even when a node matches (falls back to createPage)', async () => {
// Defensive guard: retry-adopt is only safe from a COMPLETE live tree. A
// TRUNCATED tree (complete:false) could miss an already-created page and let
// us duplicate it — the very thing adopt prevents. So on an incomplete tree
// the map is NOT built and we MUST fall back to the normal createPage path,
// even though this particular tree happens to carry a matching node.
const client = makeClient({ createId: 'page-new-55' });
// A node that WOULD match the create's (parentPageId 'parent-9', title
// 'My New Page') — but the tree is flagged incomplete, so it must be ignored.
client.listSpaceTree.mockResolvedValue({
pages: [
{ id: 'parent-9', parentPageId: null, title: 'Parent' },
{ id: 'already-created-7', parentPageId: 'parent-9', title: 'My New Page' },
],
complete: false,
});
const { git } = makeGit();
const fs = makeFs({
'Parent/My New Page.md': '# My New Page\n\nbody text\n',
'Parent/Parent.md': fileFor('parent-9'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({ creates: [{ path: 'Parent/My New Page.md' }] }),
);
expect(res.created).toBe(1);
// CRITICAL: createPage ran (normal path) — adopt was suppressed by complete:false.
expect(client.createPage).toHaveBeenCalledTimes(1);
// No adopt-UPDATE happened: the matching node was NOT trusted.
expect(client.importPageMarkdown).not.toHaveBeenCalled();
// The file carries the NEWLY assigned id, not the would-be adopted one.
expect(parsePageFile(fs.store['Parent/My New Page.md']).id).toBe('page-new-55');
expect(res.writtenBack).toEqual([
{ path: 'Parent/My New Page.md', pageId: 'page-new-55' },
]);
});
it('a NORMAL create (empty live tree) STILL calls createPage', async () => {
// No matching live node -> the happy path: createPage runs, no adopt.
const client = makeClient({ createId: 'page-new-99' });
// makeClient's listSpaceTree returns an empty tree by default.
const { git } = makeGit();
const fs = makeFs({
'Parent/My New Page.md': '# My New Page\n\nbody text\n',
'Parent/Parent.md': fileFor('parent-9'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({ creates: [{ path: 'Parent/My New Page.md' }] }),
);
expect(res.created).toBe(1);
expect(client.createPage).toHaveBeenCalledTimes(1);
// No adopt-UPDATE happened (importPageMarkdown is the update path).
expect(client.importPageMarkdown).not.toHaveBeenCalled();
expect(parsePageFile(fs.store['Parent/My New Page.md']).id).toBe('page-new-99');
});
it('a thrown adopt is isolated as a `create` failure (per-page isolation, SPEC §12)', async () => {
const client = makeClient({ createId: 'unused' });
client.listSpaceTree.mockResolvedValue({
pages: [{ id: 'existing-1', parentPageId: null, title: 'Doc' }],
complete: true,
});
// The adopt pushes the body as an UPDATE; make that throw.
client.importPageMarkdown.mockRejectedValue(new Error('adopt boom'));
const { git, updateRefCalls } = makeGit();
const fs = makeFs({ 'Doc.md': '# Doc\n\nbody\n' });
const res = await applyPushActions(
deps(client, git, fs),
actions({ creates: [{ path: 'Doc.md' }] }),
'sha-adopt-fail',
);
expect(res.created).toBe(0);
expect(client.createPage).not.toHaveBeenCalled();
expect(res.failures).toEqual([
{ kind: 'create', path: 'Doc.md', error: 'adopt boom' },
]);
// A failure means the refs are NOT advanced (re-run retries cleanly, §12).
expect(res.lastPushedAdvanced).toBe(false);
expect(updateRefCalls).toEqual([]);
});
});
describe('applyPushActions — delete (soft-delete to Trash, SPEC §8)', () => {
it('calls deletePage(pageId)', async () => {
const client = makeClient();
const { git } = makeGit();
const fs = makeFs();
const res = await applyPushActions(
deps(client, git, fs),
actions({ deletes: [{ pageId: 'p-del' }] }),
);
expect(res.deleted).toBe(1);
expect(client.deletePage).toHaveBeenCalledTimes(1);
expect(client.deletePage).toHaveBeenCalledWith('p-del');
// No body read needed for a delete.
expect(fs.reads).toEqual([]);
});
});
// FS→Docmost push #3 (SPEC §5/§6/§16): the move/rename APPLY. The classifier
// resolves the parent from the FILE PATH (the enclosing folder's `.md`), not
// stale `meta.parentPageId`, then `applyPushActions` calls move_page / rename_page
// (both for a reparent+retitle) or records a path-only NO-OP with NO client call.
describe('applyPushActions — move (parent changed, title same; SPEC §5/§16)', () => {
it('calls movePage(pageId, newParent) and NOT renamePage', async () => {
// The page moved from the space root (Doc.md) under a folder (Parent/Doc.md).
// The new parent page owns folder `Parent/`, so its file is the FOLDER-NOTE
// `Parent/Parent.md`, whose gitmost_id is the parent id.
const client = makeClient();
const { git } = makeGit({
// Prev pre-image: the file used to sit at the root (parent ROOT).
prevTree: { 'Doc.md': fileFor('p-mv') },
});
const fs = makeFs({
// Current tree: the moved file + its new parent folder's folder-note.
'Parent/Doc.md': fileFor('p-mv'),
'Parent/Parent.md': fileFor('parent-id'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({
renamesMoves: [
{ pageId: 'p-mv', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' },
],
}),
);
expect(res.moved).toBe(1);
expect(res.renamed).toBe(0);
expect(client.movePage).toHaveBeenCalledTimes(1);
// Reparented under `parent-id`; position left UNDEFINED (client default).
expect(client.movePage).toHaveBeenCalledWith('p-mv', 'parent-id');
expect(client.renamePage).not.toHaveBeenCalled();
expect(res.noops).toEqual([]);
});
});
describe('applyPushActions — move-to-root (newParent null; SPEC §16)', () => {
it('calls movePage(pageId, null) when the file lands at the space root', async () => {
const client = makeClient();
const { git } = makeGit({
// Prev: the file used to live under `Parent/`, so its old parent is the
// page whose folder-note is `Parent/Parent.md` (parent-id).
prevTree: {
'Parent/Doc.md': fileFor('p-mv'),
'Parent/Parent.md': fileFor('parent-id'),
},
});
// Current: the file is now at the root -> no enclosing folder -> parent ROOT.
const fs = makeFs({ 'Doc.md': fileFor('p-mv') });
const res = await applyPushActions(
deps(client, git, fs),
actions({
renamesMoves: [
{ pageId: 'p-mv', oldPath: 'Parent/Doc.md', newPath: 'Doc.md' },
],
}),
);
expect(res.moved).toBe(1);
expect(client.movePage).toHaveBeenCalledWith('p-mv', null);
expect(client.renamePage).not.toHaveBeenCalled();
});
});
describe('applyPushActions — rename (same parent, title changed; SPEC §5/§6)', () => {
it('calls renamePage(pageId, title-from-filename) and NOT movePage', async () => {
// Same enclosing folder on both sides (parent unchanged), the FILENAME (=
// title) changed Old -> New -> a pure rename to the new filename's title.
const client = makeClient();
const { git } = makeGit({
prevTree: {
'Folder/Old.md': fileFor('p-rn'),
'Folder/Folder.md': fileFor('folder-id'),
},
});
const fs = makeFs({
'Folder/New.md': fileFor('p-rn'),
'Folder/Folder.md': fileFor('folder-id'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({
renamesMoves: [
{ pageId: 'p-rn', oldPath: 'Folder/Old.md', newPath: 'Folder/New.md' },
],
}),
);
expect(res.renamed).toBe(1);
expect(res.moved).toBe(0);
expect(client.renamePage).toHaveBeenCalledTimes(1);
// The title is the NEW filename (no extension), not a stored meta title.
expect(client.renamePage).toHaveBeenCalledWith('p-rn', 'New');
expect(client.movePage).not.toHaveBeenCalled();
});
});
describe('applyPushActions — both (reparent + retitle; move THEN rename)', () => {
it('calls movePage first, then renamePage', async () => {
const callOrder: string[] = [];
const client = makeClient();
client.movePage.mockImplementation(async () => {
callOrder.push('move');
return { success: true };
});
client.renamePage.mockImplementation(async (pageId: string, title: string) => {
callOrder.push('rename');
return { success: true, pageId, title };
});
const { git } = makeGit({
// Prev: at root (parent ROOT), filename `Old`.
prevTree: { 'Old.md': fileFor('p-x') },
});
const fs = makeFs({
// Current: under a new folder (folder-note np-id) AND renamed to `New`.
'NewParent/New.md': fileFor('p-x'),
'NewParent/NewParent.md': fileFor('np-id'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({
renamesMoves: [
{ pageId: 'p-x', oldPath: 'Old.md', newPath: 'NewParent/New.md' },
],
}),
);
expect(res.moved).toBe(1);
expect(res.renamed).toBe(1);
expect(client.movePage).toHaveBeenCalledWith('p-x', 'np-id');
expect(client.renamePage).toHaveBeenCalledWith('p-x', 'New');
// Order matters: reparent FIRST, then retitle.
expect(callOrder).toEqual(['move', 'rename']);
});
});
describe('applyPushActions — noop (parent folder renamed; NO Docmost call; SPEC §5)', () => {
it('calls NEITHER movePage NOR renamePage and records the noop', async () => {
// The PARENT folder was renamed Old/ -> New/ (a retitle of the parent page,
// whose folder-note kept the SAME gitmost_id). For this CHILD, neither its
// own title (`Child`) nor its parent PAGE (same id `parent-P`) changed — only
// an ancestor's name did. The page is its pageId; Docmost is untouched.
const client = makeClient();
const { git } = makeGit({
prevTree: {
'Old/Child.md': fileFor('p-noop'),
'Old/Old.md': fileFor('parent-P'),
},
});
const fs = makeFs({
'New/Child.md': fileFor('p-noop'),
'New/New.md': fileFor('parent-P'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({
renamesMoves: [
{ pageId: 'p-noop', oldPath: 'Old/Child.md', newPath: 'New/Child.md' },
],
}),
);
expect(res.moved).toBe(0);
expect(res.renamed).toBe(0);
// ZERO Docmost calls — only the ancestor folder name changed.
expect(client.movePage).not.toHaveBeenCalled();
expect(client.renamePage).not.toHaveBeenCalled();
expect(res.noops).toEqual([
{
pageId: 'p-noop',
oldPath: 'Old/Child.md',
newPath: 'New/Child.md',
reason: 'path-only-rename',
},
]);
});
});
describe('applyPushActions — move whose client call throws (SPEC §12 isolation)', () => {
it('isolates the failure into `failures` and does NOT advance the refs', async () => {
const client = makeClient();
client.movePage.mockImplementation(async () => {
throw new Error('move boom');
});
const { git, updateRefCalls, ffCalls } = makeGit({
prevTree: { 'Doc.md': fileFor('p-mv') },
});
const fs = makeFs({
'Parent/Doc.md': fileFor('p-mv'),
'Parent/Parent.md': fileFor('parent-id'),
});
const res = await applyPushActions(
deps(client, git, fs),
actions({
renamesMoves: [
{ pageId: 'p-mv', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' },
],
}),
'sha-move-fail',
);
expect(res.moved).toBe(0);
expect(res.failures).toEqual([
{
kind: 'move',
pageId: 'p-mv',
path: 'Parent/Doc.md',
error: 'move boom',
},
]);
// A failure means the refs are NOT advanced — a re-run retries cleanly (§12).
expect(res.lastPushedAdvanced).toBe(false);
expect(updateRefCalls).toEqual([]);
expect(ffCalls).toEqual([]);
expect(git.updateRef).not.toHaveBeenCalled();
});
});
describe('applyPushActions — loop-close: ref advance + docmost ff (SPEC §6 step 3 / §10)', () => {
it('advances last-pushed AND fast-forwards the docmost mirror on a clean push', async () => {
const client = makeClient();
const { git, updateRefCalls, ffCalls } = makeGit();
const fs = makeFs();
const res = await applyPushActions(
deps(client, git, fs),
actions({ deletes: [{ pageId: 'p' }] }),
'commit-sha-abc',
);
expect(res.lastPushedAdvanced).toBe(true);
expect(updateRefCalls).toEqual([
{ ref: LAST_PUSHED_REF, target: 'commit-sha-abc' },
]);
// The loop-close: the docmost mirror is fast-forwarded to the pushed commit.
expect(ffCalls).toEqual([{ branch: 'docmost', toCommit: 'commit-sha-abc' }]);
expect(res.docmostFastForward).toEqual({ ok: true });
});
it('surfaces a REFUSED non-fast-forward (mirror NOT clobbered)', async () => {
const client = makeClient();
// The ff is refused because docmost is not an ancestor of the pushed commit.
const { git, updateRefCalls, ffCalls } = makeGit({
ffResult: { ok: false, reason: 'not-fast-forward' },
});
const fs = makeFs();
const res = await applyPushActions(
deps(client, git, fs),
actions({ deletes: [{ pageId: 'p' }] }),
'sha-div',
);
// last-pushed still advances (it is our own marker), but the ff result is
// surfaced so the caller can log the refusal.
expect(res.lastPushedAdvanced).toBe(true);
expect(updateRefCalls).toEqual([{ ref: LAST_PUSHED_REF, target: 'sha-div' }]);
expect(ffCalls).toEqual([{ branch: 'docmost', toCommit: 'sha-div' }]);
expect(res.docmostFastForward).toEqual({ ok: false, reason: 'not-fast-forward' });
});
it('does NOT advance either ref when no pushed commit is given', async () => {
const client = makeClient();
const { git, updateRefCalls } = makeGit();
const fs = makeFs();
const res = await applyPushActions(
deps(client, git, fs),
actions({ updates: [] }),
);
expect(res.lastPushedAdvanced).toBe(false);
expect(updateRefCalls).toEqual([]);
expect(res.docmostFastForward).toBeNull();
expect(git.updateRef).not.toHaveBeenCalled();
expect(git.fastForwardBranch).not.toHaveBeenCalled();
});
});
describe('applyPushActions — per-page error isolation + refs gated on success (SPEC §12)', () => {
it('continues the batch when an update throws; records the failure; refs NOT advanced', async () => {
// A client whose 2nd importPageMarkdown call throws — the 1st and 3rd must
// still be applied, the 2nd recorded as a failure, and NO ref advanced.
let call = 0;
const client = {
importPageMarkdown: vi.fn(async (_pageId: string, _md: string) => {
call++;
if (call === 2) throw new Error('boom on page 2');
return { success: true };
}),
createPage: vi.fn(),
deletePage: vi.fn(),
};
const { git, updateRefCalls, ffCalls } = makeGit();
const fs = makeFs({
'A.md': 'a body',
'B.md': 'b body',
'C.md': 'c body',
});
const res = await applyPushActions(
deps(client, git, fs),
actions({
updates: [
{ pageId: 'p-a', path: 'A.md' },
{ pageId: 'p-b', path: 'B.md' },
{ pageId: 'p-c', path: 'C.md' },
],
}),
'sha-partial',
);
// The 1st and 3rd were applied; the 2nd threw.
expect(res.updated).toBe(2);
expect(client.importPageMarkdown).toHaveBeenCalledTimes(3);
expect(client.importPageMarkdown).toHaveBeenNthCalledWith(1, 'p-a', 'a body', null);
expect(client.importPageMarkdown).toHaveBeenNthCalledWith(3, 'p-c', 'c body', null);
// The failure is recorded with kind/pageId/path/error.
expect(res.failures).toEqual([
{ kind: 'update', pageId: 'p-b', path: 'B.md', error: 'boom on page 2' },
]);
// Only the successful pages carry a loop-guard push record.
expect(res.pushed.map((p) => p.pageId)).toEqual(['p-a', 'p-c']);
// A PARTIAL push advances NEITHER ref, so a re-run retries cleanly (§12).
expect(res.lastPushedAdvanced).toBe(false);
expect(updateRefCalls).toEqual([]);
expect(ffCalls).toEqual([]);
expect(res.docmostFastForward).toBeNull();
expect(git.updateRef).not.toHaveBeenCalled();
expect(git.fastForwardBranch).not.toHaveBeenCalled();
});
});
describe('applyPushActions — loop-guard push record (SPEC §10)', () => {
it('records pageId + updatedAt + bodyHash per applied update', async () => {
const fileBody = fileFor('p-1', 'updated body');
const client = {
importPageMarkdown: vi.fn(async (_pageId: string, _md: string) => ({
// The write returns an updatedAt the loop-guard records.
data: { updatedAt: '2026-06-20T10:00:00.000Z' },
success: true,
})),
createPage: vi.fn(),
deletePage: vi.fn(),
};
const { git } = makeGit();
const fs = makeFs({ 'Doc.md': fileBody });
const res = await applyPushActions(
deps(client, git, fs),
actions({ updates: [{ pageId: 'p-1', path: 'Doc.md' }] }),
);
expect(res.pushed).toHaveLength(1);
expect(res.pushed[0].pageId).toBe('p-1');
expect(res.pushed[0].updatedAt).toBe('2026-06-20T10:00:00.000Z');
// The bodyHash is a stable sha256 hex of the pushed BODY (frontmatter stripped).
expect(res.pushed[0].bodyHash).toBe(bodyHash('updated body'));
expect(res.pushed[0].bodyHash).toMatch(/^[0-9a-f]{64}$/);
});
it('omits updatedAt when the client result does not expose one', async () => {
// A hand-written file with no frontmatter; its body is the whole text.
const newFile = '# N\n\nfresh body\n';
const client = makeClient({ createId: 'created-9' });
const { git } = makeGit();
const fs = makeFs({ 'N.md': newFile });
const res = await applyPushActions(
deps(client, git, fs),
actions({ creates: [{ path: 'N.md' }] }),
);
expect(res.pushed).toHaveLength(1);
expect(res.pushed[0].pageId).toBe('created-9');
expect(res.pushed[0].updatedAt).toBeUndefined();
// bodyHash of the pushed BODY (parsePageFile strips nothing here — no
// frontmatter — so it is the trimmed file text).
expect(res.pushed[0].bodyHash).toBe(bodyHash(parsePageFile(newFile).body));
});
});
describe('applyPushActions — mixed batch + skipped passthrough', () => {
it('applies update + create + delete and carries skipped rows through', async () => {
const updFile = fileFor('u-1', 'upd');
const newFile = '# N\n\nfresh body\n';
const client = makeClient({ createId: 'created-1' });
const { git, updateRefCalls } = makeGit();
const fs = makeFs({ 'U.md': updFile, 'N.md': newFile });
const skipped = [
{ path: 'Stray.md', status: 'D' as const, reason: 'no recoverable pageId' },
];
const res = await applyPushActions(
deps(client, git, fs),
actions({
updates: [{ pageId: 'u-1', path: 'U.md' }],
creates: [{ path: 'N.md' }],
deletes: [{ pageId: 'd-1' }],
skipped,
}),
'sha-9',
);
expect(res).toMatchObject({
created: 1,
updated: 1,
deleted: 1,
lastPushedAdvanced: true,
});
expect(res.writtenBack).toEqual([{ path: 'N.md', pageId: 'created-1' }]);
expect(res.skipped).toEqual(skipped);
expect(updateRefCalls).toEqual([{ ref: LAST_PUSHED_REF, target: 'sha-9' }]);
// The update pushes the STRIPPED body ('upd'), not the frontmatter file.
expect(client.importPageMarkdown).toHaveBeenCalledWith('u-1', 'upd', null);
expect(client.deletePage).toHaveBeenCalledWith('d-1');
});
});
@@ -0,0 +1,205 @@
import { describe, expect, it } from 'vitest';
import fc from 'fast-check';
// Barrel import (R-Infra alias resolves this to packages/docmost-client/src so
// coverage measures the real source, not stale dist).
import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
// ---------------------------------------------------------------------------
// Gaps NOT covered by canonicalize.test.ts (test-strategy report §2 diff):
// - the *.align family (drawio/excalidraw/video/youtube/embed): a "center"
// default is dropped, a non-default value is kept;
// - comment.resolved: TRUE is PRESERVED (only resolved:false is normalized);
// - link.target / link.rel NON-default values are kept;
// - property: canonicalizeContent is a fixpoint, docsCanonicallyEqual is
// reflexive and symmetric.
// The base file already covers id-stripping, null-drop, link/comment/orderedList
// default-drop, key-order insensitivity, and a real-diff negative — not re-added.
// ---------------------------------------------------------------------------
describe('canonicalizeContent — *.align default family', () => {
// Every diagram/media node whose schema `align` defaults to "center".
const alignTypes = ['drawio', 'excalidraw', 'video', 'youtube', 'embed'];
for (const type of alignTypes) {
it(`${type}: align "center" (the schema default) is dropped`, () => {
const out = canonicalizeContent({
type,
attrs: { id: 'n-1', src: '/x', align: 'center' },
});
// align==default removed; the meaningful src survives.
expect(out.attrs).toEqual({ src: '/x' });
});
it(`${type}: a NON-default align (e.g. "right") is kept`, () => {
const out = canonicalizeContent({
type,
attrs: { id: 'n-1', src: '/x', align: 'right' },
});
expect(out.attrs).toEqual({ src: '/x', align: 'right' });
});
}
it('image align is NOT in KNOWN_DEFAULTS: a non-null align survives, null is dropped', () => {
// image.align defaults to null, so it is handled by the null-drop rule and
// a real value ("left") must be kept (no spurious default match).
const kept = canonicalizeContent({
type: 'image',
attrs: { id: 'i-1', src: '/a.png', align: 'left' },
});
expect(kept.attrs).toEqual({ src: '/a.png', align: 'left' });
// An image with align:"center" must KEEP it (center is NOT a default for
// image, only for the diagram/media family) — guards against over-matching.
const center = canonicalizeContent({
type: 'image',
attrs: { id: 'i-2', src: '/b.png', align: 'center' },
});
expect(center.attrs).toEqual({ src: '/b.png', align: 'center' });
});
});
describe('canonicalizeContent — comment.resolved:true preserved (SPEC §11 L66)', () => {
it('keeps resolved:true (a legitimate change, not a default to normalize away)', () => {
const out = canonicalizeContent({
type: 'text',
text: 'anchored',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: true } }],
});
// resolved:true is NON-default; it must survive alongside the commentId so a
// resolve-vs-unresolved divergence is not falsely reported as equal.
expect(out.marks).toEqual([
{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: true } },
]);
});
it('a resolved:true comment is NOT canonically equal to an unresolved one', () => {
const resolved = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'c', resolved: true } }],
};
const open = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'c' } }],
};
expect(docsCanonicallyEqual(resolved, open)).toBe(false);
});
});
describe('canonicalizeContent — link non-default target/rel kept', () => {
it('keeps a NON-default link.target (e.g. "_self")', () => {
const out = canonicalizeContent({
type: 'text',
text: 'l',
marks: [{ type: 'link', attrs: { href: 'https://e.com', target: '_self' } }],
});
// _self != the "_blank" default, so target must survive.
expect(out.marks).toEqual([
{ type: 'link', attrs: { href: 'https://e.com', target: '_self' } },
]);
});
it('keeps a NON-default link.rel', () => {
const out = canonicalizeContent({
type: 'text',
text: 'l',
marks: [{ type: 'link', attrs: { href: 'https://e.com', rel: 'nofollow' } }],
});
expect(out.marks).toEqual([
{ type: 'link', attrs: { href: 'https://e.com', rel: 'nofollow' } },
]);
});
});
// ---------------------------------------------------------------------------
// Property-based oracle checks (SPEC §11). The generated trees mix node/mark
// types, ids, null attrs, known-default attrs and meaningful attrs, so the
// invariants are exercised across the whole canonicalization surface.
// ---------------------------------------------------------------------------
// An attribute value: a meaningful value, a null/undefined, a block id, or a
// known schema default — so pruning, id-drop, null-drop and default-drop all
// fire during shrinking.
const attrValueArb = fc.oneof(
fc.string({ minLength: 1, maxLength: 6 }),
fc.integer({ min: 0, max: 9 }),
fc.boolean(),
fc.constant(null),
);
// A recursive ProseMirror-ish node arbitrary (bounded depth) with type, attrs
// (incl. an id and possibly a known default), optional marks and content.
const nodeArb: fc.Arbitrary<any> = fc.letrec((tie) => ({
node: fc.record(
{
type: fc.constantFrom(
'paragraph',
'heading',
'orderedList',
'drawio',
'video',
'text',
),
text: fc.option(fc.string({ minLength: 0, maxLength: 5 }), { nil: undefined }),
attrs: fc.option(
fc.dictionary(
fc.constantFrom('id', 'level', 'start', 'align', 'src', 'indent', 'keep'),
attrValueArb,
{ maxKeys: 4 },
),
{ nil: undefined },
),
marks: fc.option(
fc.array(
fc.record({
type: fc.constantFrom('bold', 'link', 'comment'),
attrs: fc.option(
fc.dictionary(
fc.constantFrom('href', 'target', 'rel', 'commentId', 'resolved'),
fc.oneof(attrValueArb, fc.constant('_blank')),
{ maxKeys: 3 },
),
{ nil: undefined },
),
}),
{ maxLength: 2 },
),
{ nil: undefined },
),
content: fc.option(fc.array(tie('node'), { maxLength: 2 }), { nil: undefined }),
},
{ requiredKeys: ['type'] },
),
})).node;
describe('canonicalizeContent — property invariants (SPEC §11 oracle)', () => {
it('is a fixpoint: f(f(x)) === f(x)', () => {
fc.assert(
fc.property(nodeArb, (node) => {
const once = canonicalizeContent(node);
const twice = canonicalizeContent(once);
// The canonical form must already be stable under a second pass.
expect(twice).toEqual(once);
}),
{ numRuns: 300 },
);
});
it('docsCanonicallyEqual is reflexive: equal(x, x) is always true', () => {
fc.assert(
fc.property(nodeArb, (node) => {
expect(docsCanonicallyEqual(node, node)).toBe(true);
}),
{ numRuns: 300 },
);
});
it('docsCanonicallyEqual is symmetric: equal(a, b) === equal(b, a)', () => {
fc.assert(
fc.property(nodeArb, nodeArb, (a, b) => {
expect(docsCanonicallyEqual(a, b)).toBe(docsCanonicallyEqual(b, a));
}),
{ numRuns: 300 },
);
});
});
+302
View File
@@ -0,0 +1,302 @@
import { describe, expect, it } from 'vitest';
// Import via the package barrel to also assert the symbols are re-exported.
import { canonicalizeContent, docsCanonicallyEqual } from 'docmost-client';
describe('canonicalizeContent', () => {
it('strips node-level attrs.id, recursively', () => {
const input = {
type: 'doc',
content: [
{
type: 'heading',
attrs: { id: 'h-1', level: 2 },
content: [{ type: 'text', text: 'Title' }],
},
],
};
const out = canonicalizeContent(input);
expect(out.content[0].attrs).toEqual({ level: 2 });
// No `id` survives anywhere in the canonical tree.
expect(JSON.stringify(out)).not.toContain('"id"');
});
it('drops null/undefined attrs but keeps every non-null attr', () => {
const out = canonicalizeContent({
type: 'paragraph',
attrs: {
id: 'p-1',
indent: null,
textAlign: undefined,
level: 0,
keep: 'yes',
},
content: [],
});
// null/undefined gone; non-null values (incl. 0 and false) kept.
expect(out.attrs).toEqual({ keep: 'yes', level: 0 });
});
it('removes an attrs object that becomes empty after pruning', () => {
const out = canonicalizeContent({
type: 'paragraph',
attrs: { id: 'p-1', indent: null, textAlign: null },
content: [{ type: 'text', text: 'x' }],
});
// attrs had only an id + null defaults -> the whole attrs key is dropped.
expect('attrs' in out).toBe(false);
expect(out).toEqual({
type: 'paragraph',
content: [{ type: 'text', text: 'x' }],
});
});
it('treats {attrs:{}} as equivalent to no attrs', () => {
const withEmpty = canonicalizeContent({ type: 'paragraph', attrs: {} });
const without = canonicalizeContent({ type: 'paragraph' });
expect(withEmpty).toEqual(without);
});
it('keeps comment marks + commentId but normalizes resolved:false default (SPEC §3 anchor)', () => {
const out = canonicalizeContent({
type: 'text',
text: 'anchored',
marks: [
{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: false } },
],
});
// The comment mark is preserved; commentId (a meaningful anchor) survives,
// but the `resolved: false` schema default is normalized away.
expect(out.marks).toEqual([
{ type: 'comment', attrs: { commentId: 'cmt-1' } },
]);
});
it('drops known non-null schema defaults (link target/rel, comment resolved)', () => {
const out = canonicalizeContent({
type: 'text',
text: 'a link',
marks: [
{
type: 'link',
attrs: {
href: 'https://example.com/page',
target: '_blank',
rel: 'noopener noreferrer nofollow',
},
},
],
});
// href (non-default) kept; target/rel (schema defaults) dropped.
expect(out.marks).toEqual([
{ type: 'link', attrs: { href: 'https://example.com/page' } },
]);
});
it('keeps a NON-default value that happens to share an attr name (orderedList start:5)', () => {
const out = canonicalizeContent({
type: 'orderedList',
attrs: { id: 'ol-1', start: 5 },
content: [],
});
// start:5 is NOT the default (1), so it must survive.
expect(out.attrs).toEqual({ start: 5 });
});
it('keeps meaningful node/mark attrs (level, language, href, src, width)', () => {
const out = canonicalizeContent({
type: 'doc',
content: [
{
type: 'codeBlock',
attrs: { id: 'c-1', language: 'js' },
content: [{ type: 'text', text: 'x' }],
},
{
type: 'image',
attrs: { id: 'i-1', src: '/a.png', width: 100, height: null },
},
{
type: 'paragraph',
content: [
{
type: 'text',
text: 'link',
marks: [{ type: 'link', attrs: { href: 'https://e.com' } }],
},
],
},
],
});
expect(out.content[0].attrs).toEqual({ language: 'js' });
expect(out.content[1].attrs).toEqual({ src: '/a.png', width: 100 });
expect(out.content[2].content[0].marks[0].attrs).toEqual({
href: 'https://e.com',
});
});
it('preserves text, type and content order exactly', () => {
const input = {
type: 'paragraph',
content: [
{ type: 'text', text: 'one' },
{ type: 'text', text: 'two', marks: [{ type: 'bold' }] },
{ type: 'text', text: 'three' },
],
};
const out = canonicalizeContent(input);
expect(out.content.map((n: any) => n.text)).toEqual([
'one',
'two',
'three',
]);
expect(out.content[1].marks).toEqual([{ type: 'bold' }]);
});
it('drops an empty marks array (marks:[] === no marks)', () => {
const out = canonicalizeContent({ type: 'text', text: 'x', marks: [] });
expect('marks' in out).toBe(false);
});
it('does not mutate its input (frozen tree passes through unchanged)', () => {
const input = Object.freeze({
type: 'doc',
content: Object.freeze([
Object.freeze({
type: 'paragraph',
attrs: Object.freeze({ id: 'p-1', indent: null }),
content: Object.freeze([Object.freeze({ type: 'text', text: 'x' })]),
}),
]),
});
const before = JSON.stringify(input);
const out = canonicalizeContent(input);
// Input is structurally identical after the call.
expect(JSON.stringify(input)).toBe(before);
// A fresh tree is returned.
expect(out).not.toBe(input);
expect('attrs' in out.content[0]).toBe(false);
});
});
describe('docsCanonicallyEqual', () => {
it('is true when docs differ only by block ids', () => {
const a = {
type: 'doc',
content: [
{ type: 'heading', attrs: { id: 'h-1', level: 1 }, content: [] },
],
};
const b = {
type: 'doc',
content: [
{ type: 'heading', attrs: { id: 'h-DIFFERENT', level: 1 }, content: [] },
],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is true when one side omits an attr the other sets to default null', () => {
const a = {
type: 'paragraph',
attrs: { id: 'p-1' },
content: [{ type: 'text', text: 'x' }],
};
const b = {
type: 'paragraph',
attrs: { id: 'p-2', indent: null, textAlign: null },
content: [{ type: 'text', text: 'x' }],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is key-order-insensitive for attrs', () => {
const a = { type: 'image', attrs: { src: '/a.png', width: 10 } };
const b = { type: 'image', attrs: { width: 10, src: '/a.png' } };
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is false for a real text difference', () => {
const a = { type: 'text', text: 'hello' };
const b = { type: 'text', text: 'world' };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is false for a real attr difference (different level)', () => {
const a = { type: 'heading', attrs: { id: 'x', level: 1 } };
const b = { type: 'heading', attrs: { id: 'y', level: 2 } };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is false when a meaningful mark attr differs (commentId)', () => {
const a = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
};
const b = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-2' } }],
};
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is true when a link has only href vs one with the schema-default target/rel', () => {
const a = {
type: 'text',
text: 'link',
marks: [{ type: 'link', attrs: { href: 'https://example.com' } }],
};
const b = {
type: 'text',
text: 'link',
marks: [
{
type: 'link',
attrs: {
href: 'https://example.com',
target: '_blank',
rel: 'noopener noreferrer nofollow',
},
},
],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is true when an orderedList omits start vs one with the default start:1', () => {
const a = { type: 'orderedList', content: [] };
const b = { type: 'orderedList', attrs: { start: 1 }, content: [] };
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is false when an orderedList has a non-default start (5 vs absent)', () => {
const a = { type: 'orderedList', content: [] };
const b = { type: 'orderedList', attrs: { start: 5 }, content: [] };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
it('is true when a comment mark omits resolved vs one with the default false', () => {
const a = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
};
const b = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1', resolved: false } }],
};
expect(docsCanonicallyEqual(a, b)).toBe(true);
});
it('is false when a comment mark is dropped entirely', () => {
const a = {
type: 'text',
text: 'x',
marks: [{ type: 'comment', attrs: { commentId: 'cmt-1' } }],
};
const b = { type: 'text', text: 'x' };
expect(docsCanonicallyEqual(a, b)).toBe(false);
});
});
@@ -0,0 +1,263 @@
import { describe, expect, it } from 'vitest';
import { classifyRenameMoves } from '../src/engine/push';
import type {
ClassifyRenameMovesDeps,
MetaSide,
RenameMoveAction,
} from '../src/engine/push';
import type { DocmostMdMeta } from '../src/lib/index';
// FS→Docmost push #3 (SPEC §5/§6/§16). `classifyRenameMoves` is the PURE half of
// the move/rename apply: it resolves each `{pageId, oldPath, newPath}` into the
// Docmost op(s) it needs, with NO IO (both resolvers are injected). The key
// design (SPEC §5) is that the file PATH is the source of truth for tree
// position — the NEW parent comes from the new path, the OLD parent from the old
// path — and the title comes from the meta. An op is emitted ONLY when something
// really changed; a path-only rename (same parent + same title) is a noop and
// NEVER calls Docmost.
/** Build `metaAt` from a `path|side -> meta` table. */
function metaTable(
table: Record<string, DocmostMdMeta | null>,
): (path: string, side: MetaSide) => DocmostMdMeta | null {
return (path, side) => {
const key = `${path}|${side}`;
return key in table ? table[key] : null;
};
}
/** Build `resolveParentPageId` from a `path|side -> parentPageId|null` table. */
function parentTable(
table: Record<string, string | null>,
): (path: string, side: MetaSide) => string | null {
return (path, side) => {
const key = `${path}|${side}`;
return key in table ? table[key] : null;
};
}
function deps(
metas: Record<string, DocmostMdMeta | null>,
parents: Record<string, string | null>,
): ClassifyRenameMovesDeps {
return {
metaAt: metaTable(metas),
resolveParentPageId: parentTable(parents),
};
}
function meta(partial: Partial<DocmostMdMeta>): DocmostMdMeta {
return { version: 1, ...partial };
}
describe('classifyRenameMoves — move-only (parent changed, title same)', () => {
it('emits move (new parent) and NO rename', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p1', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
// Same title on both sides.
'Parent/Doc.md|current': meta({ title: 'Doc' }),
'Doc.md|prev': meta({ title: 'Doc' }),
},
{
// Parent changed: root (null) -> 'parent-id'.
'Parent/Doc.md|current': 'parent-id',
'Doc.md|prev': null,
},
),
);
expect(out).toEqual([
{
pageId: 'p1',
oldPath: 'Doc.md',
newPath: 'Parent/Doc.md',
move: { parentPageId: 'parent-id' },
},
]);
expect(out[0].rename).toBeUndefined();
expect(out[0].noop).toBeUndefined();
});
});
describe('classifyRenameMoves — rename-only (same parent, title changed)', () => {
it('emits rename (new title) and NO move', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p2', oldPath: 'Folder/Old.md', newPath: 'Folder/New.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
'Folder/New.md|current': meta({ title: 'New Title' }),
'Folder/Old.md|prev': meta({ title: 'Old Title' }),
},
{
// Same parent on both sides.
'Folder/New.md|current': 'folder-id',
'Folder/Old.md|prev': 'folder-id',
},
),
);
expect(out).toEqual([
{
pageId: 'p2',
oldPath: 'Folder/Old.md',
newPath: 'Folder/New.md',
rename: { title: 'New Title' },
},
]);
expect(out[0].move).toBeUndefined();
expect(out[0].noop).toBeUndefined();
});
});
describe('classifyRenameMoves — both (parent AND title changed)', () => {
it('emits BOTH move and rename', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p3', oldPath: 'Old.md', newPath: 'NewParent/New.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
'NewParent/New.md|current': meta({ title: 'New' }),
'Old.md|prev': meta({ title: 'Old' }),
},
{
'NewParent/New.md|current': 'np-id',
'Old.md|prev': null,
},
),
);
expect(out).toEqual([
{
pageId: 'p3',
oldPath: 'Old.md',
newPath: 'NewParent/New.md',
move: { parentPageId: 'np-id' },
rename: { title: 'New' },
},
]);
expect(out[0].noop).toBeUndefined();
});
});
describe('classifyRenameMoves — noop (path-only rename, same parent + title)', () => {
it('emits noop and NEITHER move NOR rename (SPEC §5: page is its pageId)', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p4', oldPath: 'Folder/A.md', newPath: 'Folder/B.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
'Folder/B.md|current': meta({ title: 'Same' }),
'Folder/A.md|prev': meta({ title: 'Same' }),
},
{
'Folder/B.md|current': 'folder-id',
'Folder/A.md|prev': 'folder-id',
},
),
);
expect(out).toEqual([
{
pageId: 'p4',
oldPath: 'Folder/A.md',
newPath: 'Folder/B.md',
noop: true,
},
]);
expect(out[0].move).toBeUndefined();
expect(out[0].rename).toBeUndefined();
});
});
describe('classifyRenameMoves — move-to-root (newParent null)', () => {
it('emits move with parentPageId null when the file lands at the space root', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p5', oldPath: 'Parent/Doc.md', newPath: 'Doc.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
'Doc.md|current': meta({ title: 'Doc' }),
'Parent/Doc.md|prev': meta({ title: 'Doc' }),
},
{
// New parent is ROOT (null), old parent was 'parent-id'.
'Doc.md|current': null,
'Parent/Doc.md|prev': 'parent-id',
},
),
);
expect(out).toEqual([
{
pageId: 'p5',
oldPath: 'Parent/Doc.md',
newPath: 'Doc.md',
move: { parentPageId: null },
},
]);
expect(out[0].rename).toBeUndefined();
expect(out[0].noop).toBeUndefined();
});
});
describe('classifyRenameMoves — title guards', () => {
it('an EMPTY new title is NOT a rename (even if it differs from old)', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p6', oldPath: 'Folder/A.md', newPath: 'Folder/B.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
// New title is empty -> never a rename; same parent -> overall noop.
'Folder/B.md|current': meta({ title: '' }),
'Folder/A.md|prev': meta({ title: 'Had A Title' }),
},
{
'Folder/B.md|current': 'folder-id',
'Folder/A.md|prev': 'folder-id',
},
),
);
expect(out[0].rename).toBeUndefined();
expect(out[0].move).toBeUndefined();
expect(out[0].noop).toBe(true);
});
it('a missing new meta is NOT a rename; a parent change still yields a move', () => {
const rms: RenameMoveAction[] = [
{ pageId: 'p7', oldPath: 'Doc.md', newPath: 'Parent/Doc.md' },
];
const out = classifyRenameMoves(
rms,
deps(
{
// No current meta entry at all (resolver returns null).
'Doc.md|prev': meta({ title: 'Doc' }),
},
{
'Parent/Doc.md|current': 'parent-id',
'Doc.md|prev': null,
},
),
);
expect(out[0].move).toEqual({ parentPageId: 'parent-id' });
expect(out[0].rename).toBeUndefined();
expect(out[0].noop).toBeUndefined();
});
});
describe('classifyRenameMoves — empty input', () => {
it('returns an empty array for no rename/move entries', () => {
expect(classifyRenameMoves([], deps({}, {}))).toEqual([]);
});
});
@@ -0,0 +1,195 @@
import { describe, expect, it } from 'vitest';
import { computePullActions } from '../src/engine/pull';
import type { PageNode } from '../src/engine/layout';
// R-Pull-2 (test-strategy report §5): `computePullActions` is the PURE half of
// the pull cycle — layout + planReconciliation + the SPEC §8 absence-deletion
// suppression decision, folded together, with NO IO. These tests exercise it
// without git/fs/network. The thin IO applier is covered in apply-pull-actions.
/** A live tree node (only the fields the layout / reconciliation read). */
function node(
id: string,
title: string,
parentPageId: string | null = null,
hasChildren = false,
): PageNode {
return { id, title, slugId: id, parentPageId, hasChildren };
}
describe('computePullActions — normal complete fetch', () => {
it('builds toWrite from the live layout and an empty existing set (all adds)', () => {
const pages = [
node('root', 'Root', null, true),
node('child', 'Child', 'root'),
];
const actions = computePullActions({
pages,
treeComplete: true,
existing: [],
});
// Each live page is (re)written at its deterministic layout path. `root`
// has a child, so it lives at the folder-note `Root/Root.md` (native-Obsidian
// layout), with the child alongside it in that folder.
expect(actions.toWrite).toEqual([
{ pageId: 'root', relPath: 'Root/Root.md' },
{ pageId: 'child', relPath: 'Root/Child.md' },
]);
expect(actions.moved).toEqual([]);
expect(actions.toDelete).toEqual([]);
expect(actions.deletionDecision).toEqual({ apply: true });
});
it('plans toWrite / moved / toDelete correctly for a mixed reconciliation', () => {
const pages = [
node('keep', 'Keep'),
node('mover', 'Mover'),
node('fresh', 'Fresh'),
];
// existing: keep (same path), mover (old path -> move), dead (absent -> delete).
const existing = [
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'mover', relPath: 'Old/Mover.md' },
{ pageId: 'dead', relPath: 'Dead.md' },
];
const actions = computePullActions({ pages, treeComplete: true, existing });
expect(actions.toWrite).toEqual([
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'mover', relPath: 'Mover.md' },
{ pageId: 'fresh', relPath: 'Fresh.md' },
]);
// mover moved from Old/Mover.md to the new layout path Mover.md.
expect(actions.moved).toEqual([
{
pageId: 'mover',
fromRelPath: 'Old/Mover.md',
toRelPath: 'Mover.md',
removeOldPath: true,
},
]);
// dead is absent from live -> an absence delete (decision applies it).
expect(actions.toDelete).toEqual(['Dead.md']);
expect(actions.deletionDecision).toEqual({ apply: true });
});
it('a live page moved to a NEW path is in `moved`, its old path NOT in toDelete', () => {
const pages = [node('p1', 'Doc', 'newparent'), node('newparent', 'NewParent', null, true)];
const existing = [{ pageId: 'p1', relPath: 'OldParent/Doc.md' }];
const actions = computePullActions({ pages, treeComplete: true, existing });
const moved = actions.moved.find((m) => m.pageId === 'p1');
expect(moved).toBeTruthy();
expect(moved!.fromRelPath).toBe('OldParent/Doc.md');
expect(moved!.toRelPath).toBe('NewParent/Doc.md');
// The old path is a MOVE removal, NEVER an absence delete.
expect(actions.toDelete).not.toContain('OldParent/Doc.md');
expect(actions.toDelete).toEqual([]);
});
});
describe('computePullActions — SPEC §8 suppression folded in', () => {
it('INCOMPLETE fetch (treeComplete:false) SUPPRESSES absence deletions', () => {
// dead is absent from the live tree, but the tree fetch was partial -> the
// missing pageId is NOT proof of deletion, so toDelete must be EMPTY and the
// decision must report apply:false / incomplete-fetch.
const pages = [node('keep', 'Keep')];
const existing = [
{ pageId: 'keep', relPath: 'Keep.md' },
{ pageId: 'dead', relPath: 'Dead.md' },
];
const actions = computePullActions({
pages,
treeComplete: false,
existing,
});
expect(actions.deletionDecision).toEqual({
apply: false,
reason: 'incomplete-fetch',
});
// Suppressed: nothing to delete this cycle...
expect(actions.toDelete).toEqual([]);
// ...but the planned count is still reported (for the suppression log).
expect(actions.plannedDeleteCount).toBe(1);
// Writes/updates still happen regardless of the suppression.
expect(actions.toWrite).toEqual([{ pageId: 'keep', relPath: 'Keep.md' }]);
});
it('MASS-DELETE guard (>50% of a non-trivial vault) SUPPRESSES deletions', () => {
// 1 live page, 10 existing tracked, 9 of them absent -> 9/10 > 50% on a
// non-trivial (>=4) vault -> mass-delete suppression.
const pages = [node('p0', 'P0')];
const existing = [
{ pageId: 'p0', relPath: 'P0.md' },
...Array.from({ length: 9 }, (_, i) => ({
pageId: `gone${i}`,
relPath: `Gone${i}.md`,
})),
];
const actions = computePullActions({ pages, treeComplete: true, existing });
expect(actions.deletionDecision).toEqual({
apply: false,
reason: 'mass-delete',
});
expect(actions.toDelete).toEqual([]);
expect(actions.plannedDeleteCount).toBe(9);
expect(actions.existingCount).toBe(10);
});
it('moves are NOT suppressed even on an incomplete fetch', () => {
// A moved page is PRESENT in live, so its move is real regardless of the
// suppression (which only governs ABSENCE deletes).
const pages = [node('m', 'Moved')];
const existing = [{ pageId: 'm', relPath: 'Old/Moved.md' }];
const actions = computePullActions({
pages,
treeComplete: false,
existing,
});
expect(actions.moved).toEqual([
{
pageId: 'm',
fromRelPath: 'Old/Moved.md',
toRelPath: 'Moved.md',
removeOldPath: true,
},
]);
// No absence deletes were planned here, so the decision trivially applies.
expect(actions.toDelete).toEqual([]);
});
it('empty-live with tracked files SUPPRESSES (failed fetch, not a real wipe)', () => {
const existing = [
{ pageId: 'a', relPath: 'A.md' },
{ pageId: 'b', relPath: 'B.md' },
];
const actions = computePullActions({
pages: [],
treeComplete: true,
existing,
});
expect(actions.deletionDecision).toEqual({
apply: false,
reason: 'empty-live',
});
expect(actions.toDelete).toEqual([]);
expect(actions.toWrite).toEqual([]);
});
});
describe('computePullActions — degenerate inputs', () => {
it('skips nodes without an id and nodes with no layout entry', () => {
const pages = [
node('p1', 'Valid'),
{ id: '', title: 'NoId' } as PageNode, // skipped (no id)
];
const actions = computePullActions({
pages,
treeComplete: true,
existing: [],
});
expect(actions.toWrite).toEqual([{ pageId: 'p1', relPath: 'Valid.md' }]);
});
});
@@ -0,0 +1,452 @@
import { describe, expect, it } from 'vitest';
import { computePushActions } from '../src/engine/push';
import type { DiffEntry, MetaSide } from '../src/engine/push';
import type { DocmostMdMeta } from '../src/lib/index';
// FS→Docmost push, FIRST increment (SPEC §6). `computePushActions` is the PURE
// half: it classifies each `git diff --name-status` row into a Docmost action by
// `pageId` identity (SPEC §4/§8), with NO IO — the `metaAt` resolver is injected.
// These tests cover every classification incl. edges.
/** Build a `metaAt` resolver from a `path|side -> meta` table. */
function metaTable(
table: Record<string, DocmostMdMeta | null>,
): (path: string, side: MetaSide) => DocmostMdMeta | null {
return (path, side) => {
const key = `${path}|${side}`;
return key in table ? table[key] : null;
};
}
function meta(partial: Partial<DocmostMdMeta>): DocmostMdMeta {
return { version: 1, ...partial };
}
describe('computePushActions — A (added)', () => {
it('added file with NO pageId -> create', () => {
const changes: DiffEntry[] = [{ status: 'A', path: 'New.md' }];
const metaAt = metaTable({
'New.md|current': meta({ title: 'New', spaceId: 'sp1' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.creates).toEqual([{ path: 'New.md' }]);
expect(actions.updates).toEqual([]);
expect(actions.deletes).toEqual([]);
expect(actions.renamesMoves).toEqual([]);
expect(actions.skipped).toEqual([]);
});
it('added file with NO meta at all -> skipped (a create needs a spaceId)', () => {
// No meta -> no spaceId -> cannot create (Docmost create_page requires it).
const changes: DiffEntry[] = [{ status: 'A', path: 'Plain.md' }];
const actions = computePushActions({ changes, metaAt: metaTable({}) });
expect(actions.creates).toEqual([]);
expect(actions.skipped).toEqual([
{ path: 'Plain.md', status: 'A', reason: 'create-without-spaceId' },
]);
});
it('added file with meta but NO spaceId -> skipped (create-without-spaceId)', () => {
// Partial human meta (title only, no spaceId) -> refuse to create.
const changes: DiffEntry[] = [{ status: 'A', path: 'Partial.md' }];
const metaAt = metaTable({
'Partial.md|current': meta({ title: 'Partial' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.creates).toEqual([]);
expect(actions.skipped).toEqual([
{ path: 'Partial.md', status: 'A', reason: 'create-without-spaceId' },
]);
});
it('added file with an EMPTY-string spaceId -> skipped (create-without-spaceId)', () => {
// An empty spaceId is not a usable target either.
const changes: DiffEntry[] = [{ status: 'A', path: 'Empty.md' }];
const metaAt = metaTable({
'Empty.md|current': meta({ title: 'E', spaceId: '' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.creates).toEqual([]);
expect(actions.skipped).toEqual([
{ path: 'Empty.md', status: 'A', reason: 'create-without-spaceId' },
]);
});
it('added file WITH a pageId (restored/copied) -> update (page exists)', () => {
const changes: DiffEntry[] = [{ status: 'A', path: 'Restored.md' }];
const metaAt = metaTable({
'Restored.md|current': meta({ pageId: 'p-restored', title: 'R' }),
});
const actions = computePushActions({ changes, metaAt });
// The page already exists -> push content as an UPDATE, never a duplicate.
expect(actions.updates).toEqual([
{ pageId: 'p-restored', path: 'Restored.md' },
]);
expect(actions.creates).toEqual([]);
});
});
describe('computePushActions — M (modified)', () => {
it('modified file with a pageId -> update content', () => {
const changes: DiffEntry[] = [{ status: 'M', path: 'Doc.md' }];
const metaAt = metaTable({
'Doc.md|current': meta({ pageId: 'p-doc' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.updates).toEqual([{ pageId: 'p-doc', path: 'Doc.md' }]);
expect(actions.skipped).toEqual([]);
});
it('modified file with NO pageId in current OR pre-image -> skipped', () => {
const changes: DiffEntry[] = [{ status: 'M', path: 'Untracked.md' }];
const actions = computePushActions({ changes, metaAt: metaTable({}) });
expect(actions.updates).toEqual([]);
expect(actions.skipped).toEqual([
{
path: 'Untracked.md',
status: 'M',
reason: 'modified file has no pageId in meta (nor in pre-image)',
},
]);
});
it('modified file that DROPPED its pageId recovers it from the pre-image -> update (bug C10-D1)', () => {
// The current file lost its `gitmost_id` frontmatter (a tool rewrote the whole
// file), but the last-pushed version at this path still had it. Recover the id
// and apply the body edit instead of silently skipping+reverting it.
const changes: DiffEntry[] = [{ status: 'M', path: 'Doc.md' }];
const metaAt = metaTable({
// current side has no pageId; prev (pre-image) side does.
'Doc.md|prev': meta({ pageId: 'p-doc' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.updates).toEqual([{ pageId: 'p-doc', path: 'Doc.md' }]);
expect(actions.skipped).toEqual([]);
});
});
describe('computePushActions — D (deleted)', () => {
it('deleted file recovers pageId from the PRE-IMAGE meta -> delete', () => {
const changes: DiffEntry[] = [{ status: 'D', path: 'Gone.md' }];
// The file is gone from `current`; its pageId lives in the `prev` pre-image.
const metaAt = metaTable({
'Gone.md|prev': meta({ pageId: 'p-gone' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.deletes).toEqual([{ pageId: 'p-gone' }]);
expect(actions.skipped).toEqual([]);
});
it('deleted file with NO recoverable pageId -> skipped (untracked guard §8)', () => {
const changes: DiffEntry[] = [{ status: 'D', path: 'Stray.md' }];
// No pre-image pageId -> the untracked-file guard skips it (never deletes a
// page that was never tracked, SPEC §8).
const actions = computePushActions({ changes, metaAt: metaTable({}) });
expect(actions.deletes).toEqual([]);
expect(actions.skipped).toEqual([
{
path: 'Stray.md',
status: 'D',
reason: 'deleted file has no recoverable pageId (pre-image meta)',
},
]);
});
it('uses the PREV side, not current, to recover the deleted pageId', () => {
const changes: DiffEntry[] = [{ status: 'D', path: 'Gone.md' }];
// A stale `current` meta must NOT be used; only the pre-image counts.
const metaAt = metaTable({
'Gone.md|current': meta({ pageId: 'WRONG' }),
'Gone.md|prev': meta({ pageId: 'p-correct' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.deletes).toEqual([{ pageId: 'p-correct' }]);
});
});
describe('computePushActions — R/C (renamed/moved)', () => {
it('renamed file -> renamesMoves (record only; resolution deferred)', () => {
const changes: DiffEntry[] = [
{ status: 'R', path: 'New/Path.md', oldPath: 'Old/Path.md', score: 100 },
];
const metaAt = metaTable({
'New/Path.md|current': meta({ pageId: 'p-moved' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.renamesMoves).toEqual([
{ pageId: 'p-moved', oldPath: 'Old/Path.md', newPath: 'New/Path.md' },
]);
// It is ALSO recorded as an UPDATE for the new path (F4) so a body edit
// riding along the rename in the same diff is pushed, not lost. The update
// carries `basePath = oldPath` so the 3-way merge base is resolved from where
// the file lived at last-pushed (the OLD path), not the new path (which would
// return null and degrade to a 2-way clobber). Never a create/delete though.
expect(actions.creates).toEqual([]);
expect(actions.updates).toEqual([
{ pageId: 'p-moved', path: 'New/Path.md', basePath: 'Old/Path.md' },
]);
expect(actions.deletes).toEqual([]);
});
it('rename + body edit in one diff -> emits BOTH a rename/move action AND a body update (F4)', () => {
// git `-M` reports a rename WITH a body edit as a single `R` row. The
// move/rename ops never carry page content, so the body edit must ALSO be
// emitted as an UPDATE targeting the NEW path + same pageId — otherwise it is
// silently and permanently lost (F4, the critical data-loss bug).
const changes: DiffEntry[] = [
{ status: 'R', path: 'New/Path.md', oldPath: 'Old/Path.md', score: 75 },
];
const metaAt = metaTable({
'New/Path.md|current': meta({ pageId: 'p-edited' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.renamesMoves).toEqual([
{ pageId: 'p-edited', oldPath: 'Old/Path.md', newPath: 'New/Path.md' },
]);
// The body update carries the NEW path so importPageMarkdown reads the moved
// file and targets the moved page by its (stable) pageId; `basePath` is the OLD
// path so the merge base is the pre-rename file (honest 3-way merge).
expect(actions.updates).toEqual([
{ pageId: 'p-edited', path: 'New/Path.md', basePath: 'Old/Path.md' },
]);
expect(actions.creates).toEqual([]);
expect(actions.deletes).toEqual([]);
expect(actions.skipped).toEqual([]);
});
it('copy (C) is recorded like a rename for the deferred apply', () => {
const changes: DiffEntry[] = [
{ status: 'C', path: 'Copy.md', oldPath: 'Src.md', score: 90 },
];
const metaAt = metaTable({
'Copy.md|current': meta({ pageId: 'p-copy' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.renamesMoves).toEqual([
{ pageId: 'p-copy', oldPath: 'Src.md', newPath: 'Copy.md' },
]);
// The body also rides along as an UPDATE for the new path (F4). For a COPY the
// `basePath` is the SOURCE path (`Src.md`): the source still exists in the
// last-pushed tree, so the copy's body 3-way-merges against its source's
// last-synced text — a real common ancestor, not a null 2-way base.
expect(actions.updates).toEqual([
{ pageId: 'p-copy', path: 'Copy.md', basePath: 'Src.md' },
]);
});
it('renamed file with NO pageId -> skipped', () => {
const changes: DiffEntry[] = [
{ status: 'R', path: 'New.md', oldPath: 'Old.md', score: 100 },
];
const actions = computePushActions({ changes, metaAt: metaTable({}) });
expect(actions.renamesMoves).toEqual([]);
expect(actions.skipped).toEqual([
{ path: 'New.md', status: 'R', reason: 'renamed/moved file has no pageId in meta' },
]);
});
});
describe('computePushActions — mixed batch', () => {
it('classifies a realistic mixed diff in one pass', () => {
const changes: DiffEntry[] = [
{ status: 'A', path: 'Fresh.md' }, // create
{ status: 'A', path: 'Restored.md' }, // update (has pageId)
{ status: 'M', path: 'Edited.md' }, // update
{ status: 'D', path: 'Removed.md' }, // delete
{ status: 'R', path: 'Dst.md', oldPath: 'Srcc.md', score: 100 }, // move
];
const metaAt = metaTable({
'Fresh.md|current': meta({ title: 'Fresh', spaceId: 'sp' }),
'Restored.md|current': meta({ pageId: 'p-rest' }),
'Edited.md|current': meta({ pageId: 'p-edit' }),
'Removed.md|prev': meta({ pageId: 'p-rm' }),
'Dst.md|current': meta({ pageId: 'p-mv' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.creates).toEqual([{ path: 'Fresh.md' }]);
// The R row contributes BOTH a rename/move AND a body update for the new path
// (F4), so `p-mv` appears in updates too — in diff-row order, last.
expect(actions.updates).toEqual([
{ pageId: 'p-rest', path: 'Restored.md' },
{ pageId: 'p-edit', path: 'Edited.md' },
// The rename-derived body update carries basePath = the OLD path (`Srcc.md`)
// for an honest 3-way merge; the plain A/M updates carry no basePath.
{ pageId: 'p-mv', path: 'Dst.md', basePath: 'Srcc.md' },
]);
expect(actions.deletes).toEqual([{ pageId: 'p-rm' }]);
expect(actions.renamesMoves).toEqual([
{ pageId: 'p-mv', oldPath: 'Srcc.md', newPath: 'Dst.md' },
]);
expect(actions.skipped).toEqual([]);
});
});
describe('computePushActions — ghost-move coalescing (data-loss guard)', () => {
// git's `-M` rename detection misses a move when the files are too dissimilar
// (tiny meta-only files after a layout reshuffle of `_`-fallback names). git
// then reports the move as a DELETE of the old path + an ADD of the new one.
// Taken literally this soft-deletes a page that merely MOVED. The classifier
// must recognize the shared pageId and emit a rename/move, never a delete.
it('D(old)+A(new) of the SAME pageId -> rename/move, NOT a delete', () => {
const changes: DiffEntry[] = [
{ status: 'D', path: '_ ~slug.md' },
{ status: 'A', path: '_.md' },
];
const metaAt = metaTable({
'_ ~slug.md|prev': meta({ pageId: 'p1', title: '', spaceId: 'sp1' }),
'_.md|current': meta({ pageId: 'p1', title: '', spaceId: 'sp1' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.deletes).toEqual([]); // the page is NEVER trashed
// The coalesced move ALSO carries a body update for the new path (F4): a body
// edit accompanying the relocation must be pushed, not lost. `basePath` is the
// OLD (deleted) path so the 3-way merge base is the pre-move file, not null.
expect(actions.updates).toEqual([
{ pageId: 'p1', path: '_.md', basePath: '_ ~slug.md' },
]);
expect(actions.renamesMoves).toEqual([
{ pageId: 'p1', oldPath: '_ ~slug.md', newPath: '_.md' },
]);
// The suppressed delete is recorded as a skip with a clear reason.
expect(actions.skipped).toEqual([
{
path: '_ ~slug.md',
status: 'D',
reason: 'ghost-move (re-added at a new path) — not a deletion',
},
]);
});
it('D(old)+M(new) of the SAME pageId -> rename/move, NOT a delete (M-side reshuffle)', () => {
// A reshuffle: an ALREADY-existing path (`New.md`) takes on a new pageId while
// the old path (`Old.md`) is deleted — git reports the surviving side as `M`
// (the path was occupied), not `A`. Same pageId on both sides, so it is one
// page that relocated: the M-side ghost-move coalescing path.
const changes: DiffEntry[] = [
{ status: 'D', path: 'Old.md' },
{ status: 'M', path: 'New.md' },
];
const metaAt = metaTable({
'Old.md|prev': meta({ pageId: 'p1', title: 'Old', spaceId: 'sp1' }),
'New.md|current': meta({ pageId: 'p1', title: 'New', spaceId: 'sp1' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.deletes).toEqual([]); // the page is NEVER trashed
// The coalesced move ALSO carries a body update for the new path (F4); the
// merge base resolves from the OLD path, not null and not the new path.
expect(actions.updates).toEqual([
{ pageId: 'p1', path: 'New.md', basePath: 'Old.md' },
]);
expect(actions.renamesMoves).toEqual([
{ pageId: 'p1', oldPath: 'Old.md', newPath: 'New.md' },
]);
});
it('a real delete (no matching add) is STILL a delete', () => {
const changes: DiffEntry[] = [{ status: 'D', path: 'Gone.md' }];
const metaAt = metaTable({
'Gone.md|prev': meta({ pageId: 'p9', title: 'Gone', spaceId: 'sp1' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.deletes).toEqual([{ pageId: 'p9' }]);
expect(actions.renamesMoves).toEqual([]);
});
it('an unrelated D + A (different pageIds) are a real delete + a real update', () => {
const changes: DiffEntry[] = [
{ status: 'D', path: 'A.md' },
{ status: 'A', path: 'B.md' },
];
const metaAt = metaTable({
'A.md|prev': meta({ pageId: 'pa', title: 'A', spaceId: 'sp1' }),
'B.md|current': meta({ pageId: 'pb', title: 'B', spaceId: 'sp1' }),
});
const actions = computePushActions({ changes, metaAt });
expect(actions.deletes).toEqual([{ pageId: 'pa' }]);
expect(actions.updates).toEqual([{ pageId: 'pb', path: 'B.md' }]);
expect(actions.renamesMoves).toEqual([]);
});
});
describe('computePushActions — currentPageIds guard (cross-cycle move)', () => {
it('a D whose pageId still exists in the tree (no matching A in THIS diff) is NOT deleted', () => {
// The move happened across cycles: the new file landed earlier, so this diff
// only has the old path D. The pageId still lives in the tree -> not a delete.
const changes: DiffEntry[] = [{ status: 'D', path: '_ ~old.md' }];
const metaAt = metaTable({
'_ ~old.md|prev': meta({ pageId: 'pX', title: '', spaceId: 'sp1' }),
});
const actions = computePushActions({
changes,
metaAt,
currentPageIds: new Set(['pX']), // pX is still tracked somewhere on main
});
expect(actions.deletes).toEqual([]);
expect(actions.skipped).toEqual([
{
path: '_ ~old.md',
status: 'D',
reason: 'pageId still present in the tree (moved) — not a deletion',
},
]);
});
it('a D whose pageId is GONE from the tree is a real delete', () => {
const changes: DiffEntry[] = [{ status: 'D', path: 'Removed.md' }];
const metaAt = metaTable({
'Removed.md|prev': meta({ pageId: 'pY', title: 'Removed', spaceId: 'sp1' }),
});
const actions = computePushActions({
changes,
metaAt,
currentPageIds: new Set(['pOther']), // pY is NOT present -> genuinely deleted
});
expect(actions.deletes).toEqual([{ pageId: 'pY' }]);
expect(actions.skipped).toEqual([]);
});
});
describe('computePushActions — page-file filter (non-page files ignored)', () => {
it('IGNORES added/modified/deleted non-page files (.obsidian, dotfiles, non-.md)', () => {
// A vault commits `.obsidian/*`, attachments, dotfiles (no .gitignore), so
// they show up in the diff — but they are NEVER Docmost pages. Even though a
// synthetic metaAt would hand back a spaceId (the vault's), none of these may
// become a CREATE/UPDATE/DELETE. This pins the data-corruption guard: an
// added `.obsidian/workspace.json` must NOT create a page nor get a gitmost_id.
const changes: DiffEntry[] = [
{ status: 'A', path: '.obsidian/workspace.json' },
{ status: 'M', path: '.obsidian/app.json' },
{ status: 'A', path: 'attachments/diagram.png' },
{ status: 'A', path: '.hidden.md' }, // dotfile, even with .md
{ status: 'A', path: 'Notes/.config/x.md' }, // dot-segment mid-path
{ status: 'D', path: '.obsidian/old.json' },
];
// Every path resolves to a spaceId-bearing meta (the vault's space) — proving
// the filter, not a missing spaceId, is what screens them out.
const metaAt = (path: string): DocmostMdMeta =>
({ version: 1, title: 'x', spaceId: 'sp-vault' }) as DocmostMdMeta;
const actions = computePushActions({ changes, metaAt });
expect(actions.creates).toEqual([]);
expect(actions.updates).toEqual([]);
expect(actions.deletes).toEqual([]);
expect(actions.renamesMoves).toEqual([]);
expect(actions.skipped).toEqual([]); // not even recorded as skipped — ignored
});
it('still processes a normal .md page alongside ignored non-page files', () => {
const changes: DiffEntry[] = [
{ status: 'A', path: '.obsidian/workspace.json' },
{ status: 'A', path: 'Real Page.md' },
{ status: 'A', path: 'Folder/Note.md' },
];
const metaAt = (path: string): DocmostMdMeta =>
({ version: 1, title: 'x', spaceId: 'sp-vault' }) as DocmostMdMeta;
const actions = computePushActions({ changes, metaAt });
// Only the two real .md pages become creates; the .obsidian file is ignored.
expect(actions.creates).toEqual([
{ path: 'Real Page.md' },
{ path: 'Folder/Note.md' },
]);
});
});

Some files were not shown because too many files have changed in this diff Show More