feat(git-sync): native-Obsidian format — phase 1 = page-file (frontmatter gitmost_id)

Pivot the thin-meta design to "the vault IS a native Obsidian vault": clean
markdown + a minimal YAML frontmatter `gitmost_id:` (the durable pageId, travels
with the file so identity survives any move); folders mirror the page tree with
the parent's body as a folder-note `<Folder>/<Folder>.md` (LostPaul Folder Notes
convention); links as `[[wikilinks]]` (basename-resolved → reparent never breaks a
link, only retitle does); collisions disambiguated Obsidian-style; `.obsidian/`
and non-page files left untouched (no .gitignore). Verified the conventions
against the Obsidian/Folder-Notes docs.

Replaces the abandoned `.gitmost/index.json` sidecar (path-keyed → fragile to
git-undetected renames; the in-file id is self-sufficient): removes vault-index.ts.
Adds lib/page-file.ts — parsePageFile/serializePageFile (frontmatter id + clean
body) with a LEGACY `docmost:meta` fallback for migration. 6 unit tests; engine
suite green. Not yet wired into pull/push — no behavior change. Design doc
rewritten to the native-Obsidian format.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-24 04:25:04 +03:00
parent c7d4b6f820
commit c4af91391f
7 changed files with 226 additions and 389 deletions

View File

@@ -1,154 +0,0 @@
/**
* The vault SIDECAR index — `.gitmost/index.json`. It holds the ONLY service
* metadata that is not derivable from the vault itself: a page's stable identity
* (`pageId`) and its collision-disambiguation token (`slugId`), keyed by the
* file's vault-relative (forward-slash) path. Everything else is derived:
* - title -> the file/folder name (stem),
* - parentPageId-> the enclosing folder's `index.md` (path-as-truth),
* - spaceId -> the vault is the space,
* - updatedAt -> git history.
*
* Keeping identity here (not in a `docmost:meta` block inside every file) lets
* the `.md` files stay CLEAN markdown that any third-party editor (Obsidian, …)
* reads and writes directly. This module is PURE (parse/serialize/lookup); all
* file IO is the caller's (injected), matching the rest of the engine.
*/
/** Where the sidecar lives inside a space vault (vault-relative, forward-slash). */
export const VAULT_INDEX_PATH = ".gitmost/index.json";
/** Per-file identity record. `slugId` is optional (a freshly adopted file has
* none until Docmost assigns one on create). */
export interface VaultIndexEntry {
pageId: string;
slugId?: string;
}
export interface VaultIndex {
version: number;
/** The space this vault mirrors (one repo per space). Informational. */
spaceId?: string;
/** file path (forward-slash, vault-relative) -> identity. */
pages: Map<string, VaultIndexEntry>;
}
const CURRENT_VERSION = 1;
export function emptyVaultIndex(spaceId?: string): VaultIndex {
return { version: CURRENT_VERSION, spaceId, pages: new Map() };
}
/**
* Parse `.gitmost/index.json`. TOLERANT by construction — a missing file
* (`null`), invalid JSON, or a malformed entry must never crash a sync cycle, so
* those degrade to an empty index / skipped entries (the engine then treats the
* affected files as un-tracked and re-derives identity, rather than losing data).
*/
export function parseVaultIndex(text: string | null | undefined): VaultIndex {
if (text == null || text.trim() === "") return emptyVaultIndex();
let raw: unknown;
try {
raw = JSON.parse(text);
} catch {
return emptyVaultIndex();
}
if (typeof raw !== "object" || raw === null) return emptyVaultIndex();
const obj = raw as Record<string, unknown>;
const index = emptyVaultIndex(
typeof obj.spaceId === "string" ? obj.spaceId : undefined,
);
if (typeof obj.version === "number") index.version = obj.version;
const pages = obj.pages;
if (typeof pages === "object" && pages !== null) {
for (const [path, value] of Object.entries(pages as Record<string, unknown>)) {
if (typeof value !== "object" || value === null) continue;
const entry = value as Record<string, unknown>;
if (typeof entry.pageId !== "string" || entry.pageId === "") continue;
index.pages.set(path, {
pageId: entry.pageId,
...(typeof entry.slugId === "string" ? { slugId: entry.slugId } : {}),
});
}
}
return index;
}
/**
* Serialize to STABLE JSON: object keys sorted so the file produces minimal,
* deterministic git diffs (a re-sync that changes nothing yields byte-identical
* output — no churn, which the loop-guard relies on). Trailing newline.
*/
export function serializeVaultIndex(index: VaultIndex): string {
const pages: Record<string, VaultIndexEntry> = {};
for (const path of [...index.pages.keys()].sort()) {
const e = index.pages.get(path)!;
pages[path] = e.slugId
? { pageId: e.pageId, slugId: e.slugId }
: { pageId: e.pageId };
}
const out: Record<string, unknown> = { version: index.version };
if (index.spaceId) out.spaceId = index.spaceId;
out.pages = pages;
return JSON.stringify(out, null, 2) + "\n";
}
// --- lookups (pure) --------------------------------------------------------
/** The pageId tracked at `path`, or undefined. */
export function pageIdAt(index: VaultIndex, path: string): string | undefined {
return index.pages.get(path)?.pageId;
}
/** The slugId tracked at `path`, or undefined. */
export function slugIdAt(index: VaultIndex, path: string): string | undefined {
return index.pages.get(path)?.slugId;
}
/**
* Reverse lookup: the CURRENT path of a pageId, or undefined. Used by push to
* decide identity — a vanished file whose pageId still resolves to a (different)
* tracked path is a MOVE, not a delete.
*/
export function pathForPageId(
index: VaultIndex,
pageId: string,
): string | undefined {
for (const [path, entry] of index.pages) {
if (entry.pageId === pageId) return path;
}
return undefined;
}
/** The set of all pageIds currently tracked in the index. */
export function trackedPageIds(index: VaultIndex): Set<string> {
const ids = new Set<string>();
for (const entry of index.pages.values()) ids.add(entry.pageId);
return ids;
}
// --- mutations (in place; the index is a builder during a cycle) -----------
export function setEntry(
index: VaultIndex,
path: string,
entry: VaultIndexEntry,
): void {
index.pages.set(path, entry);
}
export function removeAt(index: VaultIndex, path: string): void {
index.pages.delete(path);
}
/** Move a tracked entry from one path to another (a rename/reparent), keeping
* its identity. No-op if `fromPath` is not tracked. */
export function moveEntry(
index: VaultIndex,
fromPath: string,
toPath: string,
): void {
const entry = index.pages.get(fromPath);
if (!entry) return;
index.pages.delete(fromPath);
index.pages.set(toPath, entry);
}

View File

@@ -120,17 +120,4 @@ export type {
CycleFs,
} from "./engine/cycle";
export {
VAULT_INDEX_PATH,
emptyVaultIndex,
parseVaultIndex,
serializeVaultIndex,
pageIdAt,
slugIdAt,
pathForPageId,
trackedPageIds,
setEntry,
removeAt,
moveEntry,
} from "./engine/vault-index";
export type { VaultIndex, VaultIndexEntry } from "./engine/vault-index";
export { parsePageFile, serializePageFile } from "./lib/page-file";

View File

@@ -25,3 +25,4 @@ export {
canonicalizeContent,
docsCanonicallyEqual,
} from "./canonicalize";
export { parsePageFile, serializePageFile } from "./page-file";

View File

@@ -0,0 +1,86 @@
import { parseDocmostMarkdown } from "./markdown-document";
/**
* The THIN page-file format (design: docs/backlog/git-sync-thin-meta.md, option
* C). A page file is CLEAN markdown with a minimal YAML frontmatter carrying ONLY
* the page's durable identity:
*
* ---
* id: 019ef6fc-2638-7ce1-9ce3-2756ce038480
* ---
* <clean markdown body>
*
* Everything else is derived (title = filename, parentPageId = enclosing folder,
* spaceId = the vault, updatedAt = git). The `id` (a Docmost pageId) is the only
* non-derivable bit and travels WITH the file so identity survives any move,
* even one git's rename detection misses. Third-party editors (Obsidian, …) see
* clean markdown; the frontmatter is hidden in their preview.
*
* MIGRATION: a file may still carry the LEGACY `<!-- docmost:meta {…} -->` block
* (the pre-thin format). `parsePageFile` reads the id from the frontmatter first,
* then falls back to the legacy meta — so old vaults keep working and a re-sync
* rewrites them into the thin format.
*/
/**
* The frontmatter key carrying the Docmost pageId. NAMESPACED (not a bare `id`)
* so it never collides with a user's own frontmatter fields.
*/
export const ID_KEY = "gitmost_id";
/** Leading YAML frontmatter block: `---\n…\n---` at the very start of the file. */
const FRONTMATTER_RE = /^?---\n([\s\S]*?)\n---\n?/;
/** The top-level `<ID_KEY>: <value>` line inside the frontmatter (quotes optional). */
function readIdFromYaml(yaml: string): string | null {
const re = new RegExp(`^${ID_KEY}:\\s*(.+?)\\s*$`);
for (const line of yaml.split("\n")) {
const m = line.match(re);
if (m) {
const v = m[1].trim().replace(/^["']|["']$/g, "");
return v === "" ? null : v;
}
}
return null;
}
/**
* Parse a page file into its identity (`id`) and clean markdown `body`. Tolerant:
* a file with neither frontmatter nor legacy meta (a hand-written third-party
* file) returns `id: null` and the whole text as the body — the caller then
* ADOPTS it (creates a page, writes the id back).
*/
export function parsePageFile(full: string): {
id: string | null;
body: string;
} {
const text = (full ?? "").replace(/\r\n/g, "\n");
// 1. Thin format: YAML frontmatter.
const fm = text.match(FRONTMATTER_RE);
if (fm) {
return { id: readIdFromYaml(fm[1]), body: text.slice(fm[0].length).trim() };
}
// 2. Legacy format: `<!-- docmost:meta -->` block (migration fallback).
if (/^\s*<!--\s*docmost:meta/.test(text)) {
try {
const { meta, body } = parseDocmostMarkdown(text);
return { id: meta?.pageId ?? null, body };
} catch {
// a corrupt legacy block -> treat as an un-tracked plain file (adopt).
}
}
// 3. Plain markdown — un-tracked (no identity yet).
return { id: null, body: text.trim() };
}
/**
* Serialize a page into the thin format: `id` frontmatter + a blank line + the
* clean body + a trailing newline. Deterministic so an unchanged page re-syncs to
* byte-identical output (no churn — the loop-guard relies on it).
*/
export function serializePageFile(id: string, body: string): string {
return `---\n${ID_KEY}: ${id}\n---\n\n${body.trim()}\n`;
}