feat(git-sync): native-Obsidian format — phase 1 = page-file (frontmatter gitmost_id)
Pivot the thin-meta design to "the vault IS a native Obsidian vault": clean markdown + a minimal YAML frontmatter `gitmost_id:` (the durable pageId, travels with the file so identity survives any move); folders mirror the page tree with the parent's body as a folder-note `<Folder>/<Folder>.md` (LostPaul Folder Notes convention); links as `[[wikilinks]]` (basename-resolved → reparent never breaks a link, only retitle does); collisions disambiguated Obsidian-style; `.obsidian/` and non-page files left untouched (no .gitignore). Verified the conventions against the Obsidian/Folder-Notes docs. Replaces the abandoned `.gitmost/index.json` sidecar (path-keyed → fragile to git-undetected renames; the in-file id is self-sufficient): removes vault-index.ts. Adds lib/page-file.ts — parsePageFile/serializePageFile (frontmatter id + clean body) with a LEGACY `docmost:meta` fallback for migration. 6 unit tests; engine suite green. Not yet wired into pull/push — no behavior change. Design doc rewritten to the native-Obsidian format. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
@@ -1,154 +0,0 @@
|
||||
/**
|
||||
* The vault SIDECAR index — `.gitmost/index.json`. It holds the ONLY service
|
||||
* metadata that is not derivable from the vault itself: a page's stable identity
|
||||
* (`pageId`) and its collision-disambiguation token (`slugId`), keyed by the
|
||||
* file's vault-relative (forward-slash) path. Everything else is derived:
|
||||
* - title -> the file/folder name (stem),
|
||||
* - parentPageId-> the enclosing folder's `index.md` (path-as-truth),
|
||||
* - spaceId -> the vault is the space,
|
||||
* - updatedAt -> git history.
|
||||
*
|
||||
* Keeping identity here (not in a `docmost:meta` block inside every file) lets
|
||||
* the `.md` files stay CLEAN markdown that any third-party editor (Obsidian, …)
|
||||
* reads and writes directly. This module is PURE (parse/serialize/lookup); all
|
||||
* file IO is the caller's (injected), matching the rest of the engine.
|
||||
*/
|
||||
|
||||
/** Where the sidecar lives inside a space vault (vault-relative, forward-slash). */
|
||||
export const VAULT_INDEX_PATH = ".gitmost/index.json";
|
||||
|
||||
/** Per-file identity record. `slugId` is optional (a freshly adopted file has
|
||||
* none until Docmost assigns one on create). */
|
||||
export interface VaultIndexEntry {
|
||||
pageId: string;
|
||||
slugId?: string;
|
||||
}
|
||||
|
||||
export interface VaultIndex {
|
||||
version: number;
|
||||
/** The space this vault mirrors (one repo per space). Informational. */
|
||||
spaceId?: string;
|
||||
/** file path (forward-slash, vault-relative) -> identity. */
|
||||
pages: Map<string, VaultIndexEntry>;
|
||||
}
|
||||
|
||||
const CURRENT_VERSION = 1;
|
||||
|
||||
export function emptyVaultIndex(spaceId?: string): VaultIndex {
|
||||
return { version: CURRENT_VERSION, spaceId, pages: new Map() };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse `.gitmost/index.json`. TOLERANT by construction — a missing file
|
||||
* (`null`), invalid JSON, or a malformed entry must never crash a sync cycle, so
|
||||
* those degrade to an empty index / skipped entries (the engine then treats the
|
||||
* affected files as un-tracked and re-derives identity, rather than losing data).
|
||||
*/
|
||||
export function parseVaultIndex(text: string | null | undefined): VaultIndex {
|
||||
if (text == null || text.trim() === "") return emptyVaultIndex();
|
||||
let raw: unknown;
|
||||
try {
|
||||
raw = JSON.parse(text);
|
||||
} catch {
|
||||
return emptyVaultIndex();
|
||||
}
|
||||
if (typeof raw !== "object" || raw === null) return emptyVaultIndex();
|
||||
const obj = raw as Record<string, unknown>;
|
||||
const index = emptyVaultIndex(
|
||||
typeof obj.spaceId === "string" ? obj.spaceId : undefined,
|
||||
);
|
||||
if (typeof obj.version === "number") index.version = obj.version;
|
||||
const pages = obj.pages;
|
||||
if (typeof pages === "object" && pages !== null) {
|
||||
for (const [path, value] of Object.entries(pages as Record<string, unknown>)) {
|
||||
if (typeof value !== "object" || value === null) continue;
|
||||
const entry = value as Record<string, unknown>;
|
||||
if (typeof entry.pageId !== "string" || entry.pageId === "") continue;
|
||||
index.pages.set(path, {
|
||||
pageId: entry.pageId,
|
||||
...(typeof entry.slugId === "string" ? { slugId: entry.slugId } : {}),
|
||||
});
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize to STABLE JSON: object keys sorted so the file produces minimal,
|
||||
* deterministic git diffs (a re-sync that changes nothing yields byte-identical
|
||||
* output — no churn, which the loop-guard relies on). Trailing newline.
|
||||
*/
|
||||
export function serializeVaultIndex(index: VaultIndex): string {
|
||||
const pages: Record<string, VaultIndexEntry> = {};
|
||||
for (const path of [...index.pages.keys()].sort()) {
|
||||
const e = index.pages.get(path)!;
|
||||
pages[path] = e.slugId
|
||||
? { pageId: e.pageId, slugId: e.slugId }
|
||||
: { pageId: e.pageId };
|
||||
}
|
||||
const out: Record<string, unknown> = { version: index.version };
|
||||
if (index.spaceId) out.spaceId = index.spaceId;
|
||||
out.pages = pages;
|
||||
return JSON.stringify(out, null, 2) + "\n";
|
||||
}
|
||||
|
||||
// --- lookups (pure) --------------------------------------------------------
|
||||
|
||||
/** The pageId tracked at `path`, or undefined. */
|
||||
export function pageIdAt(index: VaultIndex, path: string): string | undefined {
|
||||
return index.pages.get(path)?.pageId;
|
||||
}
|
||||
|
||||
/** The slugId tracked at `path`, or undefined. */
|
||||
export function slugIdAt(index: VaultIndex, path: string): string | undefined {
|
||||
return index.pages.get(path)?.slugId;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse lookup: the CURRENT path of a pageId, or undefined. Used by push to
|
||||
* decide identity — a vanished file whose pageId still resolves to a (different)
|
||||
* tracked path is a MOVE, not a delete.
|
||||
*/
|
||||
export function pathForPageId(
|
||||
index: VaultIndex,
|
||||
pageId: string,
|
||||
): string | undefined {
|
||||
for (const [path, entry] of index.pages) {
|
||||
if (entry.pageId === pageId) return path;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/** The set of all pageIds currently tracked in the index. */
|
||||
export function trackedPageIds(index: VaultIndex): Set<string> {
|
||||
const ids = new Set<string>();
|
||||
for (const entry of index.pages.values()) ids.add(entry.pageId);
|
||||
return ids;
|
||||
}
|
||||
|
||||
// --- mutations (in place; the index is a builder during a cycle) -----------
|
||||
|
||||
export function setEntry(
|
||||
index: VaultIndex,
|
||||
path: string,
|
||||
entry: VaultIndexEntry,
|
||||
): void {
|
||||
index.pages.set(path, entry);
|
||||
}
|
||||
|
||||
export function removeAt(index: VaultIndex, path: string): void {
|
||||
index.pages.delete(path);
|
||||
}
|
||||
|
||||
/** Move a tracked entry from one path to another (a rename/reparent), keeping
|
||||
* its identity. No-op if `fromPath` is not tracked. */
|
||||
export function moveEntry(
|
||||
index: VaultIndex,
|
||||
fromPath: string,
|
||||
toPath: string,
|
||||
): void {
|
||||
const entry = index.pages.get(fromPath);
|
||||
if (!entry) return;
|
||||
index.pages.delete(fromPath);
|
||||
index.pages.set(toPath, entry);
|
||||
}
|
||||
@@ -120,17 +120,4 @@ export type {
|
||||
CycleFs,
|
||||
} from "./engine/cycle";
|
||||
|
||||
export {
|
||||
VAULT_INDEX_PATH,
|
||||
emptyVaultIndex,
|
||||
parseVaultIndex,
|
||||
serializeVaultIndex,
|
||||
pageIdAt,
|
||||
slugIdAt,
|
||||
pathForPageId,
|
||||
trackedPageIds,
|
||||
setEntry,
|
||||
removeAt,
|
||||
moveEntry,
|
||||
} from "./engine/vault-index";
|
||||
export type { VaultIndex, VaultIndexEntry } from "./engine/vault-index";
|
||||
export { parsePageFile, serializePageFile } from "./lib/page-file";
|
||||
|
||||
@@ -25,3 +25,4 @@ export {
|
||||
canonicalizeContent,
|
||||
docsCanonicallyEqual,
|
||||
} from "./canonicalize";
|
||||
export { parsePageFile, serializePageFile } from "./page-file";
|
||||
|
||||
86
packages/git-sync/src/lib/page-file.ts
Normal file
86
packages/git-sync/src/lib/page-file.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import { parseDocmostMarkdown } from "./markdown-document";
|
||||
|
||||
/**
|
||||
* The THIN page-file format (design: docs/backlog/git-sync-thin-meta.md, option
|
||||
* C). A page file is CLEAN markdown with a minimal YAML frontmatter carrying ONLY
|
||||
* the page's durable identity:
|
||||
*
|
||||
* ---
|
||||
* id: 019ef6fc-2638-7ce1-9ce3-2756ce038480
|
||||
* ---
|
||||
* <clean markdown body>
|
||||
*
|
||||
* Everything else is derived (title = filename, parentPageId = enclosing folder,
|
||||
* spaceId = the vault, updatedAt = git). The `id` (a Docmost pageId) is the only
|
||||
* non-derivable bit and travels WITH the file so identity survives any move,
|
||||
* even one git's rename detection misses. Third-party editors (Obsidian, …) see
|
||||
* clean markdown; the frontmatter is hidden in their preview.
|
||||
*
|
||||
* MIGRATION: a file may still carry the LEGACY `<!-- docmost:meta {…} -->` block
|
||||
* (the pre-thin format). `parsePageFile` reads the id from the frontmatter first,
|
||||
* then falls back to the legacy meta — so old vaults keep working and a re-sync
|
||||
* rewrites them into the thin format.
|
||||
*/
|
||||
|
||||
/**
|
||||
* The frontmatter key carrying the Docmost pageId. NAMESPACED (not a bare `id`)
|
||||
* so it never collides with a user's own frontmatter fields.
|
||||
*/
|
||||
export const ID_KEY = "gitmost_id";
|
||||
|
||||
/** Leading YAML frontmatter block: `---\n…\n---` at the very start of the file. */
|
||||
const FRONTMATTER_RE = /^?---\n([\s\S]*?)\n---\n?/;
|
||||
|
||||
/** The top-level `<ID_KEY>: <value>` line inside the frontmatter (quotes optional). */
|
||||
function readIdFromYaml(yaml: string): string | null {
|
||||
const re = new RegExp(`^${ID_KEY}:\\s*(.+?)\\s*$`);
|
||||
for (const line of yaml.split("\n")) {
|
||||
const m = line.match(re);
|
||||
if (m) {
|
||||
const v = m[1].trim().replace(/^["']|["']$/g, "");
|
||||
return v === "" ? null : v;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a page file into its identity (`id`) and clean markdown `body`. Tolerant:
|
||||
* a file with neither frontmatter nor legacy meta (a hand-written third-party
|
||||
* file) returns `id: null` and the whole text as the body — the caller then
|
||||
* ADOPTS it (creates a page, writes the id back).
|
||||
*/
|
||||
export function parsePageFile(full: string): {
|
||||
id: string | null;
|
||||
body: string;
|
||||
} {
|
||||
const text = (full ?? "").replace(/\r\n/g, "\n");
|
||||
|
||||
// 1. Thin format: YAML frontmatter.
|
||||
const fm = text.match(FRONTMATTER_RE);
|
||||
if (fm) {
|
||||
return { id: readIdFromYaml(fm[1]), body: text.slice(fm[0].length).trim() };
|
||||
}
|
||||
|
||||
// 2. Legacy format: `<!-- docmost:meta -->` block (migration fallback).
|
||||
if (/^\s*<!--\s*docmost:meta/.test(text)) {
|
||||
try {
|
||||
const { meta, body } = parseDocmostMarkdown(text);
|
||||
return { id: meta?.pageId ?? null, body };
|
||||
} catch {
|
||||
// a corrupt legacy block -> treat as an un-tracked plain file (adopt).
|
||||
}
|
||||
}
|
||||
|
||||
// 3. Plain markdown — un-tracked (no identity yet).
|
||||
return { id: null, body: text.trim() };
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a page into the thin format: `id` frontmatter + a blank line + the
|
||||
* clean body + a trailing newline. Deterministic so an unchanged page re-syncs to
|
||||
* byte-identical output (no churn — the loop-guard relies on it).
|
||||
*/
|
||||
export function serializePageFile(id: string, body: string): string {
|
||||
return `---\n${ID_KEY}: ${id}\n---\n\n${body.trim()}\n`;
|
||||
}
|
||||
43
packages/git-sync/test/page-file.test.ts
Normal file
43
packages/git-sync/test/page-file.test.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { parsePageFile, serializePageFile } from "../src/lib/page-file";
|
||||
import { serializeDocmostMarkdownBody } from "../src/lib/index";
|
||||
|
||||
describe("page-file thin format", () => {
|
||||
it("round-trips id frontmatter + clean body", () => {
|
||||
const text = serializePageFile("019ef6fc-2638", "# Hello\n\nbody text");
|
||||
expect(text.startsWith("---\ngitmost_id: 019ef6fc-2638\n---\n")).toBe(true);
|
||||
const { id, body } = parsePageFile(text);
|
||||
expect(id).toBe("019ef6fc-2638");
|
||||
expect(body).toBe("# Hello\n\nbody text");
|
||||
});
|
||||
|
||||
it("serialization is deterministic (byte-identical for the same input)", () => {
|
||||
expect(serializePageFile("p", "x")).toBe(serializePageFile("p", "x"));
|
||||
});
|
||||
|
||||
it("reads id from frontmatter with quotes / extra fields", () => {
|
||||
expect(parsePageFile('---\ngitmost_id: "abc"\ntitle: ignored\n---\nbody').id).toBe("abc");
|
||||
expect(parsePageFile("---\ngitmost_id: 'xyz'\n---\nbody").id).toBe("xyz");
|
||||
});
|
||||
|
||||
it("MIGRATION: falls back to a legacy docmost:meta block for the id", () => {
|
||||
const legacy = serializeDocmostMarkdownBody(
|
||||
{ version: 1, pageId: "legacy-1", title: "T", spaceId: "sp" },
|
||||
"old body",
|
||||
);
|
||||
const { id, body } = parsePageFile(legacy);
|
||||
expect(id).toBe("legacy-1");
|
||||
expect(body).toContain("old body");
|
||||
});
|
||||
|
||||
it("ADOPT: a plain hand-written file has no id and keeps its whole body", () => {
|
||||
const { id, body } = parsePageFile("# Just a note\n\nwritten in Obsidian");
|
||||
expect(id).toBeNull();
|
||||
expect(body).toBe("# Just a note\n\nwritten in Obsidian");
|
||||
});
|
||||
|
||||
it("tolerates empty / whitespace input", () => {
|
||||
expect(parsePageFile("").id).toBeNull();
|
||||
expect(parsePageFile(" \n ").body).toBe("");
|
||||
});
|
||||
});
|
||||
@@ -1,78 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
emptyVaultIndex,
|
||||
parseVaultIndex,
|
||||
serializeVaultIndex,
|
||||
pageIdAt,
|
||||
slugIdAt,
|
||||
pathForPageId,
|
||||
trackedPageIds,
|
||||
setEntry,
|
||||
removeAt,
|
||||
moveEntry,
|
||||
} from "../src/engine/vault-index";
|
||||
|
||||
describe("vault-index parse/serialize", () => {
|
||||
it("round-trips a populated index", () => {
|
||||
const idx = emptyVaultIndex("sp1");
|
||||
setEntry(idx, "Проект/index.md", { pageId: "p1", slugId: "Ab12" });
|
||||
setEntry(idx, "Заметка.md", { pageId: "p2" });
|
||||
const text = serializeVaultIndex(idx);
|
||||
const back = parseVaultIndex(text);
|
||||
expect(back.spaceId).toBe("sp1");
|
||||
expect(pageIdAt(back, "Проект/index.md")).toBe("p1");
|
||||
expect(slugIdAt(back, "Проект/index.md")).toBe("Ab12");
|
||||
expect(pageIdAt(back, "Заметка.md")).toBe("p2");
|
||||
expect(slugIdAt(back, "Заметка.md")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("serializes deterministically (sorted keys -> stable diffs)", () => {
|
||||
const a = emptyVaultIndex("s");
|
||||
setEntry(a, "b.md", { pageId: "2" });
|
||||
setEntry(a, "a.md", { pageId: "1" });
|
||||
const b = emptyVaultIndex("s");
|
||||
setEntry(b, "a.md", { pageId: "1" });
|
||||
setEntry(b, "b.md", { pageId: "2" });
|
||||
// insertion order differs; serialized output must be identical.
|
||||
expect(serializeVaultIndex(a)).toBe(serializeVaultIndex(b));
|
||||
// keys are sorted in the output
|
||||
expect(serializeVaultIndex(a).indexOf('"a.md"')).toBeLessThan(
|
||||
serializeVaultIndex(a).indexOf('"b.md"'),
|
||||
);
|
||||
});
|
||||
|
||||
it("is tolerant: null / garbage / bad entries -> empty or skipped", () => {
|
||||
expect(parseVaultIndex(null).pages.size).toBe(0);
|
||||
expect(parseVaultIndex("").pages.size).toBe(0);
|
||||
expect(parseVaultIndex("not json{").pages.size).toBe(0);
|
||||
expect(parseVaultIndex("[1,2,3]").pages.size).toBe(0);
|
||||
// a page entry missing pageId is skipped, valid ones kept
|
||||
const idx = parseVaultIndex(
|
||||
JSON.stringify({ version: 1, pages: { "ok.md": { pageId: "p" }, "bad.md": { slugId: "x" } } }),
|
||||
);
|
||||
expect(idx.pages.size).toBe(1);
|
||||
expect(pageIdAt(idx, "ok.md")).toBe("p");
|
||||
});
|
||||
});
|
||||
|
||||
describe("vault-index lookups + mutations", () => {
|
||||
it("reverse lookup + tracked set", () => {
|
||||
const idx = emptyVaultIndex();
|
||||
setEntry(idx, "x.md", { pageId: "px" });
|
||||
setEntry(idx, "y/index.md", { pageId: "py" });
|
||||
expect(pathForPageId(idx, "py")).toBe("y/index.md");
|
||||
expect(pathForPageId(idx, "missing")).toBeUndefined();
|
||||
expect([...trackedPageIds(idx)].sort()).toEqual(["px", "py"]);
|
||||
});
|
||||
|
||||
it("moveEntry relocates identity; removeAt drops it", () => {
|
||||
const idx = emptyVaultIndex();
|
||||
setEntry(idx, "Old.md", { pageId: "p", slugId: "s" });
|
||||
moveEntry(idx, "Old.md", "New/index.md");
|
||||
expect(pageIdAt(idx, "Old.md")).toBeUndefined();
|
||||
expect(pageIdAt(idx, "New/index.md")).toBe("p");
|
||||
expect(slugIdAt(idx, "New/index.md")).toBe("s"); // identity preserved
|
||||
removeAt(idx, "New/index.md");
|
||||
expect(pageIdAt(idx, "New/index.md")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user