feat(git-sync): phase 2b — PULL writes native gitmost_id frontmatter

PULL now serializes each page as the native-Obsidian format (serializePageFile:
a minimal gitmost_id frontmatter + the fixpoint markdown body) instead of the
heavy docmost:meta envelope. title/parent/space are derived (filename / folder /
repo), so only the pageId is persisted. readExisting recovers identity from the
gitmost_id frontmatter (parsePageFile) instead of docmost:meta.

Extracted stabilizePageBody() (the export->import->export fixpoint, no meta) so
the native writer and the legacy serializer share the same deterministic body —
re-pulls of an unchanged page stay byte-identical (loop-guard).

Tests: read-existing fixtures rewritten to gitmost_id; apply-pull asserts the
written text is native frontmatter and carries NO docmost:meta (regression
guard). 611 engine tests green.

NOTE: PUSH still reads docmost:meta — the end-to-end cycle is intentionally NOT
runnable until phase 3 (PUSH reads frontmatter + derives title/parent from path)
lands; no vault is wiped/deployed until then.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-24 04:42:42 +03:00
parent 8c42c4f0d6
commit 73c5c44301
4 changed files with 73 additions and 53 deletions

View File

@@ -8,7 +8,7 @@
* 2. checkout docmost * 2. checkout docmost
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the * 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
* desired `live` files (relPath via the pure sanitize/disambiguation layout) * desired `live` files (relPath via the pure sanitize/disambiguation layout)
* 4. parse `existing` tracked .md files (pageId + relPath from docmost:meta) * 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete * 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
* is absence-only, moves are separate * is absence-only, moves are separate
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree * 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
@@ -32,7 +32,7 @@
*/ */
import { dirname } from "node:path"; import { dirname } from "node:path";
import { sep } from "node:path"; import { sep } from "node:path";
import { parseDocmostMarkdown } from "../lib/index"; import { parsePageFile, serializePageFile } from "../lib/page-file";
import type { GitSyncClient } from "./client.types"; import type { GitSyncClient } from "./client.types";
import { buildVaultLayout, type PageNode } from "./layout"; import { buildVaultLayout, type PageNode } from "./layout";
import { import {
@@ -48,7 +48,7 @@ import {
type MovedEntry, type MovedEntry,
type DeletionDecision, type DeletionDecision,
} from "./reconcile"; } from "./reconcile";
import { stabilizePageFile, type PageMeta } from "./stabilize"; import { stabilizePageBody } from "./stabilize";
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do. // Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
const DOCMOST_BRANCH = "docmost"; const DOCMOST_BRANCH = "docmost";
@@ -85,15 +85,15 @@ export interface ReadExistingDeps {
} }
/** /**
* Read every tracked .md file in the vault and parse its `docmost:meta` to * Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* recover `{ pageId, relPath }`. Files without a parseable pageId in meta are * its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* skipped (they are not engine-tracked pages — e.g. a stray hand-written file). * `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
* *
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules: * The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race) * - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges; * -> skipped, NOT thrown; the next pull converges;
* - unparseable meta (`parseDocmostMarkdown` throws) -> skipped; * - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
* - parseable but no `pageId` in meta -> skipped.
*/ */
export async function readExisting( export async function readExisting(
deps: ReadExistingDeps, deps: ReadExistingDeps,
@@ -111,15 +111,8 @@ export async function readExisting(
// converges. // converges.
continue; continue;
} }
let pageId: string | undefined; const { id } = parsePageFile(text);
try { if (id) existing.push({ pageId: id, relPath: rel });
const { meta } = parseDocmostMarkdown(text);
pageId = meta?.pageId;
} catch {
// Unparseable meta — not engine-tracked; leave it alone.
pageId = undefined;
}
if (pageId) existing.push({ pageId, relPath: rel });
} }
return existing; return existing;
} }
@@ -305,15 +298,13 @@ export async function applyPullActions(
}): Promise<void> => { }): Promise<void> => {
try { try {
const page = await client.getPageJson(w.pageId); const page = await client.getPageJson(w.pageId);
const meta: PageMeta = { // Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
version: 1, // markdown body. title/parent/space are DERIVED (filename / folder / repo),
pageId: page.id, // so nothing but the pageId is persisted as meta.
slugId: page.slugId, const text = serializePageFile(
title: page.title, page.id,
spaceId: page.spaceId, await stabilizePageBody(page.content),
parentPageId: page.parentPageId ?? null, );
};
const text = await stabilizePageFile(page.content, meta);
const abs = relToAbs(vaultRoot, w.relPath); const abs = relToAbs(vaultRoot, w.relPath);
await deps.mkdir(dirname(abs)); await deps.mkdir(dirname(abs));
await deps.writeFile(abs, text); await deps.writeFile(abs, text);

View File

@@ -49,10 +49,30 @@ export async function stabilizePageFile(
content: unknown, content: unknown,
meta: PageMeta, meta: PageMeta,
): Promise<string> { ): Promise<string> {
const md1 = convertProseMirrorToMarkdown(content);
const doc2 = await markdownToProseMirror(md1);
const stableBody = convertProseMirrorToMarkdown(doc2);
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's // The meta shape is exactly what `exportPageBody` writes; cast to the lib's
// DocmostMdMeta (a superset with optional fields) for the serializer. // DocmostMdMeta (a superset with optional fields) for the serializer.
return serializeDocmostMarkdownBody(meta as DocmostMdMeta, stableBody); return serializeDocmostMarkdownBody(
meta as DocmostMdMeta,
await stabilizePageBody(content),
);
}
/**
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
* envelope:
*
* md1 = convertProseMirrorToMarkdown(content) // export...
* doc2 = markdownToProseMirror(md1) // ...import...
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
*
* The single export->import->export pass is the verified fixpoint (SPEC §11):
* idempotent for already-stable content, and the convergence point for the known
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
*/
export async function stabilizePageBody(content: unknown): Promise<string> {
const md1 = convertProseMirrorToMarkdown(content);
const doc2 = await markdownToProseMirror(md1);
return convertProseMirrorToMarkdown(doc2);
} }

View File

@@ -164,6 +164,14 @@ describe('applyPullActions — happy path (write + commit + merge)', () => {
const writtenPaths = fs.writes.map((w) => w.abs).sort(); const writtenPaths = fs.writes.map((w) => w.abs).sort();
expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']); expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']);
// Every written file is in the native-Obsidian format: a `gitmost_id`
// frontmatter at the very top and NO legacy `docmost:meta` envelope. Guards
// against a regression back to the heavy meta block.
for (const w of fs.writes) {
expect(w.text.startsWith('---\ngitmost_id: ')).toBe(true);
expect(w.text).not.toContain('docmost:meta');
}
// The git op order is: stageAll -> commit -> checkout main -> merge. // The git op order is: stageAll -> commit -> checkout main -> merge.
expect(g.order).toEqual([ expect(g.order).toEqual([
'stageAll', 'stageAll',

View File

@@ -1,14 +1,16 @@
import { describe, expect, it } from 'vitest'; import { describe, expect, it } from 'vitest';
import { readExisting } from '../src/engine/pull'; import { readExisting } from '../src/engine/pull';
import { serializePageFile } from '../src/lib/page-file';
// R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO // R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO
// (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable // (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable
// without a real git repo or filesystem. These tests pass fakes only — no git, // without a real git repo or filesystem. These tests pass fakes only — no git,
// no fs, no network. // no fs, no network. Identity is recovered from the native `gitmost_id`
// frontmatter (no more `docmost:meta`).
/** Build a valid self-contained file with a `docmost:meta` block. */ /** Build a valid native page file with a `gitmost_id` frontmatter. */
function withMeta(meta: Record<string, unknown>, body = '# Title\nbody\n'): string { function withId(id: string, body = '# Title\nbody\n'): string {
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${body}`; return serializePageFile(id, body);
} }
/** A fake `readFile` backed by an in-memory map (rejects on a missing key). */ /** A fake `readFile` backed by an in-memory map (rejects on a missing key). */
@@ -24,8 +26,8 @@ function fakeReadFile(files: Record<string, string>) {
describe('readExisting (R-Pull-1, injected IO)', () => { describe('readExisting (R-Pull-1, injected IO)', () => {
it('recovers { pageId, relPath } for valid tracked files', async () => { it('recovers { pageId, relPath } for valid tracked files', async () => {
const files = { const files = {
'Space/A.md': withMeta({ version: 1, pageId: 'p1', title: 'A' }), 'Space/A.md': withId('p1'),
'Space/Sub/B.md': withMeta({ version: 1, pageId: 'p2', title: 'B' }), 'Space/Sub/B.md': withId('p2'),
}; };
const result = await readExisting({ const result = await readExisting({
listTracked: async () => Object.keys(files), listTracked: async () => Object.keys(files),
@@ -37,23 +39,24 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
]); ]);
}); });
it('SKIPS a file with no docmost:meta block (plain hand-written markdown)', async () => { it('SKIPS a file with no frontmatter (plain hand-written markdown)', async () => {
const files = { const files = {
'tracked.md': withMeta({ version: 1, pageId: 'p1' }), 'tracked.md': withId('p1'),
'stray.md': '# Just a hand-written note\n\nNo meta here.\n', 'stray.md': '# Just a hand-written note\n\nNo frontmatter here.\n',
}; };
const result = await readExisting({ const result = await readExisting({
listTracked: async () => Object.keys(files), listTracked: async () => Object.keys(files),
readFile: fakeReadFile(files), readFile: fakeReadFile(files),
}); });
// Only the engine-tracked file (with a pageId) survives. // Only the engine-tracked file (with a gitmost_id) survives.
expect(result).toEqual([{ pageId: 'p1', relPath: 'tracked.md' }]); expect(result).toEqual([{ pageId: 'p1', relPath: 'tracked.md' }]);
}); });
it('SKIPS a file whose meta has no pageId', async () => { it('SKIPS a file whose frontmatter has no gitmost_id key', async () => {
const files = { const files = {
'has-id.md': withMeta({ version: 1, pageId: 'keep' }), 'has-id.md': withId('keep'),
'no-id.md': withMeta({ version: 1, title: 'untitled', slugId: 's' }), // A user's own frontmatter, but no gitmost_id -> not engine-tracked.
'no-id.md': '---\ntags: [note]\ntitle: untitled\n---\n\nbody\n',
}; };
const result = await readExisting({ const result = await readExisting({
listTracked: async () => Object.keys(files), listTracked: async () => Object.keys(files),
@@ -62,12 +65,10 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
expect(result).toEqual([{ pageId: 'keep', relPath: 'has-id.md' }]); expect(result).toEqual([{ pageId: 'keep', relPath: 'has-id.md' }]);
}); });
it('SKIPS a file with an unparseable (invalid-JSON) meta block, does not throw', async () => { it('SKIPS a file with an EMPTY gitmost_id value, does not throw', async () => {
// Invalid JSON inside the meta block makes parseDocmostMarkdown throw; the
// skip-rule must swallow it and treat the file as not-engine-tracked.
const files = { const files = {
'good.md': withMeta({ version: 1, pageId: 'good' }), 'good.md': withId('good'),
'broken.md': '<!-- docmost:meta\n{ this is : not, json }\n-->\n\nbody\n', 'blank.md': '---\ngitmost_id:\n---\n\nbody\n',
}; };
const result = await readExisting({ const result = await readExisting({
listTracked: async () => Object.keys(files), listTracked: async () => Object.keys(files),
@@ -78,7 +79,7 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
it('does NOT throw when readFile REJECTS (tracked but missing) — treats it as skipped', async () => { it('does NOT throw when readFile REJECTS (tracked but missing) — treats it as skipped', async () => {
const files = { const files = {
'present.md': withMeta({ version: 1, pageId: 'present' }), 'present.md': withId('present'),
// "ghost.md" is listed as tracked but absent from the file map -> reject. // "ghost.md" is listed as tracked but absent from the file map -> reject.
}; };
const result = await readExisting({ const result = await readExisting({
@@ -101,11 +102,11 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
it('combines all skip rules in one listing (only the valid files survive)', async () => { it('combines all skip rules in one listing (only the valid files survive)', async () => {
const files = { const files = {
'ok1.md': withMeta({ version: 1, pageId: 'a' }), 'ok1.md': withId('a'),
'no-meta.md': 'plain\n', 'no-meta.md': 'plain\n',
'no-id.md': withMeta({ version: 1, title: 'x' }), 'no-id.md': '---\ntags: [x]\n---\n\nbody\n',
'broken.md': '<!-- docmost:meta\n{bad\n-->\nbody\n', 'blank.md': '---\ngitmost_id:\n---\n\nbody\n',
'ok2.md': withMeta({ version: 1, pageId: 'b' }), 'ok2.md': withId('b'),
// missing.md rejects on read. // missing.md rejects on read.
}; };
const result = await readExisting({ const result = await readExisting({