feat(git-sync): phase 2b — PULL writes native gitmost_id frontmatter

PULL now serializes each page as the native-Obsidian format (serializePageFile:
a minimal gitmost_id frontmatter + the fixpoint markdown body) instead of the
heavy docmost:meta envelope. title/parent/space are derived (filename / folder /
repo), so only the pageId is persisted. readExisting recovers identity from the
gitmost_id frontmatter (parsePageFile) instead of docmost:meta.

Extracted stabilizePageBody() (the export->import->export fixpoint, no meta) so
the native writer and the legacy serializer share the same deterministic body —
re-pulls of an unchanged page stay byte-identical (loop-guard).

Tests: read-existing fixtures rewritten to gitmost_id; apply-pull asserts the
written text is native frontmatter and carries NO docmost:meta (regression
guard). 611 engine tests green.

NOTE: PUSH still reads docmost:meta — the end-to-end cycle is intentionally NOT
runnable until phase 3 (PUSH reads frontmatter + derives title/parent from path)
lands; no vault is wiped/deployed until then.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-24 04:42:42 +03:00
parent de1c23baa6
commit 081e8e948d
4 changed files with 73 additions and 53 deletions

View File

@@ -8,7 +8,7 @@
* 2. checkout docmost
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
* 4. parse `existing` tracked .md files (pageId + relPath from docmost:meta)
* 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
* is absence-only, moves are separate
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
@@ -32,7 +32,7 @@
*/
import { dirname } from "node:path";
import { sep } from "node:path";
import { parseDocmostMarkdown } from "../lib/index";
import { parsePageFile, serializePageFile } from "../lib/page-file";
import type { GitSyncClient } from "./client.types";
import { buildVaultLayout, type PageNode } from "./layout";
import {
@@ -48,7 +48,7 @@ import {
type MovedEntry,
type DeletionDecision,
} from "./reconcile";
import { stabilizePageFile, type PageMeta } from "./stabilize";
import { stabilizePageBody } from "./stabilize";
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
const DOCMOST_BRANCH = "docmost";
@@ -85,15 +85,15 @@ export interface ReadExistingDeps {
}
/**
* Read every tracked .md file in the vault and parse its `docmost:meta` to
* recover `{ pageId, relPath }`. Files without a parseable pageId in meta are
* skipped (they are not engine-tracked pages — e.g. a stray hand-written file).
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
* hand-written Obsidian file; PUSH adopts those separately).
*
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
* -> skipped, NOT thrown; the next pull converges;
* - unparseable meta (`parseDocmostMarkdown` throws) -> skipped;
* - parseable but no `pageId` in meta -> skipped.
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
*/
export async function readExisting(
deps: ReadExistingDeps,
@@ -111,15 +111,8 @@ export async function readExisting(
// converges.
continue;
}
let pageId: string | undefined;
try {
const { meta } = parseDocmostMarkdown(text);
pageId = meta?.pageId;
} catch {
// Unparseable meta — not engine-tracked; leave it alone.
pageId = undefined;
}
if (pageId) existing.push({ pageId, relPath: rel });
const { id } = parsePageFile(text);
if (id) existing.push({ pageId: id, relPath: rel });
}
return existing;
}
@@ -305,15 +298,13 @@ export async function applyPullActions(
}): Promise<void> => {
try {
const page = await client.getPageJson(w.pageId);
const meta: PageMeta = {
version: 1,
pageId: page.id,
slugId: page.slugId,
title: page.title,
spaceId: page.spaceId,
parentPageId: page.parentPageId ?? null,
};
const text = await stabilizePageFile(page.content, meta);
// Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
// markdown body. title/parent/space are DERIVED (filename / folder / repo),
// so nothing but the pageId is persisted as meta.
const text = serializePageFile(
page.id,
await stabilizePageBody(page.content),
);
const abs = relToAbs(vaultRoot, w.relPath);
await deps.mkdir(dirname(abs));
await deps.writeFile(abs, text);

View File

@@ -49,10 +49,30 @@ export async function stabilizePageFile(
content: unknown,
meta: PageMeta,
): Promise<string> {
const md1 = convertProseMirrorToMarkdown(content);
const doc2 = await markdownToProseMirror(md1);
const stableBody = convertProseMirrorToMarkdown(doc2);
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
// DocmostMdMeta (a superset with optional fields) for the serializer.
return serializeDocmostMarkdownBody(meta as DocmostMdMeta, stableBody);
return serializeDocmostMarkdownBody(
meta as DocmostMdMeta,
await stabilizePageBody(content),
);
}
/**
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
* envelope:
*
* md1 = convertProseMirrorToMarkdown(content) // export...
* doc2 = markdownToProseMirror(md1) // ...import...
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
*
* The single export->import->export pass is the verified fixpoint (SPEC §11):
* idempotent for already-stable content, and the convergence point for the known
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
*/
export async function stabilizePageBody(content: unknown): Promise<string> {
const md1 = convertProseMirrorToMarkdown(content);
const doc2 = await markdownToProseMirror(md1);
return convertProseMirrorToMarkdown(doc2);
}

View File

@@ -164,6 +164,14 @@ describe('applyPullActions — happy path (write + commit + merge)', () => {
const writtenPaths = fs.writes.map((w) => w.abs).sort();
expect(writtenPaths).toEqual(['/vault/A.md', '/vault/Sub/B.md']);
// Every written file is in the native-Obsidian format: a `gitmost_id`
// frontmatter at the very top and NO legacy `docmost:meta` envelope. Guards
// against a regression back to the heavy meta block.
for (const w of fs.writes) {
expect(w.text.startsWith('---\ngitmost_id: ')).toBe(true);
expect(w.text).not.toContain('docmost:meta');
}
// The git op order is: stageAll -> commit -> checkout main -> merge.
expect(g.order).toEqual([
'stageAll',

View File

@@ -1,14 +1,16 @@
import { describe, expect, it } from 'vitest';
import { readExisting } from '../src/engine/pull';
import { serializePageFile } from '../src/lib/page-file';
// R-Pull-1 (test-strategy report §5): `readExisting` now takes injectable IO
// (`listTracked` / `readFile`), so its parsing + skip rules are unit-testable
// without a real git repo or filesystem. These tests pass fakes only — no git,
// no fs, no network.
// no fs, no network. Identity is recovered from the native `gitmost_id`
// frontmatter (no more `docmost:meta`).
/** Build a valid self-contained file with a `docmost:meta` block. */
function withMeta(meta: Record<string, unknown>, body = '# Title\nbody\n'): string {
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${body}`;
/** Build a valid native page file with a `gitmost_id` frontmatter. */
function withId(id: string, body = '# Title\nbody\n'): string {
return serializePageFile(id, body);
}
/** A fake `readFile` backed by an in-memory map (rejects on a missing key). */
@@ -24,8 +26,8 @@ function fakeReadFile(files: Record<string, string>) {
describe('readExisting (R-Pull-1, injected IO)', () => {
it('recovers { pageId, relPath } for valid tracked files', async () => {
const files = {
'Space/A.md': withMeta({ version: 1, pageId: 'p1', title: 'A' }),
'Space/Sub/B.md': withMeta({ version: 1, pageId: 'p2', title: 'B' }),
'Space/A.md': withId('p1'),
'Space/Sub/B.md': withId('p2'),
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
@@ -37,23 +39,24 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
]);
});
it('SKIPS a file with no docmost:meta block (plain hand-written markdown)', async () => {
it('SKIPS a file with no frontmatter (plain hand-written markdown)', async () => {
const files = {
'tracked.md': withMeta({ version: 1, pageId: 'p1' }),
'stray.md': '# Just a hand-written note\n\nNo meta here.\n',
'tracked.md': withId('p1'),
'stray.md': '# Just a hand-written note\n\nNo frontmatter here.\n',
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
readFile: fakeReadFile(files),
});
// Only the engine-tracked file (with a pageId) survives.
// Only the engine-tracked file (with a gitmost_id) survives.
expect(result).toEqual([{ pageId: 'p1', relPath: 'tracked.md' }]);
});
it('SKIPS a file whose meta has no pageId', async () => {
it('SKIPS a file whose frontmatter has no gitmost_id key', async () => {
const files = {
'has-id.md': withMeta({ version: 1, pageId: 'keep' }),
'no-id.md': withMeta({ version: 1, title: 'untitled', slugId: 's' }),
'has-id.md': withId('keep'),
// A user's own frontmatter, but no gitmost_id -> not engine-tracked.
'no-id.md': '---\ntags: [note]\ntitle: untitled\n---\n\nbody\n',
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
@@ -62,12 +65,10 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
expect(result).toEqual([{ pageId: 'keep', relPath: 'has-id.md' }]);
});
it('SKIPS a file with an unparseable (invalid-JSON) meta block, does not throw', async () => {
// Invalid JSON inside the meta block makes parseDocmostMarkdown throw; the
// skip-rule must swallow it and treat the file as not-engine-tracked.
it('SKIPS a file with an EMPTY gitmost_id value, does not throw', async () => {
const files = {
'good.md': withMeta({ version: 1, pageId: 'good' }),
'broken.md': '<!-- docmost:meta\n{ this is : not, json }\n-->\n\nbody\n',
'good.md': withId('good'),
'blank.md': '---\ngitmost_id:\n---\n\nbody\n',
};
const result = await readExisting({
listTracked: async () => Object.keys(files),
@@ -78,7 +79,7 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
it('does NOT throw when readFile REJECTS (tracked but missing) — treats it as skipped', async () => {
const files = {
'present.md': withMeta({ version: 1, pageId: 'present' }),
'present.md': withId('present'),
// "ghost.md" is listed as tracked but absent from the file map -> reject.
};
const result = await readExisting({
@@ -101,11 +102,11 @@ describe('readExisting (R-Pull-1, injected IO)', () => {
it('combines all skip rules in one listing (only the valid files survive)', async () => {
const files = {
'ok1.md': withMeta({ version: 1, pageId: 'a' }),
'ok1.md': withId('a'),
'no-meta.md': 'plain\n',
'no-id.md': withMeta({ version: 1, title: 'x' }),
'broken.md': '<!-- docmost:meta\n{bad\n-->\nbody\n',
'ok2.md': withMeta({ version: 1, pageId: 'b' }),
'no-id.md': '---\ntags: [x]\n---\n\nbody\n',
'blank.md': '---\ngitmost_id:\n---\n\nbody\n',
'ok2.md': withId('b'),
// missing.md rejects on read.
};
const result = await readExisting({