Coder↔reviewer design loop (9 rounds, reviewer verdict: exhaustive) produced 92 specs; implemented +123 tests (465 -> 588 passing). The new round-trip coverage exposed three genuine data-loss bugs in the Markdown<->ProseMirror converter, all now FIXED (round-trip is lossless for these): 1. pageBreak was lost on export (no converter case -> rendered to "" and the node vanished). Now emits <div data-type="pageBreak"></div>, which the schema parses back -> round-trips. 2. A block image between blocks left an empty <p> artifact after import-hoisting, producing a phantom blank-gap diff on every sync. markdownToProseMirror now strips content-less paragraphs after generateJSON — with a schema-validity guard that keeps the obligatory single empty paragraph in `content: "block+"` containers (tableCell/tableHeader/blockquote/column/callout/doc), so empty cells/quotes never become an invalid `content: []`. 3. The `code` mark combined with another mark was not byte-stable (emitted nested HTML that the schema's `code` `excludes:"_"` collapsed on import). The converter now emits code-only when `code` co-occurs, matching the editor. New coverage spans media/diagram/details/columns/math/mention attribute round-trips, converter emission branches, git error paths, and engine decision branches. A dedicated test pins the empty-container schema validity (the review catch on the bug-2 fix). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
326 lines
13 KiB
TypeScript
326 lines
13 KiB
TypeScript
import { execFile } from 'node:child_process';
|
|
import { copyFile, mkdtemp, rm, writeFile } from 'node:fs/promises';
|
|
import { tmpdir } from 'node:os';
|
|
import { join } from 'node:path';
|
|
import { promisify } from 'node:util';
|
|
import { afterEach, beforeAll, describe, expect, it } from 'vitest';
|
|
import {
|
|
VaultGit,
|
|
BOT_AUTHOR_NAME,
|
|
BOT_AUTHOR_EMAIL,
|
|
} from '../src/engine/git';
|
|
|
|
// Integration coverage gaps for `git.ts` flagged by the PR #119 reviewers
|
|
// (test-strategy report, Module 2). These create REAL temp git repos (mirroring
|
|
// test/git.test.ts's setup/teardown) to exercise the actual `git` binary, since
|
|
// the behaviors under test (the `-z` NUL-token alignment, copy detection, and
|
|
// per-invocation committer identity) only manifest against real git.
|
|
|
|
const execFileAsync = promisify(execFile);
|
|
|
|
/** True if a usable `git` binary is on PATH (skip gracefully otherwise). */
|
|
async function gitAvailable(): Promise<boolean> {
|
|
try {
|
|
await execFileAsync('git', ['--version']);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** Read the author "Name <email>" of HEAD in a repo dir. */
|
|
async function headAuthor(dir: string): Promise<string> {
|
|
const { stdout } = await execFileAsync(
|
|
'git',
|
|
['--no-pager', 'log', '-1', '--pretty=%an <%ae>'],
|
|
{ cwd: dir },
|
|
);
|
|
return stdout.trim();
|
|
}
|
|
|
|
/** Read the committer "Name <email>" of HEAD in a repo dir. */
|
|
async function headCommitter(dir: string): Promise<string> {
|
|
const { stdout } = await execFileAsync(
|
|
'git',
|
|
['--no-pager', 'log', '-1', '--pretty=%cn <%ce>'],
|
|
{ cwd: dir },
|
|
);
|
|
return stdout.trim();
|
|
}
|
|
|
|
/** Read a LOCAL git config value (or '' if unset) in a repo dir. */
|
|
async function localConfig(dir: string, key: string): Promise<string> {
|
|
const r = await execFileAsync('git', ['config', '--local', '--get', key], {
|
|
cwd: dir,
|
|
}).catch(() => ({ stdout: '' }) as { stdout: string });
|
|
return r.stdout.trim();
|
|
}
|
|
|
|
describe('VaultGit integration gaps (temp repo)', () => {
|
|
let available = false;
|
|
let dir: string;
|
|
|
|
beforeAll(async () => {
|
|
available = await gitAvailable();
|
|
});
|
|
|
|
afterEach(async () => {
|
|
if (dir) {
|
|
await rm(dir, { recursive: true, force: true });
|
|
}
|
|
});
|
|
|
|
async function freshDir(): Promise<string> {
|
|
dir = await mkdtemp(join(tmpdir(), 'docmost-vault-gap-'));
|
|
return dir;
|
|
}
|
|
|
|
// --- 7. diffNameStatus: rename mixed with add + modify in ONE diff ----------
|
|
//
|
|
// The `-z` parser walks NUL-delimited tokens pulling 1 or 2 path tokens per
|
|
// status (R/C take TWO: old + new; A/M/D take ONE). A misalignment — pulling
|
|
// the wrong number of tokens for any row — would SHIFT every subsequent path
|
|
// and misclassify a move as a delete (or vice versa). This test mixes an R
|
|
// (rename) with an A (add) and an M (modify) in a SINGLE diff so the walk MUST
|
|
// stay aligned across the 2-token R row and the 1-token A/M rows.
|
|
it('diffNameStatus keeps -z token alignment with R + A + M in one diff', async (ctx) => {
|
|
// Truly SKIP (not silently pass) when git is unavailable — a green result on
|
|
// a git-less machine would falsely claim this integration ran.
|
|
if (!available) ctx.skip();
|
|
const vault = await freshDir();
|
|
const git = new VaultGit(vault);
|
|
await git.ensureRepo();
|
|
|
|
// Base commit: `keep.md` (to be modified) and `old-name.md` (to be renamed).
|
|
const renameBody = 'line a\nline b\nline c\nline d\n';
|
|
await writeFile(join(vault, 'keep.md'), 'v1\n', 'utf8');
|
|
await writeFile(join(vault, 'old-name.md'), renameBody, 'utf8');
|
|
await git.stageAll();
|
|
await git.commit('base', {
|
|
authorName: BOT_AUTHOR_NAME,
|
|
authorEmail: BOT_AUTHOR_EMAIL,
|
|
});
|
|
const base = await git.revParse('HEAD');
|
|
expect(base).toBeTruthy();
|
|
|
|
// Second commit: MODIFY keep.md, ADD fresh.md, RENAME old-name.md ->
|
|
// new-name.md (identical content so -M detects a rename, not delete+add).
|
|
await writeFile(join(vault, 'keep.md'), 'v2\n', 'utf8');
|
|
await writeFile(join(vault, 'fresh.md'), 'brand new\n', 'utf8');
|
|
await rm(join(vault, 'old-name.md'));
|
|
await writeFile(join(vault, 'new-name.md'), renameBody, 'utf8');
|
|
await git.stageAll();
|
|
await git.commit('mixed change', {
|
|
authorName: BOT_AUTHOR_NAME,
|
|
authorEmail: BOT_AUTHOR_EMAIL,
|
|
});
|
|
|
|
const entries = await git.diffNameStatus(base!, 'HEAD');
|
|
const byPath = new Map(entries.map((e) => [e.path, e]));
|
|
|
|
// The modify and the add are each classified correctly (1 path token each).
|
|
expect(byPath.get('keep.md')).toEqual({ status: 'M', path: 'keep.md' });
|
|
expect(byPath.get('fresh.md')).toEqual({ status: 'A', path: 'fresh.md' });
|
|
|
|
// The rename is a SINGLE R row carrying BOTH old + new paths (2 path tokens)
|
|
// — proof the walk consumed exactly two tokens here and stayed aligned. If
|
|
// alignment were off, the rename would surface as a D (delete) of
|
|
// old-name.md and/or an A of new-name.md instead.
|
|
const r = byPath.get('new-name.md');
|
|
expect(r?.status).toBe('R');
|
|
expect(r?.oldPath).toBe('old-name.md');
|
|
expect(r?.score).toBe(100);
|
|
|
|
// Exactly three rows, and crucially NO stray D/A for the renamed file (which
|
|
// is the tell-tale of a -z misalignment).
|
|
expect(entries.length).toBe(3);
|
|
expect(entries.some((e) => e.status === 'D')).toBe(false);
|
|
expect(byPath.has('old-name.md')).toBe(false);
|
|
});
|
|
|
|
// --- 8. diffNameStatus: copy (C) status lines -------------------------------
|
|
//
|
|
// DOCUMENTED OUTCOME (reported as such): `C` (copy) rows are NOT reachable
|
|
// through the engine's actual git invocation. `diffNameStatus` invokes
|
|
// `git diff --name-status -M -z` — `-M` enables rename detection ONLY; copy
|
|
// detection requires `-C`/`--find-copies`, which the engine does NOT pass. So a
|
|
// file that is a verbatim COPY of another (the original is KEPT) is reported as
|
|
// a plain ADD (`A`), never `C`. This test pins that real behavior so a future
|
|
// change that turns on `-C` (and would start emitting `C` rows) is caught.
|
|
it('diffNameStatus reports a pure copy as A, not C (engine uses -M only)', async (ctx) => {
|
|
if (!available) ctx.skip();
|
|
const vault = await freshDir();
|
|
const git = new VaultGit(vault);
|
|
await git.ensureRepo();
|
|
|
|
// Base: a single source file with enough content to be copy-detectable.
|
|
const body = 'aaa\nbbb\nccc\nddd\neee\nfff\n';
|
|
await writeFile(join(vault, 'src.md'), body, 'utf8');
|
|
await git.stageAll();
|
|
await git.commit('add src', {
|
|
authorName: BOT_AUTHOR_NAME,
|
|
authorEmail: BOT_AUTHOR_EMAIL,
|
|
});
|
|
const base = await git.revParse('HEAD');
|
|
|
|
// KEEP src.md and add an identical copy dup.md (a pure copy, not a rename).
|
|
await copyFile(join(vault, 'src.md'), join(vault, 'dup.md'));
|
|
await git.stageAll();
|
|
await git.commit('add copy of src', {
|
|
authorName: BOT_AUTHOR_NAME,
|
|
authorEmail: BOT_AUTHOR_EMAIL,
|
|
});
|
|
|
|
const entries = await git.diffNameStatus(base!, 'HEAD');
|
|
|
|
// With -M only (no -C), git does NOT emit a C row: the copy is a plain add.
|
|
expect(entries).toEqual([{ status: 'A', path: 'dup.md' }]);
|
|
expect(entries.some((e) => e.status === 'C')).toBe(false);
|
|
});
|
|
|
|
// --- 9. commit: per-invocation committer/author does NOT leak into config ----
|
|
//
|
|
// The engine sets author + committer identity via GIT_AUTHOR_*/GIT_COMMITTER_*
|
|
// env vars per `git commit` invocation (commitRaw). This underpins the §10
|
|
// provenance/loop-guard: the identity must travel WITH the commit, not be
|
|
// written into the repo config (which would make it global to every later
|
|
// hand-run commit). We commit with the distinct "Local" identity (different
|
|
// from the repo's default `user.name`/`user.email`, which ensureRepo seeds as
|
|
// the bot identity) and assert the commit carries the passed identity while the
|
|
// repo config is UNCHANGED (still the bot default).
|
|
it('commit passes committer/author per-invocation without mutating repo config', async (ctx) => {
|
|
if (!available) ctx.skip();
|
|
const vault = await freshDir();
|
|
const git = new VaultGit(vault);
|
|
await git.ensureRepo();
|
|
|
|
// ensureRepo seeds the repo's LOCAL user.* with the bot identity. Capture it
|
|
// so we can prove the per-commit identity does NOT overwrite it.
|
|
expect(await localConfig(vault, 'user.name')).toBe(BOT_AUTHOR_NAME);
|
|
expect(await localConfig(vault, 'user.email')).toBe(BOT_AUTHOR_EMAIL);
|
|
|
|
// Commit with a DIFFERENT identity, passed per-invocation only.
|
|
const LOCAL_NAME = 'Local';
|
|
const LOCAL_EMAIL = 'local@local';
|
|
await writeFile(join(vault, 'page.md'), 'hello\n', 'utf8');
|
|
await git.stageAll();
|
|
const made = await git.commit('docmost: sync 1 page(s)', {
|
|
authorName: LOCAL_NAME,
|
|
authorEmail: LOCAL_EMAIL,
|
|
});
|
|
expect(made).toBe(true);
|
|
|
|
// The commit's author AND committer are the passed per-invocation identity
|
|
// (committer matches author via GIT_COMMITTER_* — not the repo default).
|
|
expect(await headAuthor(vault)).toBe(`${LOCAL_NAME} <${LOCAL_EMAIL}>`);
|
|
expect(await headCommitter(vault)).toBe(`${LOCAL_NAME} <${LOCAL_EMAIL}>`);
|
|
|
|
// CRITICAL: the per-commit identity did NOT leak into the repo config — the
|
|
// LOCAL user.* is still the bot default ensureRepo seeded.
|
|
expect(await localConfig(vault, 'user.name')).toBe(BOT_AUTHOR_NAME);
|
|
expect(await localConfig(vault, 'user.email')).toBe(BOT_AUTHOR_EMAIL);
|
|
|
|
// And the identity never reached the GLOBAL config either (the env-var path
|
|
// writes no config at all). `--global --get` exits non-zero / empty when the
|
|
// value differs or is unset; assert it is NOT the per-commit identity.
|
|
const globalName = await execFileAsync('git', [
|
|
'config',
|
|
'--global',
|
|
'--get',
|
|
'user.name',
|
|
])
|
|
.then((r) => r.stdout.trim())
|
|
.catch(() => '');
|
|
expect(globalName).not.toBe(LOCAL_NAME);
|
|
});
|
|
});
|
|
|
|
// Parser/error-fallback gaps for `git.ts` exercised WITHOUT a real git binary by
|
|
// monkey-patching the private `runRaw` primitive (every git invocation funnels
|
|
// through it, per the module header). These pin defensive arms the accepted
|
|
// integration specs above could not reach: the unknown-status consume in the
|
|
// `-z` walk, and the `|| r.stdout` empty-stderr error-detail fallbacks.
|
|
describe('VaultGit parser/error-fallback gaps (runRaw stubbed)', () => {
|
|
// --- 1. diffNameStatus: unknown status (T) sandwiched between A and M --------
|
|
//
|
|
// Protects the default arm of the status switch (git.ts ~lines 497-502): an
|
|
// unknown status like `T` (type-change) consumes ONE path token defensively
|
|
// but emits nothing. If the walk pulled the wrong count here it would desync
|
|
// and misclassify the trailing M row.
|
|
it('diffNameStatus swallows an unknown T status mid-stream and stays aligned', async () => {
|
|
const git = new VaultGit('/tmp/any');
|
|
(git as any).runRaw = async () => ({
|
|
code: 0,
|
|
// A\0a.md T\0t.md M\0m.md — T is the unknown status mid-stream.
|
|
stdout: 'A\0a.md\0T\0t.md\0M\0m.md\0',
|
|
stderr: '',
|
|
});
|
|
|
|
const entries = await git.diffNameStatus('X', 'Y');
|
|
|
|
// The T row's path token 't.md' is consumed but NOT emitted; the walk stays
|
|
// aligned so the trailing M/m.md parses cleanly (no off-by-one).
|
|
expect(entries).toEqual([
|
|
{ status: 'A', path: 'a.md' },
|
|
{ status: 'M', path: 'm.md' },
|
|
]);
|
|
expect(entries.length).toBe(2);
|
|
expect(entries.some((e) => e.status === ('T' as any))).toBe(false);
|
|
expect(entries.some((e) => e.path === 't.md')).toBe(false);
|
|
});
|
|
|
|
// --- 2. diffNameStatus: unknown status (T) FIRST in the stream --------------
|
|
//
|
|
// Leading-position variant: a `T` at the head must consume its own path token
|
|
// without swallowing the following real A entry.
|
|
it('diffNameStatus swallows a leading unknown T status and parses the next A', async () => {
|
|
const git = new VaultGit('/tmp/any');
|
|
(git as any).runRaw = async () => ({
|
|
code: 0,
|
|
stdout: 'T\0t.md\0A\0a.md\0',
|
|
stderr: '',
|
|
});
|
|
|
|
const entries = await git.diffNameStatus('X', 'Y');
|
|
|
|
expect(entries.length).toBe(1);
|
|
expect(entries[0]).toEqual({ status: 'A', path: 'a.md' });
|
|
});
|
|
|
|
// --- 3. listTrackedFiles: non-zero exit, EMPTY stderr, stdout carries detail -
|
|
//
|
|
// The thrown message is built from `(r.stderr || r.stdout || '')`. This pins
|
|
// the `|| r.stdout` arm (empty stderr, non-empty stdout) — distinct from the
|
|
// non-empty-stderr and spawn-ENOENT paths the accepted specs cover.
|
|
it('listTrackedFiles uses stdout in the error message when stderr is empty', async () => {
|
|
const git = new VaultGit('/tmp/any');
|
|
(git as any).runRaw = async () => ({
|
|
code: 1,
|
|
stderr: '',
|
|
stdout: 'some detail',
|
|
});
|
|
|
|
await expect(git.listTrackedFiles()).rejects.toThrow(
|
|
'git ls-files failed: some detail',
|
|
);
|
|
});
|
|
|
|
// --- 4. diffNameStatus: non-zero exit, EMPTY stderr, stdout carries detail ---
|
|
//
|
|
// diffNameStatus has its OWN independent `(r.stderr || r.stdout || '').trim()`
|
|
// fallback (git.ts ~line 469), separate from listTrackedFiles. Pin the
|
|
// empty-stderr/non-empty-stdout arm of THIS branch.
|
|
it('diffNameStatus uses stdout in the error message when stderr is empty', async () => {
|
|
const git = new VaultGit('/tmp/any');
|
|
(git as any).runRaw = async () => ({
|
|
code: 1,
|
|
stderr: '',
|
|
stdout: 'diff detail',
|
|
});
|
|
|
|
await expect(git.diffNameStatus('X', 'Y')).rejects.toThrow(
|
|
'git diff --name-status failed: diff detail',
|
|
);
|
|
});
|
|
});
|