fix(git-sync,converter): 3 HIGH round-trip/data-loss fixes found by QA (#359)
Implements the 3 HIGH fixes agent_qa found via the RALPH test cycle (analysis theirs, implementation here), fixtures-first. - T2E-1 [converter]: a multi-paragraph callout collapsed to one paragraph on round-trip. markdown-converter `case "callout"` now joins block children with "\n>\n" (a blank ">" separator), byte-identical to the blockquote serializer, so two paragraphs survive re-import. Fixture 12-callout-multiblock; two existing tests updated (their old expectations encoded the collapse bug). - T6-listnest [converter]: a callout/blockquote nested in a list item corrupted on round-trip (callout type lost, `[!type]` leaked into text — confirmed DB corruption). Add bridgeNestedCallouts: a post-marked, nesting-agnostic JSDOM pass that reconstructs callouts from `[!type]`-opening blockquotes at ANY depth (the indented ` > [!type]` form the column-0-anchored preprocessor misses), emitting the same callout div as the top-level path (disjoint inputs, no double-processing). Strict `^[!type]` guard, so a real blockquote — or one with `[!` mid-line — is NOT converted. Fixture 13-callout-in-list + a no-lead control + false-positive controls (proven non-vacuous). - D-P3-1 [git-sync]: a "ghost" file (a tracked id that was never a page) was silently absence-deleted on pull-reconcile -> data loss. Add GitSyncClient.pageIdsExist (datasource: workspace-scoped SELECT over any space incl. trashed/moved, non-UUID ids filtered first); cycle.ts computes the candidate-delete set and passes existing ids as `deletableIds` into planReconciliation, which now absence-deletes ONLY real page rows — a ghost is preserved. A cycle-level e2e test proves the ghost survives runCycle end-to-end. Suites green: prosemirror-markdown 687, git-sync 272 (no type errors), datasource 36, orchestrator 26; both packages tsc clean. Scope: the 3 fixes + tests/fixtures only, no develop re-merge. T4E-esc (LOW escaping) left for the maintainer, per agent_qa. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -114,6 +114,10 @@ function build(opts: BuildOptions = {}): Built {
|
||||
// The read-side / write-side client the datasource hands back.
|
||||
const client: Record<string, AnyMock> = {
|
||||
listSpaceTree: jest.fn(async () => ({ pages: [], complete: true })),
|
||||
// Historical behavior: every candidate id is a real page row. Present so the
|
||||
// fake does not rely on `listTrackedFiles -> []` to avoid the ghost-guard
|
||||
// probe, and won't break if a future test seeds tracked files.
|
||||
pageIdsExist: jest.fn(async (ids: string[]) => ids),
|
||||
deletePage: jest.fn(async () => undefined),
|
||||
createPage: jest.fn(async () => undefined),
|
||||
updatePageBody: jest.fn(async () => undefined),
|
||||
|
||||
@@ -200,6 +200,51 @@ describe('GitmostDataSourceService', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// D-P3-1 ghost guard: the pull reconciler asks which candidate-delete ids are
|
||||
// REAL page rows before deleting a tracked vault file. Only rows returned here
|
||||
// may be absence-deleted; a ghost id (never a page) is preserved.
|
||||
describe('pageIdsExist (D-P3-1 ghost guard)', () => {
|
||||
const UUID_A = '11111111-1111-4111-8111-111111111111';
|
||||
const UUID_B = '22222222-2222-4222-8222-222222222222';
|
||||
|
||||
it('returns the subset that are real page rows, workspace-scoped (any space/state)', async () => {
|
||||
// The DB reports only UUID_A exists as a row.
|
||||
const { service, mocks } = build([{ id: UUID_A }]);
|
||||
const res = await service.bind(CTX).pageIdsExist([UUID_A, UUID_B]);
|
||||
expect(res).toEqual([UUID_A]);
|
||||
// The query is workspace-scoped and restricted to the candidate ids.
|
||||
// Crucially there is NO deletedAt / spaceId filter: a trashed page or a
|
||||
// page moved to another space still counts as "exists" (its file is
|
||||
// cleaned up), while a ghost has no row (its file is preserved).
|
||||
expect(mocks.db.selectFrom).toHaveBeenCalledWith('pages');
|
||||
const qb = mocks.db.selectFrom.mock.results[0].value;
|
||||
expect(qb.where).toHaveBeenCalledWith('workspaceId', '=', 'ws-1');
|
||||
expect(qb.where).toHaveBeenCalledWith('id', 'in', [UUID_A, UUID_B]);
|
||||
const filterCols = qb.where.mock.calls.map((c: any[]) => c[0]);
|
||||
expect(filterCols).not.toContain('deletedAt');
|
||||
expect(filterCols).not.toContain('spaceId');
|
||||
});
|
||||
|
||||
it('filters malformed non-UUID ids BEFORE the predicate (and skips the query when none remain)', async () => {
|
||||
// A mix of one valid uuid + malformed tokens: only the valid uuid reaches
|
||||
// the predicate, so a bad `gitmost_id` cannot trigger a 22P02 that would
|
||||
// reject the whole existence probe.
|
||||
const { service, mocks } = build([]);
|
||||
const res = await service
|
||||
.bind(CTX)
|
||||
.pageIdsExist([UUID_A, 'not-a-uuid', '[unclosed']);
|
||||
const qb = mocks.db.selectFrom.mock.results[0].value;
|
||||
expect(qb.where).toHaveBeenCalledWith('id', 'in', [UUID_A]);
|
||||
expect(res).toEqual([]);
|
||||
|
||||
// When EVERY candidate is malformed, the query is skipped entirely.
|
||||
const { service: s2, mocks: m2 } = build([]);
|
||||
const res2 = await s2.bind(CTX).pageIdsExist(['not-a-uuid', '[unclosed']);
|
||||
expect(res2).toEqual([]);
|
||||
expect(m2.db.selectFrom).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getPageJson', () => {
|
||||
it('returns the engine page shape with ISO updatedAt + content', async () => {
|
||||
const { service, mocks } = build();
|
||||
|
||||
@@ -78,6 +78,7 @@ export class GitmostDataSourceService {
|
||||
return {
|
||||
listSpaceTree: (spaceId, rootPageId) =>
|
||||
this.listSpaceTree(ctx, spaceId, rootPageId),
|
||||
pageIdsExist: (pageIds) => this.pageIdsExist(ctx, pageIds),
|
||||
getPageJson: (pageId) => this.getPageJson(ctx, pageId),
|
||||
// The id-scoped WRITE ops are wrapped so a malformed (non-UUID) `pageId`
|
||||
// from a broken vault `gitmost_id` frontmatter cannot wedge the space's sync
|
||||
@@ -191,6 +192,36 @@ export class GitmostDataSourceService {
|
||||
return { pages, complete: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Existence probe for the pull-side ghost guard (D-P3-1). Return the subset of
|
||||
* `pageIds` that correspond to a REAL page ROW in this workspace — INCLUDING
|
||||
* soft-deleted (trashed) rows and pages in ANY OTHER space (no `deletedAt` /
|
||||
* `spaceId` filter), i.e. any id that has ever been a page. The engine only
|
||||
* absence-deletes a tracked vault file whose id is returned here; a GHOST id
|
||||
* (a git file whose id was never a page) has no row and is preserved.
|
||||
*
|
||||
* Malformed (non-UUID) ids are filtered out BEFORE the `id = ANY(...)`
|
||||
* predicate: an invalid uuid literal would make Postgres reject the whole
|
||||
* query (error 22P02), and a ghost id is often exactly such a hand-edited
|
||||
* token. A filtered-out id is never returned, so its file is preserved.
|
||||
*/
|
||||
private async pageIdsExist(
|
||||
ctx: GitSyncBindContext,
|
||||
pageIds: string[],
|
||||
): Promise<string[]> {
|
||||
const validIds = pageIds.filter((id) => isValidUUID(id));
|
||||
if (validIds.length === 0) return [];
|
||||
|
||||
const rows = await this.db
|
||||
.selectFrom('pages')
|
||||
.select(['id'])
|
||||
.where('workspaceId', '=', ctx.workspaceId)
|
||||
.where('id', 'in', validIds)
|
||||
.execute();
|
||||
|
||||
return rows.map((row) => row.id);
|
||||
}
|
||||
|
||||
/**
|
||||
* One page WITH its ProseMirror body content (editor-ext schema). `updatedAt`
|
||||
* is serialized to an ISO string for the loop-guard.
|
||||
|
||||
@@ -51,6 +51,23 @@ export interface GitSyncClient {
|
||||
rootPageId?: string,
|
||||
): Promise<{ pages: GitSyncPageNodeLite[]; complete: boolean }>;
|
||||
|
||||
/**
|
||||
* Existence probe for the pull-side ghost guard (D-P3-1). Given a set of
|
||||
* candidate pageIds, return the subset that corresponds to a REAL page ROW —
|
||||
* INCLUDING soft-deleted (trashed) and pages in ANY OTHER space, i.e. any
|
||||
* `id` that has ever been a page (`SELECT id FROM pages WHERE id = ANY(...)`,
|
||||
* workspace-scoped). Malformed (non-UUID) ids are filtered out before the
|
||||
* query and are never returned.
|
||||
*
|
||||
* The pull reconciler only absence-deletes a tracked file whose pageId is
|
||||
* returned here: a deleted/moved/trashed page HAS a row -> its vault file is
|
||||
* cleaned up; a GHOST id (a git file whose id was NEVER a page) has NO row ->
|
||||
* it is preserved rather than silently deleted (the data-loss bug). Called on
|
||||
* only the small candidate-delete set (tracked ids absent from the live tree),
|
||||
* so the query is bounded and usually empty.
|
||||
*/
|
||||
pageIdsExist(pageIds: string[]): Promise<string[]>;
|
||||
|
||||
/**
|
||||
* One page WITH its ProseMirror body content. `applyPullActions` reads
|
||||
* `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
|
||||
|
||||
@@ -170,10 +170,38 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
});
|
||||
|
||||
const tree = await client.listSpaceTree(spaceId);
|
||||
|
||||
// D-P3-1 ghost guard: an absence-delete must not silently remove a git file
|
||||
// whose pageId was NEVER a page (a hand-authored file with an unknown id).
|
||||
// Compute the candidate-delete set (tracked ids absent from the live tree)
|
||||
// and ask the datasource which of them are REAL page rows (incl. trashed /
|
||||
// other spaces). Only those may be absence-deleted; a ghost id is preserved
|
||||
// (adopted/skipped by the push side).
|
||||
//
|
||||
// Size note: on a COMPLETE fetch this set is usually small/empty (only
|
||||
// genuinely removed pages look absent), so the `id IN (...)` probe is cheap.
|
||||
// On an INCOMPLETE fetch (`tree.complete === false`) MANY live pages look
|
||||
// absent, so the set — and the probe — can be large. That is only a perf
|
||||
// consideration, not a correctness one: `decideAbsenceDeletions` (inside
|
||||
// `computePullActions`) still SUPPRESSES every absence delete on an
|
||||
// incomplete fetch, so no ghost-guarded deletion is applied that cycle
|
||||
// regardless of what the probe returns.
|
||||
const livePageIds = new Set(
|
||||
tree.pages.filter((p) => p && p.id).map((p) => p.id),
|
||||
);
|
||||
const candidateDeleteIds = existing
|
||||
.map((e) => e.pageId)
|
||||
.filter((id) => !livePageIds.has(id));
|
||||
const deletableIds =
|
||||
candidateDeleteIds.length > 0
|
||||
? await client.pageIdsExist(candidateDeleteIds)
|
||||
: [];
|
||||
|
||||
const pullActions = computePullActions({
|
||||
pages: tree.pages,
|
||||
treeComplete: tree.complete,
|
||||
existing,
|
||||
deletableIds,
|
||||
});
|
||||
|
||||
// Bail before the first destructive write phase if the lock was lost.
|
||||
|
||||
@@ -148,6 +148,14 @@ export interface PullActionsInput {
|
||||
treeComplete: boolean;
|
||||
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
|
||||
existing: { pageId: string; relPath: string }[];
|
||||
/**
|
||||
* The subset of tracked pageIds that correspond to a REAL page row (D-P3-1
|
||||
* ghost guard, from `client.pageIdsExist`). Only ids in this set may be
|
||||
* absence-deleted; a ghost id (never a page) is preserved. When omitted, all
|
||||
* absent ids are deletable (the historical behavior; pure unit callers that
|
||||
* do not model ghosts).
|
||||
*/
|
||||
deletableIds?: string[];
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -191,7 +199,7 @@ export interface PullActions {
|
||||
* thin `applyPullActions`.
|
||||
*/
|
||||
export function computePullActions(input: PullActionsInput): PullActions {
|
||||
const { pages, treeComplete, existing } = input;
|
||||
const { pages, treeComplete, existing, deletableIds } = input;
|
||||
const layout = buildVaultLayout(pages);
|
||||
|
||||
const live: LiveEntry[] = [];
|
||||
@@ -206,8 +214,14 @@ export function computePullActions(input: PullActionsInput): PullActions {
|
||||
}
|
||||
|
||||
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
|
||||
// `plan.moved` carries move old-path removals separately.
|
||||
const plan = planReconciliation(live, existing);
|
||||
// `plan.moved` carries move old-path removals separately. The ghost guard
|
||||
// (D-P3-1) gates absence-deletes to ids that are a real page row; when
|
||||
// `deletableIds` is omitted, all absent ids are deletable (historical).
|
||||
const plan = planReconciliation(
|
||||
live,
|
||||
existing,
|
||||
deletableIds === undefined ? undefined : new Set(deletableIds),
|
||||
);
|
||||
|
||||
// Decide whether the ABSENCE-based deletions may be applied this cycle
|
||||
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
|
||||
|
||||
@@ -96,6 +96,16 @@ export interface ReconciliationPlan {
|
||||
export function planReconciliation(
|
||||
live: LiveEntry[],
|
||||
existing: ExistingEntry[],
|
||||
/**
|
||||
* The subset of tracked pageIds that correspond to a REAL page row (D-P3-1
|
||||
* ghost guard). When provided, a tracked file whose pageId is ABSENT from
|
||||
* `live` is absence-deleted ONLY if its id is in this set — a deleted/moved/
|
||||
* trashed page has a row (delete its stale vault file), while a GHOST id (a
|
||||
* git file whose id was never a page) has NO row and is PRESERVED. When
|
||||
* `undefined` (pure unit callers that do not model ghosts), every absent id is
|
||||
* treated as deletable — the historical behavior.
|
||||
*/
|
||||
deletableIds?: ReadonlySet<string>,
|
||||
): ReconciliationPlan {
|
||||
// Desired path for each live pageId.
|
||||
const liveByPageId = new Map<string, string>();
|
||||
@@ -119,9 +129,17 @@ export function planReconciliation(
|
||||
for (const ex of existing) {
|
||||
const liveRel = liveByPageId.get(ex.pageId);
|
||||
if (liveRel === undefined) {
|
||||
// Tracked page is gone from the live tree -> absence delete.
|
||||
// Tracked page is gone from the live tree -> candidate absence delete.
|
||||
// D-P3-1: a candidate is only deleted when its id corresponds to a real
|
||||
// page row (deleted/moved/trashed). A GHOST id (never a page) is NOT in
|
||||
// `deletableIds` and is preserved rather than silently deleted. When the
|
||||
// gate is not supplied (pure unit callers), fall back to the historical
|
||||
// "all absent ids deletable" behavior.
|
||||
const deletable = deletableIds === undefined || deletableIds.has(ex.pageId);
|
||||
// Never queue a path a live page will (re)write (path reuse -> no loss).
|
||||
if (!liveTargetPaths.has(ex.relPath)) toDeleteSet.add(ex.relPath);
|
||||
if (deletable && !liveTargetPaths.has(ex.relPath)) {
|
||||
toDeleteSet.add(ex.relPath);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (liveRel !== ex.relPath) {
|
||||
|
||||
@@ -77,6 +77,8 @@ const nodeFs: CycleFs = {
|
||||
function makeEmptyClientFake() {
|
||||
return {
|
||||
listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
|
||||
// Default: every candidate id is a real page row (historical behavior).
|
||||
pageIdsExist: vi.fn(async (ids: string[]) => ids),
|
||||
getPageJson: vi.fn(),
|
||||
importPageMarkdown: vi.fn(async () => ({ updatedAt: "2026-06-20T00:00:00.000Z" })),
|
||||
createPage: vi.fn(async (title: string) => ({
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import { describe, it, expect, vi } from "vitest";
|
||||
import { runCycle, type RunCycleDeps } from "../src/engine/cycle";
|
||||
import { serializePageFile } from "@docmost/prosemirror-markdown";
|
||||
|
||||
// A fake VaultGit recording the staging calls. An EMPTY vault/tree lets the real
|
||||
// readExisting/computePullActions/applyPullActions/runPush run trivially (no
|
||||
@@ -46,6 +47,8 @@ function baseDeps(vault: any, over: Partial<RunCycleDeps> = {}): RunCycleDeps {
|
||||
spaceId: "space-1",
|
||||
client: {
|
||||
listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
|
||||
// Default: every candidate id is a real page row (historical behavior).
|
||||
pageIdsExist: vi.fn(async (ids: string[]) => ids),
|
||||
getPageJson: vi.fn(),
|
||||
importPageMarkdown: vi.fn(),
|
||||
createPage: vi.fn(),
|
||||
@@ -235,4 +238,88 @@ describe("runCycle (composition)", () => {
|
||||
expect(deps.client.listSpaceTree).toHaveBeenCalledTimes(1);
|
||||
expect(vault.diffNameStatus).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// D-P3-1 ghost guard, end-to-end through runCycle: a tracked file whose id is
|
||||
// absent from live AND is NOT returned by `pageIdsExist` (a ghost — never a
|
||||
// page) must SURVIVE the whole cycle (no rm), while a sibling id that IS
|
||||
// returned (a genuinely deleted page row) is absence-deleted. This proves the
|
||||
// guard flows from pageIdsExist -> computePullActions -> applyPullActions,
|
||||
// not just at the pure `planReconciliation` unit.
|
||||
it("GHOST GUARD (e2e): a ghost tracked file SURVIVES runCycle; a real deleted-page file is removed", async () => {
|
||||
const ghostId = "019f2500-dead-7000-8000-000000000009"; // never a page
|
||||
const deletedId = "019f2500-0000-7000-8000-000000000001"; // a real (deleted) row
|
||||
const liveId = "019f2500-0000-7000-8000-0000000000aa"; // keeps empty-live from firing
|
||||
|
||||
// Two tracked files, both ABSENT from the live tree (deletion candidates).
|
||||
const vault = fakeVault({
|
||||
listTrackedFiles: vi.fn(async () => ["Ghost.md", "Deleted.md"]),
|
||||
});
|
||||
|
||||
// The datasource reports ONLY the deleted page as a real row; the ghost has
|
||||
// no row (a PROPER SUBSET of the probed ids, not the identity shim).
|
||||
const pageIdsExist = vi.fn(async (ids: string[]) =>
|
||||
ids.filter((id) => id === deletedId),
|
||||
);
|
||||
const rm = vi.fn(async () => undefined);
|
||||
|
||||
const deps = baseDeps(vault, {
|
||||
fs: {
|
||||
readFile: vi.fn(async (absPath: string) => {
|
||||
if (absPath.includes("Ghost.md"))
|
||||
return serializePageFile(ghostId, "a body authored in git");
|
||||
if (absPath.includes("Deleted.md"))
|
||||
return serializePageFile(deletedId, "a deleted page body");
|
||||
return "";
|
||||
}),
|
||||
writeFile: vi.fn(async () => undefined),
|
||||
mkdir: vi.fn(async () => undefined),
|
||||
rm,
|
||||
lstat: vi.fn(async () => ({ isSymbolicLink: false })),
|
||||
realpath: vi.fn(async (p: string) => p),
|
||||
},
|
||||
client: {
|
||||
...baseDeps(vault).client,
|
||||
// A single live page so the empty-live suppression does NOT fire (which
|
||||
// would mask the guard by suppressing every delete this cycle).
|
||||
listSpaceTree: vi.fn(async () => ({
|
||||
pages: [
|
||||
{
|
||||
id: liveId,
|
||||
slugId: "live",
|
||||
title: "Live",
|
||||
parentPageId: null,
|
||||
position: "a0",
|
||||
hasChildren: false,
|
||||
},
|
||||
],
|
||||
complete: true,
|
||||
})),
|
||||
pageIdsExist,
|
||||
getPageJson: vi.fn(async (pageId: string) => ({
|
||||
id: pageId,
|
||||
slugId: "live",
|
||||
title: "Live",
|
||||
parentPageId: null,
|
||||
spaceId: "space-1",
|
||||
updatedAt: "2026-06-20T00:00:00.000Z",
|
||||
content: { type: "doc", content: [] },
|
||||
})),
|
||||
} as any,
|
||||
});
|
||||
|
||||
const res = await runCycle(deps);
|
||||
expect(res.ran).toBe(true);
|
||||
|
||||
// The guard probed the datasource for EXACTLY the absent candidate ids.
|
||||
expect(pageIdsExist).toHaveBeenCalledTimes(1);
|
||||
expect(new Set(pageIdsExist.mock.calls[0][0])).toEqual(
|
||||
new Set([ghostId, deletedId]),
|
||||
);
|
||||
|
||||
// The real deleted-page file is removed; the ghost file is PRESERVED.
|
||||
const rmPaths = rm.mock.calls.map((c) => c[0] as string);
|
||||
expect(rmPaths.some((p) => p.includes("Deleted.md"))).toBe(true);
|
||||
expect(rmPaths.some((p) => p.includes("Ghost.md"))).toBe(false);
|
||||
expect(res.pull?.deleted).toBe(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -65,6 +65,16 @@ describe('GitSyncClient contract (type-level)', () => {
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
|
||||
it('pageIdsExist(ids) -> ids subset (D-P3-1 ghost guard seam)', () => {
|
||||
expectTypeOf<GitSyncClient['pageIdsExist']>().parameters.toEqualTypeOf<
|
||||
[string[]]
|
||||
>();
|
||||
expectTypeOf<
|
||||
Awaited<ReturnType<GitSyncClient['pageIdsExist']>>
|
||||
>().toEqualTypeOf<string[]>();
|
||||
expect(true).toBe(true);
|
||||
});
|
||||
|
||||
it('a structurally-correct adapter satisfies GitSyncClient (drift => compile error)', () => {
|
||||
// A minimal dummy adapter mirroring the EXACT result shapes the engine reads.
|
||||
// The `satisfies GitSyncClient` clause is the contract guard: any drift in a
|
||||
@@ -74,6 +84,7 @@ describe('GitSyncClient contract (type-level)', () => {
|
||||
pages: [] as GitSyncPageNodeLite[],
|
||||
complete: true,
|
||||
}),
|
||||
pageIdsExist: async (_pageIds: string[]) => [] as string[],
|
||||
getPageJson: async (pageId: string) => ({
|
||||
id: pageId,
|
||||
slugId: 'slug',
|
||||
@@ -130,6 +141,7 @@ describe('GitSyncClient contract (type-level)', () => {
|
||||
// in BOTH directions).
|
||||
const bad = {
|
||||
listSpaceTree: async () => ({ pages: [] as GitSyncPageNodeLite[], complete: true }),
|
||||
pageIdsExist: async () => [] as string[],
|
||||
getPageJson: async (pageId: string) => ({
|
||||
id: pageId,
|
||||
slugId: 's',
|
||||
|
||||
@@ -59,6 +59,43 @@ describe('planReconciliation', () => {
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
// D-P3-1 ghost guard: when `deletableIds` is supplied, an absence-delete fires
|
||||
// ONLY for an id that is a REAL page row. A tracked file whose id was NEVER a
|
||||
// page (a hand-authored git file with an unknown id) is absent from `live` AND
|
||||
// absent from `deletableIds` -> it MUST be preserved, not silently deleted.
|
||||
it('GHOST GUARD: an absent id NOT in deletableIds is PRESERVED (not deleted)', () => {
|
||||
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'Space/Keep.md' },
|
||||
// A ghost: its id is not live and not a real page row.
|
||||
{ pageId: 'ghost', relPath: 'Space/Ghost.md' },
|
||||
// A genuinely deleted page: absent from live but IS a real row.
|
||||
{ pageId: 'gone', relPath: 'Space/Gone.md' },
|
||||
];
|
||||
// Only the real page row ('gone') is deletable; 'ghost' has no row.
|
||||
const deletableIds = new Set(['gone']);
|
||||
const plan = planReconciliation(live, existing, deletableIds);
|
||||
expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Keep.md' }]);
|
||||
// The genuine delete is applied; the ghost file is preserved.
|
||||
expect(plan.toDelete).toEqual(['Space/Gone.md']);
|
||||
expect(plan.toDelete).not.toContain('Space/Ghost.md');
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
// The empty gate is the maximally-safe case: no id is a real row, so NOTHING
|
||||
// is absence-deleted (every absent tracked file is treated as a ghost). This
|
||||
// is what a cycle sees when `pageIdsExist` returns nothing for the candidates.
|
||||
it('GHOST GUARD: an EMPTY deletableIds set suppresses every absence delete', () => {
|
||||
const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
|
||||
const existing: ExistingEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'Space/Keep.md' },
|
||||
{ pageId: 'gone', relPath: 'Space/Gone.md' },
|
||||
];
|
||||
const plan = planReconciliation(live, existing, new Set<string>());
|
||||
expect(plan.toDelete).toEqual([]);
|
||||
expect(plan.moved).toEqual([]);
|
||||
});
|
||||
|
||||
it('NO-OP: live and existing identical -> writes (re-emit) but no deletes/moves', () => {
|
||||
const live: LiveEntry[] = [
|
||||
{ pageId: 'p1', relPath: 'A.md' },
|
||||
|
||||
@@ -787,12 +787,18 @@ export function convertProseMirrorToMarkdown(
|
||||
// blockquote-prefixed; a blank line becomes a bare `>` so the callout is
|
||||
// not split.
|
||||
const calloutType = (node.attrs?.type || "info").toLowerCase();
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line (mirrors the blockquote serializer),
|
||||
// so a multi-paragraph callout does not collapse into one paragraph on
|
||||
// re-import.
|
||||
const calloutBody = nodeContent
|
||||
.map(processNode)
|
||||
.join("\n")
|
||||
.split("\n")
|
||||
.map((l: string) => (l.length ? `> ${l}` : ">"))
|
||||
.join("\n");
|
||||
.map((n: any) =>
|
||||
processNode(n)
|
||||
.split("\n")
|
||||
.map((line: string) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"),
|
||||
)
|
||||
.join("\n>\n");
|
||||
return `> [!${calloutType}]\n${calloutBody}`;
|
||||
}
|
||||
|
||||
|
||||
@@ -548,6 +548,77 @@ function bridgeTaskLists(html: string): string {
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches a callout marker at the START of a blockquote's first paragraph:
|
||||
* `[!type]` plus an optional Obsidian title that runs to the end of that line.
|
||||
* The whole match (marker + title + trailing newline) is stripped; the Docmost
|
||||
* callout schema has no title slot, so — exactly like the line-based
|
||||
* `preprocessCallouts` opener — the title is dropped. The `\w+` requires a bare
|
||||
* `[!word]`, so a real blockquote whose text merely contains `[!` does not match.
|
||||
*/
|
||||
const CALLOUT_MARKER_HTML_RE = /^(\s*)\[!(\w+)\][^\n]*(?:\n|$)/;
|
||||
|
||||
/**
|
||||
* Reconstruct callouts that survived `marked` as `[!type]`-prefixed blockquotes.
|
||||
*
|
||||
* `preprocessCallouts` recognizes a callout only when its `> [!type]` opener sits
|
||||
* at column 0 (the regex is `^`-anchored). A callout NESTED inside a list item is
|
||||
* exported INDENTED (` > [!info]`), so the line-based preprocessor cannot see it
|
||||
* and `marked` renders it as an ordinary `<blockquote>` with the `[!type]` marker
|
||||
* leaking into the body text — losing the callout type and, for a lead-less list
|
||||
* item, breaking the list marker.
|
||||
*
|
||||
* This post-`marked` DOM pass is NESTING-AGNOSTIC: it walks EVERY `<blockquote>`
|
||||
* (at any depth — inside `<li>`, inside another callout, …) whose first `<p>`
|
||||
* opens with a `[!type]` marker and rewrites it into the same
|
||||
* `<div data-type="callout" data-callout-type="TYPE">` element the top-level
|
||||
* path emits, so a nested callout is parsed identically to a top-level one.
|
||||
* Top-level callouts are already `<div>`s (converted before `marked`), so they
|
||||
* are not `<blockquote>`s here and are left untouched.
|
||||
*/
|
||||
function bridgeNestedCallouts(html: string): string {
|
||||
// Cheap early-out: no callout marker anywhere -> nothing to bridge.
|
||||
if (!html.includes("[!")) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with the other post-marked passes): skip the
|
||||
// bridge for pathologically large inputs rather than running a JSDOM parse.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Process deepest-first (document order reversed) so a callout nested inside
|
||||
// another callout's blockquote is converted before its parent.
|
||||
const blockquotes = Array.from(
|
||||
document.querySelectorAll("blockquote"),
|
||||
).reverse();
|
||||
for (const bq of blockquotes) {
|
||||
// marked wraps each blockquote text line in a <p>; the marker lives in the
|
||||
// first direct <p> child.
|
||||
const firstP = bq.querySelector(":scope > p");
|
||||
if (!firstP) continue;
|
||||
const match = firstP.innerHTML.match(CALLOUT_MARKER_HTML_RE);
|
||||
if (!match) continue;
|
||||
const type = match[2].toLowerCase();
|
||||
// Strip the marker (and dropped title) from the first paragraph. If that
|
||||
// leaves the paragraph empty (opener on its own line, body in later <p>s),
|
||||
// remove the now-empty paragraph so the callout has no phantom lead block.
|
||||
firstP.innerHTML = firstP.innerHTML.slice(match[0].length);
|
||||
if (firstP.innerHTML.trim() === "") {
|
||||
firstP.remove();
|
||||
}
|
||||
const div = document.createElement("div");
|
||||
div.setAttribute("data-type", "callout");
|
||||
div.setAttribute("data-callout-type", type);
|
||||
while (bq.firstChild) {
|
||||
div.appendChild(bq.firstChild);
|
||||
}
|
||||
bq.replaceWith(div);
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Re-apply ATTACHED HTML comments (#293 canon) before the DOM/generateJSON
|
||||
* stage drops them.
|
||||
@@ -999,10 +1070,15 @@ export async function markdownToProseMirror(
|
||||
): Promise<any> {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const html = await markedInstance.parse(withCallouts);
|
||||
// Reconstruct callouts that survived as `[!type]`-prefixed blockquotes because
|
||||
// they were nested (e.g. inside a list item) and the column-0-anchored line
|
||||
// preprocessor could not see them. Runs first so the rest of the pipeline
|
||||
// treats a nested callout identically to a top-level one.
|
||||
const withNestedCallouts = bridgeNestedCallouts(html);
|
||||
// Materialize comment directives (#293 #9 attached textAlign; #5 standalone
|
||||
// subpages/pageBreak) while the comment nodes still exist, before generateJSON
|
||||
// drops them.
|
||||
const withAttrs = applyCommentDirectives(html);
|
||||
const withAttrs = applyCommentDirectives(withNestedCallouts);
|
||||
// #293 canon #2: assemble the doc-level footnote list from the `<sup
|
||||
// data-fn-text>` markers (from `^[…]` or the raw-HTML column form) before
|
||||
// generateJSON, so references + definitions materialize into the schema model.
|
||||
|
||||
@@ -0,0 +1,173 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import {
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
} from 'docmost-client';
|
||||
|
||||
// T6-listnest regression: a callout nested inside a list item is exported
|
||||
// INDENTED (` > [!info]`), which the column-0-anchored line preprocessor cannot
|
||||
// see, so on re-import it degraded to a plain blockquote with the `[!type]`
|
||||
// marker leaking into the body text (confirmed DB corruption). The post-marked
|
||||
// `bridgeNestedCallouts` pass reconstructs the callout regardless of nesting.
|
||||
|
||||
/** Collect every node of a given type anywhere in the doc tree. */
|
||||
function collect(node: any, type: string, acc: any[] = []): any[] {
|
||||
if (!node || typeof node !== 'object') return acc;
|
||||
if (node.type === type) acc.push(node);
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) collect(child, type, acc);
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
/** Concatenate all text in a doc tree. */
|
||||
function allText(node: any): string {
|
||||
if (!node || typeof node !== 'object') return '';
|
||||
let s = node.type === 'text' ? node.text ?? '' : '';
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) s += allText(child);
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
// The primary case (a callout WITH a lead paragraph inside the list item) is
|
||||
// covered by the round-trip corpus fixture `13-callout-in-list.json`, which
|
||||
// asserts both byte-stability and canonical equality (doc === doc2). This file
|
||||
// adds the no-lead CONTROL, which the corpus cannot cover: with no lead
|
||||
// paragraph the StarterKit listItem schema hoists the callout out of the item,
|
||||
// so the doc is not canonically stable and cannot live in the corpus loop.
|
||||
describe('T6-listnest: callout nested in a list item', () => {
|
||||
it('no-lead control: a callout as the first list-item child keeps its type and does not leak the list marker', async () => {
|
||||
// With no lead paragraph the StarterKit listItem schema (`paragraph block*`)
|
||||
// cannot hold a callout-first child, so generateJSON hoists the callout out
|
||||
// of the item — that is correct schema normalization. What MUST hold is that
|
||||
// the callout type survives, the `[!danger]` marker does not leak into text,
|
||||
// and the literal `-` list marker does not leak as text (both were the
|
||||
// confirmed pre-fix corruption).
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
{
|
||||
type: 'callout',
|
||||
attrs: { type: 'danger' },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'DangerText' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
|
||||
// The callout survives with its type; it did NOT degrade to a blockquote.
|
||||
const callouts = collect(doc2, 'callout');
|
||||
expect(callouts).toHaveLength(1);
|
||||
expect(callouts[0].attrs.type).toBe('danger');
|
||||
expect(allText(callouts[0])).toBe('DangerText');
|
||||
expect(collect(doc2, 'blockquote')).toHaveLength(0);
|
||||
|
||||
// No marker leak and no literal list-marker leak.
|
||||
const text = allText(doc2);
|
||||
expect(text).not.toContain('[!');
|
||||
expect(text).not.toContain('-');
|
||||
});
|
||||
});
|
||||
|
||||
// False-positive controls for `bridgeNestedCallouts`: it must reconstruct a
|
||||
// callout ONLY from a blockquote whose FIRST paragraph OPENS with a `[!type]`
|
||||
// marker (mirroring the column-0 `preprocessCallouts` rule), and must NOT
|
||||
// over-convert an ordinary nested blockquote. If the detection were loosened to
|
||||
// fire on any blockquote merely CONTAINING `[!`, both tests below would fail.
|
||||
describe('T6-listnest: bridgeNestedCallouts does NOT over-convert', () => {
|
||||
it('a REAL blockquote nested in a list item round-trips STILL as a blockquote', async () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'item lead' }] },
|
||||
{
|
||||
type: 'blockquote',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'just a quote' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
|
||||
// It stays a blockquote nested in the list item; NO callout is invented.
|
||||
const listItems = collect(doc2, 'listItem');
|
||||
expect(listItems).toHaveLength(1);
|
||||
expect(listItems[0].content.map((c: any) => c.type)).toEqual([
|
||||
'paragraph',
|
||||
'blockquote',
|
||||
]);
|
||||
expect(collect(doc2, 'callout')).toHaveLength(0);
|
||||
const blockquotes = collect(doc2, 'blockquote');
|
||||
expect(blockquotes).toHaveLength(1);
|
||||
expect(allText(blockquotes[0])).toBe('just a quote');
|
||||
// Byte-stable on re-export (no callout div injected on the way in).
|
||||
expect(convertProseMirrorToMarkdown(doc2)).toBe(md1);
|
||||
});
|
||||
|
||||
it('a nested blockquote with `[!` MID-line (not at the opener) stays a blockquote', async () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'item lead' }] },
|
||||
{
|
||||
type: 'blockquote',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'see [!note] inline' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const md1 = convertProseMirrorToMarkdown(doc);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
|
||||
// The `[!note]` is mid-paragraph, not an opener -> NO callout is created.
|
||||
expect(collect(doc2, 'callout')).toHaveLength(0);
|
||||
const blockquotes = collect(doc2, 'blockquote');
|
||||
expect(blockquotes).toHaveLength(1);
|
||||
// The literal `[!note]` text is preserved inside the surviving blockquote.
|
||||
expect(allText(blockquotes[0])).toContain('[!note]');
|
||||
expect(convertProseMirrorToMarkdown(doc2)).toBe(md1);
|
||||
});
|
||||
});
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "callout",
|
||||
"attrs": {
|
||||
"type": "info"
|
||||
},
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "line1"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "line2"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,50 @@
|
||||
{
|
||||
"type": "doc",
|
||||
"content": [
|
||||
{
|
||||
"type": "bulletList",
|
||||
"content": [
|
||||
{
|
||||
"type": "listItem",
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "item lead"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "callout",
|
||||
"attrs": {
|
||||
"type": "info"
|
||||
},
|
||||
"content": [
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "ca"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "paragraph",
|
||||
"content": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "cb"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -581,16 +581,18 @@ describe('convertProseMirrorToMarkdown', () => {
|
||||
}),
|
||||
);
|
||||
// NOTE(review): the spec predicted ':::warning\nline1\n\nline2\n:::' (a
|
||||
// The converter joins the callout's rendered children with a single '\n'
|
||||
// and emits an Obsidian-native callout: a `> [!type]` opener plus one
|
||||
// `>`-prefixed body line per content line. We pin the lowercasing
|
||||
// (WARNING -> warning) and the multi-child join.
|
||||
expect(out).toBe('> [!warning]\n> line1\n> line2');
|
||||
// The converter emits an Obsidian-native callout: a `> [!type]` opener plus
|
||||
// one `>`-prefixed body line per content line, with block-level children
|
||||
// separated by a blank `>` line (mirrors the blockquote serializer) so a
|
||||
// multi-paragraph callout does not collapse into one paragraph on re-import.
|
||||
// We pin the lowercasing (WARNING -> warning) and the multi-child join.
|
||||
expect(out).toBe('> [!warning]\n> line1\n>\n> line2');
|
||||
// The type is lowercased (an uppercase `[!WARNING]` would not re-import).
|
||||
expect(out.startsWith('> [!warning]\n')).toBe(true);
|
||||
expect(out).not.toContain('[!WARNING]');
|
||||
// Both paragraph children are present, each blockquote-prefixed.
|
||||
expect(out).toContain('> line1\n> line2');
|
||||
// Both paragraph children are present, each blockquote-prefixed and
|
||||
// separated by a blank `>` line.
|
||||
expect(out).toContain('> line1\n>\n> line2');
|
||||
});
|
||||
|
||||
// Spec 4 — blockquote per-line prefixer over a multi-line nested callout.
|
||||
@@ -609,11 +611,12 @@ describe('convertProseMirrorToMarkdown', () => {
|
||||
);
|
||||
// NOTE(review): the spec predicted '> :::info\n> a\n>\n> b\n> :::',
|
||||
// assuming the nested callout body contains a blank line between 'a' and
|
||||
// The nested callout renders as an Obsidian callout '> [!info]\n> a\n> b'
|
||||
// (single-'\n' join, no blank line). The outer blockquote prefixer then
|
||||
// prefixes each of those lines with '> ' again, yielding a doubly-nested
|
||||
// blockquote — the realistic per-line-prefix loop over a multi-line child.
|
||||
expect(out).toBe('> > [!info]\n> > a\n> > b');
|
||||
// The nested callout renders as an Obsidian callout '> [!info]\n> a\n>\n> b'
|
||||
// (block-level children separated by a blank `>` line). The outer blockquote
|
||||
// prefixer then prefixes each of those lines with '> ' again, yielding a
|
||||
// doubly-nested blockquote — the realistic per-line-prefix loop over a
|
||||
// multi-block child.
|
||||
expect(out).toBe('> > [!info]\n> > a\n> >\n> > b');
|
||||
// Every produced line carries the '> ' prefix (no line escapes to col 0).
|
||||
for (const line of out.split('\n')) {
|
||||
expect(line.startsWith('>')).toBe(true);
|
||||
|
||||
Reference in New Issue
Block a user