fix(git-sync,converter): 3 HIGH round-trip/data-loss fixes found by QA (#359)

Implements the 3 HIGH fixes agent_qa found via the RALPH test cycle (analysis theirs, implementation here), fixtures-first. - T2E-1 [converter]: a multi-paragraph callout collapsed to one paragraph on round-trip. markdown-converter `case "callout"` now joins block children with "\n>\n" (a blank ">" separator), byte-identical to the blockquote serializer, so two paragraphs survive re-import. Fixture 12-callout-multiblock; two existing tests updated (their old expectations encoded the collapse bug). - T6-listnest [converter]: a callout/blockquote nested in a list item corrupted on round-trip (callout type lost, `[!type]` leaked into text — confirmed DB corruption). Add bridgeNestedCallouts: a post-marked, nesting-agnostic JSDOM pass that reconstructs callouts from `[!type]`-opening blockquotes at ANY depth (the indented ` > [!type]` form the column-0-anchored preprocessor misses), emitting the same callout div as the top-level path (disjoint inputs, no double-processing). Strict `^[!type]` guard, so a real blockquote — or one with `[!` mid-line — is NOT converted. Fixture 13-callout-in-list + a no-lead control + false-positive controls (proven non-vacuous). - D-P3-1 [git-sync]: a "ghost" file (a tracked id that was never a page) was silently absence-deleted on pull-reconcile -> data loss. Add GitSyncClient.pageIdsExist (datasource: workspace-scoped SELECT over any space incl. trashed/moved, non-UUID ids filtered first); cycle.ts computes the candidate-delete set and passes existing ids as `deletableIds` into planReconciliation, which now absence-deletes ONLY real page rows — a ghost is preserved. A cycle-level e2e test proves the ghost survives runCycle end-to-end. Suites green: prosemirror-markdown 687, git-sync 272 (no type errors), datasource 36, orchestrator 26; both packages tsc clean. Scope: the 3 fixes + tests/fixtures only, no develop re-merge. T4E-esc (LOW escaping) left for the maintainer, per agent_qa. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-07-05 21:27:21 +03:00
parent 6956563961
commit 5ac08b8f4b
17 changed files with 657 additions and 23 deletions
@@ -114,6 +114,10 @@ function build(opts: BuildOptions = {}): Built {
  // The read-side / write-side client the datasource hands back.
  const client: Record<string, AnyMock> = {
    listSpaceTree: jest.fn(async () => ({ pages: [], complete: true })),
+    // Historical behavior: every candidate id is a real page row. Present so the
+    // fake does not rely on `listTrackedFiles -> []` to avoid the ghost-guard
+    // probe, and won't break if a future test seeds tracked files.
+    pageIdsExist: jest.fn(async (ids: string[]) => ids),
    deletePage: jest.fn(async () => undefined),
    createPage: jest.fn(async () => undefined),
    updatePageBody: jest.fn(async () => undefined),
@@ -200,6 +200,51 @@ describe('GitmostDataSourceService', () => {
    });
  });

+  // D-P3-1 ghost guard: the pull reconciler asks which candidate-delete ids are
+  // REAL page rows before deleting a tracked vault file. Only rows returned here
+  // may be absence-deleted; a ghost id (never a page) is preserved.
+  describe('pageIdsExist (D-P3-1 ghost guard)', () => {
+    const UUID_A = '11111111-1111-4111-8111-111111111111';
+    const UUID_B = '22222222-2222-4222-8222-222222222222';
+
+    it('returns the subset that are real page rows, workspace-scoped (any space/state)', async () => {
+      // The DB reports only UUID_A exists as a row.
+      const { service, mocks } = build([{ id: UUID_A }]);
+      const res = await service.bind(CTX).pageIdsExist([UUID_A, UUID_B]);
+      expect(res).toEqual([UUID_A]);
+      // The query is workspace-scoped and restricted to the candidate ids.
+      // Crucially there is NO deletedAt / spaceId filter: a trashed page or a
+      // page moved to another space still counts as "exists" (its file is
+      // cleaned up), while a ghost has no row (its file is preserved).
+      expect(mocks.db.selectFrom).toHaveBeenCalledWith('pages');
+      const qb = mocks.db.selectFrom.mock.results[0].value;
+      expect(qb.where).toHaveBeenCalledWith('workspaceId', '=', 'ws-1');
+      expect(qb.where).toHaveBeenCalledWith('id', 'in', [UUID_A, UUID_B]);
+      const filterCols = qb.where.mock.calls.map((c: any[]) => c[0]);
+      expect(filterCols).not.toContain('deletedAt');
+      expect(filterCols).not.toContain('spaceId');
+    });
+
+    it('filters malformed non-UUID ids BEFORE the predicate (and skips the query when none remain)', async () => {
+      // A mix of one valid uuid + malformed tokens: only the valid uuid reaches
+      // the predicate, so a bad `gitmost_id` cannot trigger a 22P02 that would
+      // reject the whole existence probe.
+      const { service, mocks } = build([]);
+      const res = await service
+        .bind(CTX)
+        .pageIdsExist([UUID_A, 'not-a-uuid', '[unclosed']);
+      const qb = mocks.db.selectFrom.mock.results[0].value;
+      expect(qb.where).toHaveBeenCalledWith('id', 'in', [UUID_A]);
+      expect(res).toEqual([]);
+
+      // When EVERY candidate is malformed, the query is skipped entirely.
+      const { service: s2, mocks: m2 } = build([]);
+      const res2 = await s2.bind(CTX).pageIdsExist(['not-a-uuid', '[unclosed']);
+      expect(res2).toEqual([]);
+      expect(m2.db.selectFrom).not.toHaveBeenCalled();
+    });
+  });
+
  describe('getPageJson', () => {
    it('returns the engine page shape with ISO updatedAt + content', async () => {
      const { service, mocks } = build();
@@ -78,6 +78,7 @@ export class GitmostDataSourceService {
    return {
      listSpaceTree: (spaceId, rootPageId) =>
        this.listSpaceTree(ctx, spaceId, rootPageId),
+      pageIdsExist: (pageIds) => this.pageIdsExist(ctx, pageIds),
      getPageJson: (pageId) => this.getPageJson(ctx, pageId),
      // The id-scoped WRITE ops are wrapped so a malformed (non-UUID) `pageId`
      // from a broken vault `gitmost_id` frontmatter cannot wedge the space's sync
@@ -191,6 +192,36 @@ export class GitmostDataSourceService {
    return { pages, complete: true };
  }

+  /**
+   * Existence probe for the pull-side ghost guard (D-P3-1). Return the subset of
+   * `pageIds` that correspond to a REAL page ROW in this workspace — INCLUDING
+   * soft-deleted (trashed) rows and pages in ANY OTHER space (no `deletedAt` /
+   * `spaceId` filter), i.e. any id that has ever been a page. The engine only
+   * absence-deletes a tracked vault file whose id is returned here; a GHOST id
+   * (a git file whose id was never a page) has no row and is preserved.
+   *
+   * Malformed (non-UUID) ids are filtered out BEFORE the `id = ANY(...)`
+   * predicate: an invalid uuid literal would make Postgres reject the whole
+   * query (error 22P02), and a ghost id is often exactly such a hand-edited
+   * token. A filtered-out id is never returned, so its file is preserved.
+   */
+  private async pageIdsExist(
+    ctx: GitSyncBindContext,
+    pageIds: string[],
+  ): Promise<string[]> {
+    const validIds = pageIds.filter((id) => isValidUUID(id));
+    if (validIds.length === 0) return [];
+
+    const rows = await this.db
+      .selectFrom('pages')
+      .select(['id'])
+      .where('workspaceId', '=', ctx.workspaceId)
+      .where('id', 'in', validIds)
+      .execute();
+
+    return rows.map((row) => row.id);
+  }
+
  /**
   * One page WITH its ProseMirror body content (editor-ext schema). `updatedAt`
   * is serialized to an ISO string for the loop-guard.
@@ -51,6 +51,23 @@ export interface GitSyncClient {
    rootPageId?: string,
  ): Promise<{ pages: GitSyncPageNodeLite[]; complete: boolean }>;

+  /**
+   * Existence probe for the pull-side ghost guard (D-P3-1). Given a set of
+   * candidate pageIds, return the subset that corresponds to a REAL page ROW —
+   * INCLUDING soft-deleted (trashed) and pages in ANY OTHER space, i.e. any
+   * `id` that has ever been a page (`SELECT id FROM pages WHERE id = ANY(...)`,
+   * workspace-scoped). Malformed (non-UUID) ids are filtered out before the
+   * query and are never returned.
+   *
+   * The pull reconciler only absence-deletes a tracked file whose pageId is
+   * returned here: a deleted/moved/trashed page HAS a row -> its vault file is
+   * cleaned up; a GHOST id (a git file whose id was NEVER a page) has NO row ->
+   * it is preserved rather than silently deleted (the data-loss bug). Called on
+   * only the small candidate-delete set (tracked ids absent from the live tree),
+   * so the query is bounded and usually empty.
+   */
+  pageIdsExist(pageIds: string[]): Promise<string[]>;
+
  /**
   * One page WITH its ProseMirror body content. `applyPullActions` reads
   * `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
@@ -170,10 +170,38 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
    });

    const tree = await client.listSpaceTree(spaceId);
+
+    // D-P3-1 ghost guard: an absence-delete must not silently remove a git file
+    // whose pageId was NEVER a page (a hand-authored file with an unknown id).
+    // Compute the candidate-delete set (tracked ids absent from the live tree)
+    // and ask the datasource which of them are REAL page rows (incl. trashed /
+    // other spaces). Only those may be absence-deleted; a ghost id is preserved
+    // (adopted/skipped by the push side).
+    //
+    // Size note: on a COMPLETE fetch this set is usually small/empty (only
+    // genuinely removed pages look absent), so the `id IN (...)` probe is cheap.
+    // On an INCOMPLETE fetch (`tree.complete === false`) MANY live pages look
+    // absent, so the set — and the probe — can be large. That is only a perf
+    // consideration, not a correctness one: `decideAbsenceDeletions` (inside
+    // `computePullActions`) still SUPPRESSES every absence delete on an
+    // incomplete fetch, so no ghost-guarded deletion is applied that cycle
+    // regardless of what the probe returns.
+    const livePageIds = new Set(
+      tree.pages.filter((p) => p && p.id).map((p) => p.id),
+    );
+    const candidateDeleteIds = existing
+      .map((e) => e.pageId)
+      .filter((id) => !livePageIds.has(id));
+    const deletableIds =
+      candidateDeleteIds.length > 0
+        ? await client.pageIdsExist(candidateDeleteIds)
+        : [];
+
    const pullActions = computePullActions({
      pages: tree.pages,
      treeComplete: tree.complete,
      existing,
+      deletableIds,
    });

    // Bail before the first destructive write phase if the lock was lost.
@@ -148,6 +148,14 @@ export interface PullActionsInput {
  treeComplete: boolean;
  /** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
  existing: { pageId: string; relPath: string }[];
+  /**
+   * The subset of tracked pageIds that correspond to a REAL page row (D-P3-1
+   * ghost guard, from `client.pageIdsExist`). Only ids in this set may be
+   * absence-deleted; a ghost id (never a page) is preserved. When omitted, all
+   * absent ids are deletable (the historical behavior; pure unit callers that
+   * do not model ghosts).
+   */
+  deletableIds?: string[];
 }

 /**
@@ -191,7 +199,7 @@ export interface PullActions {
 * thin `applyPullActions`.
 */
 export function computePullActions(input: PullActionsInput): PullActions {
-  const { pages, treeComplete, existing } = input;
+  const { pages, treeComplete, existing, deletableIds } = input;
  const layout = buildVaultLayout(pages);

  const live: LiveEntry[] = [];
@@ -206,8 +214,14 @@ export function computePullActions(input: PullActionsInput): PullActions {
  }

  // Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
-  // `plan.moved` carries move old-path removals separately.
-  const plan = planReconciliation(live, existing);
+  // `plan.moved` carries move old-path removals separately. The ghost guard
+  // (D-P3-1) gates absence-deletes to ids that are a real page row; when
+  // `deletableIds` is omitted, all absent ids are deletable (historical).
+  const plan = planReconciliation(
+    live,
+    existing,
+    deletableIds === undefined ? undefined : new Set(deletableIds),
+  );

  // Decide whether the ABSENCE-based deletions may be applied this cycle
  // (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
@@ -96,6 +96,16 @@ export interface ReconciliationPlan {
 export function planReconciliation(
  live: LiveEntry[],
  existing: ExistingEntry[],
+  /**
+   * The subset of tracked pageIds that correspond to a REAL page row (D-P3-1
+   * ghost guard). When provided, a tracked file whose pageId is ABSENT from
+   * `live` is absence-deleted ONLY if its id is in this set — a deleted/moved/
+   * trashed page has a row (delete its stale vault file), while a GHOST id (a
+   * git file whose id was never a page) has NO row and is PRESERVED. When
+   * `undefined` (pure unit callers that do not model ghosts), every absent id is
+   * treated as deletable — the historical behavior.
+   */
+  deletableIds?: ReadonlySet<string>,
 ): ReconciliationPlan {
  // Desired path for each live pageId.
  const liveByPageId = new Map<string, string>();
@@ -119,9 +129,17 @@ export function planReconciliation(
  for (const ex of existing) {
    const liveRel = liveByPageId.get(ex.pageId);
    if (liveRel === undefined) {
-      // Tracked page is gone from the live tree -> absence delete.
+      // Tracked page is gone from the live tree -> candidate absence delete.
+      // D-P3-1: a candidate is only deleted when its id corresponds to a real
+      // page row (deleted/moved/trashed). A GHOST id (never a page) is NOT in
+      // `deletableIds` and is preserved rather than silently deleted. When the
+      // gate is not supplied (pure unit callers), fall back to the historical
+      // "all absent ids deletable" behavior.
+      const deletable = deletableIds === undefined || deletableIds.has(ex.pageId);
      // Never queue a path a live page will (re)write (path reuse -> no loss).
-      if (!liveTargetPaths.has(ex.relPath)) toDeleteSet.add(ex.relPath);
+      if (deletable && !liveTargetPaths.has(ex.relPath)) {
+        toDeleteSet.add(ex.relPath);
+      }
      continue;
    }
    if (liveRel !== ex.relPath) {
@@ -77,6 +77,8 @@ const nodeFs: CycleFs = {
 function makeEmptyClientFake() {
  return {
    listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
+    // Default: every candidate id is a real page row (historical behavior).
+    pageIdsExist: vi.fn(async (ids: string[]) => ids),
    getPageJson: vi.fn(),
    importPageMarkdown: vi.fn(async () => ({ updatedAt: "2026-06-20T00:00:00.000Z" })),
    createPage: vi.fn(async (title: string) => ({
@@ -1,5 +1,6 @@
 import { describe, it, expect, vi } from "vitest";
 import { runCycle, type RunCycleDeps } from "../src/engine/cycle";
+import { serializePageFile } from "@docmost/prosemirror-markdown";

 // A fake VaultGit recording the staging calls. An EMPTY vault/tree lets the real
 // readExisting/computePullActions/applyPullActions/runPush run trivially (no
@@ -46,6 +47,8 @@ function baseDeps(vault: any, over: Partial<RunCycleDeps> = {}): RunCycleDeps {
    spaceId: "space-1",
    client: {
      listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
+      // Default: every candidate id is a real page row (historical behavior).
+      pageIdsExist: vi.fn(async (ids: string[]) => ids),
      getPageJson: vi.fn(),
      importPageMarkdown: vi.fn(),
      createPage: vi.fn(),
@@ -235,4 +238,88 @@ describe("runCycle (composition)", () => {
    expect(deps.client.listSpaceTree).toHaveBeenCalledTimes(1);
    expect(vault.diffNameStatus).not.toHaveBeenCalled();
  });
+
+  // D-P3-1 ghost guard, end-to-end through runCycle: a tracked file whose id is
+  // absent from live AND is NOT returned by `pageIdsExist` (a ghost — never a
+  // page) must SURVIVE the whole cycle (no rm), while a sibling id that IS
+  // returned (a genuinely deleted page row) is absence-deleted. This proves the
+  // guard flows from pageIdsExist -> computePullActions -> applyPullActions,
+  // not just at the pure `planReconciliation` unit.
+  it("GHOST GUARD (e2e): a ghost tracked file SURVIVES runCycle; a real deleted-page file is removed", async () => {
+    const ghostId = "019f2500-dead-7000-8000-000000000009"; // never a page
+    const deletedId = "019f2500-0000-7000-8000-000000000001"; // a real (deleted) row
+    const liveId = "019f2500-0000-7000-8000-0000000000aa"; // keeps empty-live from firing
+
+    // Two tracked files, both ABSENT from the live tree (deletion candidates).
+    const vault = fakeVault({
+      listTrackedFiles: vi.fn(async () => ["Ghost.md", "Deleted.md"]),
+    });
+
+    // The datasource reports ONLY the deleted page as a real row; the ghost has
+    // no row (a PROPER SUBSET of the probed ids, not the identity shim).
+    const pageIdsExist = vi.fn(async (ids: string[]) =>
+      ids.filter((id) => id === deletedId),
+    );
+    const rm = vi.fn(async () => undefined);
+
+    const deps = baseDeps(vault, {
+      fs: {
+        readFile: vi.fn(async (absPath: string) => {
+          if (absPath.includes("Ghost.md"))
+            return serializePageFile(ghostId, "a body authored in git");
+          if (absPath.includes("Deleted.md"))
+            return serializePageFile(deletedId, "a deleted page body");
+          return "";
+        }),
+        writeFile: vi.fn(async () => undefined),
+        mkdir: vi.fn(async () => undefined),
+        rm,
+        lstat: vi.fn(async () => ({ isSymbolicLink: false })),
+        realpath: vi.fn(async (p: string) => p),
+      },
+      client: {
+        ...baseDeps(vault).client,
+        // A single live page so the empty-live suppression does NOT fire (which
+        // would mask the guard by suppressing every delete this cycle).
+        listSpaceTree: vi.fn(async () => ({
+          pages: [
+            {
+              id: liveId,
+              slugId: "live",
+              title: "Live",
+              parentPageId: null,
+              position: "a0",
+              hasChildren: false,
+            },
+          ],
+          complete: true,
+        })),
+        pageIdsExist,
+        getPageJson: vi.fn(async (pageId: string) => ({
+          id: pageId,
+          slugId: "live",
+          title: "Live",
+          parentPageId: null,
+          spaceId: "space-1",
+          updatedAt: "2026-06-20T00:00:00.000Z",
+          content: { type: "doc", content: [] },
+        })),
+      } as any,
+    });
+
+    const res = await runCycle(deps);
+    expect(res.ran).toBe(true);
+
+    // The guard probed the datasource for EXACTLY the absent candidate ids.
+    expect(pageIdsExist).toHaveBeenCalledTimes(1);
+    expect(new Set(pageIdsExist.mock.calls[0][0])).toEqual(
+      new Set([ghostId, deletedId]),
+    );
+
+    // The real deleted-page file is removed; the ghost file is PRESERVED.
+    const rmPaths = rm.mock.calls.map((c) => c[0] as string);
+    expect(rmPaths.some((p) => p.includes("Deleted.md"))).toBe(true);
+    expect(rmPaths.some((p) => p.includes("Ghost.md"))).toBe(false);
+    expect(res.pull?.deleted).toBe(1);
+  });
 });
@@ -65,6 +65,16 @@ describe('GitSyncClient contract (type-level)', () => {
    expect(true).toBe(true);
  });

+  it('pageIdsExist(ids) -> ids subset (D-P3-1 ghost guard seam)', () => {
+    expectTypeOf<GitSyncClient['pageIdsExist']>().parameters.toEqualTypeOf<
+      [string[]]
+    >();
+    expectTypeOf<
+      Awaited<ReturnType<GitSyncClient['pageIdsExist']>>
+    >().toEqualTypeOf<string[]>();
+    expect(true).toBe(true);
+  });
+
  it('a structurally-correct adapter satisfies GitSyncClient (drift => compile error)', () => {
    // A minimal dummy adapter mirroring the EXACT result shapes the engine reads.
    // The `satisfies GitSyncClient` clause is the contract guard: any drift in a
@@ -74,6 +84,7 @@ describe('GitSyncClient contract (type-level)', () => {
        pages: [] as GitSyncPageNodeLite[],
        complete: true,
      }),
+      pageIdsExist: async (_pageIds: string[]) => [] as string[],
      getPageJson: async (pageId: string) => ({
        id: pageId,
        slugId: 'slug',
@@ -130,6 +141,7 @@ describe('GitSyncClient contract (type-level)', () => {
    // in BOTH directions).
    const bad = {
      listSpaceTree: async () => ({ pages: [] as GitSyncPageNodeLite[], complete: true }),
+      pageIdsExist: async () => [] as string[],
      getPageJson: async (pageId: string) => ({
        id: pageId,
        slugId: 's',
@@ -59,6 +59,43 @@ describe('planReconciliation', () => {
    expect(plan.moved).toEqual([]);
  });

+  // D-P3-1 ghost guard: when `deletableIds` is supplied, an absence-delete fires
+  // ONLY for an id that is a REAL page row. A tracked file whose id was NEVER a
+  // page (a hand-authored git file with an unknown id) is absent from `live` AND
+  // absent from `deletableIds` -> it MUST be preserved, not silently deleted.
+  it('GHOST GUARD: an absent id NOT in deletableIds is PRESERVED (not deleted)', () => {
+    const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
+    const existing: ExistingEntry[] = [
+      { pageId: 'p1', relPath: 'Space/Keep.md' },
+      // A ghost: its id is not live and not a real page row.
+      { pageId: 'ghost', relPath: 'Space/Ghost.md' },
+      // A genuinely deleted page: absent from live but IS a real row.
+      { pageId: 'gone', relPath: 'Space/Gone.md' },
+    ];
+    // Only the real page row ('gone') is deletable; 'ghost' has no row.
+    const deletableIds = new Set(['gone']);
+    const plan = planReconciliation(live, existing, deletableIds);
+    expect(plan.toWrite).toEqual([{ pageId: 'p1', relPath: 'Space/Keep.md' }]);
+    // The genuine delete is applied; the ghost file is preserved.
+    expect(plan.toDelete).toEqual(['Space/Gone.md']);
+    expect(plan.toDelete).not.toContain('Space/Ghost.md');
+    expect(plan.moved).toEqual([]);
+  });
+
+  // The empty gate is the maximally-safe case: no id is a real row, so NOTHING
+  // is absence-deleted (every absent tracked file is treated as a ghost). This
+  // is what a cycle sees when `pageIdsExist` returns nothing for the candidates.
+  it('GHOST GUARD: an EMPTY deletableIds set suppresses every absence delete', () => {
+    const live: LiveEntry[] = [{ pageId: 'p1', relPath: 'Space/Keep.md' }];
+    const existing: ExistingEntry[] = [
+      { pageId: 'p1', relPath: 'Space/Keep.md' },
+      { pageId: 'gone', relPath: 'Space/Gone.md' },
+    ];
+    const plan = planReconciliation(live, existing, new Set<string>());
+    expect(plan.toDelete).toEqual([]);
+    expect(plan.moved).toEqual([]);
+  });
+
  it('NO-OP: live and existing identical -> writes (re-emit) but no deletes/moves', () => {
    const live: LiveEntry[] = [
      { pageId: 'p1', relPath: 'A.md' },
@@ -787,12 +787,18 @@ export function convertProseMirrorToMarkdown(
        // blockquote-prefixed; a blank line becomes a bare `>` so the callout is
        // not split.
        const calloutType = (node.attrs?.type || "info").toLowerCase();
+        // Prefix EVERY line of EVERY child with "> " and separate block-level
+        // children with a blank ">" line (mirrors the blockquote serializer),
+        // so a multi-paragraph callout does not collapse into one paragraph on
+        // re-import.
        const calloutBody = nodeContent
-          .map(processNode)
-          .join("\n")
-          .split("\n")
-          .map((l: string) => (l.length ? `> ${l}` : ">"))
-          .join("\n");
+          .map((n: any) =>
+            processNode(n)
+              .split("\n")
+              .map((line: string) => (line.length ? `> ${line}` : ">"))
+              .join("\n"),
+          )
+          .join("\n>\n");
        return `> [!${calloutType}]\n${calloutBody}`;
      }

@@ -548,6 +548,77 @@ function bridgeTaskLists(html: string): string {
  return document.body.innerHTML;
 }

+/**
+ * Matches a callout marker at the START of a blockquote's first paragraph:
+ * `[!type]` plus an optional Obsidian title that runs to the end of that line.
+ * The whole match (marker + title + trailing newline) is stripped; the Docmost
+ * callout schema has no title slot, so — exactly like the line-based
+ * `preprocessCallouts` opener — the title is dropped. The `\w+` requires a bare
+ * `[!word]`, so a real blockquote whose text merely contains `[!` does not match.
+ */
+const CALLOUT_MARKER_HTML_RE = /^(\s*)\[!(\w+)\][^\n]*(?:\n|$)/;
+
+/**
+ * Reconstruct callouts that survived `marked` as `[!type]`-prefixed blockquotes.
+ *
+ * `preprocessCallouts` recognizes a callout only when its `> [!type]` opener sits
+ * at column 0 (the regex is `^`-anchored). A callout NESTED inside a list item is
+ * exported INDENTED (`  > [!info]`), so the line-based preprocessor cannot see it
+ * and `marked` renders it as an ordinary `<blockquote>` with the `[!type]` marker
+ * leaking into the body text — losing the callout type and, for a lead-less list
+ * item, breaking the list marker.
+ *
+ * This post-`marked` DOM pass is NESTING-AGNOSTIC: it walks EVERY `<blockquote>`
+ * (at any depth — inside `<li>`, inside another callout, …) whose first `<p>`
+ * opens with a `[!type]` marker and rewrites it into the same
+ * `<div data-type="callout" data-callout-type="TYPE">` element the top-level
+ * path emits, so a nested callout is parsed identically to a top-level one.
+ * Top-level callouts are already `<div>`s (converted before `marked`), so they
+ * are not `<blockquote>`s here and are left untouched.
+ */
+function bridgeNestedCallouts(html: string): string {
+  // Cheap early-out: no callout marker anywhere -> nothing to bridge.
+  if (!html.includes("[!")) {
+    return html;
+  }
+  // Defensive cap (consistent with the other post-marked passes): skip the
+  // bridge for pathologically large inputs rather than running a JSDOM parse.
+  if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
+    return html;
+  }
+  const dom = new JSDOM(html);
+  const document = dom.window.document;
+  // Process deepest-first (document order reversed) so a callout nested inside
+  // another callout's blockquote is converted before its parent.
+  const blockquotes = Array.from(
+    document.querySelectorAll("blockquote"),
+  ).reverse();
+  for (const bq of blockquotes) {
+    // marked wraps each blockquote text line in a <p>; the marker lives in the
+    // first direct <p> child.
+    const firstP = bq.querySelector(":scope > p");
+    if (!firstP) continue;
+    const match = firstP.innerHTML.match(CALLOUT_MARKER_HTML_RE);
+    if (!match) continue;
+    const type = match[2].toLowerCase();
+    // Strip the marker (and dropped title) from the first paragraph. If that
+    // leaves the paragraph empty (opener on its own line, body in later <p>s),
+    // remove the now-empty paragraph so the callout has no phantom lead block.
+    firstP.innerHTML = firstP.innerHTML.slice(match[0].length);
+    if (firstP.innerHTML.trim() === "") {
+      firstP.remove();
+    }
+    const div = document.createElement("div");
+    div.setAttribute("data-type", "callout");
+    div.setAttribute("data-callout-type", type);
+    while (bq.firstChild) {
+      div.appendChild(bq.firstChild);
+    }
+    bq.replaceWith(div);
+  }
+  return document.body.innerHTML;
+}
+
 /**
 * Re-apply ATTACHED HTML comments (#293 canon) before the DOM/generateJSON
 * stage drops them.
@@ -999,10 +1070,15 @@ export async function markdownToProseMirror(
 ): Promise<any> {
  const withCallouts = await preprocessCallouts(markdownContent);
  const html = await markedInstance.parse(withCallouts);
+  // Reconstruct callouts that survived as `[!type]`-prefixed blockquotes because
+  // they were nested (e.g. inside a list item) and the column-0-anchored line
+  // preprocessor could not see them. Runs first so the rest of the pipeline
+  // treats a nested callout identically to a top-level one.
+  const withNestedCallouts = bridgeNestedCallouts(html);
  // Materialize comment directives (#293 #9 attached textAlign; #5 standalone
  // subpages/pageBreak) while the comment nodes still exist, before generateJSON
  // drops them.
-  const withAttrs = applyCommentDirectives(html);
+  const withAttrs = applyCommentDirectives(withNestedCallouts);
  // #293 canon #2: assemble the doc-level footnote list from the `<sup
  // data-fn-text>` markers (from `^[…]` or the raw-HTML column form) before
  // generateJSON, so references + definitions materialize into the schema model.
@@ -0,0 +1,173 @@
+import { describe, expect, it } from 'vitest';
+import {
+  convertProseMirrorToMarkdown,
+  markdownToProseMirror,
+} from 'docmost-client';
+
+// T6-listnest regression: a callout nested inside a list item is exported
+// INDENTED (`  > [!info]`), which the column-0-anchored line preprocessor cannot
+// see, so on re-import it degraded to a plain blockquote with the `[!type]`
+// marker leaking into the body text (confirmed DB corruption). The post-marked
+// `bridgeNestedCallouts` pass reconstructs the callout regardless of nesting.
+
+/** Collect every node of a given type anywhere in the doc tree. */
+function collect(node: any, type: string, acc: any[] = []): any[] {
+  if (!node || typeof node !== 'object') return acc;
+  if (node.type === type) acc.push(node);
+  if (Array.isArray(node.content)) {
+    for (const child of node.content) collect(child, type, acc);
+  }
+  return acc;
+}
+
+/** Concatenate all text in a doc tree. */
+function allText(node: any): string {
+  if (!node || typeof node !== 'object') return '';
+  let s = node.type === 'text' ? node.text ?? '' : '';
+  if (Array.isArray(node.content)) {
+    for (const child of node.content) s += allText(child);
+  }
+  return s;
+}
+
+// The primary case (a callout WITH a lead paragraph inside the list item) is
+// covered by the round-trip corpus fixture `13-callout-in-list.json`, which
+// asserts both byte-stability and canonical equality (doc === doc2). This file
+// adds the no-lead CONTROL, which the corpus cannot cover: with no lead
+// paragraph the StarterKit listItem schema hoists the callout out of the item,
+// so the doc is not canonically stable and cannot live in the corpus loop.
+describe('T6-listnest: callout nested in a list item', () => {
+  it('no-lead control: a callout as the first list-item child keeps its type and does not leak the list marker', async () => {
+    // With no lead paragraph the StarterKit listItem schema (`paragraph block*`)
+    // cannot hold a callout-first child, so generateJSON hoists the callout out
+    // of the item — that is correct schema normalization. What MUST hold is that
+    // the callout type survives, the `[!danger]` marker does not leak into text,
+    // and the literal `-` list marker does not leak as text (both were the
+    // confirmed pre-fix corruption).
+    const doc = {
+      type: 'doc',
+      content: [
+        {
+          type: 'bulletList',
+          content: [
+            {
+              type: 'listItem',
+              content: [
+                {
+                  type: 'callout',
+                  attrs: { type: 'danger' },
+                  content: [
+                    { type: 'paragraph', content: [{ type: 'text', text: 'DangerText' }] },
+                  ],
+                },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+    const md1 = convertProseMirrorToMarkdown(doc);
+    const doc2 = await markdownToProseMirror(md1);
+
+    // The callout survives with its type; it did NOT degrade to a blockquote.
+    const callouts = collect(doc2, 'callout');
+    expect(callouts).toHaveLength(1);
+    expect(callouts[0].attrs.type).toBe('danger');
+    expect(allText(callouts[0])).toBe('DangerText');
+    expect(collect(doc2, 'blockquote')).toHaveLength(0);
+
+    // No marker leak and no literal list-marker leak.
+    const text = allText(doc2);
+    expect(text).not.toContain('[!');
+    expect(text).not.toContain('-');
+  });
+});
+
+// False-positive controls for `bridgeNestedCallouts`: it must reconstruct a
+// callout ONLY from a blockquote whose FIRST paragraph OPENS with a `[!type]`
+// marker (mirroring the column-0 `preprocessCallouts` rule), and must NOT
+// over-convert an ordinary nested blockquote. If the detection were loosened to
+// fire on any blockquote merely CONTAINING `[!`, both tests below would fail.
+describe('T6-listnest: bridgeNestedCallouts does NOT over-convert', () => {
+  it('a REAL blockquote nested in a list item round-trips STILL as a blockquote', async () => {
+    const doc = {
+      type: 'doc',
+      content: [
+        {
+          type: 'bulletList',
+          content: [
+            {
+              type: 'listItem',
+              content: [
+                { type: 'paragraph', content: [{ type: 'text', text: 'item lead' }] },
+                {
+                  type: 'blockquote',
+                  content: [
+                    {
+                      type: 'paragraph',
+                      content: [{ type: 'text', text: 'just a quote' }],
+                    },
+                  ],
+                },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+    const md1 = convertProseMirrorToMarkdown(doc);
+    const doc2 = await markdownToProseMirror(md1);
+
+    // It stays a blockquote nested in the list item; NO callout is invented.
+    const listItems = collect(doc2, 'listItem');
+    expect(listItems).toHaveLength(1);
+    expect(listItems[0].content.map((c: any) => c.type)).toEqual([
+      'paragraph',
+      'blockquote',
+    ]);
+    expect(collect(doc2, 'callout')).toHaveLength(0);
+    const blockquotes = collect(doc2, 'blockquote');
+    expect(blockquotes).toHaveLength(1);
+    expect(allText(blockquotes[0])).toBe('just a quote');
+    // Byte-stable on re-export (no callout div injected on the way in).
+    expect(convertProseMirrorToMarkdown(doc2)).toBe(md1);
+  });
+
+  it('a nested blockquote with `[!` MID-line (not at the opener) stays a blockquote', async () => {
+    const doc = {
+      type: 'doc',
+      content: [
+        {
+          type: 'bulletList',
+          content: [
+            {
+              type: 'listItem',
+              content: [
+                { type: 'paragraph', content: [{ type: 'text', text: 'item lead' }] },
+                {
+                  type: 'blockquote',
+                  content: [
+                    {
+                      type: 'paragraph',
+                      content: [{ type: 'text', text: 'see [!note] inline' }],
+                    },
+                  ],
+                },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+    const md1 = convertProseMirrorToMarkdown(doc);
+    const doc2 = await markdownToProseMirror(md1);
+
+    // The `[!note]` is mid-paragraph, not an opener -> NO callout is created.
+    expect(collect(doc2, 'callout')).toHaveLength(0);
+    const blockquotes = collect(doc2, 'blockquote');
+    expect(blockquotes).toHaveLength(1);
+    // The literal `[!note]` text is preserved inside the surviving blockquote.
+    expect(allText(blockquotes[0])).toContain('[!note]');
+    expect(convertProseMirrorToMarkdown(doc2)).toBe(md1);
+  });
+});
@@ -0,0 +1,31 @@
+{
+  "type": "doc",
+  "content": [
+    {
+      "type": "callout",
+      "attrs": {
+        "type": "info"
+      },
+      "content": [
+        {
+          "type": "paragraph",
+          "content": [
+            {
+              "type": "text",
+              "text": "line1"
+            }
+          ]
+        },
+        {
+          "type": "paragraph",
+          "content": [
+            {
+              "type": "text",
+              "text": "line2"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
@@ -0,0 +1,50 @@
+{
+  "type": "doc",
+  "content": [
+    {
+      "type": "bulletList",
+      "content": [
+        {
+          "type": "listItem",
+          "content": [
+            {
+              "type": "paragraph",
+              "content": [
+                {
+                  "type": "text",
+                  "text": "item lead"
+                }
+              ]
+            },
+            {
+              "type": "callout",
+              "attrs": {
+                "type": "info"
+              },
+              "content": [
+                {
+                  "type": "paragraph",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "ca"
+                    }
+                  ]
+                },
+                {
+                  "type": "paragraph",
+                  "content": [
+                    {
+                      "type": "text",
+                      "text": "cb"
+                    }
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
@@ -581,16 +581,18 @@ describe('convertProseMirrorToMarkdown', () => {
        }),
      );
      // NOTE(review): the spec predicted ':::warning\nline1\n\nline2\n:::' (a
-      // The converter joins the callout's rendered children with a single '\n'
-      // and emits an Obsidian-native callout: a `> [!type]` opener plus one
-      // `>`-prefixed body line per content line. We pin the lowercasing
-      // (WARNING -> warning) and the multi-child join.
-      expect(out).toBe('> [!warning]\n> line1\n> line2');
+      // The converter emits an Obsidian-native callout: a `> [!type]` opener plus
+      // one `>`-prefixed body line per content line, with block-level children
+      // separated by a blank `>` line (mirrors the blockquote serializer) so a
+      // multi-paragraph callout does not collapse into one paragraph on re-import.
+      // We pin the lowercasing (WARNING -> warning) and the multi-child join.
+      expect(out).toBe('> [!warning]\n> line1\n>\n> line2');
      // The type is lowercased (an uppercase `[!WARNING]` would not re-import).
      expect(out.startsWith('> [!warning]\n')).toBe(true);
      expect(out).not.toContain('[!WARNING]');
-      // Both paragraph children are present, each blockquote-prefixed.
-      expect(out).toContain('> line1\n> line2');
+      // Both paragraph children are present, each blockquote-prefixed and
+      // separated by a blank `>` line.
+      expect(out).toContain('> line1\n>\n> line2');
    });

    // Spec 4 — blockquote per-line prefixer over a multi-line nested callout.
@@ -609,11 +611,12 @@ describe('convertProseMirrorToMarkdown', () => {
      );
      // NOTE(review): the spec predicted '> :::info\n> a\n>\n> b\n> :::',
      // assuming the nested callout body contains a blank line between 'a' and
-      // The nested callout renders as an Obsidian callout '> [!info]\n> a\n> b'
-      // (single-'\n' join, no blank line). The outer blockquote prefixer then
-      // prefixes each of those lines with '> ' again, yielding a doubly-nested
-      // blockquote — the realistic per-line-prefix loop over a multi-line child.
-      expect(out).toBe('> > [!info]\n> > a\n> > b');
+      // The nested callout renders as an Obsidian callout '> [!info]\n> a\n>\n> b'
+      // (block-level children separated by a blank `>` line). The outer blockquote
+      // prefixer then prefixes each of those lines with '> ' again, yielding a
+      // doubly-nested blockquote — the realistic per-line-prefix loop over a
+      // multi-block child.
+      expect(out).toBe('> > [!info]\n> > a\n> >\n> > b');
      // Every produced line carries the '> ' prefix (no line escapes to col 0).
      for (const line of out.split('\n')) {
        expect(line.startsWith('>')).toBe(true);