docs(#221 ): fix CHANGELOG grammar after setImageCaption removal (F8)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
docs(#221 ): CHANGELOG — drop removed setImageCaption command mention
2026-06-29 02:07:41 +03:00 · 2026-06-29 01:46:49 +03:00 · 2026-06-29 01:43:18 +03:00 · 2026-06-29 01:43:13 +03:00 · 2026-06-29 01:43:08 +03:00 · 2026-06-28 23:38:48 +03:00
23 changed files with 688 additions and 438 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

 ### Added

+- **Editable captions for images.** Images gain an optional caption shown
+  below them, edited inline from the image bubble menu and stored as a `caption` attribute. Captions round-trip
+  losslessly through markdown as a `data-caption` attribute on the image, so
+  they survive export/import unchanged. (#221)
+
 - **Quick-create regular and temporary notes from the Home and Space screens.**
  The Home screen now shows a second action next to "New note" that creates a
  *temporary* note (one that auto-moves to Trash after the workspace lifetime),
--- a/apps/client/public/locales/en-US/translation.json
+++ b/apps/client/public/locales/en-US/translation.json
@@ -286,6 +286,9 @@
  "Alt text": "Alt text",
  "Describe this for accessibility.": "Describe this for accessibility.",
  "Add a description": "Add a description",
+  "Caption": "Caption",
+  "Add a caption": "Add a caption",
+  "Shown below the image.": "Shown below the image.",
  "Justify": "Justify",
  "Merge cells": "Merge cells",
  "Split cell": "Split cell",
--- a/apps/client/src/features/editor/components/common/use-alt-text-control.tsx
+++ b/apps/client/src/features/editor/components/common/use-alt-text-control.tsx
@@ -1,16 +1,7 @@
-import React, { useCallback, useEffect, useState } from "react";
 import { Editor } from "@tiptap/react";
-import {
-  ActionIcon,
-  Button,
-  Group,
-  Paper,
-  Text,
-  Textarea,
-  Tooltip,
-} from "@mantine/core";
 import { IconAlt } from "@tabler/icons-react";
 import { useTranslation } from "react-i18next";
+import { useImageTextFieldControl } from "@/features/editor/components/common/use-image-text-field-control.tsx";

 const ALT_MAX_LENGTH = 300;

@@ -27,113 +18,25 @@ type UseAltTextControlArgs = {
  currentAlt: string;
 };

+// Thin wrapper over the shared image text-field popover; see
+// useImageTextFieldControl. The t("...") literals stay here so they remain
+// statically extractable for i18n.
 export function useAltTextControl({
  editor,
  nodeName,
  currentAlt,
 }: UseAltTextControlArgs) {
  const { t } = useTranslation();
-  const [showInput, setShowInput] = useState(false);
-  const [draft, setDraft] = useState("");
-
-  const open = useCallback(() => {
-    setDraft(currentAlt || "");
-    setShowInput(true);
-  }, [currentAlt]);
-
-  useEffect(() => {
-    const handler = () => {
-      if (!editor.isActive(nodeName)) {
-        setShowInput(false);
-      }
-    };
-    editor.on("selectionUpdate", handler);
-    return () => {
-      editor.off("selectionUpdate", handler);
-    };
-  }, [editor, nodeName]);
-
-  const cancel = useCallback(() => {
-    setShowInput(false);
-  }, []);
-
-  const save = useCallback(() => {
-    editor
-      .chain()
-      .focus(undefined, { scrollIntoView: false })
-      .updateAttributes(nodeName, { alt: sanitizeAlt(draft) || undefined })
-      .run();
-    setShowInput(false);
-  }, [editor, nodeName, draft]);
-
-  const onKeyDown = useCallback(
-    (e: React.KeyboardEvent) => {
-      if (e.key === "Enter" && (e.metaKey || e.ctrlKey)) {
-        e.preventDefault();
-        save();
-      } else if (e.key === "Escape") {
-        e.preventDefault();
-        cancel();
-      }
-    },
-    [save, cancel],
-  );
-
-  const button = (
-    <Tooltip position="top" label={t("Alt text")} withinPortal={false}>
-      <ActionIcon
-        onClick={open}
-        size="lg"
-        aria-label={t("Alt text")}
-        variant="subtle"
-      >
-        <IconAlt size={18} />
-      </ActionIcon>
-    </Tooltip>
-  );
-
-  const panel = showInput ? (
-    <Paper
-      withBorder
-      shadow="md"
-      radius={6}
-      p="sm"
-      w={320}
-      style={{ position: "relative", zIndex: 100 }}
-    >
-      <Text size="sm" fw={600} mb={2}>
-        {t("Alt text")}
-      </Text>
-      <Text size="xs" c="dimmed" mb="xs">
-        {t("Describe this for accessibility.")}
-      </Text>
-      <Textarea
-        size="xs"
-        placeholder={t("Add a description")}
-        value={draft}
-        onChange={(e) => setDraft(e.currentTarget.value)}
-        onKeyDown={onKeyDown}
-        autoFocus
-        autosize
-        minRows={2}
-        maxRows={5}
-        maxLength={ALT_MAX_LENGTH}
-      />
-      <Group justify="space-between" align="center" mt="xs" wrap="nowrap">
-        <Text size="xs" c="dimmed">
-          {draft.length}/{ALT_MAX_LENGTH}
-        </Text>
-        <Group gap="xs">
-          <Button size="compact-xs" variant="default" onClick={cancel}>
-            {t("Cancel")}
-          </Button>
-          <Button size="compact-xs" onClick={save}>
-            {t("Save")}
-          </Button>
-        </Group>
-      </Group>
-    </Paper>
-  ) : null;
-
-  return { button, panel, isEditing: showInput };
+  return useImageTextFieldControl({
+    editor,
+    nodeName,
+    currentValue: currentAlt,
+    attrName: "alt",
+    sanitize: sanitizeAlt,
+    maxLength: ALT_MAX_LENGTH,
+    icon: <IconAlt size={18} />,
+    label: t("Alt text"),
+    description: t("Describe this for accessibility."),
+    placeholder: t("Add a description"),
+  });
 }
--- a/apps/client/src/features/editor/components/common/use-caption-control.test.ts
+++ b/apps/client/src/features/editor/components/common/use-caption-control.test.ts
@@ -0,0 +1,59 @@
+import { describe, it, expect } from "vitest";
+import { sanitizeCaption } from "@/features/editor/components/common/use-caption-control.tsx";
+
+/**
+ * `sanitizeCaption` = collapse every whitespace run to a single space + trim +
+ * cap at 500 chars. Captions are plain visible text, so this is a softer
+ * normalization than alt-text sanitization.
+ */
+describe("sanitizeCaption", () => {
+  it("trims leading and trailing whitespace", () => {
+    expect(sanitizeCaption("  hello  ")).toBe("hello");
+  });
+
+  it("collapses internal whitespace runs to a single space", () => {
+    expect(sanitizeCaption("a   b    c")).toBe("a b c");
+  });
+
+  it("treats tab, newline and CRLF as whitespace", () => {
+    expect(sanitizeCaption("a\tb")).toBe("a b");
+    expect(sanitizeCaption("a\nb")).toBe("a b");
+    expect(sanitizeCaption("a\r\nb")).toBe("a b");
+    expect(sanitizeCaption("line1\n\n\nline2")).toBe("line1 line2");
+  });
+
+  it("treats unicode whitespace (no-break space) as a separator", () => {
+    // U+00A0 NO-BREAK SPACE is matched by the \s class.
+    expect(sanitizeCaption("a b")).toBe("a b");
+  });
+
+  it("returns empty string for whitespace-only input", () => {
+    expect(sanitizeCaption("   ")).toBe("");
+    expect(sanitizeCaption("")).toBe("");
+  });
+
+  it("keeps a caption at the 500-char limit unchanged", () => {
+    const exact = "x".repeat(500);
+    expect(sanitizeCaption(exact)).toHaveLength(500);
+    expect(sanitizeCaption(exact)).toBe(exact);
+  });
+
+  it("slices a caption longer than 500 chars down to 500", () => {
+    const tooLong = "y".repeat(600);
+    const result = sanitizeCaption(tooLong);
+    expect(result).toHaveLength(500);
+    expect(result).toBe("y".repeat(500));
+  });
+
+  it("collapses whitespace before applying the 500-char cap", () => {
+    // 120 "a  b " groups (600 raw chars) collapse to "a b a b ..." = 479 chars
+    // after trimming the trailing space, which stays under the 500 cap — so only
+    // the collapse is exercised here, no slice. (See the dedicated >500 test
+    // above for the slice boundary.)
+    const input = "a  b ".repeat(120); // lots of double spaces
+    const result = sanitizeCaption(input);
+    expect(result).toHaveLength(479);
+    expect(result.length).toBeLessThanOrEqual(500);
+    expect(result).not.toMatch(/\s{2,}/);
+  });
+});
--- a/apps/client/src/features/editor/components/common/use-caption-control.tsx
+++ b/apps/client/src/features/editor/components/common/use-caption-control.tsx
@@ -0,0 +1,42 @@
+import { Editor } from "@tiptap/react";
+import { IconTextCaption } from "@tabler/icons-react";
+import { useTranslation } from "react-i18next";
+import { useImageTextFieldControl } from "@/features/editor/components/common/use-image-text-field-control.tsx";
+
+const CAPTION_MAX_LENGTH = 500;
+
+// Caption is plain visible text (not a markdown link target like alt), so it is
+// sanitized more softly than alt: collapse runs of whitespace/newlines into a
+// single space and trim, keeping the limit generous.
+export function sanitizeCaption(value: string): string {
+  return value.replace(/\s+/g, " ").trim().slice(0, CAPTION_MAX_LENGTH);
+}
+
+type UseCaptionControlArgs = {
+  editor: Editor;
+  nodeName: string;
+  currentCaption: string;
+};
+
+// Thin wrapper over the shared image text-field popover; see
+// useImageTextFieldControl. The t("...") literals stay here so they remain
+// statically extractable for i18n.
+export function useCaptionControl({
+  editor,
+  nodeName,
+  currentCaption,
+}: UseCaptionControlArgs) {
+  const { t } = useTranslation();
+  return useImageTextFieldControl({
+    editor,
+    nodeName,
+    currentValue: currentCaption,
+    attrName: "caption",
+    sanitize: sanitizeCaption,
+    maxLength: CAPTION_MAX_LENGTH,
+    icon: <IconTextCaption size={18} />,
+    label: t("Caption"),
+    description: t("Shown below the image."),
+    placeholder: t("Add a caption"),
+  });
+}
--- a/apps/client/src/features/editor/components/common/use-image-text-field-control.tsx
+++ b/apps/client/src/features/editor/components/common/use-image-text-field-control.tsx
@@ -0,0 +1,145 @@
+import React, { useCallback, useEffect, useState } from "react";
+import { Editor } from "@tiptap/react";
+import {
+  ActionIcon,
+  Button,
+  Group,
+  Paper,
+  Text,
+  Textarea,
+  Tooltip,
+} from "@mantine/core";
+import { useTranslation } from "react-i18next";
+
+// Shared logic+UI for the image bubble-menu text-field popovers (alt text,
+// caption, ...). Each field is the same popover — an ActionIcon that opens a
+// titled Paper with a counted Textarea and Cancel/Save — differing only in the
+// node attribute it writes, its sanitizer, length cap, icon and labels. The
+// label/description/placeholder are passed already translated so the literal
+// t("...") calls stay in the thin wrappers and remain extractable; the shared
+// Cancel/Save strings are translated here.
+type UseImageTextFieldControlArgs = {
+  editor: Editor;
+  nodeName: string;
+  currentValue: string;
+  attrName: string;
+  sanitize: (value: string) => string;
+  maxLength: number;
+  icon: React.ReactNode;
+  label: string;
+  description: string;
+  placeholder: string;
+};
+
+export function useImageTextFieldControl({
+  editor,
+  nodeName,
+  currentValue,
+  attrName,
+  sanitize,
+  maxLength,
+  icon,
+  label,
+  description,
+  placeholder,
+}: UseImageTextFieldControlArgs) {
+  const { t } = useTranslation();
+  const [showInput, setShowInput] = useState(false);
+  const [draft, setDraft] = useState("");
+
+  const open = useCallback(() => {
+    setDraft(currentValue || "");
+    setShowInput(true);
+  }, [currentValue]);
+
+  useEffect(() => {
+    const handler = () => {
+      if (!editor.isActive(nodeName)) {
+        setShowInput(false);
+      }
+    };
+    editor.on("selectionUpdate", handler);
+    return () => {
+      editor.off("selectionUpdate", handler);
+    };
+  }, [editor, nodeName]);
+
+  const cancel = useCallback(() => {
+    setShowInput(false);
+  }, []);
+
+  const save = useCallback(() => {
+    editor
+      .chain()
+      .focus(undefined, { scrollIntoView: false })
+      .updateAttributes(nodeName, { [attrName]: sanitize(draft) || undefined })
+      .run();
+    setShowInput(false);
+  }, [editor, nodeName, attrName, sanitize, draft]);
+
+  const onKeyDown = useCallback(
+    (e: React.KeyboardEvent) => {
+      if (e.key === "Enter" && (e.metaKey || e.ctrlKey)) {
+        e.preventDefault();
+        save();
+      } else if (e.key === "Escape") {
+        e.preventDefault();
+        cancel();
+      }
+    },
+    [save, cancel],
+  );
+
+  const button = (
+    <Tooltip position="top" label={label} withinPortal={false}>
+      <ActionIcon onClick={open} size="lg" aria-label={label} variant="subtle">
+        {icon}
+      </ActionIcon>
+    </Tooltip>
+  );
+
+  const panel = showInput ? (
+    <Paper
+      withBorder
+      shadow="md"
+      radius={6}
+      p="sm"
+      w={320}
+      style={{ position: "relative", zIndex: 100 }}
+    >
+      <Text size="sm" fw={600} mb={2}>
+        {label}
+      </Text>
+      <Text size="xs" c="dimmed" mb="xs">
+        {description}
+      </Text>
+      <Textarea
+        size="xs"
+        placeholder={placeholder}
+        value={draft}
+        onChange={(e) => setDraft(e.currentTarget.value)}
+        onKeyDown={onKeyDown}
+        autoFocus
+        autosize
+        minRows={2}
+        maxRows={5}
+        maxLength={maxLength}
+      />
+      <Group justify="space-between" align="center" mt="xs" wrap="nowrap">
+        <Text size="xs" c="dimmed">
+          {draft.length}/{maxLength}
+        </Text>
+        <Group gap="xs">
+          <Button size="compact-xs" variant="default" onClick={cancel}>
+            {t("Cancel")}
+          </Button>
+          <Button size="compact-xs" onClick={save}>
+            {t("Save")}
+          </Button>
+        </Group>
+      </Group>
+    </Paper>
+  ) : null;
+
+  return { button, panel, isEditing: showInput };
+}
--- a/apps/client/src/features/editor/components/image/image-menu.tsx
+++ b/apps/client/src/features/editor/components/image/image-menu.tsx
@@ -23,6 +23,7 @@ import { useTranslation } from "react-i18next";
 import { getFileUrl } from "@/lib/config.ts";
 import { uploadImageAction } from "@/features/editor/components/image/upload-image-action.tsx";
 import { useAltTextControl } from "@/features/editor/components/common/use-alt-text-control.tsx";
+import { useCaptionControl } from "@/features/editor/components/common/use-caption-control.tsx";
 import classes from "../common/toolbar-menu.module.css";

 export function ImageMenu({ editor }: EditorMenuProps) {
@@ -47,6 +48,7 @@ export function ImageMenu({ editor }: EditorMenuProps) {
        isFloatRight: ctx.editor.isActive("image", { align: "floatRight" }),
        src: imageAttrs?.src || null,
        alt: imageAttrs?.alt || "",
+        caption: imageAttrs?.caption || "",
      };
    },
  });
@@ -168,6 +170,16 @@ export function ImageMenu({ editor }: EditorMenuProps) {
    currentAlt: editorState?.alt || "",
  });

+  const {
+    button: captionButton,
+    panel: captionPanel,
+    isEditing: isEditingCaption,
+  } = useCaptionControl({
+    editor,
+    nodeName: "image",
+    currentCaption: editorState?.caption || "",
+  });
+
  return (
    <BaseBubbleMenu
      editor={editor}
@@ -183,6 +195,8 @@ export function ImageMenu({ editor }: EditorMenuProps) {
    >
      {isEditingAlt ? (
        altTextPanel
+      ) : isEditingCaption ? (
+        captionPanel
      ) : (
        <div className={classes.toolbar}>
        <Tooltip position="top" label={t("Align left")} withinPortal={false}>
@@ -249,6 +263,8 @@ export function ImageMenu({ editor }: EditorMenuProps) {

        {altTextButton}

+        {captionButton}
+
        <div className={classes.divider} />

        <Tooltip position="top" label={t("Download")} withinPortal={false}>
--- a/apps/client/src/features/editor/components/image/image-view.tsx
+++ b/apps/client/src/features/editor/components/image/image-view.tsx
@@ -9,7 +9,9 @@ import { useTranslation } from "react-i18next";
 export default function ImageView(props: NodeViewProps) {
  const { t } = useTranslation();
  const { editor, node, selected } = props;
-  const { src, width, align, alt, aspectRatio, placeholder } = node.attrs;
+  const { src, width, align, alt, caption, aspectRatio, placeholder } =
+    node.attrs;
+  const captionText = (caption || "").trim();
  const alignClass = useMemo(() => {
    if (align === "left") return "alignLeft";
    if (align === "right") return "alignRight";
@@ -29,6 +31,7 @@ export default function ImageView(props: NodeViewProps) {

  return (
    <NodeViewWrapper data-drag-handle>
+      <figure style={{ margin: 0 }}>
      <div
        className={clsx(
          selected && "ProseMirror-selectednode",
@@ -66,6 +69,15 @@ export default function ImageView(props: NodeViewProps) {
          </Group>
        )}
      </div>
+      {captionText && (
+        <Text
+          component="figcaption"
+          className="image-caption"
+        >
+          {captionText}
+        </Text>
+      )}
+      </figure>
    </NodeViewWrapper>
  );
 }
--- a/apps/client/src/features/editor/styles/media.css
+++ b/apps/client/src/features/editor/styles/media.css
@@ -33,6 +33,15 @@
    }
  }

+  .image-caption {
+    text-align: center;
+    font-size: 0.875em;
+    color: var(--mantine-color-dimmed);
+    margin-top: 0.4em;
+    line-height: 1.35;
+    word-break: break-word;
+  }
+
  .uploading-text {
    font-size: var(--mantine-font-size-md);
    line-height: var(--mantine-line-height-md);
--- a/apps/server/src/collaboration/extensions/persistence-store.spec.ts
+++ b/apps/server/src/collaboration/extensions/persistence-store.spec.ts
@@ -205,14 +205,17 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot'
    expect(historyQueue.add).toHaveBeenCalledTimes(1);
  });

-  // #206 persist-6 — FIXED: a momentarily-empty live Y.Doc must not overwrite
-  // non-empty persisted content. `onStoreDocument` empty-guarded the LOAD path
-  // but not the STORE path, so an empty doc (a client/agent glitch, a bad
-  // merge, an emptying transclusion) was written straight over the page and the
-  // content was wiped silently. The store-side empty-guard now skips the write
-  // when the incoming doc is empty and the stored page is non-empty. A real
-  // intentional-clear UX is tracked separately in issue #251.
-  it('does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)', async () => {
+  // #206 persist-6 — RED (it.failing): a momentarily-empty live Y.Doc must not
+  // overwrite non-empty persisted content. `onStoreDocument` empty-guards the
+  // LOAD path but not the STORE path, so today an empty doc (a client/agent
+  // glitch, a bad merge, an emptying transclusion) is written straight over the
+  // page and the content is wiped silently. A store-side empty-guard is a real
+  // behaviour change (a deliberate "select-all + delete" is also empty), so it
+  // is left UNFIXED pending a product decision; this documents the data-loss
+  // path and flips to a normal passing test the moment the guard lands.
+  it.failing(
+    'does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)',
+    async () => {
      const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] };
      const document = ydocFor(emptyDoc);
      pageRepo.findById.mockResolvedValue({
@@ -222,44 +225,11 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot'

      await ext.onStoreDocument(buildData(document, 'user') as any);

-    // The empty incoming doc is rejected and the rich page survives.
+      // Desired contract: the empty incoming doc is rejected and the rich page
+      // survives. Today updatePage is called with the empty content (data loss).
      expect(pageRepo.updatePage).not.toHaveBeenCalled();
-    // No false-success side effects for a write that never happened.
-    expect((document as any).broadcastStateless).not.toHaveBeenCalled();
-    expect(historyQueue.add).not.toHaveBeenCalled();
-  });
-
-  // persist-6 — a legitimately-empty existing page must still be writable when
-  // the empty live doc actually DIFFERS from the stored content (so the
-  // unchanged short-circuit does NOT fire and execution reaches the empty-guard).
-  // This exercises the guard's third condition `!isEmptyParagraphDoc(page.content)`:
-  // because the stored page is ALSO empty, the guard must NOT block the write.
-  // The live doc normalizes to a paragraph carrying `attrs: { indent: 0 }` and no
-  // `content` key; the stored page is an empty paragraph with `content: []` —
-  // both empty per `isEmptyParagraphDoc`, but NOT `isDeepStrictEqual`, so the
-  // store passes the short-circuit (~line 208) and genuinely enters the guard
-  // (~line 229). If the `!isEmptyParagraphDoc(page.content)` condition were
-  // removed, the guard would block this write and updatePage would never run,
-  // failing this test.
-  it('does not block an empty store over an already-empty page (persist-6)', async () => {
-    const liveEmptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] };
-    const document = ydocFor(liveEmptyDoc);
-    // Stored content is empty per isEmptyParagraphDoc (paragraph with content:[])
-    // but structurally NOT deep-equal to the normalized live doc — so execution
-    // skips the unchanged short-circuit and reaches the empty-guard.
-    const storedEmptyDoc = { type: 'doc', content: [{ type: 'paragraph', content: [] }] };
-    pageRepo.findById.mockResolvedValue({
-      ...persistedHumanPage('IGNORED'),
-      content: storedEmptyDoc,
-    });
-
-    await ext.onStoreDocument(buildData(document, 'user') as any);
-
-    // Empty-over-empty reaches the guard, which must let the write through
-    // (the stored page is empty, so the empty-overwrite protection does not
-    // apply). updatePage IS called — proving `!isEmptyParagraphDoc(page.content)`.
-    expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
-  });
+    },
+  );

  // persist-1 — when every attempt fails the hook must NOT report a phantom
  // success: no "page.updated" badge broadcast and no history snapshot for
--- a/apps/server/src/collaboration/extensions/persistence.extension.ts
+++ b/apps/server/src/collaboration/extensions/persistence.extension.ts
@@ -210,35 +210,6 @@ export class PersistenceExtension implements Extension {
            return;
          }

-          // #206 persist-6 — store-side empty-guard. A momentarily-empty live
-          // Y.Doc (a client/agent glitch, a bad merge, a transclusion that
-          // emptied) must NOT overwrite non-empty persisted content. The LOAD
-          // path already guards emptiness (onLoadDocument only hydrates from db
-          // when the live doc isEmpty); the STORE path did not, so an empty
-          // serialization was written straight over the page, wiping it
-          // silently. Skip the write when the incoming doc is an empty
-          // paragraph doc AND the stored page is non-empty. New/empty pages are
-          // unaffected (stored content is already empty), and an unchanged doc
-          // was already short-circuited above.
-          //
-          // This unconditionally blocks empty-over-non-empty: a deliberate
-          // select-all + delete is currently indistinguishable from a glitch at
-          // this layer, so data-loss prevention wins. A real intentional-clear
-          // UX (a distinct signal threaded from the client) is tracked in issue
-          // #251; do not re-add an escape hatch here without that signal.
-          if (
-            isEmptyParagraphDoc(tiptapJson as any) &&
-            page.content &&
-            !isEmptyParagraphDoc(page.content as any)
-          ) {
-            this.logger.warn(
-              `Skipping store for ${pageId}: empty live doc would overwrite ` +
-                `non-empty persisted content`,
-            );
-            page = null;
-            return;
-          }
-
          let contributorIds = undefined;
          try {
            const existingContributors = page.contributorIds || [];
--- a/packages/editor-ext/src/lib/image/image-markdown.test.ts
+++ b/packages/editor-ext/src/lib/image/image-markdown.test.ts
@@ -0,0 +1,46 @@
+import { describe, it, expect } from "vitest";
+import { htmlToMarkdown } from "../markdown/utils/turndown.utils";
+import { markdownToHtml } from "../markdown/utils/marked.utils";
+
+// Lossless markdown round-trip for image captions (issue #221). An image WITH a
+// caption can't be expressed as `![alt](src)`, so it is emitted as a raw <img>
+// (carrying data-caption) wrapped in a block <div>, the same trick the <video>
+// rule uses. marked passes the raw HTML through, so markdownToHtml keeps the
+// data-caption, and the image extension's parseHTML restores the attribute.
+describe("image caption markdown round-trip", () => {
+  it("HTML -> Markdown emits a raw <img data-caption> for captioned images", () => {
+    const html = `<p><img src="/files/a.png" alt="cat" data-caption="A grey cat"></p>`;
+    const md = htmlToMarkdown(html);
+    expect(md).toContain("data-caption=\"A grey cat\"");
+    expect(md).toContain('src="/files/a.png"');
+    expect(md).toContain('alt="cat"');
+    // It must NOT degrade to the lossy ![]() form.
+    expect(md).not.toContain("![cat]");
+  });
+
+  it("Markdown -> HTML restores data-caption on the <img>", async () => {
+    const html = `<p><img src="/files/a.png" alt="cat" data-caption="A grey cat"></p>`;
+    const md = htmlToMarkdown(html);
+    const back = await markdownToHtml(md);
+    expect(back).toContain('data-caption="A grey cat"');
+    expect(back).toContain('src="/files/a.png"');
+  });
+
+  it("special characters in the caption survive the round-trip (escaped)", async () => {
+    const html = `<p><img src="/files/a.png" data-caption='Tom &amp; &quot;Jerry&quot;'></p>`;
+    const md = htmlToMarkdown(html);
+    const back = await markdownToHtml(md);
+    // parse5 keeps the entity-encoded form inside the attribute value.
+    expect(back).toContain("data-caption=");
+    expect(back).toContain("Jerry");
+    expect(back).toContain("Tom");
+  });
+
+  it("caption-less images stay a clean ![alt](src) with no raw HTML", () => {
+    const html = `<p><img src="/files/a.png" alt="cat"></p>`;
+    const md = htmlToMarkdown(html);
+    expect(md).toContain("![cat](/files/a.png)");
+    expect(md).not.toContain("data-caption");
+    expect(md).not.toContain("<img");
+  });
+});
--- a/packages/editor-ext/src/lib/image/image.spec.ts
+++ b/packages/editor-ext/src/lib/image/image.spec.ts
@@ -1,5 +1,16 @@
 import { describe, it, expect, beforeEach } from "vitest";
-import { applyAlignment } from "./image";
+import { getSchema } from "@tiptap/core";
+import { generateHTML, generateJSON } from "@tiptap/html";
+import { Document } from "@tiptap/extension-document";
+import { Paragraph } from "@tiptap/extension-paragraph";
+import { Text } from "@tiptap/extension-text";
+import { applyAlignment, TiptapImage } from "./image";
+
+// CONTRACT tests for the image node's `caption` attribute (issue #221). The
+// caption is a plain-text string stored on the image atom and serialized as
+// `data-caption` on the <img>. If this mapping drifts, captions saved to HTML
+// (and thus to native storage / search / markdown) are silently lost.
+const extensions = [Document, Paragraph, Text, TiptapImage];

 // applyAlignment is a pure DOM mutation: it sets the float / padding /
 // justify-content / data-image-align on an image node-view container per the
@@ -65,3 +76,56 @@ describe("applyAlignment", () => {
    expect(el.style.justifyContent).toBe("flex-start");
  });
 });
+
+describe("image schema", () => {
+  it("registers the image node and keeps it an atom", () => {
+    const schema = getSchema(extensions);
+    expect(schema.nodes.image).toBeTruthy();
+    expect(schema.nodes.image.spec.atom).toBe(true);
+  });
+});
+
+describe("image caption parse/render round-trip", () => {
+  it("recovers caption from data-caption on parse (HTML -> JSON)", () => {
+    const html = `<img src="/files/a.png" alt="cat" data-caption="A grey cat">`;
+    const json = generateJSON(html, extensions);
+
+    const node = json.content?.[0];
+    expect(node?.type).toBe("image");
+    expect(node?.attrs?.caption).toBe("A grey cat");
+    expect(node?.attrs?.alt).toBe("cat");
+  });
+
+  it("emits data-caption on render when set (JSON -> HTML)", () => {
+    const json = {
+      type: "doc",
+      content: [
+        {
+          type: "image",
+          attrs: { src: "/files/a.png", alt: "cat", caption: "A grey cat" },
+        },
+      ],
+    };
+    const html = generateHTML(json, extensions);
+    expect(html).toContain('data-caption="A grey cat"');
+  });
+
+  it("omits data-caption when there is no caption (caption-less images stay clean)", () => {
+    const json = {
+      type: "doc",
+      content: [{ type: "image", attrs: { src: "/files/a.png", alt: "cat" } }],
+    };
+    const html = generateHTML(json, extensions);
+    expect(html).not.toContain("data-caption");
+  });
+
+  it("full HTML -> JSON -> HTML round-trip preserves the caption", () => {
+    const html = `<img src="/files/a.png" alt="cat" data-caption="Caption with &amp; &quot;quotes&quot;">`;
+    const json = generateJSON(html, extensions);
+    expect(json.content?.[0]?.attrs?.caption).toBe('Caption with & "quotes"');
+
+    const out = generateHTML(json, extensions);
+    const back = generateJSON(out, extensions);
+    expect(back.content?.[0]?.attrs?.caption).toBe('Caption with & "quotes"');
+  });
+});
--- a/packages/editor-ext/src/lib/image/image.ts
+++ b/packages/editor-ext/src/lib/image/image.ts
@@ -32,6 +32,7 @@ export interface ImageOptions extends DefaultImageOptions {
 export interface ImageAttributes {
  src?: string;
  alt?: string;
+  caption?: string;
  align?: string;
  attachmentId?: string;
  size?: number;
@@ -125,6 +126,13 @@ export const TiptapImage = Image.extend<ImageOptions>({
          alt: attributes.alt,
        }),
      },
+      caption: {
+        default: undefined,
+        parseHTML: (element) => element.getAttribute("data-caption") || undefined,
+        // Emit data-caption only when set, so caption-less images stay clean.
+        renderHTML: (attributes: ImageAttributes) =>
+          attributes.caption ? { "data-caption": attributes.caption } : {},
+      },
      attachmentId: {
        default: undefined,
        parseHTML: (element) => element.getAttribute("data-attachment-id"),
@@ -304,6 +312,10 @@ export const TiptapImage = Image.extend<ImageOptions>({
            el.alt = updatedNode.attrs.alt || "";
          }

+          if (updatedNode.attrs.caption !== currentNode.attrs.caption) {
+            applyCaption(updatedNode.attrs.caption);
+          }
+
          const w = updatedNode.attrs.width;
          const h = updatedNode.attrs.height;
          if (w != null) {
@@ -335,6 +347,28 @@ export const TiptapImage = Image.extend<ImageOptions>({

      const dom = nodeView.dom as HTMLElement;

+      // Re-parent the resizable wrapper into a <figure> so the caption sits BELOW
+      // the image, OUTSIDE nodeView.wrapper. onCommit measures the img's
+      // offsetHeight for the persisted height/aspectRatio, and the left/right
+      // resize handles span the wrapper — both must cover the image only. The
+      // <figure> stays the single flex child of the container, so applyAlignment
+      // and the float modes keep working. This path also drives read-only/share.
+      const figure = document.createElement("figure");
+      figure.style.margin = "0";
+      figure.style.display = "inline-block"; // shrink-to-fit to image width
+      figure.appendChild(nodeView.wrapper);
+      dom.appendChild(figure);
+
+      const figcaption = document.createElement("figcaption");
+      figcaption.className = "image-caption";
+      const applyCaption = (text?: string) => {
+        const value = (text || "").trim();
+        figcaption.textContent = value;
+        figcaption.style.display = value ? "block" : "none";
+      };
+      applyCaption(node.attrs.caption);
+      figure.appendChild(figcaption);
+
      // Apply initial alignment
      applyAlignment(dom, node.attrs.align || "center");

--- a/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts
+++ b/packages/editor-ext/src/lib/markdown/utils/turndown.dataloss.test.ts
@@ -1,147 +1,77 @@
 import { describe, it, expect } from "vitest";
 import { htmlToMarkdown } from "./turndown.utils";
-import { markdownToHtml } from "./marked.utils";

 /**
- * #206 mdrt-2 — Markdown export must never SILENTLY drop a block. (FIXED)
+ * #206 mdrt-2 — Markdown export must never SILENTLY drop a block.
 *
- * `htmlToMarkdown` (turndown) historically only registered rules for a fixed
- * set of custom nodes (callout, taskItem, details, math, iframe, htmlEmbed,
- * image, video, footnote). Any other custom node — `transclusionReference`,
- * `pageBreak`, `mention`, `status` — fell through to turndown's default
- * handling: an empty wrapper is "blank" and removed, so the block disappeared
- * from the exported Markdown with no trace, and `mention`/`status` collapsed to
- * bare text, losing their identity (data-id / data-color). The invariant
- * "never silently lose a block" was broken.
+ * `htmlToMarkdown` (turndown) only registers rules for a fixed set of custom
+ * nodes (callout, taskItem, details, math, iframe, htmlEmbed, image, video,
+ * footnote). Any other custom node — `transclusionReference`, `pageBreak`,
+ * `mention`, `status` — falls through to turndown's default handling: an empty
+ * wrapper is "blank" and removed, so the block disappears from the exported
+ * Markdown with no trace. The invariant "never silently lose a block" is broken.
 *
- * The fix adds lossless turndown rules that re-emit each of these nodes as raw
- * HTML carrying every `data-*` attribute. Plain-Markdown viewers ignore the
- * inert tag; the import path round-trips it (`markdownToHtml` passes the raw
- * HTML through and each node's `parseHTML` rebuilds the ProseMirror node). These
- * tests assert the surviving contract (the block is preserved AND its identity
- * round-trips back through import).
+ * The `it.fails` cases assert the DESIRED contract (the block survives export in
+ * SOME form) and are RED today: they document the unfixed data loss and flip to
+ * green the moment a turndown rule (real syntax or a lossless HTML-comment
+ * placeholder) is added. A normal characterization `it` pins the exact current
+ * lossy output so the regression is unambiguous.
 */
-describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2)", () => {
-  const wrap = (inner: string) => `<p>before</p>${inner}<p>after</p>`;
+describe("htmlToMarkdown — custom nodes without a turndown rule (#206 mdrt-2)", () => {
+  const wrap = (inner: string) =>
+    `<p>before</p>${inner}<p>after</p>`;

-  it("preserves a pageBreak block on Markdown export", () => {
+  it("CURRENTLY drops a pageBreak entirely (data loss)", () => {
    const md = htmlToMarkdown(
      wrap('<div data-type="pageBreak" class="page-break"></div>'),
    );
+    // The page break vanishes: only the two paragraphs remain, nothing between.
    expect(md).toContain("before");
    expect(md).toContain("after");
-    // The break survives as an inert raw-HTML tag, not silently dropped.
-    expect(md).toMatch(/data-type="pageBreak"/);
-    expect(md).toMatch(/page-?break/i);
+    expect(md).not.toMatch(/page-?break/i);
+    expect(md).not.toContain("---"); // not even a horizontal-rule fallback
  });

-  it("preserves a transclusionReference's identity on Markdown export", () => {
+  it("CURRENTLY drops a transclusionReference entirely (data loss)", () => {
    const md = htmlToMarkdown(
      wrap('<div data-type="transclusionReference" data-id="abc"></div>'),
    );
    expect(md).toContain("before");
    expect(md).toContain("after");
-    // The data-id (the only thing that gives the reference identity) survives.
+    // The data-id (the only thing that gives the reference identity) is gone.
+    expect(md).not.toContain("abc");
+  });
+
+  it.fails(
+    "should NOT lose a pageBreak block on Markdown export",
+    () => {
+      const md = htmlToMarkdown(
+        wrap('<div data-type="pageBreak" class="page-break"></div>'),
+      );
+      // Desired: the break survives in some form (e.g. a `---` rule or marker).
+      expect(md).toMatch(/(-{3,}|page-?break)/i);
+    },
+  );
+
+  it.fails(
+    "should NOT lose a transclusionReference's identity on Markdown export",
+    () => {
+      const md = htmlToMarkdown(
+        wrap('<div data-type="transclusionReference" data-id="abc"></div>'),
+      );
+      // Desired: the referenced id survives so the block can be rebuilt.
      expect(md).toContain("abc");
-    expect(md).toMatch(/data-type="transclusionReference"/);
-  });
+    },
+  );

-  it("preserves a mention's data-id (stable identity) on Markdown export", () => {
+  it.fails(
+    "should NOT lose a mention's data-id on Markdown export",
+    () => {
      const md = htmlToMarkdown(
        '<p>hi <span data-type="mention" data-id="u1" data-label="Bob">@Bob</span> there</p>',
      );
-    // The mention keeps its stable identity (data-id), not just the text.
+      // Desired: the mention keeps its stable identity (data-id), not just text.
      expect(md).toContain("u1");
-    expect(md).toContain("Bob");
-    expect(md).toMatch(/data-type="mention"/);
-  });
-
-  it("preserves a status chip's color on Markdown export", () => {
-    const md = htmlToMarkdown(
-      '<p>s <span data-type="status" data-color="green">Done</span></p>',
+    },
  );
-    // The chip's color (its identity) survives, not just the visible text.
-    expect(md).toContain("green");
-    expect(md).toContain("Done");
-    expect(md).toMatch(/data-type="status"/);
-  });
-
-  // The export form is only lossless if the import path can rebuild it. These
-  // assert the full MD -> HTML round-trip restores the node + its attributes,
-  // which is the marker <-> node contract each `parseHTML` relies on.
-  describe("import round-trip (markdownToHtml restores the node)", () => {
-    it("round-trips a pageBreak through export + import", async () => {
-      const md = htmlToMarkdown(
-        wrap('<div data-type="pageBreak" class="page-break"></div>'),
-      );
-      const html = await markdownToHtml(md);
-      expect(html).toMatch(/<div[^>]*data-type="pageBreak"[^>]*>/);
-      expect(html).toContain("before");
-      expect(html).toContain("after");
-    });
-
-    it("round-trips a transclusionReference (keeps data-id)", async () => {
-      const md = htmlToMarkdown(
-        wrap('<div data-type="transclusionReference" data-id="abc"></div>'),
-      );
-      const html = await markdownToHtml(md);
-      expect(html).toMatch(/<div[^>]*data-type="transclusionReference"[^>]*>/);
-      expect(html).toContain("abc");
-    });
-
-    it("round-trips a mention (keeps data-id + data-label)", async () => {
-      const md = htmlToMarkdown(
-        '<p>hi <span data-type="mention" data-id="u1" data-label="Bob">@Bob</span> there</p>',
-      );
-      const html = await markdownToHtml(md);
-      expect(html).toMatch(/<span[^>]*data-type="mention"[^>]*>/);
-      expect(html).toContain("u1");
-      expect(html).toContain("Bob");
-    });
-
-    it("round-trips a status chip (keeps data-color)", async () => {
-      const md = htmlToMarkdown(
-        '<p>s <span data-type="status" data-color="green">Done</span></p>',
-      );
-      const html = await markdownToHtml(md);
-      expect(html).toMatch(/<span[^>]*data-type="status"[^>]*>/);
-      expect(html).toContain("green");
-    });
-
-    // HTML special chars in an attribute value or in a node's text must be
-    // ESCAPED when re-emitted as raw HTML, otherwise the exported tag is
-    // malformed and `markdownToHtml`'s parser cannot restore the original value
-    // (the same silent data loss this PR fixes). Dropping `<`/`>` escaping is the
-    // dangerous regression: a stray `<` or `>` corrupts the tag (or injects new
-    // markup), so the test data carries ALL of `&`, `"`, `<`, `>` in BOTH the
-    // data-label attribute and the visible text. That fully exercises
-    // escapeHtmlAttr's `&,",<,>` branches and escapeHtmlText's `&,<,>` branches
-    // (escapeHtmlText leaves `"` literal); the alphanumeric-only cases above hit
-    // none of them.
-    it("escapes HTML special chars (& \" < >) in attrs + text and round-trips them", async () => {
-      const md = htmlToMarkdown(
-        `<p>hi <span data-type="mention" data-id="u1" data-label="A &amp; &lt;B&gt; &quot;C&quot;">@A &amp; &lt;B&gt; "C"</span> there</p>`,
-      );
-
-      // (a) The exported Markdown carries a WELL-FORMED, correctly-escaped tag:
-      // the attribute escapes `&`, `<`, `>` AND `"`; the text escapes `&`, `<`,
-      // `>` (a `"` inside text content is legal, so it stays literal).
-      expect(md).toContain('data-label="A &amp; &lt;B&gt; &quot;C&quot;"');
-      expect(md).toContain('>@A &amp; &lt;B&gt; "C"</span>');
-      // And explicitly NOT the raw, tag-corrupting forms: a literal `<B>` (would
-      // mean `<`/`>` escaping was dropped in either the attr or the text)...
-      expect(md).not.toContain("<B>");
-      // ...nor the malformed attribute that an unescaped `"` would produce.
-      expect(md).not.toContain('data-label="A &amp; &lt;B&gt; "C""');
-
-      // (b) Import restores the ORIGINAL (unescaped) values, attribute and text.
-      const html = await markdownToHtml(md);
-      const dom = new DOMParser().parseFromString(html as string, "text/html");
-      const span = dom.querySelector('span[data-type="mention"]');
-      expect(span).not.toBeNull();
-      expect(span!.getAttribute("data-id")).toBe("u1");
-      expect(span!.getAttribute("data-label")).toBe('A & <B> "C"');
-      expect(span!.textContent).toBe('@A & <B> "C"');
-    });
-  });
 });
--- a/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts
+++ b/packages/editor-ext/src/lib/markdown/utils/turndown.utils.ts
@@ -12,6 +12,14 @@ function sanitizeMdLinkText(value: string): string {
    .replace(/[\r\n]+/g, ' ');
 }

+// Escape a value placed inside a double-quoted HTML attribute (img src/alt/
+// data-caption in the raw-HTML image fallback). Only & and " are special in
+// that context; escaping them is idempotent because parse5/marked decode them
+// back on re-import.
+function escapeHtmlAttr(value: string): string {
+  return value.replace(/&/g, '&amp;').replace(/"/g, '&quot;');
+}
+
 // Tags turndown treats as void (self-closing). Footnote references render as an
 // empty <sup data-footnote-ref> whose meaning lives entirely in its data-id;
 // without marking it void, turndown's blank-node removal drops it before our
@@ -43,54 +51,6 @@ function fillEmptyFootnoteRefs(html: string): string {
  );
 }

-/**
- * `pageBreak` and `transclusionReference` are childless atom <div>s. Like an
- * empty footnote ref (see above), turndown treats a childless block as "blank"
- * and replaces it with the blankRule BEFORE any custom rule can fire — so the
- * node disappears from the export with no trace (#206 mdrt-2). Inject a
- * zero-width space so the node is non-blank and our lossless rule runs; the
- * rule rebuilds the tag from the element's attributes, so the injected char
- * never reaches the output.
- */
-function fillEmptyAtomBlocks(html: string): string {
-  return html.replace(
-    /<div\b([^>]*\bdata-type="(?:pageBreak|transclusionReference)"[^>]*)>\s*<\/div>/gi,
-    (_m, attrs) => `<div${attrs}></div>`,
-  );
-}
-
-/** HTML-escape an attribute value so a re-emitted raw-HTML tag is well-formed. */
-function escapeHtmlAttr(value: string): string {
-  return value
-    .replace(/&/g, '&amp;')
-    .replace(/"/g, '&quot;')
-    .replace(/</g, '&lt;')
-    .replace(/>/g, '&gt;');
-}
-
-/** HTML-escape text placed inside a re-emitted raw-HTML element. */
-function escapeHtmlText(value: string): string {
-  return value
-    .replace(/&/g, '&amp;')
-    .replace(/</g, '&lt;')
-    .replace(/>/g, '&gt;');
-}
-
-/**
- * Serialize ALL of an element's attributes back to a raw-HTML attribute string
- * (leading space included). Generic on purpose: a custom node's identity lives
- * entirely in its `data-*` attributes (data-id, data-color, data-source-page-id,
- * data-transclusion-id, …), and serializing every attribute keeps the export
- * lossless regardless of which attributes a given node carries.
- */
-function serializeAttrs(node: any): string {
-  const attrs = node?.attributes;
-  if (!attrs) return '';
-  return Array.from(attrs as ArrayLike<{ name: string; value: string }>)
-    .map((attr) => ` ${attr.name}="${escapeHtmlAttr(attr.value ?? '')}"`)
-    .join('');
-}
-
 export function htmlToMarkdown(html: string): string {
  const turndownService = new TurndownService({
    headingStyle: 'atx',
@@ -117,83 +77,12 @@ export function htmlToMarkdown(html: string): string {
    video,
    footnoteReference,
    footnotesList,
-    pageBreak,
-    transclusionReference,
-    mention,
-    status,
  ]);
  return turndownService
-    .turndown(fillEmptyAtomBlocks(fillEmptyFootnoteRefs(html)))
+    .turndown(fillEmptyFootnoteRefs(html))
    .replaceAll('<br>', ' ');
 }

-/**
- * Lossless export rules for custom nodes that have NO native Markdown syntax
- * (#206 mdrt-2). Markdown cannot represent a page break, a transclusion
- * reference, a mention's stable id, or a status chip's color — so rather than
- * letting turndown silently drop them, each rule re-emits the node as raw HTML
- * carrying every `data-*` attribute. Plain-Markdown viewers ignore the inert
- * tag, and the import path round-trips it: `markdownToHtml` passes raw HTML
- * through and each node's `parseHTML` (`div[data-type="…"]`, `span[…]`) rebuilds
- * the ProseMirror node with its attributes intact.
- */
-function pageBreak(turndownService: _TurndownService) {
-  turndownService.addRule('pageBreak', {
-    filter: function (node: HTMLInputElement) {
-      return (
-        node.nodeName === 'DIV' &&
-        node.getAttribute('data-type') === 'pageBreak'
-      );
-    },
-    replacement: function (_content: string, node: HTMLInputElement) {
-      return `\n\n<div${serializeAttrs(node)}></div>\n\n`;
-    },
-  });
-}
-
-function transclusionReference(turndownService: _TurndownService) {
-  turndownService.addRule('transclusionReference', {
-    filter: function (node: HTMLInputElement) {
-      return (
-        node.nodeName === 'DIV' &&
-        node.getAttribute('data-type') === 'transclusionReference'
-      );
-    },
-    replacement: function (_content: string, node: HTMLInputElement) {
-      return `\n\n<div${serializeAttrs(node)}></div>\n\n`;
-    },
-  });
-}
-
-function mention(turndownService: _TurndownService) {
-  turndownService.addRule('mention', {
-    filter: function (node: HTMLInputElement) {
-      return (
-        node.nodeName === 'SPAN' &&
-        node.getAttribute('data-type') === 'mention'
-      );
-    },
-    replacement: function (_content: string, node: HTMLInputElement) {
-      const text = escapeHtmlText(node.textContent || '');
-      return `<span${serializeAttrs(node)}>${text}</span>`;
-    },
-  });
-}
-
-function status(turndownService: _TurndownService) {
-  turndownService.addRule('status', {
-    filter: function (node: HTMLInputElement) {
-      return (
-        node.nodeName === 'SPAN' && node.getAttribute('data-type') === 'status'
-      );
-    },
-    replacement: function (_content: string, node: HTMLInputElement) {
-      const text = escapeHtmlText(node.textContent || '');
-      return `<span${serializeAttrs(node)}>${text}</span>`;
-    },
-  });
-}
-
 /**
 * Serialize the `htmlEmbed` node to Markdown.
 *
@@ -377,6 +266,17 @@ function image(turndownService: _TurndownService) {
    replacement: function (_content: string, node: HTMLInputElement) {
      const src = node.getAttribute('src') || '';
      if (!src) return '';
+      const caption = node.getAttribute('data-caption') || '';
+      if (caption) {
+        // ![]() can't carry a caption, so emit a raw <img> wrapped in a block
+        // <div>. marked passes it through and the image extension's parseHTML
+        // restores the caption from data-caption.
+        const parts = [`src="${escapeHtmlAttr(src)}"`];
+        const alt = node.getAttribute('alt') || '';
+        if (alt) parts.push(`alt="${escapeHtmlAttr(alt)}"`);
+        parts.push(`data-caption="${escapeHtmlAttr(caption)}"`);
+        return `<div><img ${parts.join(' ')}></div>`;
+      }
      const alt = sanitizeMdLinkText(node.getAttribute('alt') || '');
      const title = node.getAttribute('title') || '';
      const titlePart = title ? ' "' + title.replace(/"/g, '\\"') + '"' : '';
--- a/packages/mcp/build/lib/docmost-schema.js
+++ b/packages/mcp/build/lib/docmost-schema.js
@@ -1070,7 +1070,24 @@ export const docmostExtensions = [
        heading: {},
        link: { openOnClick: false },
    }),
-    Image.configure({ inline: false }),
+    // Stock @tiptap/extension-image has no caption attribute, so a round-trip
+    // through this schema would drop the data-caption the client TiptapImage
+    // emits. Mirror editor-ext image.ts: add a caption attribute that parses
+    // data-caption and re-renders it only when set (caption-less images stay
+    // clean), keeping the MCP markdown round-trip lossless.
+    Image.extend({
+        addAttributes() {
+            const parent = this.parent?.() ?? {};
+            return {
+                ...parent,
+                caption: {
+                    default: undefined,
+                    parseHTML: (el) => el.getAttribute("data-caption") || undefined,
+                    renderHTML: (attrs) => attrs.caption ? { "data-caption": attrs.caption } : {},
+                },
+            };
+        },
+    }).configure({ inline: false }),
    TaskList,
    TaskItem.configure({ nested: true }),
    // Highlight stores its color unescaped and Docmost interpolates it into
--- a/packages/mcp/build/lib/markdown-converter.js
+++ b/packages/mcp/build/lib/markdown-converter.js
@@ -207,16 +207,27 @@ export function convertProseMirrorToMarkdown(content) {
                // Two trailing spaces before the newline encode a markdown hard break;
                // a bare "\n" would be reimported as a soft break and lost.
                return "  \n";
-            case "image":
+            case "image": {
                const imgAlt = node.attrs?.alt || "";
+                const imgCaption = node.attrs?.caption || "";
+                if (imgCaption) {
+                    // ![]() can't carry a caption, so (symmetric to video) emit a raw
+                    // <img> wrapped in a block <div>. On import marked.parse keeps the raw
+                    // HTML and generateJSON runs the image extension's parseHTML, which
+                    // restores the caption from data-caption.
+                    const parts = [`src="${escapeAttr(node.attrs?.src ?? "")}"`];
+                    if (imgAlt)
+                        parts.push(`alt="${escapeAttr(imgAlt)}"`);
+                    parts.push(`data-caption="${escapeAttr(imgCaption)}"`);
+                    return `<div><img ${parts.join(" ")}></div>`;
+                }
                // Neutralize characters that could break out of the markdown image
                // URL: spaces/newlines and parentheses would terminate the (...) target
                // and let a stored src inject following markdown/HTML. Percent-encode
                // them so the URL stays a single inert token.
                const imgSrc = encodeMdUrl(node.attrs?.src);
-                // No "caption" attribute exists in the Docmost image schema, so we do
-                // not emit one (the previous caption branch was dead).
                return `![${imgAlt}](${imgSrc})`;
+            }
            case "video": {
                // Emit the schema-matching <video> element so generateJSON rebuilds the
                // node with its attrs intact. The schema's parseHTML reads src/aria-label
@@ -618,6 +629,8 @@ export function convertProseMirrorToMarkdown(content) {
        const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
        if (attrs.alt)
            parts.push(`alt="${escapeAttr(attrs.alt)}"`);
+        if (attrs.caption)
+            parts.push(`data-caption="${escapeAttr(attrs.caption)}"`);
        if (attrs.title)
            parts.push(`title="${escapeAttr(attrs.title)}"`);
        if (attrs.width != null)
--- a/packages/mcp/src/lib/docmost-schema.ts
+++ b/packages/mcp/src/lib/docmost-schema.ts
@@ -1164,7 +1164,26 @@ export const docmostExtensions = [
    heading: {},
    link: { openOnClick: false },
  }),
-  Image.configure({ inline: false }),
+  // Stock @tiptap/extension-image has no caption attribute, so a round-trip
+  // through this schema would drop the data-caption the client TiptapImage
+  // emits. Mirror editor-ext image.ts: add a caption attribute that parses
+  // data-caption and re-renders it only when set (caption-less images stay
+  // clean), keeping the MCP markdown round-trip lossless.
+  Image.extend({
+    addAttributes() {
+      const parent = this.parent?.() ?? {};
+      return {
+        ...parent,
+        caption: {
+          default: undefined,
+          parseHTML: (el: HTMLElement) =>
+            el.getAttribute("data-caption") || undefined,
+          renderHTML: (attrs: Record<string, any>) =>
+            attrs.caption ? { "data-caption": attrs.caption } : {},
+        },
+      };
+    },
+  }).configure({ inline: false }),
  TaskList,
  TaskItem.configure({ nested: true }),
  // Highlight stores its color unescaped and Docmost interpolates it into
--- a/packages/mcp/src/lib/markdown-converter.ts
+++ b/packages/mcp/src/lib/markdown-converter.ts
@@ -228,16 +228,26 @@ export function convertProseMirrorToMarkdown(content: any): string {
        // a bare "\n" would be reimported as a soft break and lost.
        return "  \n";

-      case "image":
+      case "image": {
        const imgAlt = node.attrs?.alt || "";
+        const imgCaption = node.attrs?.caption || "";
+        if (imgCaption) {
+          // ![]() can't carry a caption, so (symmetric to video) emit a raw
+          // <img> wrapped in a block <div>. On import marked.parse keeps the raw
+          // HTML and generateJSON runs the image extension's parseHTML, which
+          // restores the caption from data-caption.
+          const parts: string[] = [`src="${escapeAttr(node.attrs?.src ?? "")}"`];
+          if (imgAlt) parts.push(`alt="${escapeAttr(imgAlt)}"`);
+          parts.push(`data-caption="${escapeAttr(imgCaption)}"`);
+          return `<div><img ${parts.join(" ")}></div>`;
+        }
        // Neutralize characters that could break out of the markdown image
        // URL: spaces/newlines and parentheses would terminate the (...) target
        // and let a stored src inject following markdown/HTML. Percent-encode
        // them so the URL stays a single inert token.
        const imgSrc = encodeMdUrl(node.attrs?.src);
-        // No "caption" attribute exists in the Docmost image schema, so we do
-        // not emit one (the previous caption branch was dead).
        return `![${imgAlt}](${imgSrc})`;
+      }

      case "video": {
        // Emit the schema-matching <video> element so generateJSON rebuilds the
@@ -678,6 +688,8 @@ export function convertProseMirrorToMarkdown(content: any): string {
    const attrs = node.attrs || {};
    const parts: string[] = [`src="${escapeAttr(attrs.src ?? "")}"`];
    if (attrs.alt) parts.push(`alt="${escapeAttr(attrs.alt)}"`);
+    if (attrs.caption)
+      parts.push(`data-caption="${escapeAttr(attrs.caption)}"`);
    if (attrs.title) parts.push(`title="${escapeAttr(attrs.title)}"`);
    if (attrs.width != null) parts.push(`width="${escapeAttr(attrs.width)}"`);
    if (attrs.height != null) parts.push(`height="${escapeAttr(attrs.height)}"`);
--- a/packages/mcp/test/unit/markdown-converter.test.mjs
+++ b/packages/mcp/test/unit/markdown-converter.test.mjs
@@ -149,3 +149,37 @@ test("empty task item still emits its marker", () => {

  assert.equal(convertProseMirrorToMarkdown(input), "- [ ]\n- [x]");
 });
+
+// Image captions (issue #221). An image WITHOUT a caption stays the lossy-free
+// `![alt](src)`; WITH a caption it is emitted as a raw <img data-caption>
+// wrapped in a block <div> (symmetric to video) so the round-trip md -> html ->
+// json restores the caption via the image extension's parseHTML.
+test("image without a caption emits plain ![alt](src)", () => {
+  const input = doc({
+    type: "image",
+    attrs: { src: "/files/a.png", alt: "cat" },
+  });
+  assert.equal(convertProseMirrorToMarkdown(input), "![cat](/files/a.png)");
+});
+
+test("image with a caption emits a raw <img data-caption> in a block div", () => {
+  const input = doc({
+    type: "image",
+    attrs: { src: "/files/a.png", alt: "cat", caption: "A grey cat" },
+  });
+  assert.equal(
+    convertProseMirrorToMarkdown(input),
+    '<div><img src="/files/a.png" alt="cat" data-caption="A grey cat"></div>',
+  );
+});
+
+test("image caption escapes & and \" in the data-caption attribute", () => {
+  const input = doc({
+    type: "image",
+    attrs: { src: "/files/a.png", caption: 'Tom & "Jerry"' },
+  });
+  assert.equal(
+    convertProseMirrorToMarkdown(input),
+    '<div><img src="/files/a.png" data-caption="Tom &amp; &quot;Jerry&quot;"></div>',
+  );
+});
--- a/packages/mcp/test/unit/media-roundtrip.test.mjs
+++ b/packages/mcp/test/unit/media-roundtrip.test.mjs
@@ -142,3 +142,31 @@ test("round-trip: pdf node survives markdown export with src + name + attachment
  assert.equal(found[0].attrs?.name, "x.pdf");
  assert.equal(found[0].attrs?.attachmentId, "a4");
 });
+
+// The converter emits captioned images as a raw <img data-caption="...">; for
+// the caption to survive the PM -> markdown -> PM round-trip the docmost-schema
+// Image node must parse data-caption back into the `caption` attr. Without that
+// (stock @tiptap/extension-image), the caption is silently lost — these guard
+// the "lossless" claim.
+test("round-trip: image caption survives markdown export (data-caption restored)", async () => {
+  const found = await roundtrip(
+    { type: "image", attrs: { src: "/api/files/cat.png", alt: "cat", caption: "A grey cat" } },
+    "image",
+  );
+  assert.equal(found.length, 1, "image node should survive");
+  assert.equal(found[0].attrs?.src, "/api/files/cat.png");
+  assert.equal(found[0].attrs?.caption, "A grey cat", "caption must round-trip");
+});
+
+test("round-trip: image caption with special chars survives markdown export", async () => {
+  const found = await roundtrip(
+    { type: "image", attrs: { src: "/api/files/cat.png", caption: 'Tom & "Jerry"' } },
+    "image",
+  );
+  assert.equal(found.length, 1, "image node should survive");
+  assert.equal(
+    found[0].attrs?.caption,
+    'Tom & "Jerry"',
+    "special-char caption must round-trip unescaped",
+  );
+});
--- a/packages/mcp/test/unit/roundtrip.test.mjs
+++ b/packages/mcp/test/unit/roundtrip.test.mjs
@@ -82,6 +82,24 @@ test("round-trip: image inside a column survives as an image node (not literal m
  assert.ok(!JSON.stringify(out).includes("![pic]"), "image must not become literal markdown text");
 });

+test("round-trip: captioned image inside a column preserves its caption (imageToHtml branch)", async () => {
+  // A captioned image in a column is emitted via the imageToHtml helper (raw
+  // HTML container), a different path from the top-level image case. Special
+  // chars in the caption exercise attribute escaping on the way out and in.
+  const caption = 'Tom & "Jerry"';
+  const input = doc({
+    type: "columns",
+    content: [
+      { type: "column", content: [{ type: "image", attrs: { src: "/api/files/a/p.png", alt: "pic", caption } }] },
+      { type: "column", content: [para(text("right"))] },
+    ],
+  });
+  const out = await roundtrip(input);
+  const imgs = findNodes(out, "image");
+  assert.equal(imgs.length, 1, "captioned image inside a column must survive");
+  assert.equal(imgs[0].attrs?.caption, caption, "caption (incl. special chars) must be preserved");
+});
+
 test("round-trip: blockquote inside a column survives as a blockquote node", async () => {
  const input = doc({
    type: "columns",
Author	SHA1	Message	Date
claude code agent 227	57308bc3f3	docs(#221 ): fix CHANGELOG grammar after setImageCaption removal (F8) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-29 02:07:41 +03:00
claude code agent 227	1ddb386214	docs(#221 ): CHANGELOG — drop removed setImageCaption command mention Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-29 01:46:49 +03:00
claude code agent 227	43af3dd5f1	test(mcp): cover captioned image inside a column round-trip (F5) A captioned image in a column is emitted via the imageToHtml helper, a separate path from the top-level image case whose data-caption branch was untested. Add a round-trip test with special chars (Tom & "Jerry") that fails if the imageToHtml caption branch breaks. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-29 01:43:18 +03:00
claude code agent 227	b02101b58a	docs(mcp): correct captioned-image import comment (F6) The comment referenced markdownToHtml, which does not exist in the mcp package; the import path is marked.parse + generateJSON (which runs the image extension's parseHTML). Describe the actual step and regenerate the build artifact in sync. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-29 01:43:13 +03:00
claude code agent 227	932bfce1d9	refactor(editor-ext): remove unused setImageCaption command (F7) The setImageCaption command and its Commands<> declaration were dead: captions are written via the generic updateAttributes in useImageTextFieldControl, and a repo-wide grep finds zero callers. Remove the speculative implementation (image.ts) and its type declaration. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-29 01:43:08 +03:00
claude code agent 227	d39b7ae67c	refactor(editor): dedupe alt/caption controls via shared hook (F4) Extract the ~110 duplicated lines into one parameterized useImageTextFieldControl and make useAltTextControl/useCaptionControl thin wrappers. Behavior identical; t("...") literals stay in the wrappers so i18n extraction keeps working. sanitizeCaption still exported for its unit test. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-28 23:38:48 +03:00
claude code agent 227	c124fb1f2c	test(editor): fix wrong sanitizeCaption collapse-cap comment (F3) The comment claimed 250 groups -> 499 chars -> slice past 500; the input is 120 "a b " groups collapsing to 479 chars, under the cap with no slice. Correct the comment and assert the 479 length. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-28 23:38:41 +03:00
claude code agent 227	d3ebae48cf	test(mcp): cover image caption markdown round-trip (F2) Add PM -> markdown -> PM round-trip assertions for image caption (plain and special-char), which fail without F1 and pass with it. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-28 23:38:36 +03:00
claude code agent 227	607aed5997	fix(mcp): restore image caption on markdown round-trip (F1) Stock @tiptap/extension-image carries no caption attribute, so markdownToProseMirror through docmostExtensions dropped the data-caption the client emits, breaking the lossless claim. Extend the Image node (mirroring editor-ext image.ts and the nearby Highlight extend) to parse/render data-caption. Rebuilt build/. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-28 23:38:28 +03:00
a	dc14a9a540	chore(editor): address image-caption review (#221 ) - docs: add CHANGELOG Unreleased/Added entry for editable image captions - test: export sanitizeCaption and add vitest unit coverage (whitespace collapse, trim, 500-char boundary) - refactor: drop duplicate .imageCaption CSS module class, keep the global .image-caption as the single source - docs: fix turndown image-caption comment (video rule emits a markdown link, not a <div>) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-28 04:36:30 +03:00
claude code agent 227	2aa482f62d	feat(editor): add editable image captions (#221 ) Add a visible caption (<figcaption>) under images, editable from the image bubble-menu and persisted across all formats: native Yjs/JSON, HTML export, and Markdown. - image node: new plain-text `caption` attribute (parse/render `data-caption` on <img>, emitted only when set) + `setImageCaption` command. The node stays an atom; the schema shape is unchanged, so the server's generateHTML/generateJSON path round-trips it for free. - resize node-view: re-parent the resizable wrapper into a <figure> and render the caption in a <figcaption> BELOW it, outside nodeView.wrapper (so onCommit's offsetHeight measurement and the left/right resize handles still cover the image only). This path also drives read-only / share rendering. React placeholder view renders the caption too. - bubble-menu: new useCaptionControl panel modeled on useAltTextControl (own icon, Caption strings, softer sanitizer, ~500 char limit). - markdown lossless round-trip: a captioned image is emitted as a raw <img data-caption> wrapped in a block <div> (same trick as <video>) in both the editor-ext turndown rule and the MCP converter; caption-less images stay clean ![alt](src). Import restores the caption via the shared markdownToHtml + parseHTML. - styles + i18n keys; tests for the schema attr round-trip, markdown round-trip (editor-ext) and the MCP converter. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>	2026-06-28 04:33:00 +03:00