Merge remote-tracking branch 'gitea/develop' into batch/issues-2026-06-25
# Conflicts: # apps/server/src/core/ai-chat/ai-chat.service.spec.ts # apps/server/src/core/ai-chat/ai-chat.service.ts
This commit is contained in:
11
CHANGELOG.md
11
CHANGELOG.md
@@ -12,6 +12,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
### Added
|
||||
|
||||
- **Persistent AI-chat history as the source of truth + server-side export.**
|
||||
An assistant turn is now persisted to the database step by step: the row is
|
||||
inserted upfront as `streaming` and updated as each agent step finishes, then
|
||||
finalized once to `completed`/`error`/`aborted`. A process that dies mid-turn
|
||||
keeps every finished step, and a startup sweep flips any dangling `streaming`
|
||||
row (untouched for 10 minutes) to `aborted`. Chat "Copy" now exports
|
||||
server-side from these rows (`POST /ai-chat/export`) rather than from live
|
||||
client state, so the export is identical whether a chat is freshly streaming,
|
||||
just switched to, or reloaded — and is available from the first turn of a new
|
||||
chat. (#183, #174)
|
||||
|
||||
- **AI-agent attribution for MCP writes.** Comments (and pages) created through
|
||||
the MCP endpoint by a dedicated agent account are now badged as "AI", with
|
||||
unspoofable provenance derived from a per-user `is_agent` flag (not from the
|
||||
|
||||
@@ -258,6 +258,7 @@
|
||||
"Copy to space": "Copy to space",
|
||||
"Copy chat": "Copy chat",
|
||||
"Copied": "Copied",
|
||||
"Failed to export chat": "Failed to export chat",
|
||||
"Duplicate": "Duplicate",
|
||||
"Select a user": "Select a user",
|
||||
"Select a group": "Select a group",
|
||||
|
||||
@@ -257,6 +257,7 @@
|
||||
"Copy": "Копировать",
|
||||
"Copy to space": "Копировать в пространство",
|
||||
"Copied": "Скопировано",
|
||||
"Failed to export chat": "Не удалось экспортировать чат",
|
||||
"Duplicate": "Дублировать",
|
||||
"Select a user": "Выберите пользователя",
|
||||
"Select a group": "Выберите группу",
|
||||
|
||||
@@ -6,7 +6,6 @@ import {
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
import { type UIMessage } from "@ai-sdk/react";
|
||||
import { Group, Loader, Tooltip } from "@mantine/core";
|
||||
import {
|
||||
IconArrowsDiagonal,
|
||||
@@ -40,7 +39,7 @@ import {
|
||||
} from "@/features/ai-chat/queries/ai-chat-query.ts";
|
||||
import ConversationList from "@/features/ai-chat/components/conversation-list.tsx";
|
||||
import ChatThread from "@/features/ai-chat/components/chat-thread.tsx";
|
||||
import { buildChatMarkdown } from "@/features/ai-chat/utils/chat-markdown.ts";
|
||||
import { exportAiChat } from "@/features/ai-chat/services/ai-chat-service.ts";
|
||||
import { useChatSession } from "@/features/ai-chat/hooks/use-chat-session.ts";
|
||||
import {
|
||||
shouldCollapseOnOutsidePointer,
|
||||
@@ -121,7 +120,7 @@ function clampGeom(g: {
|
||||
* ported from the GitmostAgent.jsx design.
|
||||
*/
|
||||
export default function AiChatWindow() {
|
||||
const { t } = useTranslation();
|
||||
const { t, i18n } = useTranslation();
|
||||
const clipboard = useClipboard({ timeout: 500 });
|
||||
const queryClient = useQueryClient();
|
||||
const [windowOpen, setWindowOpen] = useAtom(aiChatWindowOpenAtom);
|
||||
@@ -162,30 +161,11 @@ export default function AiChatWindow() {
|
||||
const { data: messageRows, isLoading: messagesLoading } =
|
||||
useAiChatMessagesQuery(activeChatId ?? undefined);
|
||||
|
||||
// Live snapshot of the active thread's useChat state, kept up to date by
|
||||
// ChatThread. Lets the export include the in-progress (not-yet-persisted)
|
||||
// streaming turn. A ref avoids re-rendering this window on every token.
|
||||
const liveThreadRef = useRef<{
|
||||
messages: UIMessage[];
|
||||
isStreaming: boolean;
|
||||
banner: string | null;
|
||||
}>({
|
||||
messages: [],
|
||||
isStreaming: false,
|
||||
banner: null,
|
||||
});
|
||||
|
||||
// Live turn-token total (reasoning + output) for the in-flight turn, pushed up
|
||||
// (THROTTLED to ~8 Hz inside ChatThread) so the header badge ticks mid-stream.
|
||||
// `null` means no turn is in flight -> the badge falls back to the persisted
|
||||
// context size below.
|
||||
const [liveTurnTokens, setLiveTurnTokens] = useState<number | null>(null);
|
||||
// Whether the on-screen thread currently holds at least one message. Reported
|
||||
// reactively by ChatThread (the live snapshot lives in a non-reactive ref). This
|
||||
// lets the "Copy chat" button stay available for a brand-new, not-yet-persisted
|
||||
// chat whose first turn is in flight or was interrupted — that case has no
|
||||
// persisted rows yet, so a persisted-rows-only gate would hide the button (#174).
|
||||
const [hasLiveContent, setHasLiveContent] = useState(false);
|
||||
|
||||
// The page the user is currently viewing. AiChatWindow lives in a pathless
|
||||
// parent layout route, so useParams() can't see :pageSlug. Match the full
|
||||
@@ -214,6 +194,7 @@ export default function AiChatWindow() {
|
||||
threadKey,
|
||||
waitingForHistory,
|
||||
onTurnFinished,
|
||||
onServerChatId,
|
||||
cancelPendingAdoption,
|
||||
} = useChatSession({
|
||||
activeChatId,
|
||||
@@ -254,20 +235,19 @@ export default function AiChatWindow() {
|
||||
[cancelPendingAdoption, setActiveChatId, setDraft, setSelectedRoleId],
|
||||
);
|
||||
|
||||
// The active chat object (for its title) and an export gate: only enable the
|
||||
// export button when an existing chat with loaded persisted rows is active.
|
||||
// The active chat object (for its title) and an export gate. The export is now
|
||||
// SERVER-sourced (the DB is the single source of truth — #183): the assistant
|
||||
// row is persisted upfront + per step, so even a brand-new chat whose first
|
||||
// turn is streaming/interrupted has a server row to render. Enable the button
|
||||
// whenever a persisted chat is active (`activeChatId` is set). For a BRAND-NEW
|
||||
// chat that id is adopted EARLY — at the stream's `start` chunk via
|
||||
// onServerChatId (#174) — so the Copy button is available during the first
|
||||
// turn's stream, not only after it terminates.
|
||||
const activeChat = useMemo(
|
||||
() => chats?.items?.find((c) => c.id === activeChatId) ?? null,
|
||||
[chats, activeChatId],
|
||||
);
|
||||
// Export is available when there is anything to export: either persisted rows
|
||||
// for the active chat, OR a live on-screen thread with at least one message.
|
||||
// The live arm covers a brand-new chat whose first turn is streaming or was
|
||||
// interrupted before the server persisted any row (#174); the persisted arm is
|
||||
// the steady-state path for an already-saved chat (#160).
|
||||
const canExport =
|
||||
hasLiveContent ||
|
||||
(!!activeChatId && !!messageRows && messageRows.length > 0);
|
||||
const canExport = !!activeChatId;
|
||||
|
||||
// The role to display in the header and as the assistant's name. Prefer the
|
||||
// persisted role of an existing chat (chat-list JOIN); fall back to the role
|
||||
@@ -284,53 +264,21 @@ export default function AiChatWindow() {
|
||||
return picked ? { name: picked.name, emoji: picked.emoji } : null;
|
||||
}, [activeChat, enabledRoles, selectedRoleId]);
|
||||
|
||||
// Build a Markdown export from the already-loaded persisted rows (no network
|
||||
// call) and copy it to the clipboard. The "Copied" notification is the
|
||||
// feedback.
|
||||
const handleCopy = useCallback(() => {
|
||||
// Export gate. There must be SOMETHING to export — either a live on-screen
|
||||
// message or a persisted row. A brand-new chat whose first turn is streaming
|
||||
// or was interrupted has live messages but no persisted rows yet; it still
|
||||
// exports the on-screen thread WYSIWYG (#174). Only a truly empty chat (no
|
||||
// live messages and no rows) is non-exportable (the button is hidden too —
|
||||
// see `canExport`).
|
||||
const live = liveThreadRef.current;
|
||||
const hasRows = !!messageRows && messageRows.length > 0;
|
||||
if (live.messages.length === 0 && !hasRows) return;
|
||||
// WYSIWYG export: the live on-screen messages ARE the document (so a partial
|
||||
// reply from an interrupted turn — which never reached the persisted rows —
|
||||
// is exported just as it appears). The persisted rows enrich each live
|
||||
// message (token usage / error / timestamp) by id and serve as the fallback
|
||||
// when the live mirror is empty. The on-screen banner is appended too. See
|
||||
// issues #160 and #174. `chatId` may be null for a not-yet-saved chat — use a
|
||||
// placeholder so the header line still renders.
|
||||
const markdown = buildChatMarkdown({
|
||||
title: activeChat?.title ?? null,
|
||||
chatId: activeChatId ?? "unsaved",
|
||||
live: live.messages.map((m) => ({
|
||||
id: m.id,
|
||||
role: m.role,
|
||||
parts: (m.parts ?? []) as { type: string; text?: string }[],
|
||||
metadata: m.metadata as
|
||||
| {
|
||||
usage?: {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
};
|
||||
error?: string;
|
||||
}
|
||||
| undefined,
|
||||
})),
|
||||
rows: messageRows,
|
||||
isStreaming: live.isStreaming,
|
||||
banner: live.banner,
|
||||
t,
|
||||
});
|
||||
clipboard.copy(markdown);
|
||||
notifications.show({ message: t("Copied") });
|
||||
}, [activeChatId, messageRows, activeChat, clipboard, t]);
|
||||
// Fetch the server-rendered Markdown export and copy it to the clipboard. The
|
||||
// server is the single source of truth (#183): it renders the transcript from
|
||||
// the persisted rows — including an interrupted turn's in-progress row — so the
|
||||
// export is identical whether the chat is freshly streaming, just switched to,
|
||||
// or reloaded. The `lang` of the active i18n drives the few localized labels.
|
||||
const handleCopy = useCallback(async () => {
|
||||
if (!activeChatId) return;
|
||||
try {
|
||||
const markdown = await exportAiChat(activeChatId, i18n.language);
|
||||
clipboard.copy(markdown);
|
||||
notifications.show({ message: t("Copied") });
|
||||
} catch {
|
||||
notifications.show({ message: t("Failed to export chat"), color: "red" });
|
||||
}
|
||||
}, [activeChatId, clipboard, t, i18n.language]);
|
||||
|
||||
// Current context size for the active chat: how much the conversation now
|
||||
// occupies in the model's context window — NOT the cumulative tokens spent.
|
||||
@@ -685,9 +633,8 @@ export default function AiChatWindow() {
|
||||
onRolePicked={(role) => setSelectedRoleId(role.id)}
|
||||
assistantName={currentRole?.name}
|
||||
onTurnFinished={onTurnFinished}
|
||||
liveStateRef={liveThreadRef}
|
||||
onServerChatId={onServerChatId}
|
||||
onLiveTurnTokens={setLiveTurnTokens}
|
||||
onLiveContentChange={setHasLiveContent}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
|
||||
@@ -1,11 +1,4 @@
|
||||
import {
|
||||
useCallback,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
type MutableRefObject,
|
||||
} from "react";
|
||||
import { useCallback, useEffect, useMemo, useRef, useState } from "react";
|
||||
import { generateId } from "ai";
|
||||
import { ActionIcon, Box, Group, Stack, Text } from "@mantine/core";
|
||||
import { IconClockHour4, IconX } from "@tabler/icons-react";
|
||||
@@ -68,30 +61,18 @@ interface ChatThreadProps {
|
||||
* authoritative id the server streamed on the assistant message metadata, or
|
||||
* undefined on a failed turn — see adopt-chat-id.ts for the full #137 design. */
|
||||
onTurnFinished: (serverChatId?: string) => void;
|
||||
/** Parent-owned ref that this thread keeps updated with its live useChat
|
||||
* snapshot (full message list + streaming flag), so the header's
|
||||
* "Copy chat" export can include the in-progress, not-yet-persisted
|
||||
* assistant message. A ref (not state) avoids re-rendering the parent on
|
||||
* every streamed delta. */
|
||||
liveStateRef?: MutableRefObject<{
|
||||
messages: UIMessage[];
|
||||
isStreaming: boolean;
|
||||
banner: string | null;
|
||||
}>;
|
||||
/** Called EARLY (at the stream's `start` chunk) with the authoritative server
|
||||
* chat id streamed on the assistant message metadata, so a brand-new chat
|
||||
* adopts its real id WHILE the first turn is still streaming (#174 — makes the
|
||||
* Copy/export button available mid-stream). Distinct from onTurnFinished,
|
||||
* which fires only at the terminal outcome. */
|
||||
onServerChatId?: (serverChatId?: string) => void;
|
||||
/** Reports the live turn-token total (reasoning + output) for the in-flight
|
||||
* turn so the parent can show a header badge that ticks mid-stream. THROTTLED
|
||||
* here (~8 Hz) so the parent re-renders a handful of times a second, not on
|
||||
* every streamed delta. Called with `null` when no turn is in flight (the
|
||||
* parent then reverts the badge to the persisted context size). */
|
||||
onLiveTurnTokens?: (tokens: number | null) => void;
|
||||
/** Reports whether the live thread currently holds at least one message, so the
|
||||
* parent can gate the "Copy chat" button on the on-screen thread rather than on
|
||||
* the persisted rows alone. This stays truthy for a brand-new, not-yet-saved
|
||||
* chat the moment its first user message appears — so an interrupted very first
|
||||
* turn (no persisted rows yet) is still exportable (#174). Called with `false`
|
||||
* on unmount so a thread torn down by `key` on chat switch can't leave the
|
||||
* button enabled for the next, possibly empty, chat. */
|
||||
onLiveContentChange?: (hasContent: boolean) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -135,9 +116,8 @@ export default function ChatThread({
|
||||
onRolePicked,
|
||||
assistantName,
|
||||
onTurnFinished,
|
||||
liveStateRef,
|
||||
onServerChatId,
|
||||
onLiveTurnTokens,
|
||||
onLiveContentChange,
|
||||
}: ChatThreadProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
@@ -306,6 +286,26 @@ export default function ChatThread({
|
||||
// Keep the flush helper pointed at the latest sendMessage instance.
|
||||
sendMessageRef.current = sendMessage;
|
||||
|
||||
// EARLY chat-id adoption (#174): the server streams the authoritative chat id
|
||||
// on the assistant message metadata at the `start` chunk (message.metadata.
|
||||
// chatId — see adopt-chat-id.ts / chatStreamMetadata). Forward it to the parent
|
||||
// AS SOON AS it appears (mid-stream), so a brand-new chat adopts its real id
|
||||
// WHILE the first turn is still streaming and activeChatId-gated affordances
|
||||
// (the Copy/export button) light up immediately, instead of only at onFinish.
|
||||
// Keyed by the last-seen id so we forward each distinct id exactly once. The
|
||||
// parent's onServerChatId is idempotent and a no-op once the chat has an id.
|
||||
const lastForwardedChatIdRef = useRef<string | undefined>(undefined);
|
||||
useEffect(() => {
|
||||
if (!onServerChatId) return;
|
||||
const tail = messages[messages.length - 1];
|
||||
if (tail?.role !== "assistant") return;
|
||||
const serverChatId = extractServerChatId(tail);
|
||||
if (!serverChatId || serverChatId === lastForwardedChatIdRef.current)
|
||||
return;
|
||||
lastForwardedChatIdRef.current = serverChatId;
|
||||
onServerChatId(serverChatId);
|
||||
}, [messages, onServerChatId]);
|
||||
|
||||
// Live "turn was interrupted" marker for the CURRENT session. The red error
|
||||
// banner (driven by `error`) covers the error case; this covers an aborted
|
||||
// turn, distinguishing a manual Stop (`isAbort`) from a dropped connection
|
||||
@@ -328,44 +328,6 @@ export default function ChatThread({
|
||||
// the SAME on-screen banner text can be mirrored into the export (issue #160).
|
||||
const errorView = error ? describeChatError(error.message ?? "", t) : null;
|
||||
|
||||
// The exact banner the user sees under the message list, flattened to a single
|
||||
// string for the "Copy chat" export so the artifact records the interruption
|
||||
// WYSIWYG. Mirrors the JSX precedence below: error first, else the stop notice.
|
||||
const banner = errorView
|
||||
? errorView.detail
|
||||
? `${errorView.title} — ${errorView.detail}`
|
||||
: errorView.title
|
||||
: stopNotice === "manual"
|
||||
? t("Response stopped.")
|
||||
: stopNotice === "disconnect"
|
||||
? t("Connection lost — the answer was interrupted.")
|
||||
: null;
|
||||
|
||||
// Mirror the live useChat snapshot into the parent-owned ref so the export
|
||||
// (handled in AiChatWindow) can include the in-progress streaming turn AND the
|
||||
// on-screen banner. The cleanup clears the ref on unmount so a thread torn down
|
||||
// by `key` on chat switch can't leak its (possibly still-streaming) tail into
|
||||
// the next chat's export before the new thread's effect repopulates the ref.
|
||||
useEffect(() => {
|
||||
if (!liveStateRef) return;
|
||||
liveStateRef.current = { messages, isStreaming, banner };
|
||||
return () => {
|
||||
liveStateRef.current = { messages: [], isStreaming: false, banner: null };
|
||||
};
|
||||
}, [liveStateRef, messages, isStreaming, banner]);
|
||||
|
||||
// Reactively report "the live thread has content" to the parent. `liveStateRef`
|
||||
// above is a ref (deliberately non-reactive so streaming deltas don't re-render
|
||||
// the parent), so the export button needs a SEPARATE reactive signal to flip on
|
||||
// for a not-yet-persisted chat. Keyed on the boolean only — identical values are
|
||||
// a no-op setState in the parent, so this does not add per-delta re-renders.
|
||||
const hasLiveContent = messages.length > 0;
|
||||
useEffect(() => {
|
||||
if (!onLiveContentChange) return;
|
||||
onLiveContentChange(hasLiveContent);
|
||||
return () => onLiveContentChange(false);
|
||||
}, [onLiveContentChange, hasLiveContent]);
|
||||
|
||||
// Report the live turn-token total to the parent header badge, THROTTLED to
|
||||
// ~8 Hz so the parent re-renders a few times a second instead of on every
|
||||
// streamed delta. The tail assistant message's reasoning+output (estimate while
|
||||
|
||||
@@ -64,7 +64,10 @@ describe("useChatSession", () => {
|
||||
result.current.onTurnFinished(undefined);
|
||||
expect(setActiveChatId).not.toHaveBeenCalled();
|
||||
// The refetch lands with the new row => adopt it.
|
||||
rerender({ activeChatId: null, chats: { items: [{ id: "x" }, { id: "new" }] } });
|
||||
rerender({
|
||||
activeChatId: null,
|
||||
chats: { items: [{ id: "x" }, { id: "new" }] },
|
||||
});
|
||||
expect(setActiveChatId).toHaveBeenCalledWith("new");
|
||||
});
|
||||
|
||||
@@ -88,7 +91,10 @@ describe("useChatSession", () => {
|
||||
});
|
||||
result.current.onTurnFinished(undefined);
|
||||
// a was deleted, new was added — same length, but membership changed.
|
||||
rerender({ activeChatId: null, chats: { items: [{ id: "b" }, { id: "new" }] } });
|
||||
rerender({
|
||||
activeChatId: null,
|
||||
chats: { items: [{ id: "b" }, { id: "new" }] },
|
||||
});
|
||||
expect(setActiveChatId).toHaveBeenCalledWith("new");
|
||||
});
|
||||
|
||||
@@ -171,6 +177,40 @@ describe("useChatSession", () => {
|
||||
expect(setActiveChatId).not.toHaveBeenCalledWith("late");
|
||||
});
|
||||
|
||||
it("#174 early adopt: onServerChatId adopts the streamed id mid-stream (Copy button available during the first turn)", () => {
|
||||
// Brand-new chat: no id yet. The server streams the real chat id "A" on the
|
||||
// `start` chunk WHILE the first turn is still streaming (before onTurnFinished
|
||||
// fires at the terminal outcome). The hook must adopt it immediately so the
|
||||
// window's activeChatId-gated Copy/export button lights up during the stream.
|
||||
const { result, setActiveChatId } = setup({
|
||||
activeChatId: null,
|
||||
chats: { items: [] },
|
||||
});
|
||||
result.current.onServerChatId("A");
|
||||
expect(setActiveChatId).toHaveBeenCalledWith("A");
|
||||
});
|
||||
|
||||
it("#174 early adopt is in-place: threadKey stays stable (live stream not torn down)", () => {
|
||||
const chats = { items: [] };
|
||||
const { result, rerender } = setup({ activeChatId: null, chats });
|
||||
const keyBefore = result.current.threadKey;
|
||||
result.current.onServerChatId("A");
|
||||
// Parent reflects the adopted id back in; the SAME mount key is kept so the
|
||||
// in-flight useChat store (the streaming turn) is preserved.
|
||||
rerender({ activeChatId: "A", chats });
|
||||
expect(result.current.threadKey).toBe(keyBefore);
|
||||
});
|
||||
|
||||
it("#174 early adopt: no-op for an existing chat and for a missing id", () => {
|
||||
const { result, setActiveChatId } = setup({
|
||||
activeChatId: "chat-1",
|
||||
chats: { items: [{ id: "chat-1" }] },
|
||||
});
|
||||
result.current.onServerChatId("chat-1"); // already has an id
|
||||
result.current.onServerChatId(undefined); // no streamed id
|
||||
expect(setActiveChatId).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("in-place adopt keeps threadKey stable; an external switch remounts", () => {
|
||||
const chats = { items: [{ id: "B" }] };
|
||||
const { result, rerender } = setup({ activeChatId: null, chats });
|
||||
|
||||
@@ -34,6 +34,13 @@ export interface UseChatSessionResult {
|
||||
/** Call when a turn finishes; `serverChatId` is the authoritative streamed id
|
||||
* (undefined on a failed turn). Handles new-chat id adoption + invalidations. */
|
||||
onTurnFinished: (serverChatId?: string) => void;
|
||||
/** Call EARLY (at the stream's `start` chunk) with the authoritative streamed
|
||||
* chat id so a brand-new chat adopts its real id WHILE its first turn is still
|
||||
* streaming — making `activeChatId`-gated affordances (e.g. the Copy/export
|
||||
* button, #174) available immediately. In-place adoption only (same mount key,
|
||||
* no list/messages invalidation — that is left to onTurnFinished at the end).
|
||||
* Idempotent and a no-op once the chat already has an id. */
|
||||
onServerChatId: (serverChatId?: string) => void;
|
||||
/** Disarm any pending error-path new-chat fallback. The window calls this from
|
||||
* startNewChat/selectChat so a late refetch can't yank the user back into a
|
||||
* just-failed chat after they explicitly moved on. */
|
||||
@@ -85,13 +92,10 @@ export function useChatSession(
|
||||
// `newThread`/`switchThread` to (re)mount, `adoptThread` for in-place adoption.
|
||||
// Initial: a non-null activeChatId switches to it; a null one gets a fresh
|
||||
// session key with no chat id yet.
|
||||
const [thread, dispatch] = useReducer(
|
||||
threadSessionReducer,
|
||||
undefined,
|
||||
() =>
|
||||
activeChatId === null
|
||||
? newThread(`new-${generateId()}`)
|
||||
: switchThread(activeChatId),
|
||||
const [thread, dispatch] = useReducer(threadSessionReducer, undefined, () =>
|
||||
activeChatId === null
|
||||
? newThread(`new-${generateId()}`)
|
||||
: switchThread(activeChatId),
|
||||
);
|
||||
|
||||
// Error-path fallback for new-chat id adoption. When a brand-new chat's first
|
||||
@@ -150,6 +154,31 @@ export function useChatSession(
|
||||
[chats, setActiveChatId, onInvalidateChatList, onInvalidateChatMessages],
|
||||
);
|
||||
|
||||
// EARLY adoption (#174): adopt the authoritative streamed chat id the moment
|
||||
// the server emits it on the `start` chunk, so a brand-new chat gets its real
|
||||
// `activeChatId` WHILE its first turn streams — not only at terminal
|
||||
// onTurnFinished. This makes the activeChatId-gated Copy/export button
|
||||
// available during the first turn. Pure in-place adoption (same mount key, like
|
||||
// the primary path) with NO invalidation: the list/messages refresh stays on
|
||||
// onTurnFinished at the end of the turn. Reads the live id from the ref so a
|
||||
// repeat call after adoption is a no-op (resolveAdoptedChatId only fires for a
|
||||
// still-new chat).
|
||||
const onServerChatId = useCallback(
|
||||
(serverChatId?: string) => {
|
||||
const adopted = resolveAdoptedChatId(
|
||||
activeChatIdRef.current,
|
||||
serverChatId,
|
||||
);
|
||||
if (!adopted) return;
|
||||
activeChatIdRef.current = adopted;
|
||||
setActiveChatId(adopted);
|
||||
dispatch({ type: "adopt", chatId: adopted });
|
||||
// Early adoption beat the error-path fallback to it — disarm.
|
||||
pendingNewChatRef.current = null;
|
||||
},
|
||||
[setActiveChatId],
|
||||
);
|
||||
|
||||
// FALLBACK resolver. Armed only by onTurnFinished when a brand-new chat's first
|
||||
// turn errored before the `start` chunk (no authoritative id streamed). Once
|
||||
// the per-user list refetch lands with the just-created row, adopt the SINGLE
|
||||
@@ -233,6 +262,7 @@ export function useChatSession(
|
||||
threadKey: thread.key,
|
||||
waitingForHistory,
|
||||
onTurnFinished,
|
||||
onServerChatId,
|
||||
cancelPendingAdoption,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -50,6 +50,24 @@ export async function deleteAiChat(chatId: string): Promise<void> {
|
||||
await api.post("/ai-chat/delete", { chatId });
|
||||
}
|
||||
|
||||
/**
|
||||
* Export a chat to Markdown (#183). The server renders the transcript from the
|
||||
* persisted rows (the DB is the single source of truth — including an
|
||||
* interrupted turn's in-progress row, persisted upfront + per step), so the
|
||||
* client just copies the returned string. `lang` localizes the few fixed
|
||||
* role/tool labels; defaults to English server-side when omitted.
|
||||
*/
|
||||
export async function exportAiChat(
|
||||
chatId: string,
|
||||
lang?: string,
|
||||
): Promise<string> {
|
||||
const req = await api.post<{ markdown: string }>("/ai-chat/export", {
|
||||
chatId,
|
||||
lang,
|
||||
});
|
||||
return req.data.markdown;
|
||||
}
|
||||
|
||||
/**
|
||||
* Agent roles API (`/ai-chat/roles`). `list` is available to any workspace
|
||||
* member (for the chat-creation picker); create/update/delete are admin-only
|
||||
@@ -76,6 +94,8 @@ export async function updateAiRole(data: IAiRoleUpdate): Promise<IAiRole> {
|
||||
|
||||
/** Soft-delete a role (admin). */
|
||||
export async function deleteAiRole(id: string): Promise<{ success: true }> {
|
||||
const req = await api.post<{ success: true }>("/ai-chat/roles/delete", { id });
|
||||
const req = await api.post<{ success: true }>("/ai-chat/roles/delete", {
|
||||
id,
|
||||
});
|
||||
return req.data;
|
||||
}
|
||||
|
||||
@@ -1,747 +0,0 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { buildChatMarkdown } from "@/features/ai-chat/utils/chat-markdown.ts";
|
||||
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
|
||||
|
||||
/**
|
||||
* Tests for the client-only Markdown export builder. The output embeds a live
|
||||
* `new Date().toISOString()` export timestamp; we never assert that value, only
|
||||
* the deterministic structure (headings, numbering, fenced blocks, totals).
|
||||
*
|
||||
* A pass-through translator keeps role/tool labels predictable so the
|
||||
* structural assertions are stable without an i18n runtime.
|
||||
*/
|
||||
const t = (key: string, values?: Record<string, unknown>): string => {
|
||||
if (values && typeof values.name === "string") {
|
||||
return key.replace("{{name}}", values.name);
|
||||
}
|
||||
return key;
|
||||
};
|
||||
|
||||
function row(partial: Partial<IAiChatMessageRow>): IAiChatMessageRow {
|
||||
return {
|
||||
id: partial.id ?? "id",
|
||||
role: partial.role ?? "user",
|
||||
content: partial.content ?? null,
|
||||
metadata: partial.metadata ?? null,
|
||||
createdAt: partial.createdAt ?? "2026-06-21T00:00:00.000Z",
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildChatMarkdown — structure", () => {
|
||||
it("emits the title heading, chat id and message count", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "My chat",
|
||||
chatId: "chat-123",
|
||||
rows: [],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("# My chat");
|
||||
expect(md).toContain("- Chat ID: `chat-123`");
|
||||
expect(md).toContain("- Messages: 0");
|
||||
expect(md).toContain("- Exported:"); // timestamp present, value not asserted
|
||||
});
|
||||
|
||||
it("falls back to the translated 'Untitled chat' for empty/blank titles", () => {
|
||||
expect(
|
||||
buildChatMarkdown({ title: null, chatId: "c", rows: [], t }),
|
||||
).toContain("# Untitled chat");
|
||||
expect(
|
||||
buildChatMarkdown({ title: " ", chatId: "c", rows: [], t }),
|
||||
).toContain("# Untitled chat");
|
||||
});
|
||||
|
||||
it("numbers rows sequentially with role headings", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({ role: "user", content: "hi" }),
|
||||
row({ role: "assistant", content: "hello" }),
|
||||
row({ role: "user", content: "again" }),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("## 3. You");
|
||||
// Heading numbering is strictly index+1, not e.g. role-relative.
|
||||
expect(md).not.toContain("## 0.");
|
||||
});
|
||||
|
||||
it("renders the per-row text content from `content` when no metadata.parts", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "plain body" })],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("plain body");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — text parts", () => {
|
||||
it("skips empty / whitespace-only text parts", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "ignored-content",
|
||||
metadata: {
|
||||
parts: [
|
||||
{ type: "text", text: " " },
|
||||
{ type: "text", text: "" },
|
||||
{ type: "text", text: "kept line" },
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
] as any,
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("kept line");
|
||||
// Whitespace-only part contributed no block of its own.
|
||||
expect(md).not.toContain(" \n\n");
|
||||
// When metadata.parts exists, the plain `content` fallback is NOT used.
|
||||
expect(md).not.toContain("ignored-content");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — tool parts", () => {
|
||||
it("renders a tool label, name, state and fenced Input/Output blocks", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "",
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-available",
|
||||
input: { pageId: "p1" },
|
||||
output: { id: "p1", title: "Home" },
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any,
|
||||
],
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
// Known tool name maps to its label key; raw name in backticks; done state.
|
||||
expect(md).toContain("**Tool: Read page** (`getPage`) — done");
|
||||
expect(md).toContain("Input:");
|
||||
expect(md).toContain("Output:");
|
||||
// Fenced JSON blocks contain the stringified payloads.
|
||||
expect(md).toContain('"pageId": "p1"');
|
||||
expect(md).toContain('"title": "Home"');
|
||||
expect(md).toContain("```json");
|
||||
});
|
||||
|
||||
it("renders the generic label for an unknown tool and surfaces errorText", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "",
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: "tool-mysteryTool",
|
||||
state: "output-error",
|
||||
input: { a: 1 },
|
||||
errorText: "boom",
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any,
|
||||
],
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain(
|
||||
"**Tool: Ran tool mysteryTool** (`mysteryTool`) — error",
|
||||
);
|
||||
expect(md).toContain("**Error:** boom");
|
||||
});
|
||||
|
||||
it("does not throw on a circular tool input (falls back to String)", () => {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const circular: any = {};
|
||||
circular.self = circular;
|
||||
expect(() =>
|
||||
buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "",
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "input-available",
|
||||
input: circular,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any,
|
||||
],
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
}),
|
||||
).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — fence anti-breakout", () => {
|
||||
it("lengthens the delimiter so embedded ``` cannot break out of the block", () => {
|
||||
// Tool input whose stringified string form contains a literal ``` run.
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "",
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-available",
|
||||
// A bare string passes through stringify() verbatim.
|
||||
input: "before ``` after",
|
||||
output: "x",
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any,
|
||||
],
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
// The fence around the 3-backtick content must use at least 4 backticks so
|
||||
// the embedded ``` run cannot terminate the block.
|
||||
expect(md).toContain("````json\nbefore ``` after\n````");
|
||||
// Robust anti-breakout check: the opening fence delimiter is strictly
|
||||
// longer than the longest backtick run inside the wrapped content. (A naive
|
||||
// `not.toContain("```json...")` is a false negative — a 4-backtick fence
|
||||
// textually contains the 3-backtick substring.)
|
||||
const open = md.match(/(`{3,})json\nbefore/);
|
||||
expect(open).not.toBeNull();
|
||||
expect(open![1].length).toBeGreaterThan(3); // > the 3-backtick run in content
|
||||
});
|
||||
|
||||
it("uses a 5-backtick fence when the content has a 4-backtick run", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "",
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: "tool-getPage",
|
||||
state: "output-available",
|
||||
input: "a ```` b",
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
} as any,
|
||||
],
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("`````json\na ```` b\n`````");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — token totals", () => {
|
||||
it("prints the total-tokens line only when the summed usage is > 0", () => {
|
||||
const withTokens = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: { usage: { inputTokens: 10, outputTokens: 5 } },
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(withTokens).toContain("- Total tokens: 15");
|
||||
// Per-row usage footer too.
|
||||
expect(withTokens).toContain("_Tokens — in: 10, out: 5, total: 15_");
|
||||
});
|
||||
|
||||
it("omits the total-tokens line when the sum is 0 / usage absent", () => {
|
||||
const noTokens = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({ role: "user", content: "hi" }),
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: { usage: { inputTokens: 0, outputTokens: 0 } },
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(noTokens).not.toContain("- Total tokens:");
|
||||
});
|
||||
|
||||
it("uses totalTokens when present rather than summing in/out", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: {
|
||||
usage: { inputTokens: 3, outputTokens: 4, totalTokens: 99 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("- Total tokens: 99");
|
||||
});
|
||||
|
||||
it("appends the reasoning figure to the row footer when reasoningTokens > 0", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: {
|
||||
usage: { inputTokens: 10, outputTokens: 8, reasoningTokens: 3 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("_Tokens — in: 10, out: 8, reasoning: 3, total: 18_");
|
||||
});
|
||||
|
||||
it("omits the reasoning figure when reasoningTokens is 0 / absent", () => {
|
||||
const zero = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: {
|
||||
usage: { inputTokens: 10, outputTokens: 5, reasoningTokens: 0 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(zero).toContain("_Tokens — in: 10, out: 5, total: 15_");
|
||||
expect(zero).not.toContain("reasoning:");
|
||||
|
||||
const absent = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: { usage: { inputTokens: 10, outputTokens: 5 } },
|
||||
}),
|
||||
],
|
||||
t,
|
||||
});
|
||||
expect(absent).not.toContain("reasoning:");
|
||||
});
|
||||
});
|
||||
|
||||
// A minimal on-screen (live) message, matching the subset buildChatMarkdown reads.
|
||||
function live(partial: {
|
||||
id?: string;
|
||||
role?: string;
|
||||
parts?: { type: string; text?: string }[];
|
||||
metadata?: { usage?: Record<string, number>; error?: string };
|
||||
}) {
|
||||
return {
|
||||
id: partial.id ?? "live-id",
|
||||
role: partial.role ?? "assistant",
|
||||
parts: partial.parts ?? [],
|
||||
metadata: partial.metadata,
|
||||
};
|
||||
}
|
||||
|
||||
describe("buildChatMarkdown — live (WYSIWYG) source", () => {
|
||||
it("uses the live messages as the document (what's on screen), numbered from 1", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
// Persisted rows hold only the user turn; the assistant reply is live-only.
|
||||
rows: [row({ id: "u1", role: "user", content: "persisted user" })],
|
||||
live: [
|
||||
live({
|
||||
id: "u1",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "on-screen user" }],
|
||||
}),
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "on-screen reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("on-screen user");
|
||||
expect(md).toContain("on-screen reply");
|
||||
// Message count reflects the LIVE document, not rows + live.
|
||||
expect(md).toContain("- Messages: 2");
|
||||
});
|
||||
|
||||
it("captures a partial reply from an interrupted (non-streaming) turn — no 'generating' note", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ id: "u1", role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({
|
||||
id: "a-live",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "partial plan before the drop" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false, // the stream dropped — not streaming anymore
|
||||
banner: "Connection lost — the answer was interrupted.",
|
||||
t,
|
||||
});
|
||||
// The partial assistant answer that was on screen IS in the export.
|
||||
expect(md).toContain("partial plan before the drop");
|
||||
// It is NOT flagged still-generating (the turn is over, just interrupted).
|
||||
expect(md).not.toContain("still being generated");
|
||||
// The on-screen banner is recorded at the end.
|
||||
expect(md).toContain("Connection lost — the answer was interrupted.");
|
||||
});
|
||||
|
||||
it("flags ONLY the tail assistant as still generating, and only while streaming", () => {
|
||||
const streaming = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "a",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "done earlier" }],
|
||||
}),
|
||||
live({
|
||||
id: "u",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "next q" }],
|
||||
}),
|
||||
live({
|
||||
id: "b",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "streaming now" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
// Exactly one "still being generated" note (the tail assistant).
|
||||
expect(streaming.match(/still being generated/g)?.length).toBe(1);
|
||||
|
||||
const idle = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "b",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "final" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(idle).not.toContain("still being generated");
|
||||
});
|
||||
|
||||
it("does NOT flag a completed assistant as generating when the streaming tail is a user message", () => {
|
||||
// The `status === "submitted"` window: the user just sent, isStreaming is
|
||||
// already true, but the new assistant turn has no message yet so the tail is
|
||||
// the USER message. The previous assistant answer is complete on screen and
|
||||
// must not be marked still-generating (WYSIWYG; regression for #160 review).
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "a",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "completed answer" }],
|
||||
}),
|
||||
live({
|
||||
id: "u",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "the new question" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("completed answer");
|
||||
expect(md).not.toContain("still being generated");
|
||||
});
|
||||
|
||||
it("emits the heading + note for a streaming tail assistant with empty parts", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ id: "u1", role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({ id: "a-live", role: "assistant", parts: [] }),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("still being generated");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — live enrichment from persisted rows", () => {
|
||||
it("pulls usage / error / timestamp from the persisted row matched by id", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
createdAt: "2026-06-22T10:00:00.000Z",
|
||||
metadata: {
|
||||
usage: { inputTokens: 10, outputTokens: 5 },
|
||||
error: "rate limited",
|
||||
},
|
||||
}),
|
||||
],
|
||||
live: [
|
||||
// Same id as the persisted row, but no usage/error/timestamp on the live msg.
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("reply");
|
||||
// Token footer + total come from the enriched row.
|
||||
expect(md).toContain("_Tokens — in: 10, out: 5, total: 15_");
|
||||
expect(md).toContain("- Total tokens: 15");
|
||||
expect(md).toContain("**⚠️ Error:** rate limited");
|
||||
// The persisted timestamp is carried into the export.
|
||||
expect(md).toContain("<!-- 2026-06-22T10:00:00.000Z -->");
|
||||
});
|
||||
|
||||
it("prefers authoritative usage already on the live message over the row's", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
content: "x",
|
||||
metadata: {
|
||||
usage: { inputTokens: 1, outputTokens: 1, totalTokens: 2 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
live: [
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "reply" }],
|
||||
metadata: {
|
||||
usage: { inputTokens: 100, outputTokens: 50, totalTokens: 150 },
|
||||
},
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
// The live (authoritative, freshest) usage wins, not the stale row usage.
|
||||
expect(md).toContain("- Total tokens: 150");
|
||||
expect(md).not.toContain("- Total tokens: 2");
|
||||
});
|
||||
|
||||
it("a current-turn live message with no matching row renders without a footer", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ id: "u1", role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({
|
||||
id: "a-live",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "fresh reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("fresh reply");
|
||||
// No persisted row for the live assistant -> no token footer, no timestamp.
|
||||
expect(md).not.toContain("_Tokens —");
|
||||
expect(md).not.toContain("<!-- undefined -->");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildChatMarkdown — fallback + banner", () => {
|
||||
it("falls back to the persisted rows when there are no live messages", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [
|
||||
row({ role: "user", content: "from rows" }),
|
||||
row({
|
||||
role: "assistant",
|
||||
content: "answer",
|
||||
metadata: { usage: { inputTokens: 4, outputTokens: 6 } },
|
||||
}),
|
||||
],
|
||||
live: [], // empty live mirror -> fallback path
|
||||
isStreaming: false,
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("from rows");
|
||||
expect(md).toContain("- Messages: 2");
|
||||
expect(md).toContain("- Total tokens: 10");
|
||||
});
|
||||
|
||||
it("appends the on-screen banner once, after the messages", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
],
|
||||
isStreaming: false,
|
||||
banner: "Rate limit reached — try again shortly.",
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("_⚠️ Rate limit reached — try again shortly._");
|
||||
// Banner comes after the (only) message block.
|
||||
expect(md.indexOf("Rate limit reached")).toBeGreaterThan(
|
||||
md.indexOf("## 1."),
|
||||
);
|
||||
});
|
||||
|
||||
it("omits the banner block when there is no banner", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: "t",
|
||||
chatId: "c",
|
||||
rows: [row({ role: "user", content: "q" })],
|
||||
live: [
|
||||
live({ id: "u", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
],
|
||||
isStreaming: false,
|
||||
banner: null,
|
||||
t,
|
||||
});
|
||||
expect(md).not.toContain("_⚠️");
|
||||
});
|
||||
});
|
||||
|
||||
// #174: a brand-new, not-yet-persisted chat whose first turn is streaming (or was
|
||||
// interrupted) has live messages but NO persisted rows yet, and its chat id is not
|
||||
// known (the caller passes a placeholder). The export must still capture the
|
||||
// on-screen thread WYSIWYG from the live messages alone.
|
||||
describe("buildChatMarkdown — first-turn export with no persisted base (#174)", () => {
|
||||
it("builds the document from live messages alone when rows are empty", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: null,
|
||||
chatId: "unsaved",
|
||||
rows: [],
|
||||
live: [
|
||||
live({
|
||||
id: "u1",
|
||||
role: "user",
|
||||
parts: [{ type: "text", text: "hello" }],
|
||||
}),
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "partial reply" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: true,
|
||||
t,
|
||||
});
|
||||
// Both on-screen messages are serialized, numbered from 1.
|
||||
expect(md).toContain("## 1. You");
|
||||
expect(md).toContain("hello");
|
||||
expect(md).toContain("## 2. AI agent");
|
||||
expect(md).toContain("partial reply");
|
||||
// The streaming tail assistant is flagged as in-progress.
|
||||
expect(md).toContain("still being generated");
|
||||
// The placeholder chat id and the live message count are recorded.
|
||||
expect(md).toContain("- Chat ID: `unsaved`");
|
||||
expect(md).toContain("- Messages: 2");
|
||||
// No persisted timestamp exists for a current-turn live message.
|
||||
expect(md).not.toContain("<!--");
|
||||
});
|
||||
|
||||
it("captures an interrupted first turn (no rows, not streaming) without a generating note", () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: null,
|
||||
chatId: "unsaved",
|
||||
rows: [],
|
||||
live: [
|
||||
live({ id: "u1", role: "user", parts: [{ type: "text", text: "q" }] }),
|
||||
live({
|
||||
id: "a1",
|
||||
role: "assistant",
|
||||
parts: [{ type: "text", text: "half an answer" }],
|
||||
}),
|
||||
],
|
||||
isStreaming: false,
|
||||
banner: "Connection dropped — the response was cut off.",
|
||||
t,
|
||||
});
|
||||
expect(md).toContain("half an answer");
|
||||
// An interrupted (non-streaming) partial is exported as-is, no generating note.
|
||||
expect(md).not.toContain("still being generated");
|
||||
// The on-screen banner records the interruption.
|
||||
expect(md).toContain("_⚠️ Connection dropped — the response was cut off._");
|
||||
});
|
||||
});
|
||||
@@ -1,308 +0,0 @@
|
||||
/**
|
||||
* Client-only Markdown builder for an AI agent chat. Serializes the already
|
||||
* persisted message rows (loaded via `useAiChatMessagesQuery`) into a single
|
||||
* Markdown string suitable for copying to the clipboard. NO network call is
|
||||
* made and NO server/DB code is touched — this reuses the rich "request
|
||||
* internals" (tool calls with input/output, per-message token usage,
|
||||
* finish/error info) that the chat already holds client-side.
|
||||
*
|
||||
* Only role labels and tool action labels are localized via the passed-in `t`
|
||||
* translator; the structural document words (Input/Output/Error/Tokens/...) are
|
||||
* plain English constants because the output is a technical artifact.
|
||||
*/
|
||||
|
||||
import type { IAiChatMessageRow } from "@/features/ai-chat/types/ai-chat.types.ts";
|
||||
import {
|
||||
ToolUiPart,
|
||||
getToolName,
|
||||
toolRunState,
|
||||
toolLabelKey,
|
||||
} from "@/features/ai-chat/utils/tool-parts.tsx";
|
||||
|
||||
// Minimal translator signature compatible with react-i18next's `t`.
|
||||
type Translate = (key: string, values?: Record<string, unknown>) => string;
|
||||
|
||||
interface BuildChatMarkdownArgs {
|
||||
title: string | null;
|
||||
chatId: string;
|
||||
/** The live, on-screen messages — the WYSIWYG source of the export. When
|
||||
* present and non-empty these DRIVE the document (so it mirrors exactly what
|
||||
* the user sees, including a partial reply from an interrupted turn). Each is
|
||||
* matched to a persisted row by `id` to enrich it with token usage / error /
|
||||
* timestamp. When absent or empty the builder falls back to `rows`. */
|
||||
live?: LiveMessage[];
|
||||
/** Persisted message rows. Enrichment source (matched to `live` by id) AND the
|
||||
* fallback document source when `live` is empty. */
|
||||
rows: IAiChatMessageRow[];
|
||||
/** Whether the live thread is still streaming. Only then is the tail assistant
|
||||
* message flagged "still generating"; an interrupted (non-streaming) partial
|
||||
* reply is exported as-is and the `banner` explains the interruption. */
|
||||
isStreaming?: boolean;
|
||||
/** The on-screen banner text (error / dropped connection / manual stop),
|
||||
* appended at the end of the export so the artifact records the interruption
|
||||
* the user saw. */
|
||||
banner?: string | null;
|
||||
t: Translate;
|
||||
}
|
||||
|
||||
/** A single AI SDK UIMessage part (text part or other). */
|
||||
interface TextLikePart {
|
||||
type: string;
|
||||
text?: string;
|
||||
}
|
||||
|
||||
/** Authoritative per-turn usage the server attaches to a message / row. */
|
||||
interface UsageLike {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
/** A live, on-screen message (subset of the AI SDK UIMessage we consume). */
|
||||
interface LiveMessage {
|
||||
id: string;
|
||||
role: "user" | "assistant" | string;
|
||||
parts: TextLikePart[];
|
||||
metadata?: { usage?: UsageLike; error?: string };
|
||||
}
|
||||
|
||||
/** One message normalized for rendering, regardless of live/persisted origin. */
|
||||
interface ExportItem {
|
||||
role: string;
|
||||
parts: TextLikePart[];
|
||||
usage?: UsageLike;
|
||||
error?: string;
|
||||
/** ISO timestamp from the persisted row, when one is known. */
|
||||
createdAt?: string;
|
||||
/** True only for the tail assistant message while the thread is streaming. */
|
||||
generating: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Stringify an arbitrary tool input/output value for a fenced block. Strings
|
||||
* pass through as-is; everything else is pretty-printed JSON, falling back to
|
||||
* `String(value)` if serialization throws (e.g. a circular structure).
|
||||
*/
|
||||
function stringify(value: unknown): string {
|
||||
if (typeof value === "string") return value;
|
||||
try {
|
||||
return JSON.stringify(value, null, 2);
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap `code` in a fenced code block whose backtick delimiter is LONGER than
|
||||
* the longest backtick run inside the content, so embedded backticks (or even
|
||||
* a literal ``` fence) never break out of the block. Minimum 3 backticks.
|
||||
*/
|
||||
function fence(code: string, lang = ""): string {
|
||||
const runs: string[] = code.match(/`+/g) ?? [];
|
||||
const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
|
||||
const delim = "`".repeat(Math.max(3, longest + 1));
|
||||
return `${delim}${lang}\n${code}\n${delim}`;
|
||||
}
|
||||
|
||||
/** Per-row token count, mirroring the header sum in ai-chat-window.tsx. */
|
||||
function rowTokens(usage: {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}): number {
|
||||
return (
|
||||
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
|
||||
);
|
||||
}
|
||||
|
||||
/** Render one message's UIMessage parts into an array of Markdown blocks
|
||||
* (text blocks + tool blocks). Mirrors MessageItem's part handling. */
|
||||
function renderMessageParts(parts: TextLikePart[], t: Translate): string[] {
|
||||
const out: string[] = [];
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.type === "text") {
|
||||
const text = (part.text ?? "").trim();
|
||||
// Skip empty/whitespace-only text parts (matches MessageItem).
|
||||
if (text.length > 0) out.push(text);
|
||||
continue;
|
||||
}
|
||||
|
||||
const isToolPart =
|
||||
part.type.startsWith("tool-") || part.type === "dynamic-tool";
|
||||
if (!isToolPart) continue;
|
||||
|
||||
const tp = part as unknown as ToolUiPart;
|
||||
const name = getToolName(tp);
|
||||
const { key, values } = toolLabelKey(name);
|
||||
const label = t(key, values);
|
||||
const state = toolRunState(tp.state);
|
||||
|
||||
const toolLines: string[] = [
|
||||
`**Tool: ${label}** (\`${name}\`) — ${state}`,
|
||||
];
|
||||
if (tp.input !== undefined) {
|
||||
toolLines.push("Input:");
|
||||
toolLines.push(fence(stringify(tp.input), "json"));
|
||||
}
|
||||
if (tp.output !== undefined) {
|
||||
toolLines.push("Output:");
|
||||
toolLines.push(fence(stringify(tp.output), "json"));
|
||||
}
|
||||
if (tp.errorText) {
|
||||
toolLines.push(`**Error:** ${tp.errorText}`);
|
||||
}
|
||||
out.push(toolLines.join("\n\n"));
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
|
||||
* single text part built from the plain-text content (mirrors `rowToUiMessage`). */
|
||||
function rowParts(row: IAiChatMessageRow): TextLikePart[] {
|
||||
return Array.isArray(row.metadata?.parts) && row.metadata.parts.length > 0
|
||||
? (row.metadata.parts as TextLikePart[])
|
||||
: [{ type: "text", text: row.content ?? "" }];
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize the export to one ordered list of {@link ExportItem}, WYSIWYG-first:
|
||||
*
|
||||
* - When `live` messages are present, THEY are the document (what the user sees,
|
||||
* incl. an interrupted turn's partial reply). Each is matched to a persisted
|
||||
* row by `id` to pull token usage / error / timestamp — a live message of the
|
||||
* CURRENT turn has no matching row yet, so it simply renders without a footer.
|
||||
* Authoritative `usage`/`error` already on the live message metadata win over
|
||||
* the row (the server attaches usage to the streamed message at a step
|
||||
* boundary before the row is refetched). Only the tail assistant message is
|
||||
* flagged `generating`, and only while `isStreaming`.
|
||||
* - When `live` is empty (e.g. the export runs before the live mirror is
|
||||
* populated), fall back to the persisted `rows` so the format never regresses.
|
||||
*/
|
||||
function resolveItems(
|
||||
live: LiveMessage[] | undefined,
|
||||
rows: IAiChatMessageRow[],
|
||||
isStreaming: boolean,
|
||||
): ExportItem[] {
|
||||
if (live && live.length > 0) {
|
||||
const rowsById = new Map(rows.map((r) => [r.id, r]));
|
||||
// The "still generating" note may apply ONLY to an assistant message that is
|
||||
// the actual TAIL of the list — that is where the on-screen typing indicator
|
||||
// sits. While `status === "submitted"` (isStreaming true) right after the
|
||||
// user hit send, the tail is the USER message and the new assistant turn has
|
||||
// no message yet; the previous assistant answer is shown complete on screen,
|
||||
// so it must NOT be flagged (the indicator renders as a separate bottom
|
||||
// block, not on that answer).
|
||||
const lastIndex = live.length - 1;
|
||||
const tailIsStreamingAssistant =
|
||||
isStreaming && live[lastIndex]?.role === "assistant";
|
||||
return live.map((m, i) => {
|
||||
const row = rowsById.get(m.id);
|
||||
return {
|
||||
role: m.role,
|
||||
parts: m.parts ?? [],
|
||||
// Authoritative usage/error already on the live message (the server
|
||||
// attaches usage to the streamed message at a step boundary) wins over
|
||||
// the persisted row; a current-turn live message has no matching row yet
|
||||
// and simply renders without a token footer (the accepted WYSIWYG
|
||||
// tradeoff — an interrupted turn loses only its token footer, not text).
|
||||
usage: m.metadata?.usage ?? row?.metadata?.usage,
|
||||
error: m.metadata?.error ?? row?.metadata?.error ?? undefined,
|
||||
createdAt: row?.createdAt,
|
||||
generating: tailIsStreamingAssistant && i === lastIndex,
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
return rows.map((row) => ({
|
||||
role: row.role,
|
||||
parts: rowParts(row),
|
||||
usage: row.metadata?.usage,
|
||||
error: row.metadata?.error ?? undefined,
|
||||
createdAt: row.createdAt,
|
||||
generating: false,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a chat to a Markdown string. Pure (apart from `new Date()` for the
|
||||
* export timestamp), so it is straightforward to unit-test.
|
||||
*/
|
||||
export function buildChatMarkdown(args: BuildChatMarkdownArgs): string {
|
||||
const { title, chatId, live, rows, isStreaming, banner, t } = args;
|
||||
const blocks: string[] = [];
|
||||
|
||||
const items = resolveItems(live, rows, isStreaming === true);
|
||||
|
||||
const heading = (title ?? "").trim() || t("Untitled chat");
|
||||
blocks.push(`# ${heading}`);
|
||||
|
||||
// Metadata bullet list. Total tokens is only shown when there is a sum.
|
||||
const totalTokens = items.reduce(
|
||||
(sum, item) => (item.usage ? sum + rowTokens(item.usage) : sum),
|
||||
0,
|
||||
);
|
||||
const meta = [
|
||||
`- Chat ID: \`${chatId}\``,
|
||||
`- Exported: ${new Date().toISOString()}`,
|
||||
`- Messages: ${items.length}`,
|
||||
];
|
||||
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
|
||||
blocks.push(meta.join("\n"));
|
||||
|
||||
items.forEach((item, index) => {
|
||||
blocks.push("---");
|
||||
|
||||
const roleLabel = item.role === "assistant" ? t("AI agent") : t("You");
|
||||
blocks.push(`## ${index + 1}. ${roleLabel}`);
|
||||
|
||||
// Created-at kept in source as an HTML comment (out of the rendered prose).
|
||||
// A live message of the current turn has no persisted row yet — omit it.
|
||||
if (item.createdAt) blocks.push(`<!-- ${item.createdAt} -->`);
|
||||
|
||||
blocks.push(...renderMessageParts(item.parts, t));
|
||||
|
||||
// A generating assistant may have empty/no parts yet — the heading (above)
|
||||
// and this note still record the in-progress turn.
|
||||
if (item.generating) {
|
||||
blocks.push(
|
||||
"_⏳ This message is still being generated — the export captured a partial, in-progress response._",
|
||||
);
|
||||
}
|
||||
|
||||
// A persisted per-message error (the raw provider text) may coexist with the
|
||||
// trailing `banner` (the classified on-screen alert) when the failed turn's
|
||||
// row has already been refetched by export time. They describe the same
|
||||
// failure at different fidelity; showing both is an accepted, minor redundancy.
|
||||
if (item.error) {
|
||||
blocks.push(`**⚠️ Error:** ${item.error}`);
|
||||
}
|
||||
|
||||
const usage = item.usage;
|
||||
if (usage) {
|
||||
const total = usage.totalTokens ?? rowTokens(usage);
|
||||
// Reasoning (thinking) tokens are shown only when the provider reported a
|
||||
// positive count; old rows / non-reasoning providers omit it.
|
||||
const reasoning =
|
||||
usage.reasoningTokens && usage.reasoningTokens > 0
|
||||
? `, reasoning: ${usage.reasoningTokens}`
|
||||
: "";
|
||||
blocks.push(
|
||||
`_Tokens — in: ${usage.inputTokens ?? "?"}, out: ${usage.outputTokens ?? "?"}${reasoning}, total: ${total}_`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// Record the on-screen banner (error / dropped connection / manual stop) so
|
||||
// the export reflects exactly what the user saw, including an interruption.
|
||||
if (banner && banner.trim().length > 0) {
|
||||
blocks.push("---");
|
||||
blocks.push(`_⚠️ ${banner.trim()}_`);
|
||||
}
|
||||
|
||||
// Blank line between blocks so the Markdown renders cleanly.
|
||||
return blocks.join("\n\n");
|
||||
}
|
||||
159
apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
Normal file
159
apps/server/src/core/ai-chat/ai-chat.controller.export.spec.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import { ForbiddenException } from '@nestjs/common';
|
||||
import { AiChatController } from './ai-chat.controller';
|
||||
import {
|
||||
planFinalizeAssistant,
|
||||
applyFinalize,
|
||||
flushAssistant,
|
||||
type AssistantFlush,
|
||||
} from './ai-chat.service';
|
||||
import type { User, Workspace } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
* Wiring spec for the #183 `POST /ai-chat/export` endpoint. It must: own-gate via
|
||||
* the chat lookup (workspace-scoped + creator-owned), load the FULL transcript
|
||||
* via findAllByChat, render server-side, and return `{ markdown }`. Exercised by
|
||||
* instantiating the controller with hand-rolled mocks — no Nest graph, no DB.
|
||||
*/
|
||||
describe('AiChatController.export', () => {
|
||||
const user = { id: 'u1' } as User;
|
||||
const workspace = { id: 'ws1' } as Workspace;
|
||||
|
||||
function makeController(
|
||||
over: {
|
||||
chat?: unknown;
|
||||
rows?: unknown[];
|
||||
} = {},
|
||||
) {
|
||||
const chat =
|
||||
'chat' in over
|
||||
? over.chat
|
||||
: { id: 'c1', creatorId: 'u1', title: 'My chat' };
|
||||
const aiChatRepo = {
|
||||
findById: jest.fn().mockResolvedValue(chat),
|
||||
};
|
||||
const aiChatMessageRepo = {
|
||||
findAllByChat: jest.fn().mockResolvedValue(
|
||||
over.rows ?? [
|
||||
{
|
||||
id: 'm1',
|
||||
role: 'user',
|
||||
content: 'hi',
|
||||
metadata: null,
|
||||
status: null,
|
||||
},
|
||||
{
|
||||
id: 'm2',
|
||||
role: 'assistant',
|
||||
content: 'hello',
|
||||
metadata: null,
|
||||
status: 'completed',
|
||||
},
|
||||
],
|
||||
),
|
||||
};
|
||||
const controller = new AiChatController(
|
||||
{} as never,
|
||||
aiChatRepo as never,
|
||||
aiChatMessageRepo as never,
|
||||
{} as never,
|
||||
);
|
||||
return { controller, aiChatRepo, aiChatMessageRepo };
|
||||
}
|
||||
|
||||
it('renders the full transcript and returns { markdown }', async () => {
|
||||
const { controller, aiChatMessageRepo } = makeController();
|
||||
const res = await controller.export({ chatId: 'c1' }, user, workspace);
|
||||
expect(aiChatMessageRepo.findAllByChat).toHaveBeenCalledWith('c1', 'ws1');
|
||||
expect(res.markdown).toContain('# My chat');
|
||||
expect(res.markdown).toContain('## 1. You');
|
||||
expect(res.markdown).toContain('## 2. AI agent');
|
||||
});
|
||||
|
||||
it('forbids a chat the user does not own', async () => {
|
||||
const { controller } = makeController({
|
||||
chat: { id: 'c1', creatorId: 'someone-else', title: 'X' },
|
||||
});
|
||||
await expect(
|
||||
controller.export({ chatId: 'c1' }, user, workspace),
|
||||
).rejects.toBeInstanceOf(ForbiddenException);
|
||||
});
|
||||
|
||||
it('forbids a missing / foreign-workspace chat', async () => {
|
||||
const { controller } = makeController({ chat: null });
|
||||
await expect(
|
||||
controller.export({ chatId: 'c1' }, user, workspace),
|
||||
).rejects.toBeInstanceOf(ForbiddenException);
|
||||
});
|
||||
|
||||
it('localizes labels when lang=ru is passed', async () => {
|
||||
const { controller } = makeController();
|
||||
const res = await controller.export(
|
||||
{ chatId: 'c1', lang: 'ru' },
|
||||
user,
|
||||
workspace,
|
||||
);
|
||||
expect(res.markdown).toContain('## 1. Вы');
|
||||
expect(res.markdown).toContain('## 2. ИИ-агент');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* The terminal-finalize dispatch (#183): the assistant row is INSERTed upfront
|
||||
* as 'streaming' and finalized once on the terminal callback. When the upfront
|
||||
* insert SUCCEEDED (we hold an id) finalize UPDATEs that row; when it FAILED
|
||||
* (assistantId is undefined) finalize falls back to INSERTing the terminal row
|
||||
* so the turn is not lost — the only safety against losing the turn entirely.
|
||||
*
|
||||
* `planFinalizeAssistant` is the pure decision; `applyFinalize` is the REAL
|
||||
* dispatch the service uses, exercised here over a mock repo (not a copy of the
|
||||
* logic) so a production drift would fail the test (#186 review).
|
||||
*/
|
||||
describe('finalizeAssistant dispatch (planFinalizeAssistant + applyFinalize)', () => {
|
||||
const workspaceId = 'ws1';
|
||||
|
||||
// Drive the SAME applyFinalize the service calls (no duplicated logic).
|
||||
async function dispatchFinalize(
|
||||
repo: { insert: jest.Mock; update: jest.Mock },
|
||||
assistantId: string | undefined,
|
||||
flushed: AssistantFlush,
|
||||
): Promise<void> {
|
||||
await applyFinalize(
|
||||
repo,
|
||||
planFinalizeAssistant(assistantId),
|
||||
{ chatId: 'c1', workspaceId, userId: 'u1' },
|
||||
flushed,
|
||||
);
|
||||
}
|
||||
|
||||
it('plan: update when the upfront insert returned an id', () => {
|
||||
expect(planFinalizeAssistant('a1')).toEqual({ kind: 'update', id: 'a1' });
|
||||
});
|
||||
|
||||
it('plan: insert (fallback) when there is no upfront id', () => {
|
||||
expect(planFinalizeAssistant(undefined)).toEqual({ kind: 'insert' });
|
||||
});
|
||||
|
||||
it('(a) upfront insert succeeded -> finalize UPDATEs the row by id', async () => {
|
||||
const repo = { insert: jest.fn(), update: jest.fn() };
|
||||
const flushed = flushAssistant([], 'final answer', 'completed', {
|
||||
finishReason: 'stop',
|
||||
});
|
||||
await dispatchFinalize(repo, 'a1', flushed);
|
||||
expect(repo.update).toHaveBeenCalledWith('a1', workspaceId, flushed);
|
||||
expect(repo.insert).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('(b) upfront insert failed -> finalize INSERTs the terminal payload', async () => {
|
||||
const repo = { insert: jest.fn(), update: jest.fn() };
|
||||
const flushed = flushAssistant([], 'partial', 'error', { error: 'boom' });
|
||||
await dispatchFinalize(repo, undefined, flushed);
|
||||
expect(repo.update).not.toHaveBeenCalled();
|
||||
expect(repo.insert).toHaveBeenCalledTimes(1);
|
||||
const arg = repo.insert.mock.calls[0][0];
|
||||
// The fallback insert carries the terminal content/status/metadata.
|
||||
expect(arg.role).toBe('assistant');
|
||||
expect(arg.content).toBe('partial');
|
||||
expect(arg.status).toBe('error');
|
||||
expect((arg.metadata as { error?: string }).error).toBe('boom');
|
||||
});
|
||||
});
|
||||
@@ -20,7 +20,7 @@ import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
|
||||
import { AuthUser } from '../../common/decorators/auth-user.decorator';
|
||||
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
|
||||
import { SkipTransform } from '../../common/decorators/skip-transform.decorator';
|
||||
import { User, Workspace } from '@docmost/db/types/entity.types';
|
||||
import { AiChat, User, Workspace } from '@docmost/db/types/entity.types';
|
||||
import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
|
||||
import { AiChatRepo } from '@docmost/db/repos/ai-chat/ai-chat.repo';
|
||||
import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
|
||||
@@ -31,10 +31,12 @@ import { AiChatService, AiChatStreamBody } from './ai-chat.service';
|
||||
import { AiTranscriptionService } from './ai-transcription.service';
|
||||
import {
|
||||
ChatIdDto,
|
||||
ExportChatDto,
|
||||
GetChatMessagesDto,
|
||||
RenameChatDto,
|
||||
} from './dto/ai-chat.dto';
|
||||
import { describeProviderError } from '../../integrations/ai/ai-error.util';
|
||||
import { buildChatMarkdown } from './chat-markdown.util';
|
||||
|
||||
/**
|
||||
* Per-user AI chat API (§6.1). Routes are POST to match this codebase's
|
||||
@@ -81,6 +83,36 @@ export class AiChatController {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Export a chat to Markdown (#183). The DB is the single source of truth: the
|
||||
* whole transcript is loaded (oldest -> newest) and rendered server-side. Now
|
||||
* that the assistant row is persisted upfront and per step, an interrupted
|
||||
* turn is included up to its last finished step. Workspace-scoped and owner-
|
||||
* gated via assertOwnedChat (same as the other read endpoints). Returns
|
||||
* `{ markdown }`. `lang` localizes the few fixed labels (default English).
|
||||
*/
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post('export')
|
||||
async export(
|
||||
@Body() dto: ExportChatDto,
|
||||
@AuthUser() user: User,
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
): Promise<{ markdown: string }> {
|
||||
const chat = await this.assertOwnedChat(dto.chatId, user, workspace);
|
||||
const rows = await this.aiChatMessageRepo.findAllByChat(
|
||||
dto.chatId,
|
||||
workspace.id,
|
||||
);
|
||||
const markdown = buildChatMarkdown({
|
||||
title: chat.title ?? null,
|
||||
chatId: dto.chatId,
|
||||
rows,
|
||||
// normalizeLang(undefined) already yields 'en', so no `?? 'en'` is needed.
|
||||
lang: dto.lang,
|
||||
});
|
||||
return { markdown };
|
||||
}
|
||||
|
||||
/** Rename a chat. */
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post('rename')
|
||||
@@ -90,7 +122,11 @@ export class AiChatController {
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
) {
|
||||
await this.assertOwnedChat(dto.chatId, user, workspace);
|
||||
await this.aiChatRepo.update(dto.chatId, { title: dto.title }, workspace.id);
|
||||
await this.aiChatRepo.update(
|
||||
dto.chatId,
|
||||
{ title: dto.title },
|
||||
workspace.id,
|
||||
);
|
||||
return { success: true };
|
||||
}
|
||||
|
||||
@@ -145,7 +181,10 @@ export class AiChatController {
|
||||
// Resolve the agent role for this turn BEFORE hijack: existing chats read it
|
||||
// from ai_chats.role_id (authoritative), a new chat from body.roleId. The
|
||||
// role drives both the persona and the optional model override below.
|
||||
const role = await this.aiChatService.resolveRoleForRequest(workspace, body);
|
||||
const role = await this.aiChatService.resolveRoleForRequest(
|
||||
workspace,
|
||||
body,
|
||||
);
|
||||
|
||||
// Resolve the model (applying the role's optional override) BEFORE hijack so
|
||||
// an unconfigured provider — including a role pointing at an unconfigured
|
||||
@@ -232,7 +271,9 @@ export class AiChatController {
|
||||
let file = null;
|
||||
try {
|
||||
// Whisper hard-caps uploads at 25MB; allow a single file.
|
||||
file = await req.file({ limits: { fileSize: 25 * 1024 * 1024, files: 1 } });
|
||||
file = await req.file({
|
||||
limits: { fileSize: 25 * 1024 * 1024, files: 1 },
|
||||
});
|
||||
} catch (err: any) {
|
||||
if (err?.statusCode === 413) {
|
||||
throw new BadRequestException('Audio file too large (max 25MB)');
|
||||
@@ -283,11 +324,12 @@ export class AiChatController {
|
||||
chatId: string,
|
||||
user: User,
|
||||
workspace: Workspace,
|
||||
): Promise<void> {
|
||||
): Promise<AiChat> {
|
||||
const chat = await this.aiChatRepo.findById(chatId, workspace.id);
|
||||
if (!chat || chat.creatorId !== user.id) {
|
||||
throw new ForbiddenException();
|
||||
}
|
||||
return chat;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { AiChatService } from './ai-chat.service';
|
||||
|
||||
/**
|
||||
* Lifecycle unit tests for AiChatService.onModuleInit (#183 crash-recovery
|
||||
* sweep). The sweep is BEST-EFFORT: a failure must be logged (warn) but must
|
||||
* NEVER throw out of onModuleInit and block server startup. Exercised with a
|
||||
* hand-rolled mock repo — no Nest graph, no DB. Only `aiChatMessageRepo` is
|
||||
* touched by onModuleInit, so the other constructor deps are stubbed as never.
|
||||
*/
|
||||
describe('AiChatService.onModuleInit (startup sweep)', () => {
|
||||
function makeService(sweepStreaming: jest.Mock) {
|
||||
const aiChatMessageRepo = { sweepStreaming };
|
||||
const service = new AiChatService(
|
||||
{} as never, // ai
|
||||
{} as never, // aiChatRepo
|
||||
aiChatMessageRepo as never,
|
||||
{} as never, // aiSettings
|
||||
{} as never, // tools
|
||||
{} as never, // mcpClients
|
||||
{} as never, // aiAgentRoleRepo
|
||||
{} as never, // pageRepo
|
||||
{} as never, // pageAccess
|
||||
);
|
||||
return { service, aiChatMessageRepo };
|
||||
}
|
||||
|
||||
afterEach(() => jest.restoreAllMocks());
|
||||
|
||||
it('happy path: calls sweepStreaming and resolves', async () => {
|
||||
const sweepStreaming = jest.fn().mockResolvedValue(0);
|
||||
const { service } = makeService(sweepStreaming);
|
||||
await expect(service.onModuleInit()).resolves.toBeUndefined();
|
||||
expect(sweepStreaming).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('logs how many rows were swept when > 0', async () => {
|
||||
const sweepStreaming = jest.fn().mockResolvedValue(3);
|
||||
const logSpy = jest
|
||||
.spyOn(Logger.prototype, 'log')
|
||||
.mockImplementation(() => undefined);
|
||||
const { service } = makeService(sweepStreaming);
|
||||
await service.onModuleInit();
|
||||
expect(logSpy).toHaveBeenCalledTimes(1);
|
||||
expect(String(logSpy.mock.calls[0][0])).toContain('3');
|
||||
});
|
||||
|
||||
it('sweepStreaming throws -> onModuleInit resolves (does NOT throw) and warns', async () => {
|
||||
const sweepStreaming = jest
|
||||
.fn()
|
||||
.mockRejectedValue(new Error('db unavailable'));
|
||||
const warnSpy = jest
|
||||
.spyOn(Logger.prototype, 'warn')
|
||||
.mockImplementation(() => undefined);
|
||||
const { service } = makeService(sweepStreaming);
|
||||
// Must not throw — a sweep failure may never block startup.
|
||||
await expect(service.onModuleInit()).resolves.toBeUndefined();
|
||||
expect(warnSpy).toHaveBeenCalledTimes(1);
|
||||
expect(String(warnSpy.mock.calls[0][0])).toContain('db unavailable');
|
||||
});
|
||||
});
|
||||
@@ -6,7 +6,7 @@ import {
|
||||
serializeSteps,
|
||||
rowToUiMessage,
|
||||
prepareAgentStep,
|
||||
buildPartialAssistantRecord,
|
||||
flushAssistant,
|
||||
chatStreamMetadata,
|
||||
accumulateStepUsage,
|
||||
MAX_AGENT_STEPS,
|
||||
@@ -233,101 +233,108 @@ describe('prepareAgentStep', () => {
|
||||
// The synthesis instruction is appended.
|
||||
expect(result?.system).toContain(FINAL_STEP_INSTRUCTION);
|
||||
});
|
||||
|
||||
it('pins the off-by-one boundary (MAX-2 is not final, MAX-1 is)', () => {
|
||||
// Boundary expressed via the constant, not a hardcoded 18/19, so the test
|
||||
// tracks MAX_AGENT_STEPS if the cap ever changes.
|
||||
expect(prepareAgentStep(MAX_AGENT_STEPS - 2, 'SYS')).toBeUndefined();
|
||||
const atBoundary = prepareAgentStep(MAX_AGENT_STEPS - 1, 'SYS');
|
||||
expect(atBoundary).toBeDefined();
|
||||
expect(atBoundary?.toolChoice).toBe('none');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Unit test for buildPartialAssistantRecord: the pure helper that shapes the
|
||||
* assistant-message record persisted on a partial/failed turn (the streamText
|
||||
* onError / onAbort paths). It captures the PARTIAL answer the user already saw
|
||||
* (finished steps' text + tool parts, plus the in-progress step's text) so a
|
||||
* provider error / disconnect no longer throws the streamed answer away. Pinning
|
||||
* the record shape here covers the persist-partial logic without seaming
|
||||
* streamText itself.
|
||||
* flushAssistant (#183): the PURE row builder behind the step-granular durable
|
||||
* write path. It runs identically for the upfront insert (empty steps,
|
||||
* 'streaming'), every per-step update, and the terminal finalize — so a future
|
||||
* background worker can call the same function. These tests pin the four status
|
||||
* shapes and the `metadata.parts` shape that rowToUiMessage/findRecent depend on
|
||||
* (per-step text + tool parts via assistantParts, in-progress text appended).
|
||||
*/
|
||||
describe('buildPartialAssistantRecord', () => {
|
||||
describe('flushAssistant', () => {
|
||||
type AnyPart = Record<string, unknown>;
|
||||
|
||||
it('records an empty turn with the error text (preserves old behavior)', () => {
|
||||
const rec = buildPartialAssistantRecord(
|
||||
[],
|
||||
'',
|
||||
'error',
|
||||
'401: Unauthorized',
|
||||
);
|
||||
expect(rec).toEqual({
|
||||
text: '',
|
||||
toolCalls: null,
|
||||
metadata: {
|
||||
finishReason: 'error',
|
||||
parts: [],
|
||||
error: '401: Unauthorized',
|
||||
},
|
||||
});
|
||||
const toolStep = {
|
||||
text: 'looked it up',
|
||||
toolCalls: [{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } }],
|
||||
toolResults: [
|
||||
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
|
||||
],
|
||||
};
|
||||
|
||||
it('upfront seed: empty streaming row (no content, no toolCalls, empty parts)', () => {
|
||||
const f = flushAssistant([], '', 'streaming');
|
||||
expect(f.status).toBe('streaming');
|
||||
expect(f.content).toBe('');
|
||||
expect(f.toolCalls).toBeNull();
|
||||
expect(f.metadata.parts).toEqual([]);
|
||||
// No finishReason while streaming (it is not a terminal state).
|
||||
expect('finishReason' in f.metadata).toBe(false);
|
||||
});
|
||||
|
||||
it('persists in-progress text (no finished steps) as the partial answer', () => {
|
||||
const rec = buildPartialAssistantRecord(
|
||||
[],
|
||||
'partial answer',
|
||||
'error',
|
||||
'boom',
|
||||
);
|
||||
expect(rec.text).toBe('partial answer');
|
||||
expect(rec.metadata.parts).toEqual([
|
||||
it('streaming update folds in finished steps but keeps status streaming', () => {
|
||||
const f = flushAssistant([toolStep], '', 'streaming');
|
||||
expect(f.status).toBe('streaming');
|
||||
expect(f.content).toBe('looked it up');
|
||||
const parts = f.metadata.parts as AnyPart[];
|
||||
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
|
||||
const toolPart = parts.find((p) => p.type === 'tool-getPage');
|
||||
expect(toolPart!.state).toBe('output-available');
|
||||
expect(f.toolCalls).not.toBeNull();
|
||||
});
|
||||
|
||||
it('completed: attaches finishReason + normalized usage + contextTokens', () => {
|
||||
const f = flushAssistant([toolStep], '', 'completed', {
|
||||
finishReason: 'stop',
|
||||
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
|
||||
contextTokens: 15,
|
||||
});
|
||||
expect(f.status).toBe('completed');
|
||||
expect(f.metadata.finishReason).toBe('stop');
|
||||
expect(f.metadata.usage).toEqual({
|
||||
inputTokens: 10,
|
||||
outputTokens: 5,
|
||||
totalTokens: 15,
|
||||
reasoningTokens: undefined,
|
||||
});
|
||||
expect(f.metadata.contextTokens).toBe(15);
|
||||
});
|
||||
|
||||
it('error: records the error and a derived finishReason', () => {
|
||||
const f = flushAssistant([], 'partial answer', 'error', { error: 'boom' });
|
||||
expect(f.status).toBe('error');
|
||||
expect(f.content).toBe('partial answer');
|
||||
expect(f.metadata.error).toBe('boom');
|
||||
// Derives finishReason from the terminal status when none is supplied.
|
||||
expect(f.metadata.finishReason).toBe('error');
|
||||
expect(f.metadata.parts).toEqual([
|
||||
{ type: 'text', text: 'partial answer' },
|
||||
]);
|
||||
expect(rec.metadata.error).toBe('boom');
|
||||
});
|
||||
|
||||
it('combines a finished tool step with trailing in-progress text', () => {
|
||||
const steps = [
|
||||
{
|
||||
text: 'looked it up',
|
||||
toolCalls: [
|
||||
{ toolCallId: 'c1', toolName: 'getPage', input: { id: 'p1' } },
|
||||
],
|
||||
toolResults: [
|
||||
{ toolCallId: 'c1', toolName: 'getPage', output: { title: 'T' } },
|
||||
],
|
||||
},
|
||||
];
|
||||
const rec = buildPartialAssistantRecord(
|
||||
steps,
|
||||
' and then',
|
||||
'error',
|
||||
'boom',
|
||||
);
|
||||
const parts = rec.metadata.parts as AnyPart[];
|
||||
// The finished step's text part is present.
|
||||
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
|
||||
// The paired tool call+result becomes an output-available part.
|
||||
const toolPart = parts.find((p) => p.type === 'tool-getPage');
|
||||
expect(toolPart).toBeDefined();
|
||||
expect(toolPart!.state).toBe('output-available');
|
||||
// The in-progress text is appended LAST so the parts match the stream order.
|
||||
it('aborted: in-progress text appended last, no error key', () => {
|
||||
const f = flushAssistant([toolStep], ' and then', 'aborted');
|
||||
expect(f.status).toBe('aborted');
|
||||
expect(f.metadata.finishReason).toBe('aborted');
|
||||
expect('error' in f.metadata).toBe(false);
|
||||
expect(f.content).toBe('looked it up and then');
|
||||
const parts = f.metadata.parts as AnyPart[];
|
||||
expect(parts[parts.length - 1]).toEqual({
|
||||
type: 'text',
|
||||
text: ' and then',
|
||||
});
|
||||
expect(rec.text).toBe('looked it up and then');
|
||||
expect(rec.toolCalls).not.toBeNull();
|
||||
expect(rec.metadata.error).toBe('boom');
|
||||
});
|
||||
|
||||
it('omits the error key on the abort path (no errorText)', () => {
|
||||
const rec = buildPartialAssistantRecord([], 'half', 'aborted');
|
||||
expect(rec.metadata.finishReason).toBe('aborted');
|
||||
expect('error' in rec.metadata).toBe(false);
|
||||
expect(rec.text).toBe('half');
|
||||
it('combines a finished tool step with trailing in-progress text (error path)', () => {
|
||||
// The error path captures the PARTIAL answer the user already saw: each
|
||||
// finished step's text + tool parts, then the in-progress step's text last.
|
||||
const flushed = flushAssistant([toolStep], ' and then', 'error', {
|
||||
error: 'boom',
|
||||
});
|
||||
const parts = flushed.metadata.parts as AnyPart[];
|
||||
expect(parts).toContainEqual({ type: 'text', text: 'looked it up' });
|
||||
const toolPart = parts.find((p) => p.type === 'tool-getPage');
|
||||
expect(toolPart!.state).toBe('output-available');
|
||||
// In-progress text appended LAST so the parts match the stream order.
|
||||
expect(parts[parts.length - 1]).toEqual({
|
||||
type: 'text',
|
||||
text: ' and then',
|
||||
});
|
||||
expect(flushed.content).toBe('looked it up and then');
|
||||
expect(flushed.toolCalls).not.toBeNull();
|
||||
expect(flushed.metadata.error).toBe('boom');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
import { ForbiddenException, Injectable, Logger } from '@nestjs/common';
|
||||
import {
|
||||
ForbiddenException,
|
||||
Injectable,
|
||||
Logger,
|
||||
OnModuleInit,
|
||||
} from '@nestjs/common';
|
||||
import { FastifyReply } from 'fastify';
|
||||
import {
|
||||
streamText,
|
||||
@@ -124,7 +129,7 @@ export interface AiChatStreamArgs {
|
||||
* can be rebuilt for `convertToModelMessages`.
|
||||
*/
|
||||
@Injectable()
|
||||
export class AiChatService {
|
||||
export class AiChatService implements OnModuleInit {
|
||||
private readonly logger = new Logger(AiChatService.name);
|
||||
|
||||
constructor(
|
||||
@@ -139,6 +144,32 @@ export class AiChatService {
|
||||
private readonly pageAccess: PageAccessService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Crash-recovery sweep on server start (#183): any assistant row left in the
|
||||
* 'streaming' state is the relic of a turn whose process died before it
|
||||
* reached a terminal status. Flip those to 'aborted' so history/export show
|
||||
* them settled (with whatever finished steps were already persisted) instead
|
||||
* of perpetually "streaming". Best-effort: a sweep failure is logged but must
|
||||
* never block server startup.
|
||||
*/
|
||||
async onModuleInit(): Promise<void> {
|
||||
try {
|
||||
const swept = await this.aiChatMessageRepo.sweepStreaming();
|
||||
if (swept > 0) {
|
||||
this.logger.log(
|
||||
`Startup sweep: marked ${swept} dangling 'streaming' assistant ` +
|
||||
`message(s) as 'aborted'.`,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`Startup sweep of dangling 'streaming' messages failed: ${
|
||||
err instanceof Error ? err.message : 'unknown error'
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the agent role that applies to this stream request, scoped to the
|
||||
* workspace and soft-delete aware. For an EXISTING chat the role is read from
|
||||
@@ -395,31 +426,6 @@ export class AiChatService {
|
||||
|
||||
const tools = { ...external.tools, ...docmostTools };
|
||||
|
||||
// Persist the assistant message. Used by onFinish (full result) and the
|
||||
// abort/error paths (partial result). Guarded so we persist at most once.
|
||||
let persisted = false;
|
||||
const persistAssistant = async (data: {
|
||||
text: string;
|
||||
toolCalls: unknown;
|
||||
metadata: Record<string, unknown>;
|
||||
}): Promise<void> => {
|
||||
if (persisted) return;
|
||||
persisted = true;
|
||||
try {
|
||||
await this.aiChatMessageRepo.insert({
|
||||
chatId,
|
||||
workspaceId: workspace.id,
|
||||
userId: user.id,
|
||||
role: 'assistant',
|
||||
content: data.text ?? '',
|
||||
toolCalls: (data.toolCalls ?? null) as never,
|
||||
metadata: data.metadata as never,
|
||||
});
|
||||
} catch (err) {
|
||||
this.logger.error('Failed to persist assistant message', err as Error);
|
||||
}
|
||||
};
|
||||
|
||||
// Accumulate the turn's streamed output so a provider error / disconnect can
|
||||
// persist the PARTIAL answer the user already saw — the SDK's onError/onAbort
|
||||
// callbacks don't hand us the in-progress text. `capturedSteps` holds finished
|
||||
@@ -428,6 +434,101 @@ export class AiChatService {
|
||||
const capturedSteps: StepLike[] = [];
|
||||
let inProgressText = '';
|
||||
|
||||
// Step-granular durability (#183): create the assistant row UPFRONT in the
|
||||
// 'streaming' state (before any token), then UPDATE it as each step finishes
|
||||
// and finalize it once on the terminal callback. If the process dies
|
||||
// mid-turn the row survives with every finished step already persisted; the
|
||||
// startup sweep (sweepStreaming) later flips a dangling 'streaming' row to
|
||||
// 'aborted'. The DB is now the single source of truth for the turn — the
|
||||
// socket is never required for the write path. A failed upfront insert is
|
||||
// logged and leaves assistantId undefined; the per-step/terminal updates then
|
||||
// no-op (guarded below) so the turn still streams to the user.
|
||||
let assistantId: string | undefined;
|
||||
try {
|
||||
const seed = flushAssistant([], '', 'streaming');
|
||||
const seeded = await this.aiChatMessageRepo.insert({
|
||||
chatId,
|
||||
workspaceId: workspace.id,
|
||||
userId: user.id,
|
||||
role: 'assistant',
|
||||
content: seed.content,
|
||||
// jsonb columns: cast through never (same as the user insert above).
|
||||
toolCalls: (seed.toolCalls ?? null) as never,
|
||||
metadata: seed.metadata as never,
|
||||
status: seed.status,
|
||||
});
|
||||
assistantId = seeded?.id;
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`Failed to insert upfront assistant row (chat ${chatId}, workspace ${workspace.id})`,
|
||||
err as Error,
|
||||
);
|
||||
}
|
||||
|
||||
// Per-step (non-terminal) update: persist the finished steps the moment a
|
||||
// step ends. Tolerant — a failed update is logged and swallowed so it never
|
||||
// throws into the stream. Keeps status 'streaming'.
|
||||
const updateStreaming = async (): Promise<void> => {
|
||||
if (!assistantId) return;
|
||||
// Cheap short-circuit once the turn is finalized (see `finalized` below).
|
||||
// The AUTHORITATIVE guard is `onlyIfStreaming` on the UPDATE: a late
|
||||
// fire-and-forget step update could still be in flight on another pool
|
||||
// connection when finalize runs, so the SQL `WHERE status='streaming'`
|
||||
// (not this flag) is what prevents it clobbering the terminal row.
|
||||
if (finalized) return;
|
||||
try {
|
||||
await this.aiChatMessageRepo.update(
|
||||
assistantId,
|
||||
workspace.id,
|
||||
flushAssistant(capturedSteps, '', 'streaming'),
|
||||
{ onlyIfStreaming: true },
|
||||
);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`Failed to update streaming assistant row: ${
|
||||
err instanceof Error ? err.message : 'unknown error'
|
||||
}`,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// Serialize the per-step updates (#183 review): onStepFinish fires them
|
||||
// without await, so two could otherwise commit out of order on different pool
|
||||
// connections (step N landing after N+1). Chaining each onto the previous
|
||||
// keeps the persisted row monotonic with step order; each link short-circuits
|
||||
// on `finalized`, so a tail of late updates is cheap.
|
||||
let stepUpdateChain: Promise<void> = Promise.resolve();
|
||||
|
||||
// Terminal finalize: write the completed/error/aborted row exactly once
|
||||
// across the (mutually-exclusive, at-most-once) onFinish/onError/onAbort
|
||||
// callbacks — mirroring the pre-#183 persist-at-most-once guard for the
|
||||
// TERMINAL status (the row may be updated many times with 'streaming' before
|
||||
// this fires once).
|
||||
let finalized = false;
|
||||
const finalizeAssistant = async (
|
||||
flushed: AssistantFlush,
|
||||
): Promise<void> => {
|
||||
if (finalized) return;
|
||||
finalized = true;
|
||||
const plan = planFinalizeAssistant(assistantId);
|
||||
try {
|
||||
// Shared dispatch (see applyFinalize): UPDATE the upfront row, or — when
|
||||
// the upfront insert failed (kind 'insert') — INSERT the terminal row as
|
||||
// the only safety against losing the turn entirely.
|
||||
await applyFinalize(
|
||||
this.aiChatMessageRepo,
|
||||
plan,
|
||||
{ chatId, workspaceId: workspace.id, userId: user.id },
|
||||
flushed,
|
||||
);
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`Failed to finalize assistant message (kind=${plan.kind})`,
|
||||
err as Error,
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary. Measure
|
||||
// first-chunk latency, the model-silent gap right before a disconnect, and
|
||||
// how many SSE heartbeats were written, so a Safari drop can be classified
|
||||
@@ -476,6 +577,12 @@ export class AiChatService {
|
||||
// the in-progress accumulator for the next step.
|
||||
capturedSteps.push(step as StepLike);
|
||||
inProgressText = '';
|
||||
// Step-granular durability (#183): persist this finished step (its text +
|
||||
// tool calls + tool RESULTS) the moment it ends, so a process death after
|
||||
// this point still recovers the step. Not awaited here (never block the
|
||||
// stream), but SERIALIZED via stepUpdateChain so the writes commit in
|
||||
// step order; updateStreaming is error-tolerant (logs + swallows).
|
||||
stepUpdateChain = stepUpdateChain.then(() => updateStreaming());
|
||||
},
|
||||
onFinish: async ({ text, finishReason, totalUsage, usage, steps }) => {
|
||||
// DIAGNOSTIC (Safari stream-drop investigation) — temporary: success
|
||||
@@ -486,30 +593,31 @@ export class AiChatService {
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`heartbeatsSent=${heartbeatsSent} steps=${steps.length}`,
|
||||
);
|
||||
await persistAssistant({
|
||||
text,
|
||||
toolCalls: serializeSteps(steps),
|
||||
metadata: {
|
||||
finishReason,
|
||||
// Persist the turn's cumulative usage WITH reasoning tokens resolved
|
||||
// from either the new `outputTokenDetails` or the deprecated top-level
|
||||
// field, so reopened history / the Markdown export show the thinking
|
||||
// token cost too.
|
||||
usage:
|
||||
normalizeStreamUsage(totalUsage as StreamUsage) ?? totalUsage,
|
||||
// Final-step usage = the context actually fed to the model on the last LLM
|
||||
// call (full history + tool results) plus the answer it just generated.
|
||||
// input+output of the FINAL step ≈ the conversation's CURRENT context size,
|
||||
// distinct from totalUsage which sums every step (cumulative tokens spent).
|
||||
// Finalize the assistant row (#183): the upfront 'streaming' row is
|
||||
// UPDATEd to 'completed' with the turn's final text, cumulative usage and
|
||||
// full UIMessage parts. We pass the SDK `steps` (which carry the final
|
||||
// step's text) as the captured steps so metadata.parts matches the
|
||||
// pre-#183 onFinish record exactly; `inProgressText` is '' here (the last
|
||||
// step already finished). Final-step usage (usage.input+output) ≈ the
|
||||
// conversation's CURRENT context size, distinct from totalUsage.
|
||||
//
|
||||
// COLUMN-SEMANTICS NOTE (#183): `content` is built by flushAssistant as
|
||||
// the CONCATENATION of every step's text (stepsText), whereas pre-#183
|
||||
// it stored only the FINAL step's text. This is a deliberate, harmless
|
||||
// change: the UI and the Markdown export render from `metadata.parts`
|
||||
// (per-step text + tool parts), not from `content`; `content` is the
|
||||
// plain-text projection (full-text search / fallback). A multi-step
|
||||
// turn's `content` therefore now holds all steps' prose, not just the
|
||||
// last block.
|
||||
await finalizeAssistant(
|
||||
flushAssistant(steps as StepLike[], '', 'completed', {
|
||||
finishReason: finishReason as string,
|
||||
usage: totalUsage as StreamUsage,
|
||||
contextTokens:
|
||||
(usage?.inputTokens ?? 0) + (usage?.outputTokens ?? 0) ||
|
||||
undefined,
|
||||
// Persist the FULL set of UIMessage parts for the turn (text +
|
||||
// tool-call/result), so the rebuilt history replays prior tool
|
||||
// context to the model on later turns.
|
||||
parts: assistantParts(steps, text),
|
||||
},
|
||||
});
|
||||
}),
|
||||
);
|
||||
// Lifecycle: release the external MCP clients leased for this turn.
|
||||
await closeExternalClients();
|
||||
|
||||
@@ -545,16 +653,14 @@ export class AiChatService {
|
||||
`firstChunkLatency=${firstModelChunkAt ? firstModelChunkAt - streamStartedAt : 'none'}ms ` +
|
||||
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent}`,
|
||||
);
|
||||
// Persist the PARTIAL answer streamed before the failure (text + any
|
||||
// Finalize the PARTIAL answer streamed before the failure (text + any
|
||||
// finished tool steps) WITH the error in metadata, so the turn shows what
|
||||
// the user already saw plus the cause — not just a bare error.
|
||||
await persistAssistant(
|
||||
buildPartialAssistantRecord(
|
||||
capturedSteps,
|
||||
inProgressText,
|
||||
'error',
|
||||
errorText,
|
||||
),
|
||||
// the user already saw plus the cause — not just a bare error. Status
|
||||
// 'error' (#183).
|
||||
await finalizeAssistant(
|
||||
flushAssistant(capturedSteps, inProgressText, 'error', {
|
||||
error: errorText,
|
||||
}),
|
||||
);
|
||||
await closeExternalClients();
|
||||
},
|
||||
@@ -578,12 +684,8 @@ export class AiChatService {
|
||||
`silentGapBeforeDrop=${diagNow - lastModelChunkAt}ms heartbeatsSent=${heartbeatsSent} ` +
|
||||
`steps=${steps.length}`,
|
||||
);
|
||||
await persistAssistant(
|
||||
buildPartialAssistantRecord(
|
||||
capturedSteps,
|
||||
inProgressText,
|
||||
'aborted',
|
||||
),
|
||||
await finalizeAssistant(
|
||||
flushAssistant(capturedSteps, inProgressText, 'aborted'),
|
||||
);
|
||||
await closeExternalClients();
|
||||
},
|
||||
@@ -1032,38 +1134,132 @@ export function rowToUiMessage(row: AiChatMessage): Omit<UIMessage, 'id'> & {
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the assistant-message record persisted on a partial/failed turn (the
|
||||
* streamText onError / onAbort paths). Captures the partial answer the user
|
||||
* already saw: each finished step's text + tool parts (via assistantParts),
|
||||
* then the in-progress step's text appended last. When `errorText` is provided
|
||||
* it is recorded in metadata.error so the cause shows in history; an aborted
|
||||
* turn passes none. Pure, so the partial-recording shape is unit-testable
|
||||
* without seaming streamText.
|
||||
* The persisted-row patch shape produced by {@link flushAssistant}. It is the
|
||||
* SAME shape the assistant repo insert/update consume (content + toolCalls +
|
||||
* metadata) plus the lifecycle `status` column added in #183.
|
||||
*/
|
||||
export function buildPartialAssistantRecord(
|
||||
steps: ReadonlyArray<StepLike> | undefined,
|
||||
export interface AssistantFlush {
|
||||
content: string;
|
||||
toolCalls: unknown;
|
||||
metadata: Record<string, unknown>;
|
||||
status: 'streaming' | 'completed' | 'error' | 'aborted';
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure decision for the terminal finalize (#183): given whether the upfront
|
||||
* assistant row exists (`assistantId`), choose whether the terminal payload is
|
||||
* written by UPDATEing that row or — when the upfront insert failed and there is
|
||||
* no id — by INSERTing a fresh terminal row so the turn is not lost entirely.
|
||||
* Returns `{ kind: 'update', id }` or `{ kind: 'insert' }`. Extracted so the
|
||||
* fallback-insert branch (the only safety against losing a turn whose upfront
|
||||
* insert failed) is unit-testable without seaming streamText.
|
||||
*/
|
||||
export function planFinalizeAssistant(
|
||||
assistantId: string | undefined,
|
||||
): { kind: 'update'; id: string } | { kind: 'insert' } {
|
||||
return assistantId ? { kind: 'update', id: assistantId } : { kind: 'insert' };
|
||||
}
|
||||
|
||||
/** The repo surface the terminal finalize needs (structural — the real repo and
|
||||
* a test mock both satisfy it). */
|
||||
export interface FinalizeRepo {
|
||||
insert(insertable: Record<string, unknown>): Promise<unknown>;
|
||||
update(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
patch: AssistantFlush,
|
||||
): Promise<unknown>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply a finalize `plan` to the repo with the terminal `flushed` payload (#183):
|
||||
* UPDATE the upfront row, or INSERT a fresh terminal row as the fallback when the
|
||||
* upfront insert failed. The SINGLE dispatch shared by the service's
|
||||
* finalizeAssistant and its test, so the test exercises the real path instead of
|
||||
* a copy (#186 review). Pure of error handling — the caller wraps it.
|
||||
*/
|
||||
export async function applyFinalize(
|
||||
repo: FinalizeRepo,
|
||||
plan: { kind: 'update'; id: string } | { kind: 'insert' },
|
||||
base: { chatId: string; workspaceId: string; userId: string },
|
||||
flushed: AssistantFlush,
|
||||
): Promise<void> {
|
||||
if (plan.kind === 'update') {
|
||||
await repo.update(plan.id, base.workspaceId, flushed);
|
||||
return;
|
||||
}
|
||||
await repo.insert({
|
||||
chatId: base.chatId,
|
||||
workspaceId: base.workspaceId,
|
||||
userId: base.userId,
|
||||
role: 'assistant',
|
||||
content: flushed.content,
|
||||
toolCalls: flushed.toolCalls ?? null,
|
||||
metadata: flushed.metadata,
|
||||
status: flushed.status,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* PURE assistant-row builder (#183 step-granular durability). Given the turn's
|
||||
* accumulated steps + the in-progress (not-yet-finished) text + the lifecycle
|
||||
* status, it returns the row patch to persist. The SAME path runs for the
|
||||
* upfront insert (empty steps, status 'streaming'), every per-step update, and
|
||||
* the terminal finalize (completed/error/aborted) — and a future background
|
||||
* worker can call it identically, so it must stay a pure function of its inputs
|
||||
* (NO `this`, no IO).
|
||||
*
|
||||
* `metadata.parts` is built by assistantParts over the finished steps, then the
|
||||
* in-progress text appended as a trailing text part, so rowToUiMessage /
|
||||
* findRecent keep replaying the turn unchanged. `metadata.finishReason`,
|
||||
* `metadata.error`, `metadata.usage` and `metadata.contextTokens` are attached
|
||||
* only when provided/relevant, matching the pre-#183 onFinish/onError records.
|
||||
*/
|
||||
export function flushAssistant(
|
||||
capturedSteps: ReadonlyArray<StepLike> | undefined,
|
||||
inProgressText: string,
|
||||
finishReason: 'error' | 'aborted',
|
||||
errorText?: string,
|
||||
): { text: string; toolCalls: unknown; metadata: Record<string, unknown> } {
|
||||
const finished = steps ?? [];
|
||||
status: 'streaming' | 'completed' | 'error' | 'aborted',
|
||||
extra?: {
|
||||
finishReason?: string;
|
||||
usage?: ChatStreamUsage | StreamUsage | undefined;
|
||||
contextTokens?: number;
|
||||
error?: string;
|
||||
},
|
||||
): AssistantFlush {
|
||||
const finished = capturedSteps ?? [];
|
||||
const stepsText = finished.map((s) => s.text ?? '').join('');
|
||||
const trailing = inProgressText ?? '';
|
||||
// assistantParts emits text parts only for FINISHED steps; append the
|
||||
// in-progress step's text (the answer cut off by the error) as the last text
|
||||
// part so the persisted parts match what streamed to the client.
|
||||
// in-progress step's text (the partial answer cut off by an error/abort, or
|
||||
// simply not yet flushed mid-stream) as the last text part so the persisted
|
||||
// parts match what streamed to the client.
|
||||
const parts = assistantParts(finished, '') as unknown as Array<
|
||||
Record<string, unknown>
|
||||
>;
|
||||
if (trailing) parts.push({ type: 'text', text: trailing });
|
||||
|
||||
const metadata: Record<string, unknown> = {
|
||||
parts: parts as unknown as UIMessage['parts'],
|
||||
};
|
||||
// finishReason: prefer an explicit one; else derive a sensible value from the
|
||||
// terminal status (so onError/onAbort records keep their historical reason).
|
||||
if (extra?.finishReason) {
|
||||
metadata.finishReason = extra.finishReason;
|
||||
} else if (status === 'error' || status === 'aborted') {
|
||||
metadata.finishReason = status;
|
||||
}
|
||||
if (extra?.usage !== undefined) {
|
||||
metadata.usage =
|
||||
normalizeStreamUsage(extra.usage as StreamUsage) ?? extra.usage;
|
||||
}
|
||||
if (extra?.contextTokens) metadata.contextTokens = extra.contextTokens;
|
||||
if (extra?.error) metadata.error = extra.error;
|
||||
|
||||
return {
|
||||
text: stepsText + trailing,
|
||||
content: stepsText + trailing,
|
||||
toolCalls: serializeSteps(finished),
|
||||
metadata: {
|
||||
finishReason,
|
||||
parts: parts as unknown as UIMessage['parts'],
|
||||
...(errorText ? { error: errorText } : {}),
|
||||
},
|
||||
metadata,
|
||||
status,
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
295
apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
Normal file
295
apps/server/src/core/ai-chat/chat-markdown.util.spec.ts
Normal file
@@ -0,0 +1,295 @@
|
||||
import { buildChatMarkdown, normalizeLang } from './chat-markdown.util';
|
||||
import type { AiChatMessage } from '@docmost/db/types/entity.types';
|
||||
|
||||
/**
|
||||
* normalizeLang: the client sends `i18n.language` — a FULL locale tag like
|
||||
* 'en-US' / 'ru-RU', NOT a bare 'en'/'ru'. A `@IsIn(['en','ru'])` DTO rejected
|
||||
* that with a 400 (caught in real-browser testing); the export now accepts any
|
||||
* string and normalizes here. Guards that regression.
|
||||
*/
|
||||
describe('normalizeLang', () => {
|
||||
it("maps any 'ru…' locale tag to ru", () => {
|
||||
expect(normalizeLang('ru')).toBe('ru');
|
||||
expect(normalizeLang('ru-RU')).toBe('ru');
|
||||
expect(normalizeLang('RU-ru')).toBe('ru');
|
||||
});
|
||||
|
||||
it('maps everything else (incl. region-qualified English) to en', () => {
|
||||
expect(normalizeLang('en')).toBe('en');
|
||||
expect(normalizeLang('en-US')).toBe('en');
|
||||
expect(normalizeLang('fr-FR')).toBe('en');
|
||||
expect(normalizeLang(undefined)).toBe('en');
|
||||
expect(normalizeLang('')).toBe('en');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* Unit tests for the SERVER Markdown export (#183). Mirrors the coverage of the
|
||||
* (now-removed) client chat-markdown tests: heading/metadata, role labels, text
|
||||
* + tool blocks, token footers, the interrupted-turn note, and NULL-status
|
||||
* (legacy) rows. The export embeds a live `new Date().toISOString()` timestamp;
|
||||
* we never assert it, only the deterministic structure.
|
||||
*/
|
||||
|
||||
function row(partial: Partial<AiChatMessage>): AiChatMessage {
|
||||
return {
|
||||
id: partial.id ?? 'id',
|
||||
chatId: partial.chatId ?? 'chat-1',
|
||||
workspaceId: partial.workspaceId ?? 'ws-1',
|
||||
userId: partial.userId ?? null,
|
||||
role: partial.role ?? 'user',
|
||||
content: partial.content ?? null,
|
||||
toolCalls: partial.toolCalls ?? null,
|
||||
metadata: partial.metadata ?? null,
|
||||
status: partial.status ?? null,
|
||||
createdAt: partial.createdAt ?? ('2026-06-21T00:00:00.000Z' as never),
|
||||
updatedAt: partial.updatedAt ?? ('2026-06-21T00:00:00.000Z' as never),
|
||||
deletedAt: partial.deletedAt ?? null,
|
||||
} as AiChatMessage;
|
||||
}
|
||||
|
||||
describe('buildChatMarkdown (server) — structure', () => {
|
||||
it('emits the title heading, chat id and message count', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'My chat',
|
||||
chatId: 'chat-123',
|
||||
rows: [],
|
||||
});
|
||||
expect(md).toContain('# My chat');
|
||||
expect(md).toContain('- Chat ID: `chat-123`');
|
||||
expect(md).toContain('- Messages: 0');
|
||||
});
|
||||
|
||||
it('falls back to "Untitled chat" with no title (en)', () => {
|
||||
const md = buildChatMarkdown({ title: null, chatId: 'c', rows: [] });
|
||||
expect(md).toContain('# Untitled chat');
|
||||
});
|
||||
|
||||
it('localizes fixed labels with lang=ru (structure stays English)', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: null,
|
||||
chatId: 'c',
|
||||
lang: 'ru',
|
||||
rows: [row({ role: 'assistant', content: 'hi' })],
|
||||
});
|
||||
expect(md).toContain('# Без названия');
|
||||
expect(md).toContain('## 1. ИИ-агент');
|
||||
// Structural words remain English.
|
||||
expect(md).toContain('- Chat ID:');
|
||||
});
|
||||
|
||||
it('numbers messages and labels roles (You / AI agent)', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({ role: 'user', content: 'question' }),
|
||||
row({ role: 'assistant', content: 'answer' }),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('## 1. You');
|
||||
expect(md).toContain('question');
|
||||
expect(md).toContain('## 2. AI agent');
|
||||
expect(md).toContain('answer');
|
||||
});
|
||||
|
||||
it('renders a tool part with fenced input/output and the friendly label', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: 'done',
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: 'tool-getPage',
|
||||
state: 'output-available',
|
||||
input: { id: 'p1' },
|
||||
output: { title: 'Hello' },
|
||||
},
|
||||
{ type: 'text', text: 'done' },
|
||||
],
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('**Tool: Read page** (`getPage`) — done');
|
||||
expect(md).toContain('Input:');
|
||||
expect(md).toContain('"id": "p1"');
|
||||
expect(md).toContain('Output:');
|
||||
expect(md).toContain('"title": "Hello"');
|
||||
});
|
||||
|
||||
// #186 re-review pt 1: restore the parity coverage of the removed client spec —
|
||||
// error state, unknown-tool fallback (en + ru), and the circular-stringify catch.
|
||||
it('renders a tool part in the error state with its errorText', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: 'tool-getPage',
|
||||
state: 'output-error',
|
||||
input: { id: 'p1' },
|
||||
errorText: 'page not found',
|
||||
},
|
||||
],
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('**Tool: Read page** (`getPage`) — error');
|
||||
expect(md).toContain('**Error:** page not found');
|
||||
});
|
||||
|
||||
it('falls back to "Ran tool <name>" for an unknown tool (en) and the ru variant', () => {
|
||||
const parts = [
|
||||
{
|
||||
type: 'tool-mysteryTool',
|
||||
state: 'output-available',
|
||||
output: { ok: 1 },
|
||||
},
|
||||
];
|
||||
const en = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [row({ role: 'assistant', metadata: { parts } as never })],
|
||||
});
|
||||
expect(en).toContain('**Tool: Ran tool mysteryTool** (`mysteryTool`)');
|
||||
const ru = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
lang: 'ru',
|
||||
rows: [row({ role: 'assistant', metadata: { parts } as never })],
|
||||
});
|
||||
expect(ru).toContain('Выполнил инструмент mysteryTool');
|
||||
});
|
||||
|
||||
it('does not throw on a circular tool output (falls back to String)', () => {
|
||||
const circular: Record<string, unknown> = {};
|
||||
circular.self = circular;
|
||||
expect(() =>
|
||||
buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: 'tool-getPage',
|
||||
state: 'output-available',
|
||||
output: circular,
|
||||
},
|
||||
],
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
}),
|
||||
).not.toThrow();
|
||||
});
|
||||
|
||||
it('emits a token footer + total when usage is present', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: 'a',
|
||||
metadata: {
|
||||
usage: {
|
||||
inputTokens: 100,
|
||||
outputTokens: 20,
|
||||
totalTokens: 120,
|
||||
reasoningTokens: 8,
|
||||
},
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('- Total tokens: 120');
|
||||
expect(md).toContain(
|
||||
'_Tokens — in: 100, out: 20, reasoning: 8, total: 120_',
|
||||
);
|
||||
});
|
||||
|
||||
it('flags a still-streaming (interrupted) row', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({ role: 'assistant', content: 'partial', status: 'streaming' }),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('still being generated');
|
||||
});
|
||||
|
||||
it('does NOT flag a completed row', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [row({ role: 'assistant', content: 'final', status: 'completed' })],
|
||||
});
|
||||
expect(md).not.toContain('still being generated');
|
||||
});
|
||||
|
||||
it('renders a legacy NULL-status row (no parts) from plain content', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({ role: 'assistant', content: 'legacy answer', status: null }),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('legacy answer');
|
||||
expect(md).not.toContain('still being generated');
|
||||
});
|
||||
|
||||
it('renders a persisted error', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
status: 'error',
|
||||
metadata: { error: '401: Unauthorized' } as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
expect(md).toContain('**⚠️ Error:** 401: Unauthorized');
|
||||
});
|
||||
|
||||
it('escapes embedded triple-backtick fences with a longer delimiter', () => {
|
||||
const md = buildChatMarkdown({
|
||||
title: 'T',
|
||||
chatId: 'c',
|
||||
rows: [
|
||||
row({
|
||||
role: 'assistant',
|
||||
content: 'x',
|
||||
metadata: {
|
||||
parts: [
|
||||
{
|
||||
type: 'tool-getPage',
|
||||
state: 'output-available',
|
||||
output: '```inner```',
|
||||
},
|
||||
],
|
||||
} as never,
|
||||
}),
|
||||
],
|
||||
});
|
||||
// A 4-backtick fence wraps content that itself contains a 3-backtick run.
|
||||
expect(md).toContain('````');
|
||||
});
|
||||
});
|
||||
299
apps/server/src/core/ai-chat/chat-markdown.util.ts
Normal file
299
apps/server/src/core/ai-chat/chat-markdown.util.ts
Normal file
@@ -0,0 +1,299 @@
|
||||
/**
|
||||
* Server-side Markdown export for an AI agent chat (#183). The DB is the single
|
||||
* source of truth: this renders a chat purely from its persisted message rows
|
||||
* (`AiChatMessage[]` — role / content / metadata.parts / toolCalls / usage).
|
||||
* Because the assistant row is now persisted UPFRONT and updated per step, an
|
||||
* interrupted turn is included up to its last finished step.
|
||||
*
|
||||
* Ported from the client `utils/chat-markdown.ts`. It is a PURE function (apart
|
||||
* from `new Date()` for the export timestamp), so it is straightforward to
|
||||
* unit-test and a future background worker can reuse it.
|
||||
*
|
||||
* Only a few fixed role/tool labels are localized via the `lang` param; the
|
||||
* structural document words (Input/Output/Error/Tokens/...) stay English because
|
||||
* the output is a technical artifact.
|
||||
*/
|
||||
|
||||
import type { AiChatMessage } from '@docmost/db/types/entity.types';
|
||||
|
||||
/** Supported export label languages. Defaults to English. */
|
||||
export type ExportLang = 'en' | 'ru';
|
||||
|
||||
/**
|
||||
* Normalize an arbitrary client locale code to a supported export language. The
|
||||
* client sends `i18n.language`, which is a FULL locale tag (e.g. `en-US`,
|
||||
* `ru-RU`), not a bare `en`/`ru` — so match on the language subtag and fall back
|
||||
* to English for anything non-Russian.
|
||||
*/
|
||||
export function normalizeLang(lang?: string): ExportLang {
|
||||
return lang?.toLowerCase().startsWith('ru') ? 'ru' : 'en';
|
||||
}
|
||||
|
||||
/** A single AI SDK UIMessage part (text part or a tool part). */
|
||||
interface ExportPart {
|
||||
type: string;
|
||||
text?: string;
|
||||
state?: string;
|
||||
toolName?: string;
|
||||
input?: unknown;
|
||||
output?: unknown;
|
||||
errorText?: string;
|
||||
}
|
||||
|
||||
/** Authoritative per-turn usage the server attaches to a message row. */
|
||||
interface UsageLike {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
reasoningTokens?: number;
|
||||
}
|
||||
|
||||
/** Localized label table. The client-side Markdown builder was removed by #183
|
||||
* (the export is now server-side only), so this no longer mirrors a second
|
||||
* exporter — instead the tool-action labels are kept in parity with the
|
||||
* on-screen action-log labels in the client's `tool-parts.tsx` (`toolLabelKey`)
|
||||
* so the export reads the same as the UI. Only role + tool-action labels are
|
||||
* localized; everything structural is an English constant in the renderer. */
|
||||
const LABELS: Record<
|
||||
ExportLang,
|
||||
{
|
||||
untitled: string;
|
||||
aiAgent: string;
|
||||
you: string;
|
||||
tools: Record<string, string>;
|
||||
ranTool: (name: string) => string;
|
||||
stillGenerating: string;
|
||||
}
|
||||
> = {
|
||||
en: {
|
||||
untitled: 'Untitled chat',
|
||||
aiAgent: 'AI agent',
|
||||
you: 'You',
|
||||
tools: {
|
||||
searchPages: 'Searched pages',
|
||||
getPage: 'Read page',
|
||||
createPage: 'Created page',
|
||||
updatePageContent: 'Updated page',
|
||||
renamePage: 'Renamed page',
|
||||
movePage: 'Moved page',
|
||||
deletePage: 'Deleted page (to trash)',
|
||||
createComment: 'Commented',
|
||||
resolveComment: 'Resolved comment',
|
||||
},
|
||||
ranTool: (name) => `Ran tool ${name}`,
|
||||
stillGenerating:
|
||||
'This message is still being generated — the export captured a partial, in-progress response.',
|
||||
},
|
||||
ru: {
|
||||
untitled: 'Без названия',
|
||||
aiAgent: 'ИИ-агент',
|
||||
you: 'Вы',
|
||||
tools: {
|
||||
searchPages: 'Искал по страницам',
|
||||
getPage: 'Прочитал страницу',
|
||||
createPage: 'Создал страницу',
|
||||
updatePageContent: 'Обновил страницу',
|
||||
renamePage: 'Переименовал страницу',
|
||||
movePage: 'Переместил страницу',
|
||||
deletePage: 'Удалил страницу (в корзину)',
|
||||
createComment: 'Прокомментировал',
|
||||
resolveComment: 'Закрыл комментарий',
|
||||
},
|
||||
ranTool: (name) => `Выполнил инструмент ${name}`,
|
||||
stillGenerating:
|
||||
'Это сообщение всё ещё генерируется — экспорт захватил частичный, незавершённый ответ.',
|
||||
},
|
||||
};
|
||||
|
||||
/** True for AI SDK tool parts (static `tool-*` or `dynamic-tool`). */
|
||||
function isToolPart(type: string): boolean {
|
||||
return type.startsWith('tool-') || type === 'dynamic-tool';
|
||||
}
|
||||
|
||||
/** Extract the tool name from a part `type` of `tool-${name}` (or dynamic). */
|
||||
function getToolName(part: ExportPart): string {
|
||||
if (part.type === 'dynamic-tool') return part.toolName ?? '';
|
||||
return part.type.startsWith('tool-')
|
||||
? part.type.slice('tool-'.length)
|
||||
: part.type;
|
||||
}
|
||||
|
||||
/** Map an AI SDK tool-part state to the 3 states the action-log renders. */
|
||||
function toolRunState(state: string | undefined): 'running' | 'done' | 'error' {
|
||||
if (state === 'output-error' || state === 'output-denied') return 'error';
|
||||
if (state === 'output-available') return 'done';
|
||||
return 'running';
|
||||
}
|
||||
|
||||
/** Resolve a tool's friendly action-log label (localized) from its name. */
|
||||
function toolLabel(name: string, lang: ExportLang): string {
|
||||
return LABELS[lang].tools[name] ?? LABELS[lang].ranTool(name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stringify an arbitrary tool input/output value for a fenced block. Strings
|
||||
* pass through as-is; everything else is pretty-printed JSON, falling back to
|
||||
* `String(value)` if serialization throws (e.g. a circular structure).
|
||||
*/
|
||||
function stringify(value: unknown): string {
|
||||
if (typeof value === 'string') return value;
|
||||
try {
|
||||
return JSON.stringify(value, null, 2);
|
||||
} catch {
|
||||
return String(value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap `code` in a fenced code block whose backtick delimiter is LONGER than the
|
||||
* longest backtick run inside the content, so embedded backticks (or a literal
|
||||
* ``` fence) never break out of the block. Minimum 3 backticks.
|
||||
*/
|
||||
function fence(code: string, lang = ''): string {
|
||||
const runs: string[] = code.match(/`+/g) ?? [];
|
||||
const longest = runs.reduce((m, s) => Math.max(m, s.length), 0);
|
||||
const delim = '`'.repeat(Math.max(3, longest + 1));
|
||||
return `${delim}${lang}\n${code}\n${delim}`;
|
||||
}
|
||||
|
||||
/** Per-row token count, mirroring the header sum in the client window. */
|
||||
function rowTokens(usage: UsageLike): number {
|
||||
return (
|
||||
usage.totalTokens ?? (usage.inputTokens ?? 0) + (usage.outputTokens ?? 0)
|
||||
);
|
||||
}
|
||||
|
||||
/** Render one message's UIMessage parts into an array of Markdown blocks
|
||||
* (text blocks + tool blocks). Mirrors the client renderer / MessageItem. */
|
||||
function renderMessageParts(parts: ExportPart[], lang: ExportLang): string[] {
|
||||
const out: string[] = [];
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.type === 'text') {
|
||||
const text = (part.text ?? '').trim();
|
||||
if (text.length > 0) out.push(text);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!isToolPart(part.type)) continue;
|
||||
|
||||
const name = getToolName(part);
|
||||
const label = toolLabel(name, lang);
|
||||
const state = toolRunState(part.state);
|
||||
|
||||
const toolLines: string[] = [`**Tool: ${label}** (\`${name}\`) — ${state}`];
|
||||
if (part.input !== undefined) {
|
||||
toolLines.push('Input:');
|
||||
toolLines.push(fence(stringify(part.input), 'json'));
|
||||
}
|
||||
if (part.output !== undefined) {
|
||||
toolLines.push('Output:');
|
||||
toolLines.push(fence(stringify(part.output), 'json'));
|
||||
}
|
||||
if (part.errorText) {
|
||||
toolLines.push(`**Error:** ${part.errorText}`);
|
||||
}
|
||||
out.push(toolLines.join('\n\n'));
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
/** Resolve a persisted row's parts: prefer the rich persisted parts, else a
|
||||
* single text part built from the plain-text content (mirrors rowToUiMessage). */
|
||||
function rowParts(row: AiChatMessage): ExportPart[] {
|
||||
const meta = (row.metadata ?? {}) as { parts?: ExportPart[] };
|
||||
return Array.isArray(meta.parts) && meta.parts.length > 0
|
||||
? meta.parts
|
||||
: [{ type: 'text', text: row.content ?? '' }];
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize a chat to a Markdown string from its persisted rows. Source = DB
|
||||
* ONLY (no live client state). A row whose `status` is still 'streaming' is an
|
||||
* interrupted turn that the export captured mid-flight; it is rendered up to its
|
||||
* last finished step and flagged "still generating".
|
||||
*/
|
||||
export function buildChatMarkdown(args: {
|
||||
title: string | null;
|
||||
chatId: string;
|
||||
rows: AiChatMessage[];
|
||||
// Accepts a full client locale tag (e.g. 'en-US'/'ru-RU'); normalized below.
|
||||
lang?: string;
|
||||
}): string {
|
||||
const { title, chatId, rows } = args;
|
||||
const lang: ExportLang = normalizeLang(args.lang);
|
||||
const L = LABELS[lang];
|
||||
const blocks: string[] = [];
|
||||
|
||||
const heading = (title ?? '').trim() || L.untitled;
|
||||
blocks.push(`# ${heading}`);
|
||||
|
||||
const usageOf = (row: AiChatMessage): UsageLike | undefined => {
|
||||
const meta = (row.metadata ?? {}) as { usage?: UsageLike };
|
||||
return meta.usage;
|
||||
};
|
||||
const errorOf = (row: AiChatMessage): string | undefined => {
|
||||
const meta = (row.metadata ?? {}) as { error?: string };
|
||||
return meta.error;
|
||||
};
|
||||
|
||||
// Metadata bullet list. Total tokens is only shown when there is a sum.
|
||||
const totalTokens = rows.reduce((sum, row) => {
|
||||
const usage = usageOf(row);
|
||||
return usage ? sum + rowTokens(usage) : sum;
|
||||
}, 0);
|
||||
const meta = [
|
||||
`- Chat ID: \`${chatId}\``,
|
||||
`- Exported: ${new Date().toISOString()}`,
|
||||
`- Messages: ${rows.length}`,
|
||||
];
|
||||
if (totalTokens > 0) meta.push(`- Total tokens: ${totalTokens}`);
|
||||
blocks.push(meta.join('\n'));
|
||||
|
||||
rows.forEach((row, index) => {
|
||||
blocks.push('---');
|
||||
|
||||
const roleLabel = row.role === 'assistant' ? L.aiAgent : L.you;
|
||||
blocks.push(`## ${index + 1}. ${roleLabel}`);
|
||||
|
||||
// Created-at kept in source as an HTML comment (out of the rendered prose).
|
||||
if (row.createdAt) {
|
||||
const iso =
|
||||
row.createdAt instanceof Date
|
||||
? row.createdAt.toISOString()
|
||||
: String(row.createdAt);
|
||||
blocks.push(`<!-- ${iso} -->`);
|
||||
}
|
||||
|
||||
blocks.push(...renderMessageParts(rowParts(row), lang));
|
||||
|
||||
// A still-'streaming' row is an interrupted/in-progress turn captured by the
|
||||
// export; record that so the partial answer is not mistaken for complete.
|
||||
if (row.status === 'streaming') {
|
||||
blocks.push(`_⏳ ${L.stillGenerating}_`);
|
||||
}
|
||||
|
||||
const error = errorOf(row);
|
||||
if (error) {
|
||||
blocks.push(`**⚠️ Error:** ${error}`);
|
||||
}
|
||||
|
||||
const usage = usageOf(row);
|
||||
if (usage) {
|
||||
const total = usage.totalTokens ?? rowTokens(usage);
|
||||
const reasoning =
|
||||
usage.reasoningTokens && usage.reasoningTokens > 0
|
||||
? `, reasoning: ${usage.reasoningTokens}`
|
||||
: '';
|
||||
blocks.push(
|
||||
`_Tokens — in: ${usage.inputTokens ?? '?'}, out: ${
|
||||
usage.outputTokens ?? '?'
|
||||
}${reasoning}, total: ${total}_`,
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
// Blank line between blocks so the Markdown renders cleanly.
|
||||
return blocks.join('\n\n');
|
||||
}
|
||||
@@ -26,3 +26,17 @@ export class GetChatMessagesDto {
|
||||
@IsString()
|
||||
cursor?: string;
|
||||
}
|
||||
|
||||
/** Export a chat to Markdown (#183). `lang` localizes the few fixed
|
||||
* role/tool-action labels; defaults to English server-side. */
|
||||
export class ExportChatDto {
|
||||
@IsString()
|
||||
chatId: string;
|
||||
|
||||
// A full client locale tag (e.g. 'en-US', 'ru-RU') — normalized server-side to
|
||||
// a supported export language (see normalizeLang). Accept any string so a
|
||||
// region-qualified locale is not rejected (the 400 that broke the real client).
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
lang?: string;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,18 @@
|
||||
import { type Kysely } from 'kysely';
|
||||
|
||||
export async function up(db: Kysely<any>): Promise<void> {
|
||||
// Step-granular durability for the assistant turn (#183). The assistant row is
|
||||
// now created UPFRONT (status 'streaming') and UPDATEd as each step completes,
|
||||
// so a process death mid-turn no longer loses the whole answer. The column is
|
||||
// NULLABLE on purpose: rows written before this migration carry NULL, which the
|
||||
// app treats as 'completed' (a settled, pre-status message). Values written by
|
||||
// the app: 'streaming' | 'completed' | 'error' | 'aborted'.
|
||||
await db.schema
|
||||
.alterTable('ai_chat_messages')
|
||||
.addColumn('status', 'text', (col) => col)
|
||||
.execute();
|
||||
}
|
||||
|
||||
export async function down(db: Kysely<any>): Promise<void> {
|
||||
await db.schema.alterTable('ai_chat_messages').dropColumn('status').execute();
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB, KyselyTransaction } from '../../types/kysely.types';
|
||||
import { dbOrTx } from '../../utils';
|
||||
@@ -9,8 +9,24 @@ import {
|
||||
import { PaginationOptions } from '@docmost/db/pagination/pagination-options';
|
||||
import { executeWithCursorPagination } from '@docmost/db/pagination/cursor-pagination';
|
||||
|
||||
// Crash-recovery sweep recency threshold (#183 review): a 'streaming' row is
|
||||
// only swept to 'aborted' once it has been UNTOUCHED for this long. A live turn
|
||||
// bumps `updatedAt` on every step (well under this window), so its row never
|
||||
// matches; only a turn whose process truly died (no step update for >threshold)
|
||||
// is swept. Chosen safely ABOVE the longest realistic turn so a fresh replica's
|
||||
// boot-sweep can never abort a turn another replica is actively streaming
|
||||
// (multi-instance deploy).
|
||||
const SWEEP_STREAMING_STALE_MS = 10 * 60 * 1000; // 10 minutes
|
||||
|
||||
// Hard upper bound on the rows materialized by `findAllByChat` (export path).
|
||||
// A generous cap so a pathologically huge chat cannot load an unbounded result
|
||||
// into memory; far above any realistic transcript length.
|
||||
const FIND_ALL_BY_CHAT_LIMIT = 5000;
|
||||
|
||||
@Injectable()
|
||||
export class AiChatMessageRepo {
|
||||
private readonly logger = new Logger(AiChatMessageRepo.name);
|
||||
|
||||
constructor(@InjectKysely() private readonly db: KyselyDB) {}
|
||||
|
||||
// The `tsv` column is a trigger-maintained tsvector used only for
|
||||
@@ -25,6 +41,7 @@ export class AiChatMessageRepo {
|
||||
'content',
|
||||
'toolCalls',
|
||||
'metadata',
|
||||
'status',
|
||||
'createdAt',
|
||||
'updatedAt',
|
||||
'deletedAt',
|
||||
@@ -60,6 +77,46 @@ export class AiChatMessageRepo {
|
||||
});
|
||||
}
|
||||
|
||||
// Load ALL (non-deleted) messages of a chat in ascending chronological order
|
||||
// (oldest -> newest), unpaginated. Used by the server-side Markdown export
|
||||
// (#183), where the DB is the single source of truth and the whole transcript
|
||||
// must be rendered in one pass (findByChat is cursor-paginated and would only
|
||||
// return the first page).
|
||||
//
|
||||
// Hard-capped at FIND_ALL_BY_CHAT_LIMIT rows (a generous bound, far above any
|
||||
// realistic transcript) so exporting a pathologically huge chat cannot
|
||||
// materialize an unbounded result set in memory.
|
||||
async findAllByChat(
|
||||
chatId: string,
|
||||
workspaceId: string,
|
||||
// Injectable for tests so truncation can be exercised on a modest volume.
|
||||
limit: number = FIND_ALL_BY_CHAT_LIMIT,
|
||||
): Promise<AiChatMessage[]> {
|
||||
// Fetch newest-first (+1 to DETECT truncation), so on overflow we keep the
|
||||
// NEWEST `limit` messages — the recent conversation matters most for an
|
||||
// export — rather than silently dropping the tail (#183 review). Reverse back
|
||||
// to chronological for rendering, like findRecent.
|
||||
const rows = await this.db
|
||||
.selectFrom('aiChatMessages')
|
||||
.select(this.baseFields)
|
||||
.where('chatId', '=', chatId)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.where('deletedAt', 'is', null)
|
||||
.orderBy('createdAt', 'desc')
|
||||
.orderBy('id', 'desc')
|
||||
.limit(limit + 1)
|
||||
.execute();
|
||||
|
||||
if (rows.length > limit) {
|
||||
rows.length = limit; // keep the newest `limit` (rows are newest-first here)
|
||||
this.logger.warn(
|
||||
`Chat ${chatId} export truncated to the newest ${limit} messages ` +
|
||||
`(older messages omitted).`,
|
||||
);
|
||||
}
|
||||
return rows.reverse();
|
||||
}
|
||||
|
||||
// Load the most RECENT `limit` messages for a chat and return them in
|
||||
// ascending chronological order (oldest -> newest), as the model expects.
|
||||
// `findByChat` returns the FIRST page ASC (the OLDEST messages), which loses
|
||||
@@ -96,4 +153,68 @@ export class AiChatMessageRepo {
|
||||
.returning(this.baseFields)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a single message in place by id + workspace (#183 step-granular
|
||||
* durability). The assistant row is created UPFRONT (status 'streaming') and
|
||||
* patched as each step completes, then finalized once on the terminal status.
|
||||
* `updatedAt` is always bumped. Returns the updated row (baseFields) or
|
||||
* undefined when no row matched (e.g. a foreign workspace / deleted row).
|
||||
*/
|
||||
async update(
|
||||
id: string,
|
||||
workspaceId: string,
|
||||
patch: Partial<{
|
||||
content: string | null;
|
||||
toolCalls: unknown;
|
||||
metadata: unknown;
|
||||
status: string | null;
|
||||
}>,
|
||||
opts?: { onlyIfStreaming?: boolean; trx?: KyselyTransaction },
|
||||
): Promise<AiChatMessage | undefined> {
|
||||
const db = dbOrTx(this.db, opts?.trx);
|
||||
let query = db
|
||||
.updateTable('aiChatMessages')
|
||||
.set({ ...(patch as Record<string, unknown>), updatedAt: new Date() })
|
||||
.where('id', '=', id)
|
||||
.where('workspaceId', '=', workspaceId);
|
||||
// Concurrency guard (#183 review): a per-step 'streaming' update must NEVER
|
||||
// overwrite a row the terminal callback already finalized. onStepFinish
|
||||
// fires the streaming update fire-and-forget, so its UPDATE can land AFTER
|
||||
// finalize on a DIFFERENT pool connection (commit order is not guaranteed).
|
||||
// Scoping the streaming update to rows STILL in 'streaming' makes a late
|
||||
// update a no-op once the row is completed/error/aborted — regardless of
|
||||
// commit order. The terminal finalize runs WITHOUT this guard so it always
|
||||
// wins.
|
||||
if (opts?.onlyIfStreaming) {
|
||||
query = query.where('status', '=', 'streaming');
|
||||
}
|
||||
return query.returning(this.baseFields).executeTakeFirst();
|
||||
}
|
||||
|
||||
/**
|
||||
* Crash-recovery sweep (#183): flip every assistant row still left in the
|
||||
* 'streaming' state (a turn that died mid-write before reaching a terminal
|
||||
* status) to 'aborted'. Run once on server start. Returns the number of rows
|
||||
* swept so the caller can log it. Workspace-wide on purpose — a crash can have
|
||||
* dangling streaming rows across any workspace.
|
||||
*
|
||||
* Bounded by recency (#183 review): only rows UNTOUCHED for
|
||||
* SWEEP_STREAMING_STALE_MS are swept. A live turn bumps `updatedAt` on every
|
||||
* step, so an actively-streaming row never matches; this prevents a fresh
|
||||
* replica's boot-sweep from aborting a turn another replica is still streaming
|
||||
* in a multi-instance deploy.
|
||||
*/
|
||||
async sweepStreaming(trx?: KyselyTransaction): Promise<number> {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
const staleBefore = new Date(Date.now() - SWEEP_STREAMING_STALE_MS);
|
||||
const rows = await db
|
||||
.updateTable('aiChatMessages')
|
||||
.set({ status: 'aborted', updatedAt: new Date() })
|
||||
.where('status', '=', 'streaming')
|
||||
.where('updatedAt', '<', staleBefore)
|
||||
.returning('id')
|
||||
.execute();
|
||||
return rows.length;
|
||||
}
|
||||
}
|
||||
|
||||
4
apps/server/src/database/types/db.d.ts
vendored
4
apps/server/src/database/types/db.d.ts
vendored
@@ -620,6 +620,10 @@ export interface AiChatMessages {
|
||||
content: string | null;
|
||||
toolCalls: Json | null;
|
||||
metadata: Json | null;
|
||||
// Turn lifecycle status (#183): 'streaming' | 'completed' | 'error' |
|
||||
// 'aborted'. NULL on rows written before the status column existed; the app
|
||||
// treats NULL as 'completed' (a settled, pre-status message).
|
||||
status: string | null;
|
||||
tsv: string | null;
|
||||
createdAt: Generated<Timestamp>;
|
||||
updatedAt: Generated<Timestamp>;
|
||||
|
||||
270
apps/server/test/integration/ai-chat-message-status.int-spec.ts
Normal file
270
apps/server/test/integration/ai-chat-message-status.int-spec.ts
Normal file
@@ -0,0 +1,270 @@
|
||||
import { Kysely } from 'kysely';
|
||||
import { AiChatMessageRepo } from '@docmost/db/repos/ai-chat/ai-chat-message.repo';
|
||||
import {
|
||||
getTestDb,
|
||||
destroyTestDb,
|
||||
createWorkspace,
|
||||
createUser,
|
||||
createChat,
|
||||
createMessage,
|
||||
} from './db';
|
||||
|
||||
/**
|
||||
* Integration coverage for the #183 step-granular durability primitives on
|
||||
* AiChatMessageRepo: `update` (in-place patch by id+workspace, bumps updatedAt,
|
||||
* returns the row) and `sweepStreaming` (crash recovery: flip dangling
|
||||
* 'streaming' rows to 'aborted'). Real SQL against docmost_test, not a mock.
|
||||
*/
|
||||
describe('AiChatMessageRepo.update + sweepStreaming [integration]', () => {
|
||||
let db: Kysely<any>;
|
||||
let repo: AiChatMessageRepo;
|
||||
let workspaceId: string;
|
||||
let otherWorkspaceId: string;
|
||||
let userId: string;
|
||||
let chatId: string;
|
||||
let otherChatId: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
db = getTestDb();
|
||||
repo = new AiChatMessageRepo(db as any);
|
||||
workspaceId = (await createWorkspace(db)).id;
|
||||
otherWorkspaceId = (await createWorkspace(db)).id;
|
||||
userId = (await createUser(db, workspaceId)).id;
|
||||
chatId = (await createChat(db, { workspaceId, creatorId: userId })).id;
|
||||
const otherUser = await createUser(db, otherWorkspaceId);
|
||||
otherChatId = (
|
||||
await createChat(db, {
|
||||
workspaceId: otherWorkspaceId,
|
||||
creatorId: otherUser.id,
|
||||
})
|
||||
).id;
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await destroyTestDb();
|
||||
});
|
||||
|
||||
it('update patches content/status/metadata and bumps updatedAt', async () => {
|
||||
const seeded = await repo.insert({
|
||||
chatId,
|
||||
workspaceId,
|
||||
userId,
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
status: 'streaming',
|
||||
metadata: { parts: [] } as never,
|
||||
});
|
||||
const before = seeded.updatedAt;
|
||||
// Ensure a measurable timestamp delta.
|
||||
await new Promise((r) => setTimeout(r, 5));
|
||||
|
||||
const updated = await repo.update(seeded.id, workspaceId, {
|
||||
content: 'final answer',
|
||||
status: 'completed',
|
||||
metadata: { parts: [{ type: 'text', text: 'final answer' }] },
|
||||
});
|
||||
|
||||
expect(updated).toBeDefined();
|
||||
expect(updated!.content).toBe('final answer');
|
||||
expect(updated!.status).toBe('completed');
|
||||
expect((updated!.metadata as any).parts).toHaveLength(1);
|
||||
// The 5ms sleep above guarantees a strictly-later timestamp.
|
||||
expect(new Date(updated!.updatedAt).getTime()).toBeGreaterThan(
|
||||
new Date(before).getTime(),
|
||||
);
|
||||
});
|
||||
|
||||
it('onlyIfStreaming update is a NO-OP once the row is finalized (race guard)', async () => {
|
||||
// Reproduce the step-update-vs-finalize race (#183 review): the row is
|
||||
// finalized to 'completed', then a LATE per-step 'streaming' update lands.
|
||||
// With `onlyIfStreaming` it must match nothing and leave the finalized row
|
||||
// untouched (no clobber back to 'streaming', no lost usage).
|
||||
const seeded = await repo.insert({
|
||||
chatId,
|
||||
workspaceId,
|
||||
userId,
|
||||
role: 'assistant',
|
||||
content: 'partial',
|
||||
status: 'streaming',
|
||||
});
|
||||
// Terminal finalize (unguarded) wins.
|
||||
await repo.update(seeded.id, workspaceId, {
|
||||
content: 'final answer',
|
||||
status: 'completed',
|
||||
metadata: { usage: { totalTokens: 42 } } as never,
|
||||
});
|
||||
// A straggler per-step update arrives AFTER finalize.
|
||||
const late = await repo.update(
|
||||
seeded.id,
|
||||
workspaceId,
|
||||
{ content: 'partial', status: 'streaming', metadata: {} as never },
|
||||
{ onlyIfStreaming: true },
|
||||
);
|
||||
expect(late).toBeUndefined(); // matched no 'streaming' row -> no-op
|
||||
const rows = await repo.findAllByChat(chatId, workspaceId);
|
||||
const row = rows.find((r) => r.id === seeded.id)!;
|
||||
expect(row.status).toBe('completed'); // NOT clobbered back to streaming
|
||||
expect(row.content).toBe('final answer');
|
||||
expect((row.metadata as any).usage.totalTokens).toBe(42); // usage preserved
|
||||
});
|
||||
|
||||
it('update is workspace-scoped: a foreign workspace id matches nothing', async () => {
|
||||
const seeded = await repo.insert({
|
||||
chatId,
|
||||
workspaceId,
|
||||
userId,
|
||||
role: 'assistant',
|
||||
content: 'orig',
|
||||
status: 'streaming',
|
||||
});
|
||||
const res = await repo.update(seeded.id, otherWorkspaceId, {
|
||||
status: 'completed',
|
||||
});
|
||||
expect(res).toBeUndefined();
|
||||
// The row in the real workspace is untouched.
|
||||
const rows = await repo.findAllByChat(chatId, workspaceId);
|
||||
const stillThere = rows.find((r) => r.id === seeded.id);
|
||||
expect(stillThere!.status).toBe('streaming');
|
||||
// Clean up so it does not pollute the sweep test below.
|
||||
await repo.update(seeded.id, workspaceId, { status: 'completed' });
|
||||
});
|
||||
|
||||
// Backdate a row's updatedAt so it qualifies as a STALE streaming row (the
|
||||
// sweep only flips rows untouched for >10 minutes — a live turn bumps
|
||||
// updatedAt every step, so it would never match).
|
||||
async function backdateUpdatedAt(
|
||||
id: string,
|
||||
minutesAgo: number,
|
||||
): Promise<void> {
|
||||
await db
|
||||
.updateTable('aiChatMessages')
|
||||
.set({ updatedAt: new Date(Date.now() - minutesAgo * 60 * 1000) })
|
||||
.where('id', '=', id)
|
||||
.execute();
|
||||
}
|
||||
|
||||
it('sweepStreaming flips STALE dangling streaming rows to aborted and counts them', async () => {
|
||||
// Two dangling streaming rows in our workspace + one in another workspace —
|
||||
// all backdated past the staleness threshold so the sweep picks them up.
|
||||
const a = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
const b = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
const other = await createMessage(db, {
|
||||
workspaceId: otherWorkspaceId,
|
||||
chatId: otherChatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
await backdateUpdatedAt(a.id, 20);
|
||||
await backdateUpdatedAt(b.id, 20);
|
||||
await backdateUpdatedAt(other.id, 20);
|
||||
|
||||
// A settled row must NOT be touched.
|
||||
const done = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'completed',
|
||||
});
|
||||
// A legacy NULL-status row must NOT be touched.
|
||||
const legacy = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: null,
|
||||
});
|
||||
|
||||
const swept = await repo.sweepStreaming();
|
||||
// At least the 3 stale streaming rows we created (2 here + 1 in the other ws).
|
||||
expect(swept).toBeGreaterThanOrEqual(3);
|
||||
|
||||
const rows = await repo.findAllByChat(chatId, workspaceId);
|
||||
const byId = new Map(rows.map((r) => [r.id, r]));
|
||||
expect(byId.get(a.id)!.status).toBe('aborted');
|
||||
expect(byId.get(b.id)!.status).toBe('aborted');
|
||||
expect(byId.get(done.id)!.status).toBe('completed');
|
||||
expect(byId.get(legacy.id)!.status).toBeNull();
|
||||
|
||||
// Idempotent: a second sweep finds nothing left in our seeded set.
|
||||
const again = await repo.sweepStreaming();
|
||||
const rows2 = await repo.findAllByChat(chatId, workspaceId);
|
||||
// Our two rows stay aborted regardless of `again`'s global count.
|
||||
expect(rows2.find((r) => r.id === a.id)!.status).toBe('aborted');
|
||||
expect(again).toBeGreaterThanOrEqual(0);
|
||||
});
|
||||
|
||||
it('sweepStreaming does NOT sweep a FRESH streaming row (recency bound, #183 review)', async () => {
|
||||
// A row that is actively streaming (recent updatedAt) must survive the sweep:
|
||||
// a fresh replica's boot-sweep must never abort a turn another replica is
|
||||
// still streaming in a multi-instance deploy.
|
||||
const fresh = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
// A STALE streaming row created alongside it IS swept — proving the sweep
|
||||
// ran and the only difference is recency.
|
||||
const stale = await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId,
|
||||
role: 'assistant',
|
||||
status: 'streaming',
|
||||
});
|
||||
await backdateUpdatedAt(stale.id, 20);
|
||||
|
||||
await repo.sweepStreaming();
|
||||
|
||||
const rows = await repo.findAllByChat(chatId, workspaceId);
|
||||
const byId = new Map(rows.map((r) => [r.id, r]));
|
||||
// Fresh (recently-updated) streaming row is left untouched...
|
||||
expect(byId.get(fresh.id)!.status).toBe('streaming');
|
||||
// ...while the stale one alongside it was swept to 'aborted'.
|
||||
expect(byId.get(stale.id)!.status).toBe('aborted');
|
||||
});
|
||||
|
||||
it('findAllByChat caps the result, keeping the NEWEST messages in order (#183 review)', async () => {
|
||||
// A dedicated chat so the cap test is independent of the rows above.
|
||||
const cappedChat = (
|
||||
await createChat(db, { workspaceId, creatorId: userId })
|
||||
).id;
|
||||
const base = Date.now();
|
||||
// Three messages at strictly increasing timestamps.
|
||||
await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId: cappedChat,
|
||||
content: 'm1-oldest',
|
||||
createdAt: new Date(base),
|
||||
});
|
||||
await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId: cappedChat,
|
||||
content: 'm2',
|
||||
createdAt: new Date(base + 1000),
|
||||
});
|
||||
await createMessage(db, {
|
||||
workspaceId,
|
||||
chatId: cappedChat,
|
||||
content: 'm3-newest',
|
||||
createdAt: new Date(base + 2000),
|
||||
});
|
||||
|
||||
// Cap of 2 -> the OLDEST message is dropped; the newest two stay, in
|
||||
// chronological order (oldest -> newest).
|
||||
const capped = await repo.findAllByChat(cappedChat, workspaceId, 2);
|
||||
expect(capped.map((r) => r.content)).toEqual(['m2', 'm3-newest']);
|
||||
|
||||
// Without a cap (well above the row count) all three come back in order.
|
||||
const all = await repo.findAllByChat(cappedChat, workspaceId, 100);
|
||||
expect(all.map((r) => r.content)).toEqual(['m1-oldest', 'm2', 'm3-newest']);
|
||||
});
|
||||
});
|
||||
@@ -104,7 +104,8 @@ export async function createWorkspace(
|
||||
name: overrides.name ?? `ws-${suffix}`,
|
||||
// hostname is uniquely constrained; keep it unique per workspace.
|
||||
hostname: `host-${suffix}`,
|
||||
settings: overrides.settings === undefined ? null : (overrides.settings as any),
|
||||
settings:
|
||||
overrides.settings === undefined ? null : (overrides.settings as any),
|
||||
})
|
||||
.returning(['id', 'settings'])
|
||||
.executeTakeFirstOrThrow();
|
||||
@@ -226,3 +227,37 @@ export async function createChat(
|
||||
.executeTakeFirstOrThrow();
|
||||
return { id: row.id as string };
|
||||
}
|
||||
|
||||
export async function createMessage(
|
||||
db: Kysely<any>,
|
||||
args: {
|
||||
workspaceId: string;
|
||||
chatId: string;
|
||||
userId?: string | null;
|
||||
role?: string;
|
||||
content?: string | null;
|
||||
status?: string | null;
|
||||
metadata?: unknown;
|
||||
// Explicit timestamp so a test can control message ORDER (the default DB
|
||||
// now() can tie within a millisecond, and the v4 id is not time-ordered).
|
||||
createdAt?: Date;
|
||||
},
|
||||
): Promise<{ id: string }> {
|
||||
const id = randomUUID();
|
||||
const row = await db
|
||||
.insertInto('aiChatMessages')
|
||||
.values({
|
||||
id,
|
||||
workspaceId: args.workspaceId,
|
||||
chatId: args.chatId,
|
||||
userId: args.userId ?? null,
|
||||
role: args.role ?? 'assistant',
|
||||
content: args.content ?? null,
|
||||
status: args.status ?? null,
|
||||
metadata: (args.metadata ?? null) as any,
|
||||
...(args.createdAt ? { createdAt: args.createdAt } : {}),
|
||||
})
|
||||
.returning(['id'])
|
||||
.executeTakeFirstOrThrow();
|
||||
return { id: row.id as string };
|
||||
}
|
||||
|
||||
109
packages/git-sync/build/engine/client.types.d.ts
vendored
Normal file
109
packages/git-sync/build/engine/client.types.d.ts
vendored
Normal file
@@ -0,0 +1,109 @@
|
||||
/**
|
||||
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
|
||||
* rather than any concrete client, because the gitmost server writes NATIVELY —
|
||||
* through repositories + collab `openDirectConnection`.
|
||||
*
|
||||
* `GitSyncClient` is that interface: the native datasource (server side)
|
||||
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
|
||||
* subsets of it. The signatures below MIRROR exactly the methods the engine's
|
||||
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
|
||||
* off each result), so a REST-style client is still structurally assignable and
|
||||
* the native adapter has a precise contract.
|
||||
*/
|
||||
/**
|
||||
* A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body).
|
||||
* The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`,
|
||||
* which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this
|
||||
* lite shape documents the fields the tree walk surfaces. Real tree nodes also
|
||||
* carry `position`, `icon`, `hasChildren` — kept open via the index signature.
|
||||
*/
|
||||
export interface GitSyncPageNodeLite {
|
||||
id: string;
|
||||
slugId?: string;
|
||||
title?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
/** `listSpaceTree` nodes carry extra fields (position, icon, …). */
|
||||
[key: string]: unknown;
|
||||
}
|
||||
/**
|
||||
* The structural client the engine depends on. Only `Pick<GitSyncClient, ...>`
|
||||
* subsets are ever used:
|
||||
* - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`),
|
||||
* - push writes: `importPageMarkdown` / `createPage` / `deletePage` /
|
||||
* `movePage` / `renamePage`,
|
||||
* - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`.
|
||||
*/
|
||||
export interface GitSyncClient {
|
||||
/**
|
||||
* Full tree of page nodes for the space (or the subtree rooted at
|
||||
* `rootPageId`), each WITHOUT body content. `complete` is `false` when the
|
||||
* walk was truncated / a fetch failed — the pull side suppresses absence
|
||||
* deletions on an incomplete tree (SPEC §8). Native impl returns
|
||||
* `complete: true` always (reads the DB, not a paginated REST endpoint).
|
||||
*/
|
||||
listSpaceTree(spaceId: string, rootPageId?: string): Promise<{
|
||||
pages: GitSyncPageNodeLite[];
|
||||
complete: boolean;
|
||||
}>;
|
||||
/**
|
||||
* One page WITH its ProseMirror body content. `applyPullActions` reads
|
||||
* `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
|
||||
* `content` (to stabilize/serialize). `updatedAt` is carried for the
|
||||
* poll-suppression loop-guard.
|
||||
*/
|
||||
getPageJson(pageId: string): Promise<{
|
||||
id: string;
|
||||
slugId: string;
|
||||
title: string;
|
||||
parentPageId: string | null;
|
||||
spaceId: string;
|
||||
updatedAt: string;
|
||||
content: unknown;
|
||||
}>;
|
||||
/**
|
||||
* Merge a page's body from a self-contained markdown file (meta + body). The
|
||||
* collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite.
|
||||
* `applyPushActions` reads only an optional `updatedAt` off the result
|
||||
* (via `extractUpdatedAt`, tolerant of extra fields).
|
||||
*
|
||||
* `baseMarkdown` is the last-synced version of the file (`refs/docmost/
|
||||
* last-pushed`), the common ancestor for a THREE-WAY merge against the live
|
||||
* doc so concurrent human edits survive (review #5). Optional/null -> 2-way.
|
||||
*/
|
||||
importPageMarkdown(pageId: string, fullMarkdown: string, baseMarkdown?: string | null): Promise<{
|
||||
updatedAt?: string;
|
||||
[key: string]: unknown;
|
||||
}>;
|
||||
/**
|
||||
* Create a new page and return the assigned id at `data.id`
|
||||
* (`applyPushActions` reads `result.data.id`, then writes it back into the
|
||||
* file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard.
|
||||
*/
|
||||
createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{
|
||||
data: {
|
||||
id: string;
|
||||
};
|
||||
updatedAt?: string;
|
||||
[key: string]: unknown;
|
||||
}>;
|
||||
/** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */
|
||||
deletePage(pageId: string): Promise<unknown>;
|
||||
/**
|
||||
* Reparent a page (and optionally set its fractional-index `position`). The
|
||||
* engine passes `position` UNDEFINED for now; the native impl computes a
|
||||
* default between siblings. Result is not inspected.
|
||||
*/
|
||||
movePage(pageId: string, parentPageId: string | null, position?: string): Promise<unknown>;
|
||||
/** Change a page's title only (no body touch). Result is not inspected. */
|
||||
renamePage(pageId: string, title: string): Promise<unknown>;
|
||||
/**
|
||||
* Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8).
|
||||
* `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk.
|
||||
*/
|
||||
listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise<unknown[]>;
|
||||
/** List soft-deleted (trashed) pages for the space (deletion detection). */
|
||||
listTrash(spaceId: string): Promise<unknown[]>;
|
||||
/** Restore a soft-deleted page from Trash. Result is not inspected. */
|
||||
restorePage(pageId: string): Promise<unknown>;
|
||||
}
|
||||
13
packages/git-sync/build/engine/client.types.js
Normal file
13
packages/git-sync/build/engine/client.types.js
Normal file
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
|
||||
* rather than any concrete client, because the gitmost server writes NATIVELY —
|
||||
* through repositories + collab `openDirectConnection`.
|
||||
*
|
||||
* `GitSyncClient` is that interface: the native datasource (server side)
|
||||
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
|
||||
* subsets of it. The signatures below MIRROR exactly the methods the engine's
|
||||
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
|
||||
* off each result), so a REST-style client is still structurally assignable and
|
||||
* the native adapter has a precise contract.
|
||||
*/
|
||||
export {};
|
||||
1
packages/git-sync/build/engine/config-errors.d.ts
vendored
Normal file
1
packages/git-sync/build/engine/config-errors.d.ts
vendored
Normal file
@@ -0,0 +1 @@
|
||||
export declare function loadSettingsOrExit<T>(factory: () => T): T;
|
||||
50
packages/git-sync/build/engine/config-errors.js
Normal file
50
packages/git-sync/build/engine/config-errors.js
Normal file
@@ -0,0 +1,50 @@
|
||||
import { ZodError } from 'zod';
|
||||
// Turn a ZodError from settings validation into a clear, actionable startup
|
||||
// message that names the offending env var(s), then exit(1) — no raw stack
|
||||
// trace. Mirrors the Python new-project skeleton's load_settings_or_exit.
|
||||
// A non-ZodError is left to propagate unchanged.
|
||||
export function loadSettingsOrExit(factory) {
|
||||
try {
|
||||
return factory();
|
||||
}
|
||||
catch (err) {
|
||||
if (!(err instanceof ZodError))
|
||||
throw err;
|
||||
const missing = [];
|
||||
const invalid = [];
|
||||
for (const issue of err.issues) {
|
||||
const name = issue.path.length ? String(issue.path[0]) : '?';
|
||||
// A missing required variable surfaces as an `invalid_type` issue whose
|
||||
// received value was `undefined`. zod 3 exposed `issue.received` directly;
|
||||
// zod 4 dropped that field and instead folds it into the message
|
||||
// ("expected string, received undefined"). Detect both shapes so the
|
||||
// missing-vs-invalid split holds across zod majors. NOTE: an invalid (but
|
||||
// present) value uses a different code (invalid_format / invalid_value) or
|
||||
// an `invalid_type` message that reports a non-undefined received (e.g.
|
||||
// "received NaN" from a coerced number), so neither is misread as missing.
|
||||
const i = issue;
|
||||
const isMissing = issue.code === 'invalid_type' &&
|
||||
(i.received === 'undefined' ||
|
||||
/received undefined/i.test(i.message ?? ''));
|
||||
if (isMissing)
|
||||
missing.push(name);
|
||||
else
|
||||
invalid.push(`${name}: ${issue.message}`);
|
||||
}
|
||||
const lines = ['Configuration error in environment / .env:'];
|
||||
if (missing.length) {
|
||||
lines.push(' Missing required variable(s):');
|
||||
for (const n of [...new Set(missing)])
|
||||
lines.push(` - ${n}`);
|
||||
}
|
||||
if (invalid.length) {
|
||||
lines.push(' Invalid value(s):');
|
||||
for (const item of invalid)
|
||||
lines.push(` - ${item}`);
|
||||
}
|
||||
lines.push('');
|
||||
lines.push('Set them in .env (see .env.example) and try again.');
|
||||
process.stderr.write(lines.join('\n') + '\n');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
70
packages/git-sync/build/engine/cycle.d.ts
vendored
Normal file
70
packages/git-sync/build/engine/cycle.d.ts
vendored
Normal file
@@ -0,0 +1,70 @@
|
||||
import { VaultGit } from "./git.js";
|
||||
import { GitSyncClient } from "./client.types.js";
|
||||
import { Settings } from "./settings.js";
|
||||
/**
|
||||
* Absolute-path filesystem primitives the cycle needs. Injected (not imported)
|
||||
* so the engine stays IO-free and unit-testable. `mkdir` is recursive; `rm` is
|
||||
* force (a missing file is a no-op).
|
||||
*/
|
||||
export interface CycleFs {
|
||||
readFile: (absPath: string) => Promise<string>;
|
||||
writeFile: (absPath: string, text: string) => Promise<void>;
|
||||
mkdir: (absDir: string) => Promise<void>;
|
||||
rm: (absPath: string) => Promise<void>;
|
||||
}
|
||||
export interface RunCycleDeps {
|
||||
spaceId: string;
|
||||
/** The Docmost seam (reads for pull, writes for push). */
|
||||
client: GitSyncClient;
|
||||
/** The per-space git vault (a real working repo). */
|
||||
vault: VaultGit;
|
||||
/** Engine settings; `vaultPath` roots the relPath -> absolute-path mapping. */
|
||||
settings: Settings;
|
||||
fs: CycleFs;
|
||||
log: (line: string) => void;
|
||||
/**
|
||||
* Delete-cap hook (the ONLY caller-specific policy). Called with the push
|
||||
* dry-run's planned delete count (`Number.POSITIVE_INFINITY` when the dry-run
|
||||
* itself failed, so the hook can fail safe) and the live client; returns the
|
||||
* client to use for the REAL apply. The default (omitted) applies every op
|
||||
* unmodified. gitmost uses it to neutralize deletes when over its cap.
|
||||
*
|
||||
* When omitted, NO dry-run is performed (one fewer push planning pass).
|
||||
*/
|
||||
resolveApplyClient?: (plannedDeletes: number, client: GitSyncClient) => GitSyncClient;
|
||||
}
|
||||
export interface RunCycleResult {
|
||||
ran: boolean;
|
||||
/** Set when the cycle short-circuited without running pull/push. */
|
||||
skipped?: "merge-in-progress";
|
||||
pull?: {
|
||||
written: number;
|
||||
deleted: number;
|
||||
conflict: boolean;
|
||||
};
|
||||
push?: {
|
||||
mode: string;
|
||||
failures: number;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
|
||||
* (vault -> Docmost), under the engine's required branch choreography. This is
|
||||
* the single entry point the app drives — it owns the staging order so it can
|
||||
* never drift from the engine it ships with.
|
||||
*
|
||||
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
|
||||
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
|
||||
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
|
||||
* would fail otherwise.
|
||||
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
|
||||
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
|
||||
* then checks out `main` and merges docmost -> main. Writing Docmost
|
||||
* content straight onto `main` would clobber local file edits before push
|
||||
* can diff them.
|
||||
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
|
||||
* 5. PUSH: optional dry-run to feed the delete-cap hook, then the real apply.
|
||||
*
|
||||
* Lock + cap POLICY live in the caller; this owns only the mechanics.
|
||||
*/
|
||||
export declare function runCycle(deps: RunCycleDeps): Promise<RunCycleResult>;
|
||||
97
packages/git-sync/build/engine/cycle.js
Normal file
97
packages/git-sync/build/engine/cycle.js
Normal file
@@ -0,0 +1,97 @@
|
||||
import { readExisting, computePullActions, applyPullActions } from "./pull.js";
|
||||
import { runPush } from "./push.js";
|
||||
/**
|
||||
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
|
||||
* (vault -> Docmost), under the engine's required branch choreography. This is
|
||||
* the single entry point the app drives — it owns the staging order so it can
|
||||
* never drift from the engine it ships with.
|
||||
*
|
||||
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
|
||||
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
|
||||
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
|
||||
* would fail otherwise.
|
||||
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
|
||||
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
|
||||
* then checks out `main` and merges docmost -> main. Writing Docmost
|
||||
* content straight onto `main` would clobber local file edits before push
|
||||
* can diff them.
|
||||
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
|
||||
* 5. PUSH: optional dry-run to feed the delete-cap hook, then the real apply.
|
||||
*
|
||||
* Lock + cap POLICY live in the caller; this owns only the mechanics.
|
||||
*/
|
||||
export async function runCycle(deps) {
|
||||
const { spaceId, client, vault, settings, fs, log, resolveApplyClient } = deps;
|
||||
const vaultRoot = settings.vaultPath;
|
||||
const abs = (relPath) => `${vaultRoot}/${relPath}`;
|
||||
// 1. The engine state store is git: make sure the repo + branches exist
|
||||
// before any tracked-file listing or diff.
|
||||
await vault.assertGitAvailable();
|
||||
await vault.ensureRepo();
|
||||
// 2. Refuse to run on top of an unresolved merge (SPEC §9): a prior
|
||||
// conflicting pull leaves the vault mid-merge; the next checkout would fail.
|
||||
if (await vault.isMergeInProgress()) {
|
||||
log(`vault has an unresolved merge — resolve it (or 'git merge --abort') ` +
|
||||
`and re-run (SPEC §9); skipping cycle.`);
|
||||
return { ran: false, skipped: "merge-in-progress" };
|
||||
}
|
||||
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
|
||||
await vault.ensureBranch("docmost", "main");
|
||||
await vault.checkout("docmost");
|
||||
// 4. PULL --------------------------------------------------------------------
|
||||
const existing = await readExisting({
|
||||
listTracked: () => vault.listTrackedFiles("*.md"),
|
||||
readFile: (relPath) => fs.readFile(abs(relPath)),
|
||||
});
|
||||
const tree = await client.listSpaceTree(spaceId);
|
||||
const pullActions = computePullActions({
|
||||
pages: tree.pages,
|
||||
treeComplete: tree.complete,
|
||||
existing,
|
||||
});
|
||||
const pullResult = await applyPullActions({
|
||||
client,
|
||||
git: vault,
|
||||
writeFile: (absPath, text) => fs.writeFile(absPath, text),
|
||||
mkdir: (absDir) => fs.mkdir(absDir),
|
||||
rm: (absPath) => fs.rm(absPath),
|
||||
}, pullActions, vaultRoot);
|
||||
// 5. PUSH --------------------------------------------------------------------
|
||||
const pushDeps = {
|
||||
settings,
|
||||
git: vault,
|
||||
makeClient: () => client,
|
||||
readFile: (relPath) => fs.readFile(abs(relPath)),
|
||||
writeFile: (relPath, text) => fs.writeFile(abs(relPath), text),
|
||||
log,
|
||||
};
|
||||
let applyClient = client;
|
||||
if (resolveApplyClient) {
|
||||
// Plan the push as a DRY-RUN first to read the delete count, then let the
|
||||
// caller decide the apply client (e.g. neutralize deletes over a cap). A
|
||||
// failed dry-run yields Infinity so the hook can fail safe.
|
||||
let plannedDeletes;
|
||||
try {
|
||||
const dry = await runPush(pushDeps, { dryRun: true });
|
||||
plannedDeletes = dry.planned?.deletes ?? 0;
|
||||
}
|
||||
catch (err) {
|
||||
log(`push dry-run planning failed (${err instanceof Error ? err.message : String(err)}); deferring deletion policy to the cap hook (fail-safe).`);
|
||||
plannedDeletes = Number.POSITIVE_INFINITY;
|
||||
}
|
||||
applyClient = resolveApplyClient(plannedDeletes, client);
|
||||
}
|
||||
const pushResult = await runPush({ ...pushDeps, makeClient: () => applyClient }, { dryRun: false });
|
||||
return {
|
||||
ran: true,
|
||||
pull: {
|
||||
written: pullResult.written,
|
||||
deleted: pullResult.deleted,
|
||||
conflict: pullResult.merge.conflict,
|
||||
},
|
||||
push: {
|
||||
mode: pushResult.mode,
|
||||
failures: pushResult.failures?.length ?? 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
259
packages/git-sync/build/engine/git.d.ts
vendored
Normal file
259
packages/git-sync/build/engine/git.d.ts
vendored
Normal file
@@ -0,0 +1,259 @@
|
||||
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
|
||||
export declare const BOT_AUTHOR_NAME = "Docmost Sync";
|
||||
export declare const BOT_AUTHOR_EMAIL = "docmost-sync@local";
|
||||
/** Default branch the vault repo is initialized on. */
|
||||
export declare const DEFAULT_BRANCH = "main";
|
||||
/**
|
||||
* One row of `git diff --name-status` (SPEC §6 "ФС → Docmost"). `status` is the
|
||||
* single-letter change code (`-M` rename detection on), `path` is the (new) file
|
||||
* path; for a rename/copy (`R`/`C`) `oldPath` is the source and `path` is the
|
||||
* destination, with `score` carrying git's similarity index (0–100).
|
||||
*/
|
||||
export interface DiffEntry {
|
||||
status: "A" | "M" | "D" | "R" | "C";
|
||||
/** New (destination) path. For A/M/D it is the only path. */
|
||||
path: string;
|
||||
/** Source path — present only for R/C. */
|
||||
oldPath?: string;
|
||||
/** Rename/copy similarity score (0–100) — present only for R/C. */
|
||||
score?: number;
|
||||
}
|
||||
/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */
|
||||
export interface MergeResult {
|
||||
/** True when the merge applied cleanly (fast-forward or clean 3-way). */
|
||||
ok: boolean;
|
||||
/** True when the merge stopped on conflicts (markers left in the worktree). */
|
||||
conflict: boolean;
|
||||
/** Raw combined stdout+stderr, for logging/diagnostics. */
|
||||
output: string;
|
||||
}
|
||||
/** Options for an engine-authored commit (provenance, SPEC §7.3). */
|
||||
export interface CommitOptions {
|
||||
authorName: string;
|
||||
authorEmail: string;
|
||||
/**
|
||||
* Trailer lines appended to the commit message body (e.g.
|
||||
* `Docmost-Sync-Source: docmost`). These are the machine-readable provenance
|
||||
* the loop-guard keys on (SPEC §12, "commit-attribution").
|
||||
*/
|
||||
trailers?: string[];
|
||||
}
|
||||
/**
|
||||
* A git wrapper bound to a single vault path. Construct once per vault; every
|
||||
* method runs git with `cwd = vaultPath`.
|
||||
*/
|
||||
export declare class VaultGit {
|
||||
private readonly vaultPath;
|
||||
constructor(vaultPath: string);
|
||||
/**
|
||||
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
|
||||
* to system `git` for every vault operation, so a missing binary (e.g. a slim
|
||||
* container image without git) must fail fast with an actionable message
|
||||
* rather than a cryptic ENOENT deep inside the first real git call. Presence
|
||||
* check only — we do NOT gate on a specific version. Runs `git --version`
|
||||
* with NO `cwd` (the vault dir may not exist yet at preflight time).
|
||||
*/
|
||||
assertGitAvailable(): Promise<void>;
|
||||
/**
|
||||
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
|
||||
* the single `runRaw` primitive: throws a clear, unified Error (including
|
||||
* stderr/stdout) on a non-zero exit.
|
||||
*/
|
||||
private run;
|
||||
/**
|
||||
* The ONE primitive every git invocation in this module flows through. Builds
|
||||
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
|
||||
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
|
||||
* can treat a non-zero exit as either an error (`run`) or a meaningful state
|
||||
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
|
||||
*
|
||||
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
|
||||
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
|
||||
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
|
||||
* printing commands (ls-files, diff --name-only, …).
|
||||
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
|
||||
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
|
||||
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
|
||||
* - On a spawn/exec error we capture the error `message` too, so a failure
|
||||
* before git could write to stderr (e.g. ENOENT) is NOT lost.
|
||||
*/
|
||||
private runRaw;
|
||||
/**
|
||||
* Ensure the vault directory exists and is an initialized git repo on `main`
|
||||
* with an initial (empty) commit so branches exist. Idempotent: safe to call
|
||||
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
|
||||
* (so engine commits never fall back to a global/unset identity).
|
||||
*/
|
||||
ensureRepo(): Promise<void>;
|
||||
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
|
||||
private isRepo;
|
||||
/** True if a LOCAL git config key is set in the vault repo. */
|
||||
private hasLocalConfig;
|
||||
/** True if the repo has at least one commit (HEAD resolves). */
|
||||
private hasAnyCommit;
|
||||
/** True if a branch with the given name exists. */
|
||||
branchExists(name: string): Promise<boolean>;
|
||||
/**
|
||||
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
|
||||
* checkout) when the branch is already present.
|
||||
*/
|
||||
ensureBranch(name: string, fromBranch: string): Promise<void>;
|
||||
/** Name of the currently checked-out branch. */
|
||||
currentBranch(): Promise<string>;
|
||||
/** Check out an existing branch. */
|
||||
checkout(name: string): Promise<void>;
|
||||
/** Stage everything (adds, modifications, deletions). */
|
||||
stageAll(): Promise<void>;
|
||||
/**
|
||||
* True if the vault is mid-merge (an unresolved merge from a previous run,
|
||||
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
|
||||
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
|
||||
* BEFORE any checkout so a left-over merge produces a clear, actionable
|
||||
* message instead of a raw "you need to resolve your current index first"
|
||||
* failure deep inside `checkout`. This is what makes re-runs converge
|
||||
* (resumability, SPEC §12).
|
||||
*/
|
||||
isMergeInProgress(): Promise<boolean>;
|
||||
/**
|
||||
* Commit the currently STAGED changes with an explicit author/committer
|
||||
* identity and the given trailers appended to the message body (SPEC §7.3
|
||||
* provenance). Returns `true` if a commit was made, `false` if there was
|
||||
* nothing to commit (graceful no-op). The caller is expected to have staged
|
||||
* its changes first (e.g. via `stageAll`).
|
||||
*/
|
||||
commit(message: string, opts: CommitOptions): Promise<boolean>;
|
||||
/**
|
||||
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
|
||||
* Builds the full message with appended trailers and sets author + committer
|
||||
* identity via env vars (so the committer matches the author, not the repo
|
||||
* default).
|
||||
*/
|
||||
private commitRaw;
|
||||
/**
|
||||
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
|
||||
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
|
||||
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
|
||||
* markers are left in the worktree for manual resolution by a later increment,
|
||||
* and — critically — nothing is pushed to Docmost (we never write to Docmost
|
||||
* anyway).
|
||||
*/
|
||||
merge(fromBranch: string): Promise<MergeResult>;
|
||||
/** True if the index has any unmerged (conflicted) paths. */
|
||||
private hasUnmergedPaths;
|
||||
/**
|
||||
* List tracked files on the current branch (paths relative to the vault
|
||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||
* the listing, e.g. `"*.md"`.
|
||||
*
|
||||
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
|
||||
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
|
||||
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
|
||||
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
|
||||
* breaking move/duplicate detection. We defeat that two ways at once:
|
||||
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
|
||||
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
|
||||
* pass it inline here.
|
||||
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
|
||||
* ambiguity), which we split on `\0`.
|
||||
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
|
||||
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
|
||||
* are returned verbatim — git already emits forward slashes.
|
||||
*/
|
||||
listTrackedFiles(glob?: string): Promise<string[]>;
|
||||
/**
|
||||
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
|
||||
* (SPEC §6: the FS→Docmost push direction diffs `main` against
|
||||
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
|
||||
* file is reported as a single `R` row with both its old and new path instead
|
||||
* of a delete+add pair — that distinction is what lets the push planner tell a
|
||||
* move from a delete+create (SPEC §8 "Move vs delete").
|
||||
*
|
||||
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
|
||||
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
|
||||
* status:
|
||||
* - A/M/D: `status\0path\0`
|
||||
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
|
||||
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
|
||||
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
|
||||
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
|
||||
*/
|
||||
diffNameStatus(fromRef: string, toRef: string): Promise<DiffEntry[]>;
|
||||
/**
|
||||
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
|
||||
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
|
||||
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
|
||||
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
|
||||
*/
|
||||
revParse(ref: string): Promise<string | null>;
|
||||
/**
|
||||
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
|
||||
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
|
||||
* "что из `main` уже отражено в Docmost").
|
||||
*/
|
||||
readRef(ref: string): Promise<string | null>;
|
||||
/**
|
||||
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
|
||||
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
|
||||
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
|
||||
*/
|
||||
updateRef(ref: string, target: string): Promise<void>;
|
||||
/**
|
||||
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
|
||||
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
|
||||
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
|
||||
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
|
||||
* push succeeds, Docmost already contains the pushed `main` content, so the
|
||||
* mirror must reflect it — otherwise the NEXT pull would diff our own write
|
||||
* back and re-pull it (loop-guard).
|
||||
*
|
||||
* SAFETY — never force, never clobber divergent history:
|
||||
* - If `branch` IS an ancestor of `toCommit`, advance it with
|
||||
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
|
||||
* NOT checked out during a push (push works on `main`), so updating the ref
|
||||
* directly is safe and avoids any working-tree touch.
|
||||
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
|
||||
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
|
||||
* let the caller log it. We must never overwrite a `docmost` history that
|
||||
* has commits the push base does not contain.
|
||||
*
|
||||
* Returns `{ ok: true }` when the branch was advanced (or already at
|
||||
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
|
||||
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
|
||||
*/
|
||||
fastForwardBranch(branch: string, toCommit: string): Promise<{
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
}>;
|
||||
/**
|
||||
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
|
||||
* if the path does not exist there. Used by the push direction to read the
|
||||
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
|
||||
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
|
||||
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
|
||||
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
|
||||
* that ref) maps to `null` rather than throwing.
|
||||
*/
|
||||
showFileAtRef(ref: string, path: string): Promise<string | null>;
|
||||
}
|
||||
/**
|
||||
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
|
||||
* Used by the single `runRaw` primitive every git command flows through, so
|
||||
* these pins apply uniformly (including the `git --version` preflight).
|
||||
*
|
||||
* cwd-isolation is this module's central safety guarantee: every git command
|
||||
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
|
||||
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
|
||||
* redirect the operation away from `cwd` (e.g. to the source repo or another
|
||||
* checkout), defeating that guarantee. So we always strip them, regardless of
|
||||
* whatever else the caller adds (author/committer identity, etc.).
|
||||
*
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export declare function vaultGitEnv(extra?: Record<string, string>): NodeJS.ProcessEnv;
|
||||
/**
|
||||
* Build a commit message body with trailer lines appended (SPEC §7.3). The
|
||||
* trailers are separated from the subject by a blank line so `git interpret-
|
||||
* trailers` / `git log --format=%(trailers)` parse them as trailers.
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export declare function buildCommitMessage(subject: string, trailers?: string[]): string;
|
||||
570
packages/git-sync/build/engine/git.js
Normal file
570
packages/git-sync/build/engine/git.js
Normal file
@@ -0,0 +1,570 @@
|
||||
/**
|
||||
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
|
||||
*
|
||||
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
|
||||
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
|
||||
* the gitmost application repo. This module MUST NEVER run git against the
|
||||
* application repo. `data/` is gitignored, so a nested repo under `data/vault`
|
||||
* is safe. The pull cycle is READ-ONLY toward Docmost; this module only touches
|
||||
* the local vault git, never a git remote (push is deferred, see SPEC §7).
|
||||
*
|
||||
* Implementation notes:
|
||||
* - We shell out via `node:child_process` `execFile` (promisified), passing
|
||||
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
|
||||
* if a page title / branch name contains shell metacharacters.
|
||||
* - EVERY git invocation funnels through the single `runRaw` primitive, which
|
||||
* ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git
|
||||
* never blocks on a pager and always prints verbatim UTF-8 paths). There is
|
||||
* no exception — even the `git --version` preflight goes through `runRaw`.
|
||||
* - "nothing to commit" is treated as a graceful no-op, not an error.
|
||||
*/
|
||||
import { execFile } from "node:child_process";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import { promisify } from "node:util";
|
||||
const execFileAsync = promisify(execFile);
|
||||
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
|
||||
export const BOT_AUTHOR_NAME = "Docmost Sync";
|
||||
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
|
||||
/** Default branch the vault repo is initialized on. */
|
||||
export const DEFAULT_BRANCH = "main";
|
||||
/**
|
||||
* A git wrapper bound to a single vault path. Construct once per vault; every
|
||||
* method runs git with `cwd = vaultPath`.
|
||||
*/
|
||||
export class VaultGit {
|
||||
vaultPath;
|
||||
constructor(vaultPath) {
|
||||
this.vaultPath = vaultPath;
|
||||
}
|
||||
/**
|
||||
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
|
||||
* to system `git` for every vault operation, so a missing binary (e.g. a slim
|
||||
* container image without git) must fail fast with an actionable message
|
||||
* rather than a cryptic ENOENT deep inside the first real git call. Presence
|
||||
* check only — we do NOT gate on a specific version. Runs `git --version`
|
||||
* with NO `cwd` (the vault dir may not exist yet at preflight time).
|
||||
*/
|
||||
async assertGitAvailable() {
|
||||
// Goes through the single `runRaw` primitive like every other invocation.
|
||||
// `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at
|
||||
// preflight time, so we must not point git at a missing directory.
|
||||
const r = await this.runRaw(["--version"], { cwd: null });
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error("git binary not found or not runnable — install git (the vault state " +
|
||||
`store requires it). Underlying error: ${detail}`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
|
||||
* the single `runRaw` primitive: throws a clear, unified Error (including
|
||||
* stderr/stdout) on a non-zero exit.
|
||||
*/
|
||||
async run(args, opts) {
|
||||
const r = await this.runRaw(args, opts);
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
|
||||
}
|
||||
return r.stdout.trim();
|
||||
}
|
||||
/**
|
||||
* The ONE primitive every git invocation in this module flows through. Builds
|
||||
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
|
||||
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
|
||||
* can treat a non-zero exit as either an error (`run`) or a meaningful state
|
||||
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
|
||||
*
|
||||
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
|
||||
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
|
||||
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
|
||||
* printing commands (ls-files, diff --name-only, …).
|
||||
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
|
||||
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
|
||||
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
|
||||
* - On a spawn/exec error we capture the error `message` too, so a failure
|
||||
* before git could write to stderr (e.g. ENOENT) is NOT lost.
|
||||
*/
|
||||
async runRaw(args, opts) {
|
||||
const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath);
|
||||
try {
|
||||
const { stdout, stderr } = await execFileAsync("git", ["--no-pager", "-c", "core.quotepath=false", ...args], {
|
||||
// Generous buffer: file listings / porcelain output on a large vault
|
||||
// can be sizable.
|
||||
...(cwd !== undefined ? { cwd } : {}),
|
||||
maxBuffer: 64 * 1024 * 1024,
|
||||
env: vaultGitEnv(opts?.env),
|
||||
});
|
||||
return { code: 0, stdout, stderr };
|
||||
}
|
||||
catch (err) {
|
||||
const e = err;
|
||||
return {
|
||||
code: typeof e.code === "number" ? e.code : 1,
|
||||
stdout: e.stdout ?? "",
|
||||
// Preserve the error message when there is no stderr (e.g. a spawn
|
||||
// failure like ENOENT, where promisified execFile sets stderr to an
|
||||
// EMPTY STRING — so `||`, not `??`, to fall through to `message`).
|
||||
stderr: e.stderr || e.message || "",
|
||||
};
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Ensure the vault directory exists and is an initialized git repo on `main`
|
||||
* with an initial (empty) commit so branches exist. Idempotent: safe to call
|
||||
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
|
||||
* (so engine commits never fall back to a global/unset identity).
|
||||
*/
|
||||
async ensureRepo() {
|
||||
await mkdir(this.vaultPath, { recursive: true });
|
||||
if (!(await this.isRepo())) {
|
||||
// `git init -b main` sets the initial branch on modern git; we still
|
||||
// guard the branch name below for safety on older binaries.
|
||||
await this.run(["init", "-b", DEFAULT_BRANCH]);
|
||||
}
|
||||
// Set a local identity for the vault repo if unset, so engine commits have
|
||||
// a deterministic committer even on a machine with no global git config.
|
||||
if (!(await this.hasLocalConfig("user.name"))) {
|
||||
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
|
||||
}
|
||||
if (!(await this.hasLocalConfig("user.email"))) {
|
||||
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
||||
}
|
||||
// Neutralize correctness-affecting git config in the vault's LOCAL config so
|
||||
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
|
||||
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
|
||||
// values (which override global/system) are the right scope. Set
|
||||
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
|
||||
// writes to `--local` by default inside the repo. These MUST be in place
|
||||
// before any add/commit/checkout that could be affected, hence they run
|
||||
// before the initial-commit block below.
|
||||
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
|
||||
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
|
||||
// byte-stable markdown churn and breaking the round-trip invariant.
|
||||
// `false` guarantees git stores/checks out verbatim bytes.
|
||||
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
|
||||
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
|
||||
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
|
||||
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
|
||||
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
|
||||
// cannot rewrite the STORED blob and break §11 byte-stability (a config
|
||||
// that core.autocrlf=false does not cover). POSIX-only path, which is
|
||||
// fine: the daemon runs on Linux (Docker) / macOS. A system
|
||||
// /etc/gitattributes remains the host admin's domain (out of scope).
|
||||
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
|
||||
// human running git by hand in the vault must inherit the same neutralized
|
||||
// behavior; a transient `-c` would not persist. (core.quotepath, by
|
||||
// contrast, only affects OUR parsing of output and so is baked into the
|
||||
// `runRaw` argv baseline instead.)
|
||||
try {
|
||||
await this.run(["config", "core.autocrlf", "false"]);
|
||||
await this.run(["config", "core.safecrlf", "false"]);
|
||||
await this.run(["config", "commit.gpgsign", "false"]);
|
||||
await this.run(["config", "core.attributesFile", "/dev/null"]);
|
||||
}
|
||||
catch (err) {
|
||||
const detail = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` +
|
||||
"/.git/config is writable and not locked (e.g. stale config.lock): " +
|
||||
detail);
|
||||
}
|
||||
// Create the initial empty commit on `main` if the repo has no commits yet,
|
||||
// so both `main` and (later) `docmost` branches have a common base.
|
||||
if (!(await this.hasAnyCommit())) {
|
||||
// Make sure we are on the default branch before the first commit (covers
|
||||
// the older-git case where `init -b` was not honored).
|
||||
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
|
||||
await this.commitRaw("init vault", {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
allowEmpty: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
|
||||
async isRepo() {
|
||||
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
|
||||
return r.code === 0 && r.stdout.trim() === "true";
|
||||
}
|
||||
/** True if a LOCAL git config key is set in the vault repo. */
|
||||
async hasLocalConfig(key) {
|
||||
const r = await this.runRaw(["config", "--local", "--get", key]);
|
||||
return r.code === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
/** True if the repo has at least one commit (HEAD resolves). */
|
||||
async hasAnyCommit() {
|
||||
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
|
||||
return r.code === 0;
|
||||
}
|
||||
/** True if a branch with the given name exists. */
|
||||
async branchExists(name) {
|
||||
const r = await this.runRaw([
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
`refs/heads/${name}`,
|
||||
]);
|
||||
return r.code === 0;
|
||||
}
|
||||
/**
|
||||
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
|
||||
* checkout) when the branch is already present.
|
||||
*/
|
||||
async ensureBranch(name, fromBranch) {
|
||||
if (await this.branchExists(name))
|
||||
return;
|
||||
await this.run(["branch", name, fromBranch]);
|
||||
}
|
||||
/** Name of the currently checked-out branch. */
|
||||
async currentBranch() {
|
||||
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
|
||||
}
|
||||
/** Check out an existing branch. */
|
||||
async checkout(name) {
|
||||
await this.run(["checkout", name]);
|
||||
}
|
||||
/** Stage everything (adds, modifications, deletions). */
|
||||
async stageAll() {
|
||||
await this.run(["add", "-A"]);
|
||||
}
|
||||
/**
|
||||
* True if the vault is mid-merge (an unresolved merge from a previous run,
|
||||
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
|
||||
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
|
||||
* BEFORE any checkout so a left-over merge produces a clear, actionable
|
||||
* message instead of a raw "you need to resolve your current index first"
|
||||
* failure deep inside `checkout`. This is what makes re-runs converge
|
||||
* (resumability, SPEC §12).
|
||||
*/
|
||||
async isMergeInProgress() {
|
||||
// MERGE_HEAD exists exactly while a merge is in progress.
|
||||
const mergeHead = await this.runRaw([
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
"--quiet",
|
||||
"MERGE_HEAD",
|
||||
]);
|
||||
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0)
|
||||
return true;
|
||||
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
|
||||
// working tree is mid-conflict and a checkout would refuse.
|
||||
const unmerged = await this.runRaw(["ls-files", "-u"]);
|
||||
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
|
||||
}
|
||||
/**
|
||||
* Commit the currently STAGED changes with an explicit author/committer
|
||||
* identity and the given trailers appended to the message body (SPEC §7.3
|
||||
* provenance). Returns `true` if a commit was made, `false` if there was
|
||||
* nothing to commit (graceful no-op). The caller is expected to have staged
|
||||
* its changes first (e.g. via `stageAll`).
|
||||
*/
|
||||
async commit(message, opts) {
|
||||
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
|
||||
// deterministic re-pull of unchanged pages produces identical bytes, so
|
||||
// git sees no diff and we must not error).
|
||||
const staged = await this.runRaw([
|
||||
"diff",
|
||||
"--cached",
|
||||
"--quiet",
|
||||
]);
|
||||
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
|
||||
// staged), 1 when there are staged changes.
|
||||
if (staged.code === 0)
|
||||
return false;
|
||||
await this.commitRaw(message, opts);
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
|
||||
* Builds the full message with appended trailers and sets author + committer
|
||||
* identity via env vars (so the committer matches the author, not the repo
|
||||
* default).
|
||||
*/
|
||||
async commitRaw(message, opts) {
|
||||
const fullMessage = buildCommitMessage(message, opts.trailers);
|
||||
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
|
||||
// (or any injected hook) must never interfere with engine commits in our
|
||||
// dedicated vault repo.
|
||||
const args = ["commit", "--no-verify", "-m", fullMessage];
|
||||
if (opts.allowEmpty)
|
||||
args.push("--allow-empty");
|
||||
// Route through the single `runRaw` primitive; set author + committer
|
||||
// identity via env vars (so the committer matches the author, not the repo
|
||||
// default). Throw via the same unified message on a non-zero exit.
|
||||
const r = await this.runRaw(args, {
|
||||
env: {
|
||||
GIT_AUTHOR_NAME: opts.authorName,
|
||||
GIT_AUTHOR_EMAIL: opts.authorEmail,
|
||||
GIT_COMMITTER_NAME: opts.authorName,
|
||||
GIT_COMMITTER_EMAIL: opts.authorEmail,
|
||||
},
|
||||
});
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
|
||||
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
|
||||
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
|
||||
* markers are left in the worktree for manual resolution by a later increment,
|
||||
* and — critically — nothing is pushed to Docmost (we never write to Docmost
|
||||
* anyway).
|
||||
*/
|
||||
async merge(fromBranch) {
|
||||
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
|
||||
const output = `${r.stdout}\n${r.stderr}`.trim();
|
||||
if (r.code === 0) {
|
||||
return { ok: true, conflict: false, output };
|
||||
}
|
||||
// A non-zero exit on merge most commonly means a conflict. Confirm by
|
||||
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
|
||||
// an unrelated failure as a conflict.
|
||||
const conflict = await this.hasUnmergedPaths();
|
||||
return { ok: false, conflict, output };
|
||||
}
|
||||
/** True if the index has any unmerged (conflicted) paths. */
|
||||
async hasUnmergedPaths() {
|
||||
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
|
||||
return r.code === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
/**
|
||||
* List tracked files on the current branch (paths relative to the vault
|
||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||
* the listing, e.g. `"*.md"`.
|
||||
*
|
||||
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
|
||||
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
|
||||
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
|
||||
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
|
||||
* breaking move/duplicate detection. We defeat that two ways at once:
|
||||
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
|
||||
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
|
||||
* pass it inline here.
|
||||
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
|
||||
* ambiguity), which we split on `\0`.
|
||||
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
|
||||
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
|
||||
* are returned verbatim — git already emits forward slashes.
|
||||
*/
|
||||
async listTrackedFiles(glob) {
|
||||
const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]);
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git ls-files failed: ${detail}`);
|
||||
}
|
||||
return r.stdout.split("\0").filter((p) => p.length > 0);
|
||||
}
|
||||
/**
|
||||
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
|
||||
* (SPEC §6: the FS→Docmost push direction diffs `main` against
|
||||
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
|
||||
* file is reported as a single `R` row with both its old and new path instead
|
||||
* of a delete+add pair — that distinction is what lets the push planner tell a
|
||||
* move from a delete+create (SPEC §8 "Move vs delete").
|
||||
*
|
||||
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
|
||||
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
|
||||
* status:
|
||||
* - A/M/D: `status\0path\0`
|
||||
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
|
||||
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
|
||||
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
|
||||
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
|
||||
*/
|
||||
async diffNameStatus(fromRef, toRef) {
|
||||
const r = await this.runRaw([
|
||||
"diff",
|
||||
"--name-status",
|
||||
"-M",
|
||||
"-z",
|
||||
fromRef,
|
||||
toRef,
|
||||
]);
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git diff --name-status failed: ${detail}`);
|
||||
}
|
||||
// Tokens alternate: <status> <path...> <status> <path...> ... With `-z`,
|
||||
// each token (status code AND each path) is its own NUL-delimited field.
|
||||
const tokens = r.stdout.split("\0").filter((t) => t.length > 0);
|
||||
const entries = [];
|
||||
let i = 0;
|
||||
while (i < tokens.length) {
|
||||
const raw = tokens[i++];
|
||||
// The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading
|
||||
// letter is the change kind; any trailing digits are the similarity score.
|
||||
const letter = raw[0];
|
||||
if (letter === "R" || letter === "C") {
|
||||
const score = Number.parseInt(raw.slice(1), 10);
|
||||
const oldPath = tokens[i++];
|
||||
const path = tokens[i++];
|
||||
if (oldPath === undefined || path === undefined)
|
||||
break; // malformed tail
|
||||
entries.push({
|
||||
status: letter,
|
||||
path,
|
||||
oldPath,
|
||||
...(Number.isFinite(score) ? { score } : {}),
|
||||
});
|
||||
}
|
||||
else if (letter === "A" || letter === "M" || letter === "D") {
|
||||
const path = tokens[i++];
|
||||
if (path === undefined)
|
||||
break; // malformed tail
|
||||
entries.push({ status: letter, path });
|
||||
}
|
||||
else {
|
||||
// Unknown/other status (e.g. T type-change, U unmerged) — consume one
|
||||
// path token defensively so the walk stays aligned, but do not emit it
|
||||
// (the push planner only handles A/M/D/R/C).
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
/**
|
||||
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
|
||||
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
|
||||
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
|
||||
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
|
||||
*/
|
||||
async revParse(ref) {
|
||||
const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]);
|
||||
if (r.code !== 0)
|
||||
return null;
|
||||
const sha = r.stdout.trim();
|
||||
return sha.length > 0 ? sha : null;
|
||||
}
|
||||
/**
|
||||
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
|
||||
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
|
||||
* "что из `main` уже отражено в Docmost").
|
||||
*/
|
||||
async readRef(ref) {
|
||||
return this.revParse(ref);
|
||||
}
|
||||
/**
|
||||
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
|
||||
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
|
||||
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
|
||||
*/
|
||||
async updateRef(ref, target) {
|
||||
await this.run(["update-ref", ref, target]);
|
||||
}
|
||||
/**
|
||||
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
|
||||
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
|
||||
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
|
||||
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
|
||||
* push succeeds, Docmost already contains the pushed `main` content, so the
|
||||
* mirror must reflect it — otherwise the NEXT pull would diff our own write
|
||||
* back and re-pull it (loop-guard).
|
||||
*
|
||||
* SAFETY — never force, never clobber divergent history:
|
||||
* - If `branch` IS an ancestor of `toCommit`, advance it with
|
||||
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
|
||||
* NOT checked out during a push (push works on `main`), so updating the ref
|
||||
* directly is safe and avoids any working-tree touch.
|
||||
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
|
||||
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
|
||||
* let the caller log it. We must never overwrite a `docmost` history that
|
||||
* has commits the push base does not contain.
|
||||
*
|
||||
* Returns `{ ok: true }` when the branch was advanced (or already at
|
||||
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
|
||||
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
|
||||
*/
|
||||
async fastForwardBranch(branch, toCommit) {
|
||||
const branchRef = `refs/heads/${branch}`;
|
||||
// Resolve both endpoints first so a missing ref is a clean refusal, not a
|
||||
// confusing `merge-base` failure.
|
||||
const branchSha = await this.revParse(branchRef);
|
||||
if (branchSha === null) {
|
||||
return { ok: false, reason: `branch ${branch} does not exist` };
|
||||
}
|
||||
const targetSha = await this.revParse(toCommit);
|
||||
if (targetSha === null) {
|
||||
return { ok: false, reason: `target ${toCommit} does not resolve` };
|
||||
}
|
||||
// Already at the target -> a no-op fast-forward (still ok).
|
||||
if (branchSha === targetSha)
|
||||
return { ok: true };
|
||||
// `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a
|
||||
// true ancestor is a fast-forward; anything else is divergent and refused.
|
||||
const ancestor = await this.runRaw([
|
||||
"merge-base",
|
||||
"--is-ancestor",
|
||||
branchSha,
|
||||
targetSha,
|
||||
]);
|
||||
if (ancestor.code !== 0) {
|
||||
return { ok: false, reason: "not-fast-forward" };
|
||||
}
|
||||
// Safe to advance: the branch is not checked out during push, so a direct
|
||||
// ref update avoids a checkout/working-tree touch.
|
||||
await this.updateRef(branchRef, targetSha);
|
||||
return { ok: true };
|
||||
}
|
||||
/**
|
||||
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
|
||||
* if the path does not exist there. Used by the push direction to read the
|
||||
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
|
||||
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
|
||||
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
|
||||
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
|
||||
* that ref) maps to `null` rather than throwing.
|
||||
*/
|
||||
async showFileAtRef(ref, path) {
|
||||
// `git show <ref>:<path>` requires the path relative to the repo root; pass
|
||||
// it verbatim (forward-slash, matching `listTrackedFiles` / diff output).
|
||||
const r = await this.runRaw(["show", `${ref}:${path}`]);
|
||||
if (r.code !== 0)
|
||||
return null;
|
||||
return r.stdout;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
|
||||
* Used by the single `runRaw` primitive every git command flows through, so
|
||||
* these pins apply uniformly (including the `git --version` preflight).
|
||||
*
|
||||
* cwd-isolation is this module's central safety guarantee: every git command
|
||||
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
|
||||
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
|
||||
* redirect the operation away from `cwd` (e.g. to the source repo or another
|
||||
* checkout), defeating that guarantee. So we always strip them, regardless of
|
||||
* whatever else the caller adds (author/committer identity, etc.).
|
||||
*
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function vaultGitEnv(extra) {
|
||||
const env = {
|
||||
...process.env,
|
||||
// Locale-independent output (defense in depth). We never parse localized
|
||||
// prose, but pinning the locale prevents a future regression where some
|
||||
// git message we DO key on is translated by an inherited LC_ALL/LANG.
|
||||
LC_ALL: "C",
|
||||
LANG: "C",
|
||||
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
|
||||
// bite) and never block on an interactive prompt (e.g. credentials) — the
|
||||
// daemon runs unattended and must not hang.
|
||||
GIT_PAGER: "cat",
|
||||
GIT_TERMINAL_PROMPT: "0",
|
||||
...extra,
|
||||
};
|
||||
delete env.GIT_DIR;
|
||||
delete env.GIT_WORK_TREE;
|
||||
return env;
|
||||
}
|
||||
/**
|
||||
* Build a commit message body with trailer lines appended (SPEC §7.3). The
|
||||
* trailers are separated from the subject by a blank line so `git interpret-
|
||||
* trailers` / `git log --format=%(trailers)` parse them as trailers.
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function buildCommitMessage(subject, trailers) {
|
||||
if (!trailers || trailers.length === 0)
|
||||
return subject;
|
||||
return `${subject}\n\n${trailers.join("\n")}`;
|
||||
}
|
||||
44
packages/git-sync/build/engine/layout.d.ts
vendored
Normal file
44
packages/git-sync/build/engine/layout.d.ts
vendored
Normal file
@@ -0,0 +1,44 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
/** Flat page node as returned by `listAllSpacePages` (no content). */
|
||||
export interface PageNode {
|
||||
id: string;
|
||||
title?: string;
|
||||
slugId?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
}
|
||||
/** A page's resolved vault destination: folder path + file stem. */
|
||||
export interface VaultEntry {
|
||||
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
|
||||
segments: string[];
|
||||
/** The page's own file name without extension. */
|
||||
stem: string;
|
||||
}
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export declare function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry>;
|
||||
170
packages/git-sync/build/engine/layout.js
Normal file
170
packages/git-sync/build/engine/layout.js
Normal file
@@ -0,0 +1,170 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export function buildVaultLayout(pages) {
|
||||
// Index pages by id so the parent chain can be walked. Guard against
|
||||
// duplicate ids in the input (first one wins).
|
||||
const byId = new Map();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !byId.has(p.id))
|
||||
byId.set(p.id, p);
|
||||
}
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map();
|
||||
const nameById = new Map();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey = p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
}
|
||||
}
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
// and THROWS if it is somehow absent, rather than silently recomputing a
|
||||
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
|
||||
// its target file).
|
||||
const nameOf = (id) => {
|
||||
const name = nameById.get(id);
|
||||
if (name === undefined) {
|
||||
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
|
||||
}
|
||||
return name;
|
||||
};
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node) => {
|
||||
const ancestors = [];
|
||||
const visited = new Set();
|
||||
let current = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(nameOf(current.id));
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
// First pass: compute the provisional { segments, stem } for every node.
|
||||
const layout = new Map();
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || layout.has(p.id))
|
||||
continue;
|
||||
layout.set(p.id, {
|
||||
segments: folderSegmentsFor(p),
|
||||
stem: nameOf(p.id),
|
||||
});
|
||||
}
|
||||
// FOLDER-NOTE transform (native-Obsidian layout): a page WITH CHILDREN lives at
|
||||
// `<…>/<stem>/<stem>.md` — its body is the folder-note INSIDE its own folder
|
||||
// (LostPaul Folder Notes convention), and its children sit alongside it in that
|
||||
// folder. A leaf stays `<…>/<stem>.md`. Children's segments already point into
|
||||
// the parent's folder (folderSegmentsFor walks ancestor NAMES), so only the
|
||||
// parent's own file relocates here; the sibling name pass above already made
|
||||
// the parent name unique, so folder == file name stays consistent.
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id)
|
||||
continue;
|
||||
const entry = layout.get(p.id);
|
||||
if (entry && p.hasChildren) {
|
||||
entry.segments = [...entry.segments, entry.stem];
|
||||
}
|
||||
}
|
||||
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
|
||||
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
|
||||
// above (orphans share the "__root__" bucket), so ancestor names are final
|
||||
// before `segments` are built and this pass should rarely/never re-stem an
|
||||
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
|
||||
// slugId/id, then (if still colliding) appends the id.
|
||||
//
|
||||
// Process FOLDER-NOTES (pages with children) FIRST so a parent claims its
|
||||
// canonical `<name>/<name>.md` before a same-named CHILD — the child (a leaf)
|
||||
// is the one that disambiguates, never the folder-note.
|
||||
const usedPaths = new Set();
|
||||
const seenIds = new Set();
|
||||
const pathKey = (e) => [...e.segments, e.stem].join("/");
|
||||
const ordered = pages
|
||||
.filter((p) => Boolean(p && p.id))
|
||||
.sort((a, b) => Number(Boolean(b.hasChildren)) - Number(Boolean(a.hasChildren)));
|
||||
for (const p of ordered) {
|
||||
if (seenIds.has(p.id))
|
||||
continue;
|
||||
seenIds.add(p.id);
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry)
|
||||
continue;
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// First attempt: disambiguate the stem with the sanitized slugId (or id).
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// Still colliding: append the (sanitized) id as a last resort. The id
|
||||
// is globally unique, so this always resolves the collision.
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
|
||||
}
|
||||
}
|
||||
usedPaths.add(pathKey(entry));
|
||||
}
|
||||
return layout;
|
||||
}
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(node, parentKey, usedBySibling) {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
13
packages/git-sync/build/engine/loop-guard.d.ts
vendored
Normal file
13
packages/git-sync/build/engine/loop-guard.d.ts
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
/**
|
||||
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
|
||||
* the same input string always yields the same digest, a different input a
|
||||
* different one. Used to recognize our own write later (loop suppression).
|
||||
*
|
||||
* We hash the body STRING as-is (UTF-8) with SHA-256 and return lowercase hex.
|
||||
* SPEC §10 keys on the body hash rather than file bytes; callers decide WHAT
|
||||
* counts as "the body" (here it is the exact string passed in — typically the
|
||||
* self-contained markdown that was pushed). No normalization is applied: the
|
||||
* caller is responsible for passing a canonical/stable representation if it
|
||||
* wants hash equality across cosmetic-only differences.
|
||||
*/
|
||||
export declare function bodyHash(markdownBody: string): string;
|
||||
28
packages/git-sync/build/engine/loop-guard.js
Normal file
28
packages/git-sync/build/engine/loop-guard.js
Normal file
@@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Loop-guard primitives (SPEC §10). The sync engine must never re-pull its OWN
|
||||
* write as if it were a remote edit: after a push, the next poll will see the
|
||||
* page it just wrote with a fresh `updatedAt`. To suppress that, we key on two
|
||||
* signals — the body HASH of what we pushed (this module) and the `updatedAt`
|
||||
* returned by the write — recorded per page at push time.
|
||||
*
|
||||
* This module owns the PURE, deterministic body-hash. The CONSUMPTION on the
|
||||
* pull side (comparing an incoming page's body hash against the last pushed hash
|
||||
* to decide "this is our own write, ignore it") is a future increment — here we
|
||||
* only PRODUCE the hash and the per-page push record (see `src/push.ts`).
|
||||
*/
|
||||
import { createHash } from "node:crypto";
|
||||
/**
|
||||
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
|
||||
* the same input string always yields the same digest, a different input a
|
||||
* different one. Used to recognize our own write later (loop suppression).
|
||||
*
|
||||
* We hash the body STRING as-is (UTF-8) with SHA-256 and return lowercase hex.
|
||||
* SPEC §10 keys on the body hash rather than file bytes; callers decide WHAT
|
||||
* counts as "the body" (here it is the exact string passed in — typically the
|
||||
* self-contained markdown that was pushed). No normalization is applied: the
|
||||
* caller is responsible for passing a canonical/stable representation if it
|
||||
* wants hash equality across cosmetic-only differences.
|
||||
*/
|
||||
export function bodyHash(markdownBody) {
|
||||
return createHash("sha256").update(markdownBody, "utf8").digest("hex");
|
||||
}
|
||||
136
packages/git-sync/build/engine/pull.d.ts
vendored
Normal file
136
packages/git-sync/build/engine/pull.d.ts
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
import type { GitSyncClient } from "./client.types.js";
|
||||
import { type PageNode } from "./layout.js";
|
||||
import { VaultGit } from "./git.js";
|
||||
import { type MovedEntry, type DeletionDecision } from "./reconcile.js";
|
||||
/**
|
||||
* Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real
|
||||
* `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile`
|
||||
* rooted at the vault; tests pass fakes so the parsing/skip rules are unit-
|
||||
* testable without a real git repo or filesystem.
|
||||
*/
|
||||
export interface ReadExistingDeps {
|
||||
/** List tracked .md paths (forward-slash, vault-relative). */
|
||||
listTracked: () => Promise<string[]>;
|
||||
/** Read a tracked file's text by its (forward-slash) vault-relative path. */
|
||||
readFile: (relPath: string) => Promise<string>;
|
||||
}
|
||||
/**
|
||||
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
|
||||
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
|
||||
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
|
||||
* hand-written Obsidian file; PUSH adopts those separately).
|
||||
*
|
||||
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
|
||||
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
|
||||
* -> skipped, NOT thrown; the next pull converges;
|
||||
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
|
||||
*/
|
||||
export declare function readExisting(deps: ReadExistingDeps): Promise<{
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}[]>;
|
||||
/**
|
||||
* Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live
|
||||
* tree nodes + completeness flag (from `listSpaceTree`) and the parsed
|
||||
* `existing` tracked files (from `readExisting`).
|
||||
*/
|
||||
export interface PullActionsInput {
|
||||
/** Live page nodes for the space (from `listSpaceTree`). */
|
||||
pages: PageNode[];
|
||||
/** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */
|
||||
treeComplete: boolean;
|
||||
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
|
||||
existing: {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}[];
|
||||
}
|
||||
/**
|
||||
* The PURE decisions object computed by `computePullActions` (no IO). It holds
|
||||
* the reconciliation plan plus the SPEC §8 absence-deletion decision, with the
|
||||
* suppression already folded in: `toDelete` is the POST-suppression set the
|
||||
* caller should actually remove (empty when `deletionDecision.apply` is false).
|
||||
*/
|
||||
export interface PullActions {
|
||||
/** Pages to (re)write at their relPath (add + update + move target). */
|
||||
toWrite: {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}[];
|
||||
/** Moves: write new path, then remove old path (only on a successful write). */
|
||||
moved: MovedEntry[];
|
||||
/**
|
||||
* Absence-based paths to delete AFTER suppression. Empty when the decision
|
||||
* suppressed deletions this cycle, so the caller can apply it unconditionally.
|
||||
*/
|
||||
toDelete: string[];
|
||||
/** Why absence deletions were (or were not) applied (for logging + tests). */
|
||||
deletionDecision: DeletionDecision;
|
||||
/** Tracked-file count (for the suppression log messages). */
|
||||
existingCount: number;
|
||||
/** Planned absence-delete count BEFORE suppression (for the log message). */
|
||||
plannedDeleteCount: number;
|
||||
}
|
||||
/**
|
||||
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
|
||||
* tree nodes + completeness + existing tracked files and returns the full set of
|
||||
* decisions with NO IO:
|
||||
*
|
||||
* - builds the vault layout (deterministic relPath per live page),
|
||||
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
|
||||
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
|
||||
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
|
||||
* POST-suppression set (empty when suppressed).
|
||||
*
|
||||
* Moves are NOT governed by the suppression: a moved page is present in `live`,
|
||||
* so its old-path removal is real (the caller still gates it on the write
|
||||
* succeeding). The expensive content fetch / file write / git ops happen in the
|
||||
* thin `applyPullActions`.
|
||||
*/
|
||||
export declare function computePullActions(input: PullActionsInput): PullActions;
|
||||
/**
|
||||
* Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these
|
||||
* to the live client, the vault git wrapper, and `node:fs/promises`; tests pass
|
||||
* fakes that RECORD calls so the ordering + the move-on-success data-loss guard
|
||||
* are testable without real git/fs/network.
|
||||
*/
|
||||
export interface ApplyPullActionsDeps {
|
||||
client: Pick<GitSyncClient, "getPageJson">;
|
||||
git: Pick<VaultGit, "stageAll" | "commit" | "checkout" | "merge">;
|
||||
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
|
||||
writeFile: (absPath: string, text: string) => Promise<void>;
|
||||
/** Recursive mkdir of an ABSOLUTE directory path. */
|
||||
mkdir: (absDir: string) => Promise<void>;
|
||||
/** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */
|
||||
rm: (absPath: string) => Promise<void>;
|
||||
}
|
||||
/** Outcome counters from `applyPullActions` (for the summary + tests). */
|
||||
export interface ApplyResult {
|
||||
written: number;
|
||||
movedApplied: number;
|
||||
deleted: number;
|
||||
failed: number;
|
||||
committed: boolean;
|
||||
merge: {
|
||||
ok: boolean;
|
||||
conflict: boolean;
|
||||
output: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
|
||||
* order, with all the original safety guards preserved bit-for-bit:
|
||||
*
|
||||
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
|
||||
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
|
||||
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
|
||||
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
|
||||
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
|
||||
* failed move-write keeps the old path so the page never vanishes).
|
||||
* 3. apply (post-suppression) absence deletes.
|
||||
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
|
||||
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
|
||||
*
|
||||
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
|
||||
*/
|
||||
export declare function applyPullActions(deps: ApplyPullActionsDeps, actions: PullActions, vaultRoot: string): Promise<ApplyResult>;
|
||||
284
packages/git-sync/build/engine/pull.js
Normal file
284
packages/git-sync/build/engine/pull.js
Normal file
@@ -0,0 +1,284 @@
|
||||
/**
|
||||
* Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС").
|
||||
*
|
||||
* This increment turns the read-only mirror into the git-backed pull cycle:
|
||||
*
|
||||
* 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12);
|
||||
* ensureBranch("docmost", "main") (SPEC §5 branches)
|
||||
* 2. checkout docmost
|
||||
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
|
||||
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
|
||||
* 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
|
||||
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
|
||||
* is absence-only, moves are separate
|
||||
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
|
||||
* fetch (SPEC §8) and behind the mass-delete guard (defense in depth)
|
||||
* 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11);
|
||||
* apply moved-old-path removals (only when the move write SUCCEEDED) and
|
||||
* absence-delete removals (only when the decision allowed them)
|
||||
* 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3)
|
||||
* 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved,
|
||||
* SPEC §9); push is deferred (SPEC §7)
|
||||
* 10. one-line summary
|
||||
*
|
||||
* DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost
|
||||
* (read-only: listSpaceTree + getPageJson). All git operations run against
|
||||
* the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts).
|
||||
*
|
||||
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
|
||||
* the gitmost server drives the engine in-process (there is no standalone CLI
|
||||
* entry point).
|
||||
*/
|
||||
import { dirname } from "node:path";
|
||||
import { sep } from "node:path";
|
||||
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
|
||||
import { buildVaultLayout } from "./layout.js";
|
||||
import { BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./git.js";
|
||||
import { planReconciliation, decideAbsenceDeletions, } from "./reconcile.js";
|
||||
import { stabilizePageBody } from "./stabilize.js";
|
||||
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
|
||||
const DOCMOST_BRANCH = "docmost";
|
||||
// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12).
|
||||
const SOURCE_TRAILER = "Docmost-Sync-Source: docmost";
|
||||
// Number of pages fetched/stabilized concurrently. Bounded so a large space
|
||||
// does not open thousands of simultaneous requests/conversions at once.
|
||||
const CONCURRENCY = 6;
|
||||
// How often to log incremental progress (every N completed pages).
|
||||
const PROGRESS_EVERY = 25;
|
||||
/** Convert a vault-relative path (forward-slash) to an absolute FS path. */
|
||||
function relToAbs(vaultRoot, relPath) {
|
||||
return [vaultRoot, ...relPath.split("/")].join("/");
|
||||
}
|
||||
/** Convert an absolute/relative segment list under the vault to a relPath. */
|
||||
function segmentsToRelPath(segments, stem) {
|
||||
return [...segments, `${stem}.md`].join("/");
|
||||
}
|
||||
/**
|
||||
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
|
||||
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
|
||||
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
|
||||
* hand-written Obsidian file; PUSH adopts those separately).
|
||||
*
|
||||
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
|
||||
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
|
||||
* -> skipped, NOT thrown; the next pull converges;
|
||||
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
|
||||
*/
|
||||
export async function readExisting(deps) {
|
||||
const tracked = await deps.listTracked();
|
||||
const existing = [];
|
||||
for (const relPath of tracked) {
|
||||
// git ls-files always emits forward-slash paths; normalize just in case.
|
||||
const rel = relPath.split(sep).join("/");
|
||||
let text;
|
||||
try {
|
||||
text = await deps.readFile(rel);
|
||||
}
|
||||
catch {
|
||||
// Tracked but missing on disk (mid-operation race) — skip; the next pull
|
||||
// converges.
|
||||
continue;
|
||||
}
|
||||
const { id } = parsePageFile(text);
|
||||
if (id)
|
||||
existing.push({ pageId: id, relPath: rel });
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
/**
|
||||
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
|
||||
* tree nodes + completeness + existing tracked files and returns the full set of
|
||||
* decisions with NO IO:
|
||||
*
|
||||
* - builds the vault layout (deterministic relPath per live page),
|
||||
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
|
||||
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
|
||||
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
|
||||
* POST-suppression set (empty when suppressed).
|
||||
*
|
||||
* Moves are NOT governed by the suppression: a moved page is present in `live`,
|
||||
* so its old-path removal is real (the caller still gates it on the write
|
||||
* succeeding). The expensive content fetch / file write / git ops happen in the
|
||||
* thin `applyPullActions`.
|
||||
*/
|
||||
export function computePullActions(input) {
|
||||
const { pages, treeComplete, existing } = input;
|
||||
const layout = buildVaultLayout(pages);
|
||||
const live = [];
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id)
|
||||
continue;
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry)
|
||||
continue;
|
||||
live.push({
|
||||
pageId: p.id,
|
||||
relPath: segmentsToRelPath(entry.segments, entry.stem),
|
||||
});
|
||||
}
|
||||
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
|
||||
// `plan.moved` carries move old-path removals separately.
|
||||
const plan = planReconciliation(live, existing);
|
||||
// Decide whether the ABSENCE-based deletions may be applied this cycle
|
||||
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
|
||||
// Moves are NOT governed by this.
|
||||
const deletionDecision = decideAbsenceDeletions({
|
||||
treeComplete,
|
||||
liveCount: live.length,
|
||||
existingCount: existing.length,
|
||||
deleteCount: plan.toDelete.length,
|
||||
});
|
||||
return {
|
||||
toWrite: plan.toWrite,
|
||||
moved: plan.moved,
|
||||
// Fold the suppression in: a suppressed cycle deletes nothing.
|
||||
toDelete: deletionDecision.apply ? plan.toDelete : [],
|
||||
deletionDecision,
|
||||
existingCount: existing.length,
|
||||
plannedDeleteCount: plan.toDelete.length,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
|
||||
* order, with all the original safety guards preserved bit-for-bit:
|
||||
*
|
||||
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
|
||||
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
|
||||
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
|
||||
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
|
||||
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
|
||||
* failed move-write keeps the old path so the page never vanishes).
|
||||
* 3. apply (post-suppression) absence deletes.
|
||||
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
|
||||
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
|
||||
*
|
||||
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
|
||||
*/
|
||||
export async function applyPullActions(deps, actions, vaultRoot) {
|
||||
const { client, git } = deps;
|
||||
// Emit the SPEC §8 suppression warnings (preserved from the original `main`).
|
||||
const decision = actions.deletionDecision;
|
||||
if (!decision.apply) {
|
||||
if (decision.reason === "incomplete-fetch") {
|
||||
console.warn("pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)");
|
||||
}
|
||||
else if (decision.reason === "empty-live") {
|
||||
console.warn(`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` +
|
||||
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
|
||||
`Docmost is reachable.`);
|
||||
}
|
||||
else {
|
||||
console.warn(`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` +
|
||||
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
|
||||
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`);
|
||||
}
|
||||
}
|
||||
// 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11).
|
||||
let written = 0;
|
||||
let failed = 0;
|
||||
let completed = 0;
|
||||
let nextIndex = 0;
|
||||
// pageIds whose write FAILED. A moved page whose new-path write failed must
|
||||
// NOT have its old path removed (otherwise the page vanishes entirely).
|
||||
const failedPageIds = new Set();
|
||||
const writeOne = async (w) => {
|
||||
try {
|
||||
const page = await client.getPageJson(w.pageId);
|
||||
// Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
|
||||
// markdown body. title/parent/space are DERIVED (filename / folder / repo),
|
||||
// so nothing but the pageId is persisted as meta.
|
||||
const text = serializePageFile(page.id, await stabilizePageBody(page.content));
|
||||
const abs = relToAbs(vaultRoot, w.relPath);
|
||||
await deps.mkdir(dirname(abs));
|
||||
await deps.writeFile(abs, text);
|
||||
written++;
|
||||
}
|
||||
catch (err) {
|
||||
failed++;
|
||||
failedPageIds.add(w.pageId);
|
||||
console.error(`pull: failed page ${w.pageId}:`, err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
finally {
|
||||
completed++;
|
||||
if (completed % PROGRESS_EVERY === 0) {
|
||||
console.log(`pulled ${completed}/${actions.toWrite.length}`);
|
||||
}
|
||||
}
|
||||
};
|
||||
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
|
||||
// take the next index until the write list is exhausted. One bad page never
|
||||
// aborts the whole pull (mirrors the fault-tolerant tree walk).
|
||||
const runner = async () => {
|
||||
while (true) {
|
||||
const i = nextIndex++;
|
||||
if (i >= actions.toWrite.length)
|
||||
return;
|
||||
await writeOne(actions.toWrite[i]);
|
||||
}
|
||||
};
|
||||
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, () => runner()));
|
||||
// Helper: `rm` with force:true is a no-op if the file is already gone.
|
||||
const removePath = async (rel, what) => {
|
||||
try {
|
||||
await deps.rm(relToAbs(vaultRoot, rel));
|
||||
return true;
|
||||
}
|
||||
catch (err) {
|
||||
console.error(`pull: failed to ${what} ${rel}:`, err instanceof Error ? err.message : String(err));
|
||||
return false;
|
||||
}
|
||||
};
|
||||
// 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its
|
||||
// old path is genuinely stale — NOT subject to the incomplete-fetch
|
||||
// suppression. BUT only remove the old path when (a) the planner marked it
|
||||
// removable (not reused by another live page) AND (b) the new-path write
|
||||
// actually SUCCEEDED — otherwise we would delete the only copy of a page
|
||||
// whose move-write failed (⭐ data-loss guard).
|
||||
let movedApplied = 0;
|
||||
for (const m of actions.moved) {
|
||||
if (!m.removeOldPath)
|
||||
continue;
|
||||
if (failedPageIds.has(m.pageId)) {
|
||||
console.warn(`pull: move write for ${m.pageId} failed — keeping old path ` +
|
||||
`${m.fromRelPath} (SPEC §8)`);
|
||||
continue;
|
||||
}
|
||||
if (await removePath(m.fromRelPath, "remove moved old path"))
|
||||
movedApplied++;
|
||||
}
|
||||
// 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the
|
||||
// post-suppression set (empty when the decision suppressed them, SPEC §8).
|
||||
let deleted = 0;
|
||||
for (const rel of actions.toDelete) {
|
||||
if (await removePath(rel, "delete"))
|
||||
deleted++;
|
||||
}
|
||||
// 4. Stage + commit on `docmost` (only if there is something to commit).
|
||||
// Deterministic stabilized output means unchanged pages produce identical
|
||||
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
|
||||
// ACTUAL work applied (pages written + files deleted), not the planned size,
|
||||
// so a run with failures does not over-report (SPEC §5 nit).
|
||||
const subject = deleted > 0
|
||||
? `docmost: sync ${written} page(s), ${deleted} deleted`
|
||||
: `docmost: sync ${written} page(s)`;
|
||||
await git.stageAll();
|
||||
const committed = await git.commit(subject, {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
trailers: [SOURCE_TRAILER],
|
||||
});
|
||||
// Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
|
||||
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
|
||||
await git.checkout(DEFAULT_BRANCH);
|
||||
const merge = await git.merge(DOCMOST_BRANCH);
|
||||
if (merge.conflict) {
|
||||
console.error("pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
|
||||
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
|
||||
"Docmost (read-only). Resolve locally, then re-run.");
|
||||
}
|
||||
else if (!merge.ok) {
|
||||
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
|
||||
}
|
||||
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
|
||||
return { written, movedApplied, deleted, failed, committed, merge };
|
||||
}
|
||||
504
packages/git-sync/build/engine/push.d.ts
vendored
Normal file
504
packages/git-sync/build/engine/push.d.ts
vendored
Normal file
@@ -0,0 +1,504 @@
|
||||
/**
|
||||
* Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment.
|
||||
*
|
||||
* This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref
|
||||
* primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns
|
||||
* a git diff into a classified action set with NO IO, and a THIN injectable
|
||||
* applier (`applyPushActions`) exercised in tests via fakes only.
|
||||
*
|
||||
* Direction is vault -> Docmost. The diff is `main` against
|
||||
* `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is
|
||||
* translated into a Docmost mutation by `pageId` identity (SPEC §4):
|
||||
* - A without pageId -> create_page (then write the assigned pageId back).
|
||||
* - A with pageId -> update (restored/copied file; the page already exists).
|
||||
* - M -> update content (collab/Yjs path, SPEC §2/§15.6).
|
||||
* - D -> delete_page (pageId recovered from the PRE-IMAGE meta).
|
||||
* - R -> rename/move (CLASSIFIED here, APPLIED in push #3).
|
||||
*
|
||||
* MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves
|
||||
* each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH-
|
||||
* derived parent (SPEC §5: the file path is the source of truth for tree
|
||||
* position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions`
|
||||
* then calls `move_page` / `rename_page` (both for a reparent+retitle), or
|
||||
* records a NO-OP for a cosmetic local-only file-path rename.
|
||||
*
|
||||
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
|
||||
* the gitmost server drives the engine in-process (there is no standalone CLI
|
||||
* entry point).
|
||||
*/
|
||||
import { type DocmostMdMeta } from "../lib/index.js";
|
||||
import type { GitSyncClient } from "./client.types.js";
|
||||
import type { DiffEntry } from "./git.js";
|
||||
import { VaultGit } from "./git.js";
|
||||
import { type Settings } from "./settings.js";
|
||||
export type { DiffEntry } from "./git.js";
|
||||
/** A page to CREATE in Docmost (new local file, meta has no pageId yet). */
|
||||
export interface CreateAction {
|
||||
/** Vault-relative path of the new file. */
|
||||
path: string;
|
||||
}
|
||||
/** A page whose CONTENT changed (meta carries the existing pageId). */
|
||||
export interface UpdateAction {
|
||||
pageId: string;
|
||||
/** Vault-relative path of the changed file. */
|
||||
path: string;
|
||||
}
|
||||
/** A page to soft-delete in Docmost (Trash, SPEC §8). */
|
||||
export interface DeleteAction {
|
||||
pageId: string;
|
||||
}
|
||||
/** A renamed/moved page (same pageId, new path). Resolution DEFERRED. */
|
||||
export interface RenameMoveAction {
|
||||
pageId: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
}
|
||||
/**
|
||||
* A CLASSIFIED rename/move (push #3): a `RenameMoveAction` resolved into the
|
||||
* Docmost op(s) it actually needs. The file PATH is the source of truth for tree
|
||||
* position (SPEC §5: "истина связи — pageId, не путь" — the path is COSMETIC and
|
||||
* LOCAL, the page identity is its pageId), so we compare the RESOLVED parent of
|
||||
* the new path against the resolved parent of the old path, and the title in the
|
||||
* current meta against the title in the previous meta. Each sub-op is emitted
|
||||
* ONLY when something real changed:
|
||||
* - `move` — the resolved parent page changed (reparent in Docmost). A `null`
|
||||
* `parentPageId` means the new parent is ROOT (the file sits at the space
|
||||
* root, no enclosing folder).
|
||||
* - `rename` — the page title changed (a pure title edit in Docmost).
|
||||
* - `noop` — neither changed: a purely LOCAL file-path rename (same parent,
|
||||
* same title). The page identity is its pageId, so Docmost is NOT called.
|
||||
* `move` and `rename` are independent and may BOTH be present (reparent + retitle).
|
||||
*/
|
||||
export interface RenameMoveActionClassified {
|
||||
pageId: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
/** Present iff the resolved parent changed -> `move_page` (reparent). */
|
||||
move?: {
|
||||
parentPageId: string | null;
|
||||
};
|
||||
/** Present iff the title changed -> `rename_page` (title-only). */
|
||||
rename?: {
|
||||
title: string;
|
||||
};
|
||||
/** True iff neither parent nor title changed (cosmetic local-only rename). */
|
||||
noop?: true;
|
||||
}
|
||||
/**
|
||||
* Injected resolvers for the PURE `classifyRenameMoves` (push #3). Both are PURE
|
||||
* given a path + side; the real `main` (a follow-up) wires them to the file tree
|
||||
* (`readFile` for `current`, `git.showFileAtRef` for `prev`), tests pass plain
|
||||
* lookups. SPEC §5 path-as-truth:
|
||||
* - `metaAt`: the file's synthetic native meta at that side (title from the
|
||||
* filename, pageId from the `gitmost_id` frontmatter).
|
||||
* - `resolveParentPageId`: the pageId of the page whose FILE is the parent
|
||||
* FOLDER's `.md` (one level up from the given path), or `null` for ROOT.
|
||||
*/
|
||||
export interface ClassifyRenameMovesDeps {
|
||||
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
|
||||
resolveParentPageId: (path: string, side: MetaSide) => string | null;
|
||||
}
|
||||
/**
|
||||
* PURE classifier for the `renamesMoves` produced by `computePushActions`
|
||||
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
|
||||
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
|
||||
*
|
||||
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
|
||||
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
|
||||
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
|
||||
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
|
||||
*
|
||||
* For each entry:
|
||||
* - `newParent = resolveParentPageId(newPath, 'current')`,
|
||||
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
|
||||
* - `newTitle = metaAt(newPath,'current')?.title`,
|
||||
* `oldTitle = metaAt(oldPath,'prev')?.title`.
|
||||
* - include `move` iff `newParent !== oldParent` (a real reparent),
|
||||
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
|
||||
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
|
||||
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
|
||||
* the page is its pageId, so Docmost is not touched).
|
||||
*/
|
||||
export declare function classifyRenameMoves(renamesMoves: RenameMoveAction[], deps: ClassifyRenameMovesDeps): RenameMoveActionClassified[];
|
||||
/** The classified set of push actions (PURE output of `computePushActions`). */
|
||||
export interface PushActions {
|
||||
creates: CreateAction[];
|
||||
updates: UpdateAction[];
|
||||
deletes: DeleteAction[];
|
||||
renamesMoves: RenameMoveAction[];
|
||||
/**
|
||||
* Diff rows that could NOT be classified into an action, with a reason — e.g.
|
||||
* a deleted file whose PRE-IMAGE meta carried no recoverable pageId (the
|
||||
* untracked-file guard, SPEC §8: only files that were tracked with a pageId
|
||||
* are deleted in Docmost). Carried so the caller can log them.
|
||||
*/
|
||||
skipped: {
|
||||
path: string;
|
||||
status: DiffEntry["status"];
|
||||
reason: string;
|
||||
}[];
|
||||
}
|
||||
/**
|
||||
* Which tree a `metaAt` lookup reads the file's native meta from:
|
||||
* - `current`: the current `main` tree (the live file content) — used for
|
||||
* A/M/R, where the file still exists.
|
||||
* - `prev`: the last-pushed PRE-IMAGE (e.g. `refs/docmost/last-pushed:<path>`)
|
||||
* — used for D, where the file is gone from `main` but its pageId must be
|
||||
* recovered from the version Docmost last knew (SPEC §6/§8).
|
||||
*/
|
||||
export type MetaSide = "current" | "prev";
|
||||
/** Input to the PURE planner. `metaAt` is injected (no IO inside the planner). */
|
||||
export interface PushActionsInput {
|
||||
/** Diff rows of `main` vs `refs/docmost/last-pushed` (SPEC §6 step 2). */
|
||||
changes: DiffEntry[];
|
||||
/**
|
||||
* Resolve a file's synthetic native meta at a given side, or `null` if the file is
|
||||
* absent there / has no parseable meta. PURE injection: the real `main` reads
|
||||
* the working tree (current) or `git show <last-pushed>:<path>` (prev); tests
|
||||
* pass a plain lookup.
|
||||
*/
|
||||
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
|
||||
/**
|
||||
* The pageIds present at ANY path in the current `main` tree (optional). When
|
||||
* given, a deleted file whose pageId still lives somewhere in the tree is NOT
|
||||
* a deletion but a MOVE — guards against trashing a live page when a layout
|
||||
* reshuffle relocated its file (possibly across two cycles, so the matching
|
||||
* add isn't in THIS diff). When omitted, only the in-diff D+A/M coalescing
|
||||
* applies.
|
||||
*/
|
||||
currentPageIds?: Set<string>;
|
||||
}
|
||||
/**
|
||||
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
|
||||
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
|
||||
*
|
||||
* Classification rules:
|
||||
* - `A` (added):
|
||||
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
|
||||
* page already exists; we push its content rather than create a dup).
|
||||
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
|
||||
* brand-new local file; the page does not exist in Docmost yet).
|
||||
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
|
||||
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
|
||||
* (§16), and a new local file may carry only partial human meta. We
|
||||
* refuse to create rather than guess a space (SPEC §8 guard spirit).
|
||||
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
|
||||
* file somehow lost its pageId it is skipped — there is nothing to target.)
|
||||
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
|
||||
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
|
||||
* (untracked-file guard, SPEC §8: never delete an untracked page).
|
||||
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
|
||||
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
|
||||
* DEFERRED to the next increment; here we only record oldPath/newPath/
|
||||
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
|
||||
* (`C` copy is treated the same as `R` for recording purposes.)
|
||||
*/
|
||||
export declare function computePushActions(input: PushActionsInput): PushActions;
|
||||
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
|
||||
export declare const LAST_PUSHED_REF = "refs/docmost/last-pushed";
|
||||
/**
|
||||
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
|
||||
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
|
||||
* commit closes the loop so the next pull diffs empty for the pushed pages.
|
||||
*/
|
||||
export declare const DOCMOST_BRANCH = "docmost";
|
||||
/**
|
||||
* Injectable IO for `applyPushActions`. The real `main` (NEXT increment) wires
|
||||
* these to the live client, `node:fs/promises`, and the vault git wrapper; this
|
||||
* increment drives them only through FAKES in tests (no live destructive run).
|
||||
* - `client`: the create/update/delete/move/rename subset of `GitSyncClient`.
|
||||
* - `readFile`/`writeFile`: read a changed file's body / write a file back
|
||||
* (by vault-relative path; the applier does not resolve absolute paths so
|
||||
* fakes stay trivial).
|
||||
* - `git`: `updateRef` (advance `refs/docmost/last-pushed`) and
|
||||
* `fastForwardBranch` (advance the `docmost` mirror after a clean push, the
|
||||
* loop-close — SPEC §6 step 3 / §10).
|
||||
*/
|
||||
export interface ApplyPushDeps {
|
||||
client: Pick<GitSyncClient, "importPageMarkdown" | "createPage" | "deletePage" | "movePage" | "renamePage">;
|
||||
/** Read a changed file's full text by its vault-relative path. */
|
||||
readFile: (path: string) => Promise<string>;
|
||||
/** Write a file's full text by its vault-relative path. */
|
||||
writeFile: (path: string, text: string) => Promise<void>;
|
||||
/**
|
||||
* The Docmost spaceId this vault mirrors. A CREATE targets this space (the
|
||||
* native file carries no spaceId — every file in the vault belongs to it), and
|
||||
* it backs the synthetic native meta the classifier reads.
|
||||
*/
|
||||
spaceId: string;
|
||||
/**
|
||||
* `updateRef` advances `refs/docmost/last-pushed`; `fastForwardBranch` advances
|
||||
* the `docmost` mirror after a clean push. `showFileAtRef` reads a file's text
|
||||
* at a ref (used by the move/rename classifier to resolve the PREVIOUS parent
|
||||
* folder's `.md` at `refs/docmost/last-pushed`, SPEC §5 path-as-truth).
|
||||
*/
|
||||
git: Pick<VaultGit, "updateRef" | "fastForwardBranch" | "showFileAtRef">;
|
||||
}
|
||||
/** A file whose meta was rewritten with a freshly-assigned pageId (post-create). */
|
||||
export interface WrittenBackPage {
|
||||
path: string;
|
||||
pageId: string;
|
||||
}
|
||||
/**
|
||||
* The per-page push record consulted by a FUTURE poll-suppression (SPEC §10): a
|
||||
* pulled page whose body hash + `updatedAt` match a record here is OUR OWN write
|
||||
* and must not be re-pulled. PRODUCED here; CONSUMED on the pull side later.
|
||||
*/
|
||||
export interface PushedPageRecord {
|
||||
/** The Docmost pageId that was updated/created. */
|
||||
pageId: string;
|
||||
/**
|
||||
* The `updatedAt` from the create/update client result, when the result
|
||||
* exposed one. Absent when the (fake) client did not return it.
|
||||
*/
|
||||
updatedAt?: string;
|
||||
/** Stable hash of the markdown BODY that was pushed (SPEC §10 "хэш тела"). */
|
||||
bodyHash: string;
|
||||
}
|
||||
/**
|
||||
* One page whose operation FAILED during apply (SPEC §12 resumability). The bad
|
||||
* page is isolated — recorded here — and the rest of the batch still runs; the
|
||||
* refs are NOT advanced when there is any failure, so a re-run retries cleanly.
|
||||
*/
|
||||
export interface PushFailure {
|
||||
kind: "update" | "create" | "delete" | "move" | "rename";
|
||||
/** The pageId for update/delete/move/rename; absent for a never-id'd create. */
|
||||
pageId?: string;
|
||||
/** The vault-relative path for create/update/move/rename; absent for delete. */
|
||||
path?: string;
|
||||
/** The error message captured from the thrown error. */
|
||||
error: string;
|
||||
}
|
||||
/**
|
||||
* A rename/move action that resolved to a NO-OP (push #3, SPEC §5): a purely
|
||||
* LOCAL file-path rename whose resolved parent AND title are both unchanged. The
|
||||
* page identity is its pageId and the path is COSMETIC/local-only, so Docmost is
|
||||
* NOT called — the skip is recorded here (with the reason) for logging.
|
||||
*/
|
||||
export interface PushNoop {
|
||||
pageId: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
/** Why no Docmost op was emitted (currently always a path-only rename). */
|
||||
reason: "path-only-rename";
|
||||
}
|
||||
/** Structured outcome of `applyPushActions` (counts + write-backs + noops). */
|
||||
export interface ApplyPushResult {
|
||||
created: number;
|
||||
updated: number;
|
||||
deleted: number;
|
||||
/** Pages reparented in Docmost via `move_page` (push #3, SPEC §5/§16). */
|
||||
moved: number;
|
||||
/** Pages retitled in Docmost via `rename_page` (push #3, SPEC §5/§6). */
|
||||
renamed: number;
|
||||
/**
|
||||
* Files whose `gitmost_id` frontmatter was written with the pageId Docmost assigned on
|
||||
* create — these now need a FOLLOW-UP commit (the meta on disk changed). The
|
||||
* commit itself is the caller's job (NEXT increment); recorded here so it is
|
||||
* not lost.
|
||||
*/
|
||||
writtenBack: WrittenBackPage[];
|
||||
/**
|
||||
* Per-page push records (pageId + optional `updatedAt` + body hash) for every
|
||||
* page successfully updated/created — the §10 loop-guard data a future
|
||||
* poll-suppression (pull side) will consult so it does not re-pull our own
|
||||
* write. Deletes are not included (no body was pushed).
|
||||
*/
|
||||
pushed: PushedPageRecord[];
|
||||
/**
|
||||
* Pages whose operation threw — isolated and recorded, the batch continued
|
||||
* (SPEC §12). Non-empty here means the refs were NOT advanced.
|
||||
*/
|
||||
failures: PushFailure[];
|
||||
/**
|
||||
* Rename/move actions that resolved to a NO-OP — a purely LOCAL file-path
|
||||
* rename (same parent, same title). NO Docmost call was made for these (SPEC
|
||||
* §5: the page is its pageId, the path is local-only). Recorded for logging.
|
||||
*/
|
||||
noops: PushNoop[];
|
||||
/** Diff rows the planner could not classify (carried through for logging). */
|
||||
skipped: PushActions["skipped"];
|
||||
/** Whether `refs/docmost/last-pushed` was advanced (only on a CLEAN push). */
|
||||
lastPushedAdvanced: boolean;
|
||||
/**
|
||||
* Result of fast-forwarding the `docmost` mirror branch after a CLEAN push
|
||||
* (the loop-close, SPEC §6 step 3 / §10). `null` when no advance was attempted
|
||||
* (no `pushedCommit`, or there were failures). `{ ok:false, reason }` when a
|
||||
* non-fast-forward was REFUSED (divergent `docmost` history is never clobbered).
|
||||
*/
|
||||
docmostFastForward: {
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
} | null;
|
||||
}
|
||||
/**
|
||||
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
|
||||
* via FAKES only in this increment — there is no live wiring.
|
||||
*
|
||||
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
|
||||
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
|
||||
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
|
||||
* `importPageMarkdown` parses the meta/body itself.
|
||||
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
|
||||
* `client.createPage(...)`, take the assigned pageId from the result, and
|
||||
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
|
||||
* `serializePageFile`, body preserved) so the file becomes
|
||||
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
|
||||
* is needed — NEXT increment).
|
||||
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
|
||||
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
|
||||
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
|
||||
* the parent pageId — path-as-truth — and the meta for the title), then:
|
||||
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
|
||||
* `position` is UNDEFINED for now — the client supplies a default),
|
||||
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
|
||||
* - BOTH -> move (reparent) THEN rename (title), in that order,
|
||||
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
|
||||
* file-path rename: the page is its pageId, the path is local, SPEC §5).
|
||||
*
|
||||
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
|
||||
* is wrapped in its own try/catch: a single failing page is recorded in
|
||||
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
|
||||
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
|
||||
* when `failures.length === 0`: a PARTIAL push must NOT advance
|
||||
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
|
||||
* whole batch cleanly (the already-applied pages are idempotent re-applies).
|
||||
*
|
||||
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
|
||||
* `pushedCommit` is supplied:
|
||||
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
|
||||
* - fast-forward the `docmost` mirror branch to it via
|
||||
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
|
||||
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
|
||||
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
|
||||
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
|
||||
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
|
||||
*
|
||||
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
|
||||
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
|
||||
* hash of what was pushed plus the write's `updatedAt` (when the client returned
|
||||
* one). A future pull-side poll-suppression consults this so it does not re-pull
|
||||
* our own write; producing it is in scope here, consuming it is deferred.
|
||||
*
|
||||
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
|
||||
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
|
||||
*/
|
||||
export declare function applyPushActions(deps: ApplyPushDeps, actions: PushActions, pushedCommit?: string): Promise<ApplyPushResult>;
|
||||
/**
|
||||
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
|
||||
* (forward-slash) path. `buildVaultLayout` puts a page with children at
|
||||
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
|
||||
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
|
||||
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
|
||||
* space root) has no parent folder file -> `null` (the parent is ROOT).
|
||||
*/
|
||||
export declare function parentFolderFile(path: string): string | null;
|
||||
/**
|
||||
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
|
||||
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
|
||||
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
|
||||
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
|
||||
* screen the PUSH diff so non-page files are never created/updated/deleted in
|
||||
* Docmost (and never get a `gitmost_id` frontmatter written into them).
|
||||
*/
|
||||
export declare function isPageFile(path: string): boolean;
|
||||
/**
|
||||
* The human ("local") git identity used for engine-made commits on `main` in the
|
||||
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
|
||||
* which the loop-guard keys on; the identity is for history readability only.
|
||||
* When the vault repo already has a configured `user.name`/`user.email`, git
|
||||
* uses that for the working-tree commit; this is the fallback the daemon stamps.
|
||||
*/
|
||||
export declare const LOCAL_AUTHOR_NAME = "Local";
|
||||
export declare const LOCAL_AUTHOR_EMAIL = "local@local";
|
||||
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
|
||||
export declare const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
|
||||
/**
|
||||
* Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that
|
||||
* touches the outside world is here so tests pass fakes). `makeClient` is a
|
||||
* FACTORY, not a client — a dry-run must build NO client at all (it is never
|
||||
* called), and only `--apply` invokes it.
|
||||
*/
|
||||
export interface PushDeps {
|
||||
settings: Settings;
|
||||
git: Pick<VaultGit, "assertGitAvailable" | "ensureRepo" | "isMergeInProgress" | "checkout" | "stageAll" | "commit" | "readRef" | "revParse" | "diffNameStatus" | "showFileAtRef" | "updateRef" | "fastForwardBranch" | "listTrackedFiles">;
|
||||
/** Build a real client — called ONLY on `--apply`, never on dry-run. */
|
||||
makeClient: (settings: Settings) => ApplyPushDeps["client"];
|
||||
/** Read a file's full text by its vault-relative (forward-slash) path. */
|
||||
readFile: (path: string) => Promise<string>;
|
||||
/** Write a file's full text by its vault-relative path. */
|
||||
writeFile: (path: string, text: string) => Promise<void>;
|
||||
/** Structured logger (defaults to console in `main`; a recorder in tests). */
|
||||
log: (line: string) => void;
|
||||
}
|
||||
/** The structured outcome of a `runPush` cycle (returned + summarized). */
|
||||
export interface PushRunResult {
|
||||
/** Which path ran: `dry-run` (plan only) or `apply` (Docmost mutated). */
|
||||
mode: "dry-run" | "apply";
|
||||
/** Why the cycle stopped before planning, if it did (e.g. a left-over merge). */
|
||||
aborted?: "merge-in-progress";
|
||||
/** The diff base the plan was computed against (`last-pushed` else `docmost`). */
|
||||
base?: {
|
||||
ref: string;
|
||||
source: "last-pushed" | "docmost";
|
||||
sha: string | null;
|
||||
};
|
||||
/** The `main` commit the plan targets (the would-be pushed commit). */
|
||||
pushedCommit?: string;
|
||||
/** Planned action counts from the PURE planner (present once a plan was built). */
|
||||
planned?: {
|
||||
creates: number;
|
||||
updates: number;
|
||||
deletes: number;
|
||||
renamesMoves: number;
|
||||
skipped: number;
|
||||
};
|
||||
/** The applier's structured result — ONLY present on the `--apply` path. */
|
||||
applied?: ApplyPushResult;
|
||||
/**
|
||||
* True when `applyPushActions` REFUSED to fast-forward a divergent `docmost`
|
||||
* mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded
|
||||
* into the CLI's non-zero exit.
|
||||
*/
|
||||
divergentDocmost?: boolean;
|
||||
/** Per-page failures from the applier (empty/absent on a clean run). */
|
||||
failures?: PushFailure[];
|
||||
}
|
||||
/**
|
||||
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
|
||||
*
|
||||
* Steps (mirrors `pull.ts`):
|
||||
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
|
||||
* non-zero-ish result) if a merge is in progress — never push on top of an
|
||||
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
|
||||
* Docmost (SPEC §9).
|
||||
* 2. Checkout `main` (the human-facing branch the push reads from).
|
||||
* 3. Commit the human's pending working-tree changes on `main` with the
|
||||
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
|
||||
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
|
||||
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
|
||||
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
|
||||
* resolver (current = working tree, prev = `git show <base>:<path>`); run
|
||||
* the PURE `computePushActions`.
|
||||
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
|
||||
* calls, NO ref advance.
|
||||
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
|
||||
* then (a) if any pageIds were written back (creates), commit them on `main`
|
||||
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
|
||||
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
|
||||
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
|
||||
* WARNING and a non-zero-ish flag. Then log a one-line summary.
|
||||
*/
|
||||
export declare function runPush(deps: PushDeps, opts: {
|
||||
dryRun: boolean;
|
||||
}): Promise<PushRunResult>;
|
||||
/** Parsed `push` CLI flags. DRY-RUN is the default; `--apply` opts into writes. */
|
||||
export interface PushParsedArgs {
|
||||
/** True when `--apply` was passed (the ONLY path that writes to Docmost). */
|
||||
apply: boolean;
|
||||
}
|
||||
/**
|
||||
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
|
||||
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
|
||||
*/
|
||||
export declare function parseArgs(argv: string[]): PushParsedArgs;
|
||||
971
packages/git-sync/build/engine/push.js
Normal file
971
packages/git-sync/build/engine/push.js
Normal file
@@ -0,0 +1,971 @@
|
||||
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
|
||||
import { DEFAULT_BRANCH } from "./git.js";
|
||||
import { bodyHash } from "./loop-guard.js";
|
||||
/**
|
||||
* PURE classifier for the `renamesMoves` produced by `computePushActions`
|
||||
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
|
||||
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
|
||||
*
|
||||
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
|
||||
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
|
||||
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
|
||||
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
|
||||
*
|
||||
* For each entry:
|
||||
* - `newParent = resolveParentPageId(newPath, 'current')`,
|
||||
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
|
||||
* - `newTitle = metaAt(newPath,'current')?.title`,
|
||||
* `oldTitle = metaAt(oldPath,'prev')?.title`.
|
||||
* - include `move` iff `newParent !== oldParent` (a real reparent),
|
||||
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
|
||||
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
|
||||
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
|
||||
* the page is its pageId, so Docmost is not touched).
|
||||
*/
|
||||
export function classifyRenameMoves(renamesMoves, deps) {
|
||||
return renamesMoves.map((rm) => {
|
||||
const newParent = deps.resolveParentPageId(rm.newPath, "current");
|
||||
const oldParent = deps.resolveParentPageId(rm.oldPath, "prev");
|
||||
const newTitle = deps.metaAt(rm.newPath, "current")?.title;
|
||||
const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
|
||||
const out = {
|
||||
pageId: rm.pageId,
|
||||
oldPath: rm.oldPath,
|
||||
newPath: rm.newPath,
|
||||
};
|
||||
// A reparent: the new path's resolved parent page differs from the old's.
|
||||
if (newParent !== oldParent) {
|
||||
out.move = { parentPageId: newParent };
|
||||
}
|
||||
// A title edit: only when there is a real, non-empty new title that changed.
|
||||
if (typeof newTitle === "string" &&
|
||||
newTitle.length > 0 &&
|
||||
newTitle !== oldTitle) {
|
||||
out.rename = { title: newTitle };
|
||||
}
|
||||
// Neither changed -> a purely LOCAL file-path rename; do NOT call Docmost.
|
||||
if (!out.move && !out.rename) {
|
||||
out.noop = true;
|
||||
}
|
||||
return out;
|
||||
});
|
||||
}
|
||||
/**
|
||||
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
|
||||
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
|
||||
*
|
||||
* Classification rules:
|
||||
* - `A` (added):
|
||||
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
|
||||
* page already exists; we push its content rather than create a dup).
|
||||
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
|
||||
* brand-new local file; the page does not exist in Docmost yet).
|
||||
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
|
||||
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
|
||||
* (§16), and a new local file may carry only partial human meta. We
|
||||
* refuse to create rather than guess a space (SPEC §8 guard spirit).
|
||||
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
|
||||
* file somehow lost its pageId it is skipped — there is nothing to target.)
|
||||
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
|
||||
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
|
||||
* (untracked-file guard, SPEC §8: never delete an untracked page).
|
||||
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
|
||||
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
|
||||
* DEFERRED to the next increment; here we only record oldPath/newPath/
|
||||
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
|
||||
* (`C` copy is treated the same as `R` for recording purposes.)
|
||||
*/
|
||||
export function computePushActions(input) {
|
||||
const { metaAt, currentPageIds } = input;
|
||||
// PAGE-FILE FILTER (design §"Адопция"): only `.md` files OUTSIDE any dot-folder
|
||||
// are Docmost pages. `.obsidian/*`, attachments, and other non-page files are
|
||||
// committed to the vault (no `.gitignore`) and so appear in the diff, but they
|
||||
// are NEVER pages — Obsidian owns them. Without this filter every ADDED such
|
||||
// file would be mis-classified as a CREATE (nativeMeta always supplies a
|
||||
// spaceId, so the old `create-without-spaceId` skip no longer screens them),
|
||||
// creating junk pages in Docmost and corrupting the file with a `gitmost_id`
|
||||
// frontmatter. Filter BEFORE any classification so non-page A/M/D/R are ignored.
|
||||
const changes = input.changes.filter((c) => isPageFile(c.path));
|
||||
const actions = {
|
||||
creates: [],
|
||||
updates: [],
|
||||
deletes: [],
|
||||
renamesMoves: [],
|
||||
skipped: [],
|
||||
};
|
||||
// GHOST-MOVE coalescing (⭐ data-loss guard). git's rename detection (`-M`)
|
||||
// can miss a move when the two files are too dissimilar — which is exactly the
|
||||
// case for the tiny meta-only files a layout RESHUFFLE produces (e.g.
|
||||
// several untitled pages sharing the `_` fallback name; retitling one frees the
|
||||
// bare `_` and another page's file relocates `_ ~slug.md` -> `_.md`). git then
|
||||
// reports the move as a DELETE of the old path + an ADD of the new one. Taken
|
||||
// literally that soft-deletes a page that merely MOVED — a live page vanishing
|
||||
// into Trash. Identity is the pageId, not git's heuristic: a pageId that is
|
||||
// BOTH deleted (pre-image) and added (current) is one page that relocated, so
|
||||
// we classify it as a rename/move and NEVER as a delete.
|
||||
// A pageId can land at its new path two ways: as an ADD (the path was free) or
|
||||
// as a MODIFY (the path was occupied by ANOTHER page that left — the reshuffle
|
||||
// case, where `_.md`'s occupant changes pageId). Both are "the page survives at
|
||||
// a new path", so the surviving side is the CURRENT-meta pageId of A *and* M.
|
||||
const deletedPath = new Map();
|
||||
const survivingPath = new Map();
|
||||
for (const change of changes) {
|
||||
if (change.status === "D") {
|
||||
const pid = metaAt(change.path, "prev")?.pageId;
|
||||
if (pid)
|
||||
deletedPath.set(pid, change.path);
|
||||
}
|
||||
else if (change.status === "A" || change.status === "M") {
|
||||
const pid = metaAt(change.path, "current")?.pageId;
|
||||
if (pid)
|
||||
survivingPath.set(pid, change.path);
|
||||
}
|
||||
}
|
||||
const ghostMove = new Map();
|
||||
for (const [pid, oldPath] of deletedPath) {
|
||||
const newPath = survivingPath.get(pid);
|
||||
if (newPath && newPath !== oldPath) {
|
||||
ghostMove.set(pid, { oldPath, newPath });
|
||||
}
|
||||
}
|
||||
for (const change of changes) {
|
||||
switch (change.status) {
|
||||
case "A": {
|
||||
const meta = metaAt(change.path, "current");
|
||||
const pageId = meta?.pageId;
|
||||
if (pageId && ghostMove.has(pageId)) {
|
||||
// Half of a git-undetected move (a matching DELETE exists): record it
|
||||
// as a rename/move (like a real `R`), NOT an update — the `D` side is
|
||||
// suppressed so the page is never soft-deleted.
|
||||
actions.renamesMoves.push({
|
||||
pageId,
|
||||
oldPath: ghostMove.get(pageId).oldPath,
|
||||
newPath: change.path,
|
||||
});
|
||||
}
|
||||
else if (pageId) {
|
||||
// Added but already carries a pageId (restored/copied file): the page
|
||||
// exists in Docmost, so push content as an UPDATE — never a duplicate.
|
||||
actions.updates.push({ pageId, path: change.path });
|
||||
}
|
||||
else if (meta?.spaceId) {
|
||||
// Brand-new local file with a target space -> create the page, then
|
||||
// write the assigned pageId back into its meta (in `applyPushActions`).
|
||||
// `meta.spaceId` is truthy here, so empty-string is also rejected.
|
||||
actions.creates.push({ path: change.path });
|
||||
}
|
||||
else {
|
||||
// A create needs a spaceId (Docmost `create_page` requires it, §16). A
|
||||
// new file with partial meta and no usable spaceId is SKIPPED rather
|
||||
// than created into a guessed space (SPEC §8 guard spirit).
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "A",
|
||||
reason: "create-without-spaceId",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "M": {
|
||||
const meta = metaAt(change.path, "current");
|
||||
const pageId = meta?.pageId;
|
||||
if (pageId && ghostMove.has(pageId)) {
|
||||
// This path's occupant changed pageId: the previous page left and THIS
|
||||
// page relocated here (a reshuffle). Its old file was DELETED elsewhere
|
||||
// — coalesce into a rename/move so the page is never trashed.
|
||||
actions.renamesMoves.push({
|
||||
pageId,
|
||||
oldPath: ghostMove.get(pageId).oldPath,
|
||||
newPath: change.path,
|
||||
});
|
||||
}
|
||||
else if (pageId) {
|
||||
actions.updates.push({ pageId, path: change.path });
|
||||
}
|
||||
else {
|
||||
// A modified file with no pageId has no Docmost target to update.
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "M",
|
||||
reason: "modified file has no pageId in meta",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "D": {
|
||||
// The file is gone from `main`; recover its pageId from the PRE-IMAGE
|
||||
// (the version last pushed to Docmost) so we delete the RIGHT page.
|
||||
const prevMeta = metaAt(change.path, "prev");
|
||||
const pageId = prevMeta?.pageId;
|
||||
if (pageId && ghostMove.has(pageId)) {
|
||||
// The same pageId was re-ADDED at a new path: this is a git-undetected
|
||||
// MOVE, handled by the `A` branch above. Suppress the delete so a moved
|
||||
// page is never trashed (⭐ data-loss guard).
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "D",
|
||||
reason: "ghost-move (re-added at a new path) — not a deletion",
|
||||
});
|
||||
}
|
||||
else if (pageId && currentPageIds?.has(pageId)) {
|
||||
// The pageId still EXISTS elsewhere in the current tree: the file moved
|
||||
// (a layout reshuffle whose matching add was in an earlier cycle, so it
|
||||
// is not in this diff). A live page must never be trashed because its
|
||||
// FILENAME changed — identity is the pageId (⭐ data-loss guard).
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "D",
|
||||
reason: "pageId still present in the tree (moved) — not a deletion",
|
||||
});
|
||||
}
|
||||
else if (pageId) {
|
||||
actions.deletes.push({ pageId });
|
||||
}
|
||||
else {
|
||||
// Untracked-file guard (SPEC §8): a file with no recoverable pageId was
|
||||
// never a Docmost page — do NOT translate its removal into a delete.
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "D",
|
||||
reason: "deleted file has no recoverable pageId (pre-image meta)",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "R":
|
||||
case "C": {
|
||||
// Same page, new path. Identity comes from the CURRENT (post-rename) meta
|
||||
// since the file still exists. RESOLUTION (move vs rename, parentPageId)
|
||||
// is deferred — record oldPath/newPath/pageId only.
|
||||
const meta = metaAt(change.path, "current");
|
||||
const pageId = meta?.pageId;
|
||||
const oldPath = change.oldPath ?? change.path;
|
||||
if (pageId) {
|
||||
actions.renamesMoves.push({
|
||||
pageId,
|
||||
oldPath,
|
||||
newPath: change.path,
|
||||
});
|
||||
}
|
||||
else {
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: change.status,
|
||||
reason: "renamed/moved file has no pageId in meta",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Unreachable for A/M/D/R/C; defensive for any future status.
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: change.status,
|
||||
reason: `unhandled diff status ${change.status}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return actions;
|
||||
}
|
||||
// --- thin apply (create/update/delete), fakes-only in this increment ---------
|
||||
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
|
||||
export const LAST_PUSHED_REF = "refs/docmost/last-pushed";
|
||||
/**
|
||||
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
|
||||
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
|
||||
* commit closes the loop so the next pull diffs empty for the pushed pages.
|
||||
*/
|
||||
export const DOCMOST_BRANCH = "docmost";
|
||||
/**
|
||||
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
|
||||
* via FAKES only in this increment — there is no live wiring.
|
||||
*
|
||||
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
|
||||
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
|
||||
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
|
||||
* `importPageMarkdown` parses the meta/body itself.
|
||||
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
|
||||
* `client.createPage(...)`, take the assigned pageId from the result, and
|
||||
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
|
||||
* `serializePageFile`, body preserved) so the file becomes
|
||||
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
|
||||
* is needed — NEXT increment).
|
||||
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
|
||||
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
|
||||
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
|
||||
* the parent pageId — path-as-truth — and the meta for the title), then:
|
||||
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
|
||||
* `position` is UNDEFINED for now — the client supplies a default),
|
||||
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
|
||||
* - BOTH -> move (reparent) THEN rename (title), in that order,
|
||||
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
|
||||
* file-path rename: the page is its pageId, the path is local, SPEC §5).
|
||||
*
|
||||
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
|
||||
* is wrapped in its own try/catch: a single failing page is recorded in
|
||||
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
|
||||
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
|
||||
* when `failures.length === 0`: a PARTIAL push must NOT advance
|
||||
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
|
||||
* whole batch cleanly (the already-applied pages are idempotent re-applies).
|
||||
*
|
||||
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
|
||||
* `pushedCommit` is supplied:
|
||||
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
|
||||
* - fast-forward the `docmost` mirror branch to it via
|
||||
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
|
||||
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
|
||||
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
|
||||
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
|
||||
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
|
||||
*
|
||||
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
|
||||
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
|
||||
* hash of what was pushed plus the write's `updatedAt` (when the client returned
|
||||
* one). A future pull-side poll-suppression consults this so it does not re-pull
|
||||
* our own write; producing it is in scope here, consuming it is deferred.
|
||||
*
|
||||
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
|
||||
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
|
||||
*/
|
||||
export async function applyPushActions(deps, actions, pushedCommit) {
|
||||
const { client, git } = deps;
|
||||
let created = 0;
|
||||
let updated = 0;
|
||||
let deleted = 0;
|
||||
let moved = 0;
|
||||
let renamed = 0;
|
||||
const writtenBack = [];
|
||||
const pushed = [];
|
||||
const failures = [];
|
||||
const noops = [];
|
||||
// 1. UPDATES — collab/Yjs write path (SPEC §2/§15.6), never a raw overwrite.
|
||||
// Each update is isolated: a thrown page is recorded and the batch goes on.
|
||||
for (const u of actions.updates) {
|
||||
try {
|
||||
// Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter
|
||||
// is engine metadata, never page content. The server converts the markdown
|
||||
// it receives verbatim, so stripping here keeps the id out of Docmost.
|
||||
const body = parsePageFile(await deps.readFile(u.path)).body;
|
||||
// The last-synced version of this file (pre-image) is the common ancestor
|
||||
// for a 3-way merge against the live page, so concurrent human edits are
|
||||
// not clobbered (review #5). Null when the file is new at last-pushed. Its
|
||||
// body is stripped the SAME way so the merge compares body-to-body.
|
||||
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
|
||||
const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body;
|
||||
const result = await client.importPageMarkdown(u.pageId, body, baseMarkdown);
|
||||
updated++;
|
||||
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
|
||||
pushed.push({
|
||||
pageId: u.pageId,
|
||||
...extractUpdatedAt(result),
|
||||
bodyHash: bodyHash(body),
|
||||
});
|
||||
}
|
||||
catch (err) {
|
||||
failures.push({
|
||||
kind: "update",
|
||||
pageId: u.pageId,
|
||||
path: u.path,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
// 2. CREATES — create the page, then write the assigned pageId back to meta so
|
||||
// the file becomes tracked (SPEC §4 "записать присвоенный pageId обратно").
|
||||
// Isolated per page like updates.
|
||||
for (const c of actions.creates) {
|
||||
try {
|
||||
const text = await deps.readFile(c.path);
|
||||
const { body } = parsePageFile(text);
|
||||
// Derive create args from the PATH (native-Obsidian, SPEC §5): title from
|
||||
// the filename, parent from the enclosing folder's folder-note, space from
|
||||
// the run (the vault's space). `parentPageId: null` -> created at ROOT.
|
||||
const title = titleFromPath(c.path);
|
||||
const parentPageId = (await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined;
|
||||
const result = await client.createPage(title, body, deps.spaceId, parentPageId);
|
||||
// `createPage` returns `{ data: { id, ... }, success }`; the assigned
|
||||
// pageId is at `result.data.id`.
|
||||
const assignedPageId = result?.data?.id;
|
||||
if (assignedPageId) {
|
||||
// Write the assigned pageId back as the `gitmost_id` frontmatter, body
|
||||
// preserved — the file becomes engine-tracked (SPEC §4).
|
||||
const rewritten = serializePageFile(assignedPageId, body);
|
||||
await deps.writeFile(c.path, rewritten);
|
||||
writtenBack.push({ path: c.path, pageId: assignedPageId });
|
||||
// §10 loop-guard data for the created page (hash the pushed BODY).
|
||||
pushed.push({
|
||||
pageId: assignedPageId,
|
||||
...extractUpdatedAt(result),
|
||||
bodyHash: bodyHash(body),
|
||||
});
|
||||
}
|
||||
created++;
|
||||
}
|
||||
catch (err) {
|
||||
failures.push({ kind: "create", path: c.path, error: errMessage(err) });
|
||||
}
|
||||
}
|
||||
// 3. DELETES — soft-delete to Trash (SPEC §8), reversible. Isolated per page.
|
||||
for (const d of actions.deletes) {
|
||||
try {
|
||||
await client.deletePage(d.pageId);
|
||||
deleted++;
|
||||
}
|
||||
catch (err) {
|
||||
failures.push({
|
||||
kind: "delete",
|
||||
pageId: d.pageId,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
// 4. RENAME/MOVE (push #3, SPEC §5/§6/§16). Classify each entry against the
|
||||
// tree-backed resolvers (the NEW parent comes from the new path's enclosing
|
||||
// folder `.md`, the OLD parent from the old path's at last-pushed — PATH is
|
||||
// the truth, not stale `meta.parentPageId`; the title from the meta), then
|
||||
// apply only the real ops. Each page is isolated like the cases above: a
|
||||
// thrown op is recorded in `failures` and the batch continues. ORDER for a
|
||||
// page that needs both: reparent (move) FIRST, then retitle (rename).
|
||||
if (actions.renamesMoves.length > 0) {
|
||||
// The classifier is PURE over sync resolvers; the tree reads are async, so
|
||||
// prefetch every (path, side) lookup it will make into plain tables first.
|
||||
const parentTable = new Map();
|
||||
const metaTable = new Map();
|
||||
// A tree read (readFile / git.showFileAtRef) throwing must isolate THAT page
|
||||
// into `failures`, NOT abort the whole batch (§12 resumability). The helpers
|
||||
// already swallow their own errors, but this per-entry try/catch keeps the
|
||||
// batch-isolation invariant holding regardless of future changes to them.
|
||||
const prefetchFailed = new Set();
|
||||
for (const rm of actions.renamesMoves) {
|
||||
// newParent + newTitle from the CURRENT tree; oldParent + oldTitle from the
|
||||
// last-pushed pre-image (`prev`). Keyed by `path|side` so duplicates fold.
|
||||
try {
|
||||
parentTable.set(`${rm.newPath}|current`, await resolveParentPageIdViaTree(deps, rm.newPath, "current"));
|
||||
parentTable.set(`${rm.oldPath}|prev`, await resolveParentPageIdViaTree(deps, rm.oldPath, "prev"));
|
||||
metaTable.set(`${rm.newPath}|current`, await metaAtViaTree(deps, rm.newPath, "current", deps.spaceId));
|
||||
metaTable.set(`${rm.oldPath}|prev`, await metaAtViaTree(deps, rm.oldPath, "prev", deps.spaceId));
|
||||
}
|
||||
catch (err) {
|
||||
prefetchFailed.add(rm.pageId);
|
||||
failures.push({
|
||||
kind: "move",
|
||||
pageId: rm.pageId,
|
||||
path: rm.newPath,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
const classified = classifyRenameMoves(actions.renamesMoves.filter((rm) => !prefetchFailed.has(rm.pageId)), {
|
||||
metaAt: (path, side) => metaTable.get(`${path}|${side}`) ?? null,
|
||||
resolveParentPageId: (path, side) => parentTable.get(`${path}|${side}`) ?? null,
|
||||
});
|
||||
for (const c of classified) {
|
||||
if (c.noop) {
|
||||
// Cosmetic local-only file-path rename — no Docmost op (SPEC §5).
|
||||
noops.push({
|
||||
pageId: c.pageId,
|
||||
oldPath: c.oldPath,
|
||||
newPath: c.newPath,
|
||||
reason: "path-only-rename",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Track which op is in flight so a failure is attributed to the op that
|
||||
// ACTUALLY threw: for a page needing both, a move that succeeds then a
|
||||
// rename that throws must be recorded as `rename`, not `move`.
|
||||
let failingKind = c.move ? "move" : "rename";
|
||||
try {
|
||||
// Reparent FIRST so the page is in its new tree position, THEN retitle.
|
||||
if (c.move) {
|
||||
failingKind = "move";
|
||||
// TODO(next): compute a fractional-index position between siblings
|
||||
// (SPEC §16). `position` is UNDEFINED here; the client supplies a valid
|
||||
// default. Pass `parentPageId: null` for a move to the space ROOT.
|
||||
await client.movePage(c.pageId, c.move.parentPageId);
|
||||
moved++;
|
||||
}
|
||||
if (c.rename) {
|
||||
failingKind = "rename";
|
||||
await client.renamePage(c.pageId, c.rename.title);
|
||||
renamed++;
|
||||
}
|
||||
}
|
||||
catch (err) {
|
||||
// Isolate the failed page: the op that ACTUALLY threw is recorded so a
|
||||
// re-run can retry. A move that threw before its rename leaves `rename`
|
||||
// for the next run (idempotent re-apply); refs are NOT advanced (below).
|
||||
failures.push({
|
||||
kind: failingKind,
|
||||
pageId: c.pageId,
|
||||
path: c.newPath,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// 5. Advance the refs ONLY on a CLEAN push (no failures) AND when a pushed
|
||||
// commit is supplied. A partial push must advance NEITHER ref, so a re-run
|
||||
// retries the whole batch (SPEC §12). The loop-close (SPEC §6 step 3 / §10):
|
||||
// advance `refs/docmost/last-pushed` AND fast-forward the `docmost` mirror,
|
||||
// so Docmost's new content is mirrored and the next pull diffs empty.
|
||||
let lastPushedAdvanced = false;
|
||||
let docmostFastForward = null;
|
||||
if (pushedCommit && failures.length === 0) {
|
||||
await git.updateRef(LAST_PUSHED_REF, pushedCommit);
|
||||
lastPushedAdvanced = true;
|
||||
// Fast-forward the mirror (refused, not forced, on a non-fast-forward — the
|
||||
// caller logs the reason). Surfaced in the result.
|
||||
docmostFastForward = await git.fastForwardBranch(DOCMOST_BRANCH, pushedCommit);
|
||||
}
|
||||
return {
|
||||
created,
|
||||
updated,
|
||||
deleted,
|
||||
moved,
|
||||
renamed,
|
||||
writtenBack,
|
||||
pushed,
|
||||
failures,
|
||||
noops,
|
||||
skipped: actions.skipped,
|
||||
lastPushedAdvanced,
|
||||
docmostFastForward,
|
||||
};
|
||||
}
|
||||
/** Stringify a thrown value into a stable error message. */
|
||||
function errMessage(err) {
|
||||
return err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
/**
|
||||
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
|
||||
* (forward-slash) path. `buildVaultLayout` puts a page with children at
|
||||
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
|
||||
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
|
||||
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
|
||||
* space root) has no parent folder file -> `null` (the parent is ROOT).
|
||||
*/
|
||||
export function parentFolderFile(path) {
|
||||
const slash = path.lastIndexOf("/");
|
||||
if (slash < 0)
|
||||
return null; // root-level file: parent is ROOT.
|
||||
const dir = path.slice(0, slash); // the enclosing folder
|
||||
// The page that OWNS the enclosing folder is its folder-note `<dir>/<base>.md`.
|
||||
const folderNote = `${dir}/${baseSegment(dir)}.md`;
|
||||
if (path === folderNote) {
|
||||
// This path IS its folder's folder-note, so its parent is ONE LEVEL UP: the
|
||||
// folder-note of the grandparent folder (or ROOT at the top level).
|
||||
const up = dir.lastIndexOf("/");
|
||||
if (up < 0)
|
||||
return null; // top-level folder -> parent is ROOT.
|
||||
const grandDir = dir.slice(0, up);
|
||||
return `${grandDir}/${baseSegment(grandDir)}.md`;
|
||||
}
|
||||
// A leaf (or a nested folder-note) sitting inside `dir`: its parent is `dir`'s
|
||||
// folder-note.
|
||||
return folderNote;
|
||||
}
|
||||
/**
|
||||
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
|
||||
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
|
||||
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
|
||||
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
|
||||
* screen the PUSH diff so non-page files are never created/updated/deleted in
|
||||
* Docmost (and never get a `gitmost_id` frontmatter written into them).
|
||||
*/
|
||||
export function isPageFile(path) {
|
||||
if (!path.endsWith(".md"))
|
||||
return false;
|
||||
return !path.split("/").some((seg) => seg.startsWith("."));
|
||||
}
|
||||
/** The last path segment of a forward-slash path (the folder/file base name). */
|
||||
function baseSegment(path) {
|
||||
const slash = path.lastIndexOf("/");
|
||||
return slash < 0 ? path : path.slice(slash + 1);
|
||||
}
|
||||
/**
|
||||
* The page TITLE derived from a vault path: the file's base name without the
|
||||
* `.md` extension. In the native-Obsidian layout the filename IS the title — for
|
||||
* a folder-note `<dir>/<base>.md` that base equals the folder name, so the same
|
||||
* rule yields the folder's title. Self-consistent across pull/push: a pulled
|
||||
* (possibly disambiguated) filename round-trips to the same title, so a stable
|
||||
* file never pushes a spurious rename.
|
||||
*/
|
||||
function titleFromPath(path) {
|
||||
const base = baseSegment(path);
|
||||
return base.endsWith(".md") ? base.slice(0, -3) : base;
|
||||
}
|
||||
/**
|
||||
* Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the
|
||||
* NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the
|
||||
* filename, `spaceId` from the run (the vault's space — every file belongs to
|
||||
* it). `parentPageId` is intentionally absent: tree position is resolved from the
|
||||
* PATH (`resolveParentPageId`), never from a stored field (SPEC §5).
|
||||
*/
|
||||
function nativeMeta(text, path, spaceId) {
|
||||
const { id } = parsePageFile(text);
|
||||
const meta = { version: 1, title: titleFromPath(path), spaceId };
|
||||
if (id)
|
||||
meta.pageId = id;
|
||||
return meta;
|
||||
}
|
||||
/**
|
||||
* Build the `resolveParentPageId(path, side)` resolver `classifyRenameMoves`
|
||||
* needs, reading the PARENT FOLDER's `.md` (SPEC §5 path-as-truth):
|
||||
* - `current` -> `deps.readFile(<dir>.md)` (the live working tree),
|
||||
* - `prev` -> `git.showFileAtRef('refs/docmost/last-pushed', <dir>.md)` (the
|
||||
* last-pushed pre-image),
|
||||
* then read its `gitmost_id` frontmatter and return that page's pageId. A root-level path
|
||||
* (no enclosing folder), a missing/unreadable parent file, or a parent file with
|
||||
* no parseable pageId all resolve to `null` (parent is ROOT / unknown ->
|
||||
* `parentPageId: null`, SPEC §16 "parentPageId: null -> в корень").
|
||||
*
|
||||
* The IO is async, so this returns an ASYNC resolver; the call sites prefetch the
|
||||
* parent pageIds (the classifier itself stays pure/sync over a plain table).
|
||||
*/
|
||||
async function resolveParentPageIdViaTree(deps, path, side) {
|
||||
const parentFile = parentFolderFile(path);
|
||||
if (parentFile === null)
|
||||
return null; // root-level: parent is ROOT.
|
||||
let text;
|
||||
try {
|
||||
text =
|
||||
side === "current"
|
||||
? await deps.readFile(parentFile)
|
||||
: await deps.git.showFileAtRef(LAST_PUSHED_REF, parentFile);
|
||||
}
|
||||
catch {
|
||||
// Parent folder file missing/unreadable at that side -> treat as ROOT.
|
||||
return null;
|
||||
}
|
||||
if (text === null)
|
||||
return null; // showFileAtRef returns null when absent.
|
||||
// The parent page's identity is its `gitmost_id` frontmatter; folder position
|
||||
// is irrelevant here, only the pageId.
|
||||
return parsePageFile(text).id;
|
||||
}
|
||||
/**
|
||||
* Resolve the synthetic native meta at a side for the rename/move classifier (the
|
||||
* title — derived from the path — comes from here). Mirrors
|
||||
* `resolveParentPageIdViaTree`'s IO sides: `current` reads the working tree,
|
||||
* `prev` reads `refs/docmost/last-pushed`. Returns `null` only when the file is
|
||||
* missing/unreadable at that side (a real absence the classifier must see).
|
||||
*/
|
||||
async function metaAtViaTree(deps, path, side, spaceId) {
|
||||
let text;
|
||||
try {
|
||||
text =
|
||||
side === "current"
|
||||
? await deps.readFile(path)
|
||||
: await deps.git.showFileAtRef(LAST_PUSHED_REF, path);
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
if (text === null)
|
||||
return null;
|
||||
return nativeMeta(text, path, spaceId);
|
||||
}
|
||||
/**
|
||||
* Pull an `updatedAt` out of a create/update client result, if present. The
|
||||
* shape is `{ data: { updatedAt? }, ... }` (createPage) or a flatter object;
|
||||
* absent in the simple fakes, so the field is omitted rather than `undefined`.
|
||||
*/
|
||||
function extractUpdatedAt(result) {
|
||||
const r = result;
|
||||
const raw = r?.data?.updatedAt ?? r?.updatedAt;
|
||||
return typeof raw === "string" ? { updatedAt: raw } : {};
|
||||
}
|
||||
// --- runnable push orchestration (`runPush`) ---------------------------------
|
||||
//
|
||||
// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit
|
||||
// diff/ref primitives + the PURE `computePushActions` planner + the THIN
|
||||
// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the
|
||||
// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO
|
||||
// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that
|
||||
// builds a client and mutates Docmost.
|
||||
//
|
||||
// Every external effect is injected (`PushDeps`) so the whole orchestration is
|
||||
// driven by FAKES in tests — no live Docmost, git, fs, or network.
|
||||
/**
|
||||
* The human ("local") git identity used for engine-made commits on `main` in the
|
||||
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
|
||||
* which the loop-guard keys on; the identity is for history readability only.
|
||||
* When the vault repo already has a configured `user.name`/`user.email`, git
|
||||
* uses that for the working-tree commit; this is the fallback the daemon stamps.
|
||||
*/
|
||||
export const LOCAL_AUTHOR_NAME = "Local";
|
||||
export const LOCAL_AUTHOR_EMAIL = "local@local";
|
||||
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
|
||||
export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
|
||||
/**
|
||||
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
|
||||
*
|
||||
* Steps (mirrors `pull.ts`):
|
||||
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
|
||||
* non-zero-ish result) if a merge is in progress — never push on top of an
|
||||
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
|
||||
* Docmost (SPEC §9).
|
||||
* 2. Checkout `main` (the human-facing branch the push reads from).
|
||||
* 3. Commit the human's pending working-tree changes on `main` with the
|
||||
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
|
||||
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
|
||||
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
|
||||
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
|
||||
* resolver (current = working tree, prev = `git show <base>:<path>`); run
|
||||
* the PURE `computePushActions`.
|
||||
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
|
||||
* calls, NO ref advance.
|
||||
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
|
||||
* then (a) if any pageIds were written back (creates), commit them on `main`
|
||||
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
|
||||
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
|
||||
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
|
||||
* WARNING and a non-zero-ish flag. Then log a one-line summary.
|
||||
*/
|
||||
export async function runPush(deps, opts) {
|
||||
const { git, settings, log } = deps;
|
||||
const dryRun = opts.dryRun;
|
||||
// 1. Preflight git. Fail fast (actionable message via main().catch) if the git
|
||||
// binary is missing — the vault state store relies on it.
|
||||
await git.assertGitAvailable();
|
||||
await git.ensureRepo();
|
||||
// 1b. Refuse to push on top of an unresolved merge (SPEC §9/§12). A previous
|
||||
// conflicting pull leaves the vault mid-merge; pushing now could leak
|
||||
// conflict markers into Docmost (SPEC §9, the cardinal invariant). Detect
|
||||
// it BEFORE any checkout/diff and stop with a clear, actionable message so
|
||||
// re-runs converge once the human resolves (or aborts) the merge.
|
||||
if (await git.isMergeInProgress()) {
|
||||
log(`push: vault has an unresolved merge at ${settings.vaultPath} — resolve ` +
|
||||
`it (or 'git merge --abort') and re-run. Nothing was pushed to Docmost ` +
|
||||
`(conflict markers must never reach Docmost, SPEC §9).`);
|
||||
return { mode: dryRun ? "dry-run" : "apply", aborted: "merge-in-progress" };
|
||||
}
|
||||
// 2. Work on `main` — the human-facing branch the push diffs FROM.
|
||||
await git.checkout(DEFAULT_BRANCH);
|
||||
// 3. Commit the human's pending working-tree changes on `main` with the `local`
|
||||
// provenance trailer (SPEC §7.3). A no-op commit when nothing changed is
|
||||
// fine (`commit` returns false). The loop-guard keys on the trailer.
|
||||
// Even on a "plan only" dry-run this commits the working tree (it is the
|
||||
// only way to diff `base..main`, acceptable §6.1 behavior) — so make that
|
||||
// LOCAL git mutation VISIBLE, never silent: a created commit is local-only
|
||||
// and nothing is sent to Docmost.
|
||||
await git.stageAll();
|
||||
const committedWorkingTree = await git.commit("local: working-tree changes", {
|
||||
authorName: LOCAL_AUTHOR_NAME,
|
||||
authorEmail: LOCAL_AUTHOR_EMAIL,
|
||||
trailers: [LOCAL_SOURCE_TRAILER],
|
||||
});
|
||||
if (committedWorkingTree) {
|
||||
const sha = await git.revParse(DEFAULT_BRANCH);
|
||||
log(`push: committed local working-tree changes on main` +
|
||||
(sha ? ` as ${sha.slice(0, 8)}` : "") +
|
||||
` (local git only — nothing sent to Docmost).`);
|
||||
}
|
||||
else {
|
||||
log("push: working tree clean (no local changes to push).");
|
||||
}
|
||||
// 4. Pick the diff BASE (SPEC §5/§6): `refs/docmost/last-pushed` if it resolves
|
||||
// (the marker of what `main` is already in Docmost), else fall back to the
|
||||
// `docmost` mirror branch (the mirror of what Docmost currently has) — which
|
||||
// is what exists before the first push ever advanced last-pushed.
|
||||
let base;
|
||||
const lastPushedSha = await git.readRef(LAST_PUSHED_REF);
|
||||
if (lastPushedSha) {
|
||||
base = { ref: LAST_PUSHED_REF, source: "last-pushed", sha: lastPushedSha };
|
||||
}
|
||||
else {
|
||||
base = {
|
||||
ref: DOCMOST_BRANCH,
|
||||
source: "docmost",
|
||||
sha: await git.revParse(DOCMOST_BRANCH),
|
||||
};
|
||||
}
|
||||
const pushedCommit = await git.revParse(DEFAULT_BRANCH);
|
||||
if (!pushedCommit) {
|
||||
// `main` has no commit — `ensureRepo` always makes an initial one, so this is
|
||||
// defensive. Nothing to diff.
|
||||
log("push: `main` has no commit to push — nothing to do.");
|
||||
return { mode: dryRun ? "dry-run" : "apply", base };
|
||||
}
|
||||
// 5. Diff the base against `main` and build the `metaAt` resolver (PURE planner
|
||||
// input). `current` reads the live working tree; `prev` reads the base ref's
|
||||
// pre-image via `git show <base>:<path>` (so a DELETE recovers its pageId).
|
||||
const changes = await git.diffNameStatus(base.ref, DEFAULT_BRANCH);
|
||||
// Synchronous resolver over PREFETCHED meta tables: `computePushActions` is
|
||||
// PURE/sync, but the file/ref reads are async — so we prefetch every (path,
|
||||
// side) the diff will ask for into a table first, then resolve from it.
|
||||
const metaTable = new Map();
|
||||
for (const change of changes) {
|
||||
// `current`: A/M/R/C still have the file on `main`. `prev`: D needs the
|
||||
// pre-image; R/C also benefit (old title). Prefetch both sides per path.
|
||||
const currentPath = change.path;
|
||||
const prevPath = change.oldPath ?? change.path;
|
||||
if (!metaTable.has(`${currentPath}|current`)) {
|
||||
metaTable.set(`${currentPath}|current`, await readMetaCurrent(deps, currentPath, settings.docmostSpaceId));
|
||||
}
|
||||
if (!metaTable.has(`${prevPath}|prev`)) {
|
||||
metaTable.set(`${prevPath}|prev`, await readMetaPrev(deps, base.ref, prevPath, settings.docmostSpaceId));
|
||||
}
|
||||
}
|
||||
const metaAt = (path, side) => metaTable.get(`${path}|${side}`) ?? null;
|
||||
// The set of pageIds that STILL EXIST somewhere in the current `main` tree.
|
||||
// Identity is the pageId, NOT the filename: a file vanishing from one path
|
||||
// while the SAME pageId lives at another path is a MOVE (often a layout
|
||||
// reshuffle of `_`-fallback names, whose two halves can even land in separate
|
||||
// cycles), never a deletion. Built only when the diff contains deletes — the
|
||||
// guard's whole job is to stop a phantom delete from trashing a live page.
|
||||
let currentPageIds;
|
||||
if (changes.some((c) => c.status === "D")) {
|
||||
currentPageIds = new Set();
|
||||
for (const relPath of await git.listTrackedFiles("*.md")) {
|
||||
const pid = (await readMetaCurrent(deps, relPath, settings.docmostSpaceId))
|
||||
?.pageId;
|
||||
if (pid)
|
||||
currentPageIds.add(pid);
|
||||
}
|
||||
}
|
||||
const actions = computePushActions({ changes, metaAt, currentPageIds });
|
||||
const planned = {
|
||||
creates: actions.creates.length,
|
||||
updates: actions.updates.length,
|
||||
deletes: actions.deletes.length,
|
||||
renamesMoves: actions.renamesMoves.length,
|
||||
skipped: actions.skipped.length,
|
||||
};
|
||||
// 6. DRY-RUN (default): log the full plan and RETURN — build NO client, make
|
||||
// ZERO Docmost calls, advance NO refs. This is the SAFE default.
|
||||
logPlan(log, base, pushedCommit, actions, planned, dryRun);
|
||||
if (dryRun) {
|
||||
return { mode: "dry-run", base, pushedCommit, planned };
|
||||
}
|
||||
// 7. --apply: build the REAL client and execute. This is the ONLY write path.
|
||||
const client = deps.makeClient(settings);
|
||||
const applied = await applyPushActions({
|
||||
client,
|
||||
// Pass the WHOLE `git` object (it satisfies the applier's
|
||||
// `Pick<VaultGit, ...>` deps surface). Passing bare method references
|
||||
// (`git.updateRef`, …) would lose their `this` binding, so on a REAL
|
||||
// `VaultGit` they would throw `this.runRaw is not a function`. Hand over
|
||||
// the object so the methods keep their receiver — exactly as `pull.ts`
|
||||
// does for `applyPullActions`.
|
||||
git,
|
||||
readFile: deps.readFile,
|
||||
writeFile: deps.writeFile,
|
||||
spaceId: settings.docmostSpaceId,
|
||||
}, actions, pushedCommit);
|
||||
// 7a. Persist freshly-assigned pageIds (creates) back into git. `applyPushActions`
|
||||
// rewrote those files on disk; commit them on `main` with the `local` trailer
|
||||
// so the new pageIds are recorded, then RE-advance `refs/docmost/last-pushed`
|
||||
// to the new commit so what Docmost mirrors and what last-pushed points at
|
||||
// stay in lock-step (the write-back commit is part of `main` now).
|
||||
// Track a divergent-`docmost` mirror across BOTH ff sites (the applier's main
|
||||
// push ff in 7b, and the write-back ff here). A divergent mirror is a §5
|
||||
// invariant breach in EITHER branch and must escalate identically (exit 1).
|
||||
let divergentDocmost = false;
|
||||
if (applied.writtenBack.length > 0) {
|
||||
await git.stageAll();
|
||||
const recorded = await git.commit("local: record created pageIds", {
|
||||
authorName: LOCAL_AUTHOR_NAME,
|
||||
authorEmail: LOCAL_AUTHOR_EMAIL,
|
||||
trailers: [LOCAL_SOURCE_TRAILER],
|
||||
});
|
||||
if (recorded) {
|
||||
const newCommit = await git.revParse(DEFAULT_BRANCH);
|
||||
// Only re-advance when the original push was CLEAN (last-pushed was already
|
||||
// advanced by the applier); a partial push left the refs untouched and a
|
||||
// re-run retries the whole batch, so we must not move them either.
|
||||
if (newCommit && applied.lastPushedAdvanced) {
|
||||
await git.updateRef(LAST_PUSHED_REF, newCommit);
|
||||
const ff = await git.fastForwardBranch(DOCMOST_BRANCH, newCommit);
|
||||
if (!ff.ok) {
|
||||
// SYMMETRIC with the main escalation (7b): a divergent mirror in the
|
||||
// write-back branch is the SAME §5 invariant breach and must escalate
|
||||
// (exit 1), not just log a soft warning.
|
||||
divergentDocmost = true;
|
||||
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
|
||||
`fast-forwarded to the pageId write-back commit ` +
|
||||
`(${ff.reason ?? "not-fast-forward"}). The §5 invariant ('docmost' ` +
|
||||
`mirrors what Docmost contains) is broken: reconcile 'docmost' ` +
|
||||
`against the live Docmost tree before the next cycle.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// 7b. ESCALATE a divergent-`docmost` fast-forward refusal (SPEC §5 invariant
|
||||
// broken). The applier already refused to clobber a divergent mirror; make
|
||||
// it LOUD (not silent) so the operator notices, and fold it into the exit.
|
||||
if (applied.docmostFastForward && !applied.docmostFastForward.ok) {
|
||||
divergentDocmost = true;
|
||||
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
|
||||
`fast-forwarded (${applied.docmostFastForward.reason ?? "not-fast-forward"}). ` +
|
||||
`The §5 invariant ('docmost' mirrors what Docmost contains) is broken: ` +
|
||||
`reconcile 'docmost' against the live Docmost tree before the next cycle.`);
|
||||
}
|
||||
// 7c. One-line summary (mirrors pull.ts's summary line).
|
||||
log(`push complete: ${applied.created} created, ${applied.updated} updated, ` +
|
||||
`${applied.deleted} deleted, ${applied.moved} moved, ${applied.renamed} ` +
|
||||
`renamed, ${applied.noops.length} no-op(s), ${applied.skipped.length} ` +
|
||||
`skipped, ${applied.failures.length} failure(s)` +
|
||||
(divergentDocmost ? " [DIVERGENT docmost mirror]" : ""));
|
||||
return {
|
||||
mode: "apply",
|
||||
base,
|
||||
pushedCommit,
|
||||
planned,
|
||||
applied,
|
||||
divergentDocmost,
|
||||
failures: applied.failures,
|
||||
};
|
||||
}
|
||||
/** Synthetic native meta from the live working tree (`current` side). */
|
||||
async function readMetaCurrent(deps, path, spaceId) {
|
||||
let text;
|
||||
try {
|
||||
text = await deps.readFile(path);
|
||||
}
|
||||
catch {
|
||||
return null; // absent on disk (e.g. a D row's path) -> no current meta.
|
||||
}
|
||||
return nativeMeta(text, path, spaceId);
|
||||
}
|
||||
/** Synthetic native meta from the base ref's pre-image (`prev` side). */
|
||||
async function readMetaPrev(deps, baseRef, path, spaceId) {
|
||||
let text;
|
||||
try {
|
||||
text = await deps.git.showFileAtRef(baseRef, path);
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
if (text === null)
|
||||
return null; // path absent at the base ref.
|
||||
return nativeMeta(text, path, spaceId);
|
||||
}
|
||||
/** Emit the full plan (counts + per-item) to the injected logger. */
|
||||
function logPlan(log, base, pushedCommit, actions, planned, dryRun) {
|
||||
log(`push plan (${dryRun ? "DRY-RUN — no Docmost writes" : "APPLY"}): base=` +
|
||||
`${base.ref} (${base.source}${base.sha ? ` ${base.sha.slice(0, 8)}` : ""}) ` +
|
||||
`-> main ${pushedCommit.slice(0, 8)}`);
|
||||
log(`push plan counts: ${planned.creates} create, ${planned.updates} update, ` +
|
||||
`${planned.deletes} delete, ${planned.renamesMoves} rename/move, ` +
|
||||
`${planned.skipped} skipped`);
|
||||
for (const c of actions.creates)
|
||||
log(` create: ${c.path}`);
|
||||
for (const u of actions.updates)
|
||||
log(` update: ${u.pageId} (${u.path})`);
|
||||
for (const d of actions.deletes)
|
||||
log(` delete: ${d.pageId}`);
|
||||
for (const rm of actions.renamesMoves)
|
||||
log(` rename/move: ${rm.oldPath} -> ${rm.newPath} (${rm.pageId})`);
|
||||
for (const s of actions.skipped)
|
||||
log(` skipped [${s.status}] ${s.path}: ${s.reason}`);
|
||||
}
|
||||
/**
|
||||
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
|
||||
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
|
||||
*/
|
||||
export function parseArgs(argv) {
|
||||
return { apply: argv.includes("--apply") };
|
||||
}
|
||||
126
packages/git-sync/build/engine/reconcile.d.ts
vendored
Normal file
126
packages/git-sync/build/engine/reconcile.d.ts
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
/**
|
||||
* Pure reconciliation planner (SPEC §5/§6/§8).
|
||||
*
|
||||
* Given the desired live set of files (computed from the current Docmost tree)
|
||||
* and the set of files currently tracked in the vault, compute what to write,
|
||||
* what to move (old path to remove), and what to delete. Identity is `pageId`
|
||||
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
|
||||
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
|
||||
* the live tree is a DELETE.
|
||||
*
|
||||
* This module is intentionally PURE (no IO, no git) so the whole plan is
|
||||
* unit-testable. The actual file writing / git operations happen in pull.ts.
|
||||
*/
|
||||
/** A page that SHOULD exist in the vault at a given path. */
|
||||
export interface LiveEntry {
|
||||
pageId: string;
|
||||
/** Vault-relative path (forward-slash), e.g. `Space/Parent/Child.md`. */
|
||||
relPath: string;
|
||||
}
|
||||
/** A page currently tracked in the vault (pageId parsed from its meta). */
|
||||
export interface ExistingEntry {
|
||||
pageId: string;
|
||||
/** Vault-relative path (forward-slash) of the tracked file. */
|
||||
relPath: string;
|
||||
}
|
||||
/** A page to (re)write at its destination path. */
|
||||
export interface WriteEntry {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}
|
||||
/** A page that moved: written at its NEW relPath, with the OLD path removed. */
|
||||
export interface MovedEntry {
|
||||
pageId: string;
|
||||
fromRelPath: string;
|
||||
toRelPath: string;
|
||||
/**
|
||||
* Whether the old path (`fromRelPath`) is SAFE to remove. False when another
|
||||
* live page will (re)write that exact path (path reuse): removing it would
|
||||
* destroy real data, so the caller must skip the removal. The move itself is
|
||||
* still recorded (the new path is written regardless).
|
||||
*/
|
||||
removeOldPath: boolean;
|
||||
}
|
||||
/** The full reconciliation plan. */
|
||||
export interface ReconciliationPlan {
|
||||
/**
|
||||
* Pages present in `live` -> (re)write at their relPath. This naturally
|
||||
* covers add, content-update (same path) AND move (same pageId, new path),
|
||||
* since every live page is (re)written regardless of whether it existed.
|
||||
*/
|
||||
toWrite: WriteEntry[];
|
||||
/**
|
||||
* Vault-relative paths to delete because their tracked pageId is ABSENT from
|
||||
* `live` (page removed/trashed). This set is ONLY absence-based deletions —
|
||||
* the OLD paths of moved pages are NOT here (they live in `moved` and are
|
||||
* applied separately by the caller). Keeping the two apart lets pull.ts gate
|
||||
* absence deletions behind the incomplete-fetch suppression + mass-delete
|
||||
* guard (SPEC §8) while still applying real moves.
|
||||
*/
|
||||
toDelete: string[];
|
||||
/**
|
||||
* Tracked pages whose relPath changed. The caller writes the page at
|
||||
* `toRelPath`, then removes `fromRelPath` — but ONLY after the new-path write
|
||||
* succeeded. The old path is NOT in `toDelete`.
|
||||
*/
|
||||
moved: MovedEntry[];
|
||||
}
|
||||
/**
|
||||
* Compute the reconciliation plan.
|
||||
*
|
||||
* Rules:
|
||||
* - Every `live` page is written at its relPath (covers add + update + move).
|
||||
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
|
||||
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
|
||||
* path is written) and is NEVER added to `toDelete`.
|
||||
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
|
||||
* is added to `toDelete`.
|
||||
*
|
||||
* Notes:
|
||||
* - Safety filter (no data loss): no path that is a live TARGET path of any
|
||||
* page is ever deleted/removed (a write owns it). This applies to BOTH the
|
||||
* absence `toDelete` set AND a moved page's old-path removal — if a moved
|
||||
* page's OLD path is reused by ANOTHER live page, the move records no old
|
||||
* path to remove, because that path will be (re)written.
|
||||
* - `existing` may legitimately contain duplicate pageIds (two stray files
|
||||
* carrying the same meta pageId); each such file that is not the live target
|
||||
* path is removed (as an absence/move) so the vault converges to exactly the
|
||||
* live set.
|
||||
*/
|
||||
export declare function planReconciliation(live: LiveEntry[], existing: ExistingEntry[]): ReconciliationPlan;
|
||||
/**
|
||||
* Below this many tracked files the mass-delete fraction guard is not applied
|
||||
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
|
||||
*/
|
||||
export declare const MASS_DELETE_MIN_EXISTING = 4;
|
||||
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
|
||||
export declare const MASS_DELETE_FRACTION = 0.5;
|
||||
/** Why absence-based deletions were (or were not) applied this cycle. */
|
||||
export type DeletionDecision = {
|
||||
apply: true;
|
||||
} | {
|
||||
apply: false;
|
||||
reason: "incomplete-fetch" | "empty-live" | "mass-delete";
|
||||
};
|
||||
/**
|
||||
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
|
||||
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
|
||||
* testable without live creds or git:
|
||||
*
|
||||
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
|
||||
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
|
||||
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
|
||||
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
|
||||
* (almost always a failed fetch, never a real "delete everything").
|
||||
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
|
||||
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
|
||||
*
|
||||
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
|
||||
* its old-path removal is real (handled by the caller separately).
|
||||
*/
|
||||
export declare function decideAbsenceDeletions(args: {
|
||||
treeComplete: boolean;
|
||||
liveCount: number;
|
||||
existingCount: number;
|
||||
deleteCount: number;
|
||||
}): DeletionDecision;
|
||||
117
packages/git-sync/build/engine/reconcile.js
Normal file
117
packages/git-sync/build/engine/reconcile.js
Normal file
@@ -0,0 +1,117 @@
|
||||
/**
|
||||
* Pure reconciliation planner (SPEC §5/§6/§8).
|
||||
*
|
||||
* Given the desired live set of files (computed from the current Docmost tree)
|
||||
* and the set of files currently tracked in the vault, compute what to write,
|
||||
* what to move (old path to remove), and what to delete. Identity is `pageId`
|
||||
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
|
||||
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
|
||||
* the live tree is a DELETE.
|
||||
*
|
||||
* This module is intentionally PURE (no IO, no git) so the whole plan is
|
||||
* unit-testable. The actual file writing / git operations happen in pull.ts.
|
||||
*/
|
||||
/**
|
||||
* Compute the reconciliation plan.
|
||||
*
|
||||
* Rules:
|
||||
* - Every `live` page is written at its relPath (covers add + update + move).
|
||||
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
|
||||
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
|
||||
* path is written) and is NEVER added to `toDelete`.
|
||||
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
|
||||
* is added to `toDelete`.
|
||||
*
|
||||
* Notes:
|
||||
* - Safety filter (no data loss): no path that is a live TARGET path of any
|
||||
* page is ever deleted/removed (a write owns it). This applies to BOTH the
|
||||
* absence `toDelete` set AND a moved page's old-path removal — if a moved
|
||||
* page's OLD path is reused by ANOTHER live page, the move records no old
|
||||
* path to remove, because that path will be (re)written.
|
||||
* - `existing` may legitimately contain duplicate pageIds (two stray files
|
||||
* carrying the same meta pageId); each such file that is not the live target
|
||||
* path is removed (as an absence/move) so the vault converges to exactly the
|
||||
* live set.
|
||||
*/
|
||||
export function planReconciliation(live, existing) {
|
||||
// Desired path for each live pageId.
|
||||
const liveByPageId = new Map();
|
||||
// Set of all paths that WILL be written (never delete/remove one of these).
|
||||
const liveTargetPaths = new Set();
|
||||
for (const e of live) {
|
||||
liveByPageId.set(e.pageId, e.relPath);
|
||||
liveTargetPaths.add(e.relPath);
|
||||
}
|
||||
const toWrite = live.map((e) => ({
|
||||
pageId: e.pageId,
|
||||
relPath: e.relPath,
|
||||
}));
|
||||
const moved = [];
|
||||
// Absence-based deletions ONLY (tracked pageId absent from `live`). Use a Set
|
||||
// so the same path coming from multiple existing rows is queued only once.
|
||||
const toDeleteSet = new Set();
|
||||
for (const ex of existing) {
|
||||
const liveRel = liveByPageId.get(ex.pageId);
|
||||
if (liveRel === undefined) {
|
||||
// Tracked page is gone from the live tree -> absence delete.
|
||||
// Never queue a path a live page will (re)write (path reuse -> no loss).
|
||||
if (!liveTargetPaths.has(ex.relPath))
|
||||
toDeleteSet.add(ex.relPath);
|
||||
continue;
|
||||
}
|
||||
if (liveRel !== ex.relPath) {
|
||||
// Same pageId, different path -> a MOVE. Record it so the caller can write
|
||||
// the new path first, then remove the old one. If the old path is itself a
|
||||
// live target (reused by another page), it must NOT be removed — the write
|
||||
// owns it — so flag `removeOldPath: false` (move still recorded).
|
||||
moved.push({
|
||||
pageId: ex.pageId,
|
||||
fromRelPath: ex.relPath,
|
||||
toRelPath: liveRel,
|
||||
removeOldPath: !liveTargetPaths.has(ex.relPath),
|
||||
});
|
||||
}
|
||||
// liveRel === ex.relPath -> content-update in place; nothing extra to do
|
||||
// (the write above re-emits the file; identical bytes => git no-op).
|
||||
}
|
||||
const toDelete = [...toDeleteSet];
|
||||
return { toWrite, toDelete, moved };
|
||||
}
|
||||
/**
|
||||
* Below this many tracked files the mass-delete fraction guard is not applied
|
||||
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
|
||||
*/
|
||||
export const MASS_DELETE_MIN_EXISTING = 4;
|
||||
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
|
||||
export const MASS_DELETE_FRACTION = 0.5;
|
||||
/**
|
||||
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
|
||||
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
|
||||
* testable without live creds or git:
|
||||
*
|
||||
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
|
||||
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
|
||||
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
|
||||
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
|
||||
* (almost always a failed fetch, never a real "delete everything").
|
||||
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
|
||||
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
|
||||
*
|
||||
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
|
||||
* its old-path removal is real (handled by the caller separately).
|
||||
*/
|
||||
export function decideAbsenceDeletions(args) {
|
||||
const { treeComplete, liveCount, existingCount, deleteCount } = args;
|
||||
// No tracked files, or nothing to delete -> trivially fine to "apply".
|
||||
if (existingCount === 0 || deleteCount === 0)
|
||||
return { apply: true };
|
||||
if (!treeComplete)
|
||||
return { apply: false, reason: "incomplete-fetch" };
|
||||
if (liveCount === 0)
|
||||
return { apply: false, reason: "empty-live" };
|
||||
if (existingCount >= MASS_DELETE_MIN_EXISTING &&
|
||||
deleteCount > existingCount * MASS_DELETE_FRACTION) {
|
||||
return { apply: false, reason: "mass-delete" };
|
||||
}
|
||||
return { apply: true };
|
||||
}
|
||||
21
packages/git-sync/build/engine/roundtrip-helpers.d.ts
vendored
Normal file
21
packages/git-sync/build/engine/roundtrip-helpers.d.ts
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
|
||||
* round-trip harness that drives these lives in the package's tests, not in the
|
||||
* engine.
|
||||
*/
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export declare function stripBlockIds(node: any): any;
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export declare function firstDivergence(a: any, b: any, path?: string): {
|
||||
path: string;
|
||||
a: any;
|
||||
b: any;
|
||||
} | null;
|
||||
70
packages/git-sync/build/engine/roundtrip-helpers.js
Normal file
70
packages/git-sync/build/engine/roundtrip-helpers.js
Normal file
@@ -0,0 +1,70 @@
|
||||
/**
|
||||
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
|
||||
* round-trip harness that drives these lives in the package's tests, not in the
|
||||
* engine.
|
||||
*/
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export function stripBlockIds(node) {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map(stripBlockIds);
|
||||
}
|
||||
if (node && typeof node === "object") {
|
||||
const out = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Drop the `id` attr; keep every other attribute.
|
||||
const { id, ...rest } = node.attrs;
|
||||
void id;
|
||||
out.attrs = stripBlockIds(rest);
|
||||
}
|
||||
else {
|
||||
out[key] = stripBlockIds(node[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export function firstDivergence(a, b, path = "$") {
|
||||
if (a === b)
|
||||
return null;
|
||||
const ta = typeof a;
|
||||
const tb = typeof b;
|
||||
if (ta !== tb || a === null || b === null) {
|
||||
return { path, a, b };
|
||||
}
|
||||
if (ta !== "object") {
|
||||
return { path, a, b };
|
||||
}
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr)
|
||||
return { path, a, b };
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length) {
|
||||
return { path: `${path}.length`, a: a.length, b: b.length };
|
||||
}
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const d = firstDivergence(a[i], b[i], `${path}[${i}]`);
|
||||
if (d)
|
||||
return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
|
||||
for (const k of keys) {
|
||||
const d = firstDivergence(a[k], b[k], `${path}.${k}`);
|
||||
if (d)
|
||||
return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
23
packages/git-sync/build/engine/sanitize.d.ts
vendored
Normal file
23
packages/git-sync/build/engine/sanitize.d.ts
vendored
Normal file
@@ -0,0 +1,23 @@
|
||||
/**
|
||||
* Deterministic filename strategy (SPEC §12).
|
||||
*
|
||||
* The file name is COSMETIC — the source of truth for the file<->page link is
|
||||
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
|
||||
* functions are intentionally dependency-free and pure, so they are trivially
|
||||
* unit-testable.
|
||||
*/
|
||||
/**
|
||||
* Sanitize a page title into a safe file-name component (WITHOUT extension).
|
||||
*
|
||||
* Steps: replace forbidden / control characters with "-", collapse whitespace
|
||||
* runs to a single space, trim, cap the length, then guard against an empty
|
||||
* result, an all-dots result, or a reserved Windows device name by prefixing
|
||||
* with "_".
|
||||
*/
|
||||
export declare function sanitizeTitle(title: string): string;
|
||||
/**
|
||||
* Disambiguate a sanitized name when two siblings in the same folder collapse
|
||||
* to the same name. Appends a stable suffix built from the page's `slugId`, so
|
||||
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
|
||||
*/
|
||||
export declare function disambiguate(name: string, slugId: string): string;
|
||||
97
packages/git-sync/build/engine/sanitize.js
Normal file
97
packages/git-sync/build/engine/sanitize.js
Normal file
@@ -0,0 +1,97 @@
|
||||
/**
|
||||
* Deterministic filename strategy (SPEC §12).
|
||||
*
|
||||
* The file name is COSMETIC — the source of truth for the file<->page link is
|
||||
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
|
||||
* functions are intentionally dependency-free and pure, so they are trivially
|
||||
* unit-testable.
|
||||
*/
|
||||
// Printable characters forbidden in file names on common filesystems (mainly
|
||||
// Windows): / \ < > : " | ? *. Each match is replaced with a single "-".
|
||||
// Spaces are NOT in this set; whitespace is normalized separately below.
|
||||
// ASCII control characters (code points 0..31) are stripped in a separate pass
|
||||
// (see stripControlChars) to keep this literal free of embedded control bytes.
|
||||
const FORBIDDEN_PRINTABLE_RE = /[/\\<>:"|?*]/g;
|
||||
// Runs of whitespace (including tabs/newlines) collapse to a single space.
|
||||
const WHITESPACE_RUN_RE = /\s+/g;
|
||||
// Reserved Windows device names (case-insensitive). A bare match (with or
|
||||
// without an extension) is unusable as a file name, so it is prefixed with "_".
|
||||
const RESERVED_WINDOWS_NAMES = new Set([
|
||||
"con",
|
||||
"prn",
|
||||
"aux",
|
||||
"nul",
|
||||
"com1",
|
||||
"com2",
|
||||
"com3",
|
||||
"com4",
|
||||
"com5",
|
||||
"com6",
|
||||
"com7",
|
||||
"com8",
|
||||
"com9",
|
||||
"lpt1",
|
||||
"lpt2",
|
||||
"lpt3",
|
||||
"lpt4",
|
||||
"lpt5",
|
||||
"lpt6",
|
||||
"lpt7",
|
||||
"lpt8",
|
||||
"lpt9",
|
||||
]);
|
||||
// Cap on the sanitized length to stay well within filesystem path-component
|
||||
// limits (255 bytes on most FSes) while leaving room for an extension and a
|
||||
// disambiguation suffix.
|
||||
const MAX_LENGTH = 120;
|
||||
/**
|
||||
* Replace every ASCII control character (code points 0..31) with "-". Done by
|
||||
* scanning code points rather than a control-range regex literal, so the source
|
||||
* file carries no embedded control bytes.
|
||||
*/
|
||||
function stripControlChars(input) {
|
||||
let out = "";
|
||||
for (let i = 0; i < input.length; i++) {
|
||||
out += input.charCodeAt(i) < 32 ? "-" : input[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Sanitize a page title into a safe file-name component (WITHOUT extension).
|
||||
*
|
||||
* Steps: replace forbidden / control characters with "-", collapse whitespace
|
||||
* runs to a single space, trim, cap the length, then guard against an empty
|
||||
* result, an all-dots result, or a reserved Windows device name by prefixing
|
||||
* with "_".
|
||||
*/
|
||||
export function sanitizeTitle(title) {
|
||||
let name = stripControlChars(title ?? "")
|
||||
.replace(FORBIDDEN_PRINTABLE_RE, "-")
|
||||
.replace(WHITESPACE_RUN_RE, " ")
|
||||
.trim();
|
||||
if (name.length > MAX_LENGTH) {
|
||||
name = name.slice(0, MAX_LENGTH).trim();
|
||||
}
|
||||
// Compare the base name (before the first dot) against reserved names, so
|
||||
// both "CON" and "con.md" are caught.
|
||||
const base = name.split(".")[0]?.toLowerCase() ?? "";
|
||||
// A name that is empty, consists only of dots ("." / ".." / "..."), or is a
|
||||
// reserved Windows device name is unusable as a path component. The all-dots
|
||||
// case is a path-traversal hazard in particular: an unprefixed ".." would
|
||||
// become a parent-directory segment and let a page escape the vault, so it
|
||||
// MUST be neutralized here (becomes "_..", which is a literal file name).
|
||||
if (name.length === 0 ||
|
||||
/^\.+$/.test(name) ||
|
||||
RESERVED_WINDOWS_NAMES.has(base)) {
|
||||
name = "_" + name;
|
||||
}
|
||||
return name;
|
||||
}
|
||||
/**
|
||||
* Disambiguate a sanitized name when two siblings in the same folder collapse
|
||||
* to the same name. Appends a stable suffix built from the page's `slugId`, so
|
||||
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
|
||||
*/
|
||||
export function disambiguate(name, slugId) {
|
||||
return `${name} ~${slugId}`;
|
||||
}
|
||||
41
packages/git-sync/build/engine/settings.d.ts
vendored
Normal file
41
packages/git-sync/build/engine/settings.d.ts
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Engine settings.
|
||||
*
|
||||
* The engine is driven IN-PROCESS by the NestJS server, which builds the
|
||||
* `Settings` object from `EnvironmentService` — so this module must NOT reach
|
||||
* into `process.env`. It exposes only:
|
||||
* - the `Settings` type the engine consumes, and
|
||||
* - `parseSettings(env)` as a PURE function (validate a raw env object -> typed
|
||||
* `Settings`), kept for unit tests and for the server to reuse if it wants
|
||||
* to validate an env-shaped object.
|
||||
* There is no `.env`-loading side-effecting entry point.
|
||||
*/
|
||||
import { z } from 'zod';
|
||||
export declare const envSchema: z.ZodObject<{
|
||||
DOCMOST_API_URL: z.ZodString;
|
||||
DOCMOST_EMAIL: z.ZodString;
|
||||
DOCMOST_PASSWORD: z.ZodString;
|
||||
DOCMOST_SPACE_ID: z.ZodString;
|
||||
VAULT_PATH: z.ZodDefault<z.ZodString>;
|
||||
GIT_REMOTE: z.ZodPipe<z.ZodTransform<unknown, unknown>, z.ZodOptional<z.ZodString>>;
|
||||
POLL_INTERVAL_MS: z.ZodDefault<z.ZodCoercedNumber<unknown>>;
|
||||
DEBOUNCE_MS: z.ZodDefault<z.ZodCoercedNumber<unknown>>;
|
||||
LOG_LEVEL: z.ZodDefault<z.ZodEnum<{
|
||||
info: "info";
|
||||
error: "error";
|
||||
debug: "debug";
|
||||
warn: "warn";
|
||||
}>>;
|
||||
}, z.core.$strip>;
|
||||
export type Settings = {
|
||||
docmostApiUrl: string;
|
||||
docmostEmail: string;
|
||||
docmostPassword: string;
|
||||
docmostSpaceId: string;
|
||||
vaultPath: string;
|
||||
gitRemote?: string;
|
||||
pollIntervalMs: number;
|
||||
debounceMs: number;
|
||||
logLevel: 'debug' | 'info' | 'warn' | 'error';
|
||||
};
|
||||
export declare function parseSettings(env: NodeJS.ProcessEnv): Settings;
|
||||
49
packages/git-sync/build/engine/settings.js
Normal file
49
packages/git-sync/build/engine/settings.js
Normal file
@@ -0,0 +1,49 @@
|
||||
/**
|
||||
* Engine settings.
|
||||
*
|
||||
* The engine is driven IN-PROCESS by the NestJS server, which builds the
|
||||
* `Settings` object from `EnvironmentService` — so this module must NOT reach
|
||||
* into `process.env`. It exposes only:
|
||||
* - the `Settings` type the engine consumes, and
|
||||
* - `parseSettings(env)` as a PURE function (validate a raw env object -> typed
|
||||
* `Settings`), kept for unit tests and for the server to reuse if it wants
|
||||
* to validate an env-shaped object.
|
||||
* There is no `.env`-loading side-effecting entry point.
|
||||
*/
|
||||
import { z } from 'zod';
|
||||
// Schema keyed by the real ENV variable names so validation errors name the
|
||||
// exact variable. Credentials and the address of our OWN Docmost instance have
|
||||
// NO default — a missing value must fail at startup, never silently fall back.
|
||||
export const envSchema = z.object({
|
||||
// Docmost connection — address of our own instance, no default.
|
||||
DOCMOST_API_URL: z.string().url(),
|
||||
// Credentials for /auth/login — no default, never hardcoded.
|
||||
DOCMOST_EMAIL: z.string().min(1),
|
||||
DOCMOST_PASSWORD: z.string().min(1),
|
||||
// Which Docmost space to mirror.
|
||||
DOCMOST_SPACE_ID: z.string().min(1),
|
||||
// Local git vault (state store) — kept under data/ so the volume persists it.
|
||||
VAULT_PATH: z.string().min(1).default('data/vault'),
|
||||
// Optional git remote the vault pushes to. Empty string is treated as unset.
|
||||
GIT_REMOTE: z.preprocess((v) => (v === '' ? undefined : v), z.string().min(1).optional()),
|
||||
// Non-secret tunables — sensible defaults are fine.
|
||||
POLL_INTERVAL_MS: z.coerce.number().int().positive().default(15000),
|
||||
DEBOUNCE_MS: z.coerce.number().int().positive().default(2000),
|
||||
LOG_LEVEL: z.enum(['debug', 'info', 'warn', 'error']).default('info'),
|
||||
});
|
||||
// Pure: validate a raw environment object and map it to a typed Settings.
|
||||
// Throws ZodError on bad config. No side effects — safe to import in tests.
|
||||
export function parseSettings(env) {
|
||||
const e = envSchema.parse(env);
|
||||
return {
|
||||
docmostApiUrl: e.DOCMOST_API_URL,
|
||||
docmostEmail: e.DOCMOST_EMAIL,
|
||||
docmostPassword: e.DOCMOST_PASSWORD,
|
||||
docmostSpaceId: e.DOCMOST_SPACE_ID,
|
||||
vaultPath: e.VAULT_PATH,
|
||||
gitRemote: e.GIT_REMOTE,
|
||||
pollIntervalMs: e.POLL_INTERVAL_MS,
|
||||
debounceMs: e.DEBOUNCE_MS,
|
||||
logLevel: e.LOG_LEVEL,
|
||||
};
|
||||
}
|
||||
41
packages/git-sync/build/engine/stabilize.d.ts
vendored
Normal file
41
packages/git-sync/build/engine/stabilize.d.ts
vendored
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Meta object as `exportPageBody` builds it (SPEC §4). Kept byte-for-byte
|
||||
* compatible so files produced here match `exportPageBody`'s output exactly.
|
||||
*/
|
||||
export interface PageMeta {
|
||||
version: 1;
|
||||
pageId: string;
|
||||
slugId: string;
|
||||
title: string;
|
||||
spaceId: string;
|
||||
parentPageId: string | null;
|
||||
}
|
||||
/**
|
||||
* Produce the self-contained `.md` file text for a page from its raw
|
||||
* ProseMirror `content` + identity meta, in the verified fixpoint form.
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content)
|
||||
* doc2 = markdownToProseMirror(md1) // one import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...and re-export
|
||||
* file = serializeDocmostMarkdownBody(meta, stableBody)
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the
|
||||
* known converter asymmetries.
|
||||
*/
|
||||
export declare function stabilizePageFile(content: unknown, meta: PageMeta): Promise<string>;
|
||||
/**
|
||||
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
|
||||
* envelope:
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content) // export...
|
||||
* doc2 = markdownToProseMirror(md1) // ...import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the known
|
||||
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
|
||||
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
|
||||
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
|
||||
*/
|
||||
export declare function stabilizePageBody(content: unknown): Promise<string>;
|
||||
52
packages/git-sync/build/engine/stabilize.js
Normal file
52
packages/git-sync/build/engine/stabilize.js
Normal file
@@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Normalize-on-write helper (SPEC §11 "Резолюция").
|
||||
*
|
||||
* git diffs byte-for-byte, so writing a page in a NON-fixpoint markdown form
|
||||
* would make the next pull re-export it to a slightly different (but stable)
|
||||
* form and produce a phantom diff -> churny commits. The converter has a couple
|
||||
* of known one-pass asymmetries (a block image after a paragraph adds an empty
|
||||
* paragraph; a diagram materializes `data-align`), all of which converge to a
|
||||
* fixpoint after ONE `export -> import -> export` round-trip.
|
||||
*
|
||||
* So at write time we run exactly that one pass and persist the fixpoint form.
|
||||
* Already-stable content is unaffected (the pass is idempotent), so re-pulls of
|
||||
* unchanged pages produce identical bytes and git sees no diff.
|
||||
*/
|
||||
import { convertProseMirrorToMarkdown, markdownToProseMirror, serializeDocmostMarkdownBody, } from "../lib/index.js";
|
||||
/**
|
||||
* Produce the self-contained `.md` file text for a page from its raw
|
||||
* ProseMirror `content` + identity meta, in the verified fixpoint form.
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content)
|
||||
* doc2 = markdownToProseMirror(md1) // one import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...and re-export
|
||||
* file = serializeDocmostMarkdownBody(meta, stableBody)
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the
|
||||
* known converter asymmetries.
|
||||
*/
|
||||
export async function stabilizePageFile(content, meta) {
|
||||
// The meta shape is exactly what `exportPageBody` writes; cast to the lib's
|
||||
// DocmostMdMeta (a superset with optional fields) for the serializer.
|
||||
return serializeDocmostMarkdownBody(meta, await stabilizePageBody(content));
|
||||
}
|
||||
/**
|
||||
* The fixpoint markdown BODY for a page's ProseMirror `content`, WITHOUT any meta
|
||||
* envelope:
|
||||
*
|
||||
* md1 = convertProseMirrorToMarkdown(content) // export...
|
||||
* doc2 = markdownToProseMirror(md1) // ...import...
|
||||
* stableBody = convertProseMirrorToMarkdown(doc2) // ...re-export
|
||||
*
|
||||
* The single export->import->export pass is the verified fixpoint (SPEC §11):
|
||||
* idempotent for already-stable content, and the convergence point for the known
|
||||
* converter asymmetries. The native-Obsidian writer (`serializePageFile`) wraps
|
||||
* this body with a minimal `gitmost_id` frontmatter; determinism here is what
|
||||
* keeps re-pulls of an unchanged page byte-identical (no churn, loop-guard).
|
||||
*/
|
||||
export async function stabilizePageBody(content) {
|
||||
const md1 = convertProseMirrorToMarkdown(content);
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
return convertProseMirrorToMarkdown(doc2);
|
||||
}
|
||||
31
packages/git-sync/build/index.d.ts
vendored
Normal file
31
packages/git-sync/build/index.d.ts
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Public surface of `@docmost/git-sync`.
|
||||
*
|
||||
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
|
||||
* canonicalization) and the sync engine (reconcile planner, vault layout,
|
||||
* pull/push, the git wrapper, and the settings parser) that the gitmost server
|
||||
* drives in-process.
|
||||
*/
|
||||
export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, parseDocmostMarkdown, convertProseMirrorToMarkdown, markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, } from "./lib/index.js";
|
||||
export type { DocmostMdMeta } from "./lib/index.js";
|
||||
export { planReconciliation, decideAbsenceDeletions, MASS_DELETE_MIN_EXISTING, MASS_DELETE_FRACTION, } from "./engine/reconcile.js";
|
||||
export type { LiveEntry, ExistingEntry, WriteEntry, MovedEntry, ReconciliationPlan, DeletionDecision, } from "./engine/reconcile.js";
|
||||
export { buildVaultLayout } from "./engine/layout.js";
|
||||
export type { PageNode, VaultEntry } from "./engine/layout.js";
|
||||
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
|
||||
export { stabilizePageFile } from "./engine/stabilize.js";
|
||||
export type { PageMeta } from "./engine/stabilize.js";
|
||||
export { bodyHash } from "./engine/loop-guard.js";
|
||||
export type { GitSyncClient, GitSyncPageNodeLite } from "./engine/client.types.js";
|
||||
export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git.js";
|
||||
export type { DiffEntry, MergeResult, CommitOptions } from "./engine/git.js";
|
||||
export { readExisting, computePullActions, applyPullActions, } from "./engine/pull.js";
|
||||
export type { ReadExistingDeps, PullActionsInput, PullActions, ApplyPullActionsDeps, ApplyResult, } from "./engine/pull.js";
|
||||
export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push.js";
|
||||
export type { CreateAction, UpdateAction, DeleteAction, RenameMoveAction, RenameMoveActionClassified, ClassifyRenameMovesDeps, PushActions, PushActionsInput, MetaSide, ApplyPushDeps, WrittenBackPage, PushedPageRecord, PushFailure, PushNoop, ApplyPushResult, PushDeps, PushRunResult, PushParsedArgs, } from "./engine/push.js";
|
||||
export { parseSettings, envSchema } from "./engine/settings.js";
|
||||
export type { Settings } from "./engine/settings.js";
|
||||
export { loadSettingsOrExit } from "./engine/config-errors.js";
|
||||
export { runCycle } from "./engine/cycle.js";
|
||||
export type { RunCycleDeps, RunCycleResult, CycleFs, } from "./engine/cycle.js";
|
||||
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
|
||||
24
packages/git-sync/build/index.js
Normal file
24
packages/git-sync/build/index.js
Normal file
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Public surface of `@docmost/git-sync`.
|
||||
*
|
||||
* Exposes the pure converter (markdown <-> ProseMirror, file envelope,
|
||||
* canonicalization) and the sync engine (reconcile planner, vault layout,
|
||||
* pull/push, the git wrapper, and the settings parser) that the gitmost server
|
||||
* drives in-process.
|
||||
*/
|
||||
// Pure converter (markdown <-> ProseMirror, file envelope, canonicalization).
|
||||
export { serializeDocmostMarkdown, serializeDocmostMarkdownBody, parseDocmostMarkdown, convertProseMirrorToMarkdown, markdownToProseMirror, canonicalizeContent, docsCanonicallyEqual, } from "./lib/index.js";
|
||||
// Pure engine (no IO): reconcile planner, vault layout, sanitize, stabilize,
|
||||
// loop-guard body hash.
|
||||
export { planReconciliation, decideAbsenceDeletions, MASS_DELETE_MIN_EXISTING, MASS_DELETE_FRACTION, } from "./engine/reconcile.js";
|
||||
export { buildVaultLayout } from "./engine/layout.js";
|
||||
export { sanitizeTitle, disambiguate } from "./engine/sanitize.js";
|
||||
export { stabilizePageFile } from "./engine/stabilize.js";
|
||||
export { bodyHash } from "./engine/loop-guard.js";
|
||||
export { VaultGit, vaultGitEnv, buildCommitMessage, BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./engine/git.js";
|
||||
export { readExisting, computePullActions, applyPullActions, } from "./engine/pull.js";
|
||||
export { classifyRenameMoves, computePushActions, applyPushActions, runPush, parentFolderFile, parseArgs, LAST_PUSHED_REF, DOCMOST_BRANCH, LOCAL_AUTHOR_NAME, LOCAL_AUTHOR_EMAIL, LOCAL_SOURCE_TRAILER, } from "./engine/push.js";
|
||||
export { parseSettings, envSchema } from "./engine/settings.js";
|
||||
export { loadSettingsOrExit } from "./engine/config-errors.js";
|
||||
export { runCycle } from "./engine/cycle.js";
|
||||
export { parsePageFile, serializePageFile } from "./lib/page-file.js";
|
||||
38
packages/git-sync/build/lib/canonicalize.d.ts
vendored
Normal file
38
packages/git-sync/build/lib/canonicalize.d.ts
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
/**
|
||||
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
|
||||
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
|
||||
* form rather than raw bytes).
|
||||
*
|
||||
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
|
||||
* `indent: null` where the source omitted it) and regenerates per-block ids on
|
||||
* every import. A raw deep-equal of the source doc against the re-imported doc
|
||||
* therefore diverges even when the two are semantically identical. This module
|
||||
* normalizes a document so that two semantically-equal docs compare deep-equal
|
||||
* regardless of block ids and absent-vs-explicit-default-null attributes.
|
||||
*
|
||||
* It is a self-contained module with no external dependencies.
|
||||
*/
|
||||
/**
|
||||
* Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two
|
||||
* semantically-equal documents compare deep-equal. Rules (applied recursively
|
||||
* to the node, its `content`, and its `marks`):
|
||||
*
|
||||
* 1. Remove node-level `attrs.id` (regenerated on import). Mark attrs are NOT
|
||||
* touched for `id` (marks carry no block id; only their meaningful attrs).
|
||||
* 2. In any `attrs` object (node OR mark) drop keys whose value is `null`/
|
||||
* `undefined` (absent ≡ explicit default null) OR equals that node/mark
|
||||
* type's known non-null schema default (absent ≡ explicit default).
|
||||
* Keep every non-default value. The type is passed into the attrs
|
||||
* normalizer so it can look up `KNOWN_DEFAULTS`.
|
||||
* 3. If an `attrs` object becomes empty after pruning, drop the `attrs` key.
|
||||
* 4. Preserve `marks` (including the `comment` mark and its `commentId` — a
|
||||
* meaningful anchor per SPEC §3; never strip it).
|
||||
* 5. Preserve `text`, `type`, and `content` order exactly.
|
||||
* 6. Never mutate the input.
|
||||
*/
|
||||
export declare function canonicalizeContent(node: any): any;
|
||||
/**
|
||||
* True when two ProseMirror documents are semantically equal: equal after
|
||||
* canonicalization (block ids stripped, absent-vs-default-null normalized).
|
||||
*/
|
||||
export declare function docsCanonicallyEqual(a: any, b: any): boolean;
|
||||
245
packages/git-sync/build/lib/canonicalize.js
Normal file
245
packages/git-sync/build/lib/canonicalize.js
Normal file
@@ -0,0 +1,245 @@
|
||||
/**
|
||||
* Semantic canonicalization of ProseMirror/TipTap documents for the round-trip
|
||||
* idempotency check (SPEC §11, "Задача №0", option (б): compare a CANONICALIZED
|
||||
* form rather than raw bytes).
|
||||
*
|
||||
* `markdownToProseMirror` reconstructs schema DEFAULT attributes (e.g.
|
||||
* `indent: null` where the source omitted it) and regenerates per-block ids on
|
||||
* every import. A raw deep-equal of the source doc against the re-imported doc
|
||||
* therefore diverges even when the two are semantically identical. This module
|
||||
* normalizes a document so that two semantically-equal docs compare deep-equal
|
||||
* regardless of block ids and absent-vs-explicit-default-null attributes.
|
||||
*
|
||||
* It is a self-contained module with no external dependencies.
|
||||
*/
|
||||
/**
|
||||
* Known NON-NULL schema defaults that `markdownToProseMirror` materializes on
|
||||
* import, keyed by node/mark type → { attr: defaultValue }.
|
||||
*
|
||||
* Why this exists: `canonicalizeAttrs` already treats an absent attr as
|
||||
* equivalent to an explicit `null`/`undefined`. But several Docmost schema
|
||||
* attributes default to a NON-null value, so import fills them in even when the
|
||||
* source omitted them — making "attr absent" diverge from "attr at its default
|
||||
* value" under a raw deep-equal. To keep "absent ≡ explicit-default", we ALSO
|
||||
* drop any attr whose value equals its known schema default. A non-default
|
||||
* value (e.g. `orderedList.start: 5`) is NOT a default, so it is KEPT.
|
||||
*
|
||||
* Every entry below was read from `packages/docmost-client/src/lib/
|
||||
* docmost-schema.ts` (the line refs are the exact `default:` declarations) and
|
||||
* confirmed to be materialized by an export→import→export round-trip:
|
||||
* - mark `link` target / rel — DocmostAttributes + StarterKit link.
|
||||
* StarterKit's link extension defaults `target: "_blank"` and
|
||||
* `rel: "noopener noreferrer nofollow"`; both materialize on import
|
||||
* (empirically confirmed) even when the source had only `href`.
|
||||
* - mark `comment` resolved — docmost-schema.ts L213-214 (`default: false`).
|
||||
* - node `orderedList` start — provided by StarterKit's orderedList
|
||||
* (`default: 1`); materializes on import (empirically confirmed).
|
||||
* - node `drawio`/`excalidraw`/`video`/`youtube`/`embed` align — the diagram
|
||||
* attribute set and the media nodes declare `align: { default: "center" }`
|
||||
* (docmost-schema.ts L745-750 diagramAttributes; L564 video; L626 youtube;
|
||||
* L667 embed). The diagram `align` is the one the round-trip materializes
|
||||
* (docmost-schema.ts L745); the media/embed entries normalize the SAME
|
||||
* `align` default for consistency. Note: this only normalizes `align` —
|
||||
* full canonical stability of `embed` is separately limited by the
|
||||
* converter coercing numeric `width`/`height` to strings, which is outside
|
||||
* canonicalize's scope.
|
||||
*
|
||||
* NOTE: `image` has NO non-null align default — its `align` defaults to `null`
|
||||
* (docmost-schema.ts L174), so it is already handled by the null-drop rule and
|
||||
* is intentionally NOT listed here.
|
||||
*/
|
||||
const KNOWN_DEFAULTS = {
|
||||
// mark types
|
||||
link: {
|
||||
target: "_blank",
|
||||
rel: "noopener noreferrer nofollow",
|
||||
},
|
||||
comment: {
|
||||
resolved: false,
|
||||
},
|
||||
// node types
|
||||
orderedList: {
|
||||
start: 1,
|
||||
},
|
||||
drawio: {
|
||||
align: "center",
|
||||
},
|
||||
excalidraw: {
|
||||
align: "center",
|
||||
},
|
||||
video: {
|
||||
align: "center",
|
||||
},
|
||||
youtube: {
|
||||
align: "center",
|
||||
},
|
||||
embed: {
|
||||
align: "center",
|
||||
},
|
||||
};
|
||||
/**
|
||||
* Prune an `attrs` object in place on a fresh copy: drop keys whose value is
|
||||
* `null` or `undefined` (an absent attribute and an explicit default of `null`
|
||||
* are semantically equivalent here). Optionally also drop a node-level `id`
|
||||
* (block ids are regenerated on import, SPEC §11). ALSO drop any attr whose
|
||||
* value equals the node/mark `type`'s known NON-null schema default
|
||||
* (`KNOWN_DEFAULTS`), so "attr absent" ≡ "attr at its default value" — without
|
||||
* this, the import-materialized `link.target`/`comment.resolved`/
|
||||
* `orderedList.start`/diagram `align` defaults would be a phantom diff. Every
|
||||
* non-default attribute value is KEPT (level, language, src, href, commentId,
|
||||
* width, a non-default `start`/`align`, ...).
|
||||
*
|
||||
* Returns the pruned attrs object, or `undefined` if nothing meaningful is
|
||||
* left (so the caller can drop the `attrs` key entirely: `{attrs:{}}` ≡ no
|
||||
* attrs).
|
||||
*/
|
||||
function canonicalizeAttrs(attrs, dropId, type) {
|
||||
const defaults = type ? KNOWN_DEFAULTS[type] : undefined;
|
||||
const out = {};
|
||||
// Stable key order so a JSON.stringify of the canonical form is comparable
|
||||
// regardless of the input's key order.
|
||||
for (const key of Object.keys(attrs).sort()) {
|
||||
// Block ids are regenerated on import; drop them on NODE attrs only.
|
||||
if (dropId && key === "id")
|
||||
continue;
|
||||
const value = attrs[key];
|
||||
// Absent ≡ explicit-default-null/undefined.
|
||||
if (value === null || value === undefined)
|
||||
continue;
|
||||
// Absent ≡ explicit known non-null default (e.g. link.target="_blank").
|
||||
// A non-default value (e.g. orderedList.start=5) does NOT match, so it is
|
||||
// kept. The `comment` mark's `commentId` is never a default, so it always
|
||||
// survives (SPEC §3); only its `resolved: false` default is normalized away.
|
||||
if (defaults && key in defaults && value === defaults[key])
|
||||
continue;
|
||||
out[key] = value;
|
||||
}
|
||||
return Object.keys(out).length > 0 ? out : undefined;
|
||||
}
|
||||
/**
|
||||
* Return a DEEP COPY of a ProseMirror node tree, canonicalized so that two
|
||||
* semantically-equal documents compare deep-equal. Rules (applied recursively
|
||||
* to the node, its `content`, and its `marks`):
|
||||
*
|
||||
* 1. Remove node-level `attrs.id` (regenerated on import). Mark attrs are NOT
|
||||
* touched for `id` (marks carry no block id; only their meaningful attrs).
|
||||
* 2. In any `attrs` object (node OR mark) drop keys whose value is `null`/
|
||||
* `undefined` (absent ≡ explicit default null) OR equals that node/mark
|
||||
* type's known non-null schema default (absent ≡ explicit default).
|
||||
* Keep every non-default value. The type is passed into the attrs
|
||||
* normalizer so it can look up `KNOWN_DEFAULTS`.
|
||||
* 3. If an `attrs` object becomes empty after pruning, drop the `attrs` key.
|
||||
* 4. Preserve `marks` (including the `comment` mark and its `commentId` — a
|
||||
* meaningful anchor per SPEC §3; never strip it).
|
||||
* 5. Preserve `text`, `type`, and `content` order exactly.
|
||||
* 6. Never mutate the input.
|
||||
*/
|
||||
export function canonicalizeContent(node) {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map((child) => canonicalizeContent(child));
|
||||
}
|
||||
if (node === null || typeof node !== "object") {
|
||||
// Primitive leaf (string/number/boolean/null): returned as-is.
|
||||
return node;
|
||||
}
|
||||
// A node is a mark when it has a `type` but never carries block `content`
|
||||
// and lives inside a `marks` array. We cannot tell from the node alone, so
|
||||
// we distinguish at the recursion site: node `attrs` drop `id`, mark `attrs`
|
||||
// do not. This is handled by passing a `dropId` flag down for the `attrs`
|
||||
// key specifically (nodes) vs the `marks[].attrs` path (marks).
|
||||
const out = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Node-level attrs: drop the block id, null/undefined attrs, and any
|
||||
// attr at this node type's known non-null schema default.
|
||||
const canon = canonicalizeAttrs(node.attrs, true, typeof node.type === "string" ? node.type : undefined);
|
||||
if (canon !== undefined)
|
||||
out.attrs = canon;
|
||||
// else: drop the `attrs` key entirely (rule 3).
|
||||
}
|
||||
else if (key === "marks" && Array.isArray(node.marks)) {
|
||||
// Marks: keep them all (incl. comment); canonicalize their attrs but do
|
||||
// NOT drop `id` (a mark's `id` would be a meaningful attr, not a block
|
||||
// id). An empty marks array is dropped so `marks:[]` ≡ no marks.
|
||||
const marks = node.marks.map((mark) => canonicalizeMark(mark));
|
||||
if (marks.length > 0)
|
||||
out.marks = marks;
|
||||
}
|
||||
else {
|
||||
out[key] = canonicalizeContent(node[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Canonicalize a single mark: keep `type`, prune its `attrs` (null/undefined
|
||||
* AND known non-null defaults dropped, empty attrs removed) but NEVER drop a
|
||||
* mark's attribute as a "block id" — marks have no block id, only meaningful
|
||||
* attrs (href, commentId, color, level, ...). Meaningful NON-default attrs
|
||||
* survive (the `comment` mark's `commentId` is never a default, so it always
|
||||
* survives — SPEC §3); only known defaults like `link.target="_blank"`,
|
||||
* `link.rel="noopener…"` and `comment.resolved=false` are normalized away.
|
||||
*/
|
||||
function canonicalizeMark(mark) {
|
||||
if (mark === null || typeof mark !== "object")
|
||||
return mark;
|
||||
const out = {};
|
||||
for (const key of Object.keys(mark)) {
|
||||
if (key === "attrs" && mark.attrs && typeof mark.attrs === "object") {
|
||||
const canon = canonicalizeAttrs(mark.attrs, false, typeof mark.type === "string" ? mark.type : undefined);
|
||||
if (canon !== undefined)
|
||||
out.attrs = canon;
|
||||
}
|
||||
else {
|
||||
out[key] = canonicalizeContent(mark[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Deep structural equality of two values that is key-order-insensitive.
|
||||
* Used to compare canonical forms. (`canonicalizeContent` already emits
|
||||
* `attrs` in a stable key order, but the top-level node keys preserve input
|
||||
* order, so we compare structurally rather than by string.)
|
||||
*/
|
||||
function deepEqual(a, b) {
|
||||
if (a === b)
|
||||
return true;
|
||||
if (typeof a !== typeof b)
|
||||
return false;
|
||||
if (a === null || b === null)
|
||||
return a === b;
|
||||
if (typeof a !== "object")
|
||||
return false;
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr)
|
||||
return false;
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length)
|
||||
return false;
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
if (!deepEqual(a[i], b[i]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
const aKeys = Object.keys(a);
|
||||
const bKeys = Object.keys(b);
|
||||
if (aKeys.length !== bKeys.length)
|
||||
return false;
|
||||
for (const k of aKeys) {
|
||||
if (!Object.prototype.hasOwnProperty.call(b, k))
|
||||
return false;
|
||||
if (!deepEqual(a[k], b[k]))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* True when two ProseMirror documents are semantically equal: equal after
|
||||
* canonicalization (block ids stripped, absent-vs-default-null normalized).
|
||||
*/
|
||||
export function docsCanonicallyEqual(a, b) {
|
||||
return deepEqual(canonicalizeContent(a), canonicalizeContent(b));
|
||||
}
|
||||
54
packages/git-sync/build/lib/diff.d.ts
vendored
Normal file
54
packages/git-sync/build/lib/diff.d.ts
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Headless, Docmost-equivalent document diff.
|
||||
*
|
||||
* Docmost's history editor computes a change set with the exact pipeline below
|
||||
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
|
||||
* editor decorations. This module runs the SAME computation but serializes the
|
||||
* result to text + integrity counts instead of decorations, so a diff can be
|
||||
* previewed without a browser.
|
||||
*
|
||||
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
|
||||
* maintained published fork of the MIT prosemirror-recreate-steps source that
|
||||
* Docmost vendors in @docmost/editor-ext; it exposes the identical
|
||||
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
|
||||
* signature.
|
||||
*
|
||||
* If recreateTransform / the changeset throws on a pathological document pair,
|
||||
* we fall back to a coarse block-level text diff so the tool never hard-fails.
|
||||
*/
|
||||
/** A single inserted/deleted change with its containing-block context. */
|
||||
export interface DiffChange {
|
||||
op: "insert" | "delete";
|
||||
/** Lead (plain) text of the block that contains the change, for context. */
|
||||
block: string;
|
||||
/** The inserted or deleted text. */
|
||||
text: string;
|
||||
}
|
||||
/** Integrity counts as [old, new] tuples; footnoteMarkers as [oldList, newList]. */
|
||||
export interface DiffIntegrity {
|
||||
images: [number, number];
|
||||
links: [number, number];
|
||||
tables: [number, number];
|
||||
callouts: [number, number];
|
||||
footnoteMarkers: [number[], number[]];
|
||||
}
|
||||
export interface DiffResult {
|
||||
summary: {
|
||||
inserted: number;
|
||||
deleted: number;
|
||||
blocksChanged: number;
|
||||
};
|
||||
integrity: DiffIntegrity;
|
||||
changes: DiffChange[];
|
||||
/** Human-readable unified-ish summary. */
|
||||
markdown: string;
|
||||
}
|
||||
/**
|
||||
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
|
||||
* serialize the result to text + integrity counts.
|
||||
*
|
||||
* @param oldDocJson the earlier document
|
||||
* @param newDocJson the later document
|
||||
* @param notesHeading heading delimiting body from notes for footnote counting
|
||||
*/
|
||||
export declare function diffDocs(oldDocJson: any, newDocJson: any, notesHeading?: string): DiffResult;
|
||||
273
packages/git-sync/build/lib/diff.js
Normal file
273
packages/git-sync/build/lib/diff.js
Normal file
@@ -0,0 +1,273 @@
|
||||
/**
|
||||
* Headless, Docmost-equivalent document diff.
|
||||
*
|
||||
* Docmost's history editor computes a change set with the exact pipeline below
|
||||
* (recreateTransform -> ChangeSet.addSteps -> simplifyChanges) and renders it as
|
||||
* editor decorations. This module runs the SAME computation but serializes the
|
||||
* result to text + integrity counts instead of decorations, so a diff can be
|
||||
* previewed without a browser.
|
||||
*
|
||||
* recreateTransform here comes from @fellow/prosemirror-recreate-transform, the
|
||||
* maintained published fork of the MIT prosemirror-recreate-steps source that
|
||||
* Docmost vendors in @docmost/editor-ext; it exposes the identical
|
||||
* recreateTransform(fromDoc, toDoc, { complexSteps, wordDiffs, simplifyDiff })
|
||||
* signature.
|
||||
*
|
||||
* If recreateTransform / the changeset throws on a pathological document pair,
|
||||
* we fall back to a coarse block-level text diff so the tool never hard-fails.
|
||||
*/
|
||||
import { getSchema } from "@tiptap/core";
|
||||
import { Node } from "@tiptap/pm/model";
|
||||
import { ChangeSet, simplifyChanges } from "@tiptap/pm/changeset";
|
||||
import { recreateTransform } from "@fellow/prosemirror-recreate-transform";
|
||||
import { docmostExtensions } from "./docmost-schema.js";
|
||||
/** Build the schema once; it is pure and reused across calls. */
|
||||
const schema = getSchema(docmostExtensions);
|
||||
/** Recursively concatenate the plain text of a JSON node. */
|
||||
function plainText(node) {
|
||||
if (!node || typeof node !== "object")
|
||||
return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string")
|
||||
out += node.text;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
out += plainText(child);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Count nodes in a JSON doc that satisfy `pred` (recursive). */
|
||||
function countNodes(doc, pred) {
|
||||
let n = 0;
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (pred(node))
|
||||
n++;
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return n;
|
||||
}
|
||||
/**
|
||||
* Count UNIQUE links in a JSON doc by their `href`. A single link can be split
|
||||
* across several adjacent text runs (e.g. a "link+bold" run followed by a "link"
|
||||
* run); counting link-bearing runs would over-count it. Walking the tree and
|
||||
* collecting hrefs into a Set keys each distinct link once. Link marks with a
|
||||
* missing/empty href are bucketed under a single "" key so a malformed link is
|
||||
* still counted as one.
|
||||
*/
|
||||
function countUniqueLinks(doc) {
|
||||
const hrefs = new Set();
|
||||
const visit = (node) => {
|
||||
if (!node || typeof node !== "object")
|
||||
return;
|
||||
if (node.type === "text" && Array.isArray(node.marks)) {
|
||||
for (const m of node.marks) {
|
||||
if (m && m.type === "link") {
|
||||
const href = m.attrs && typeof m.attrs.href === "string" ? m.attrs.href : "";
|
||||
hrefs.add(href);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content))
|
||||
for (const c of node.content)
|
||||
visit(c);
|
||||
};
|
||||
visit(doc);
|
||||
return hrefs.size;
|
||||
}
|
||||
/**
|
||||
* Parse the ordered list of integers from `[N]` footnote markers found in the
|
||||
* BODY only (every top-level block before the first "Примечания..." notes
|
||||
* heading; if no such heading, the whole doc). Returned in reading order.
|
||||
*/
|
||||
function footnoteMarkers(doc, notesHeading) {
|
||||
const top = Array.isArray(doc?.content) ? doc.content : [];
|
||||
const notesIdx = top.findIndex((n) => n &&
|
||||
n.type === "heading" &&
|
||||
plainText(n).trim() === notesHeading);
|
||||
const bodyBlocks = notesIdx >= 0 ? top.slice(0, notesIdx) : top;
|
||||
const markers = [];
|
||||
const re = /\[(\d+)\]/g;
|
||||
for (const block of bodyBlocks) {
|
||||
const text = plainText(block);
|
||||
let m;
|
||||
re.lastIndex = 0;
|
||||
while ((m = re.exec(text)) !== null) {
|
||||
markers.push(Number(m[1]));
|
||||
}
|
||||
}
|
||||
return markers;
|
||||
}
|
||||
/** Compute the [old,new] integrity tuples for two JSON docs. */
|
||||
function computeIntegrity(oldDoc, newDoc, notesHeading) {
|
||||
const images = [
|
||||
countNodes(oldDoc, (n) => n.type === "image"),
|
||||
countNodes(newDoc, (n) => n.type === "image"),
|
||||
];
|
||||
const links = [
|
||||
countUniqueLinks(oldDoc),
|
||||
countUniqueLinks(newDoc),
|
||||
];
|
||||
const tables = [
|
||||
countNodes(oldDoc, (n) => n.type === "table"),
|
||||
countNodes(newDoc, (n) => n.type === "table"),
|
||||
];
|
||||
const callouts = [
|
||||
countNodes(oldDoc, (n) => n.type === "callout"),
|
||||
countNodes(newDoc, (n) => n.type === "callout"),
|
||||
];
|
||||
const fns = [
|
||||
footnoteMarkers(oldDoc, notesHeading),
|
||||
footnoteMarkers(newDoc, notesHeading),
|
||||
];
|
||||
return { images, links, tables, callouts, footnoteMarkers: fns };
|
||||
}
|
||||
/**
|
||||
* Resolve the lead text of the top-level block in a ProseMirror Node that
|
||||
* contains the given document position. Returns "" when out of range.
|
||||
*/
|
||||
function blockContextAt(node, pos) {
|
||||
try {
|
||||
const clamped = Math.max(0, Math.min(pos, node.content.size));
|
||||
const $pos = node.resolve(clamped);
|
||||
// depth 1 is the top-level block in a doc node.
|
||||
const block = $pos.depth >= 1 ? $pos.node(1) : $pos.node(0);
|
||||
const text = block.textContent || "";
|
||||
return text.length > 80 ? text.slice(0, 77) + "..." : text;
|
||||
}
|
||||
catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
/** Truncate a string for the markdown summary. */
|
||||
function truncate(s, n = 120) {
|
||||
return s.length > n ? s.slice(0, n - 3) + "..." : s;
|
||||
}
|
||||
/**
|
||||
* Coarse fallback: a block-by-block plain-text diff. Used only when the precise
|
||||
* changeset pipeline throws, so the tool degrades gracefully instead of failing.
|
||||
*/
|
||||
function coarseDiff(oldDoc, newDoc) {
|
||||
const oldBlocks = Array.isArray(oldDoc?.content) ? oldDoc.content : [];
|
||||
const newBlocks = Array.isArray(newDoc?.content) ? newDoc.content : [];
|
||||
const oldTexts = oldBlocks.map(plainText);
|
||||
const newTexts = newBlocks.map(plainText);
|
||||
const oldSet = new Set(oldTexts);
|
||||
const newSet = new Set(newTexts);
|
||||
const changes = [];
|
||||
for (const t of oldTexts) {
|
||||
if (!newSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "delete", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
for (const t of newTexts) {
|
||||
if (!oldSet.has(t) && t.trim() !== "") {
|
||||
changes.push({ op: "insert", block: truncate(t, 80), text: t });
|
||||
}
|
||||
}
|
||||
return changes;
|
||||
}
|
||||
/** Build the human-readable unified-ish markdown summary. */
|
||||
function renderMarkdown(result, fellBack) {
|
||||
const lines = [];
|
||||
const { summary, integrity, changes } = result;
|
||||
lines.push(`# Diff: ${summary.inserted} inserted / ${summary.deleted} deleted (${summary.blocksChanged} blocks changed)`);
|
||||
if (fellBack) {
|
||||
lines.push("");
|
||||
lines.push("> note: precise diff failed; coarse block-level diff shown.");
|
||||
}
|
||||
lines.push("");
|
||||
lines.push("## Integrity (old -> new)");
|
||||
lines.push(`- images: ${integrity.images[0]} -> ${integrity.images[1]}`);
|
||||
lines.push(`- links: ${integrity.links[0]} -> ${integrity.links[1]}`);
|
||||
lines.push(`- tables: ${integrity.tables[0]} -> ${integrity.tables[1]}`);
|
||||
lines.push(`- callouts: ${integrity.callouts[0]} -> ${integrity.callouts[1]}`);
|
||||
lines.push(`- footnoteMarkers: [${integrity.footnoteMarkers[0].join(", ")}] -> [${integrity.footnoteMarkers[1].join(", ")}]`);
|
||||
lines.push("");
|
||||
lines.push("## Changes");
|
||||
if (changes.length === 0) {
|
||||
lines.push("(no textual changes)");
|
||||
}
|
||||
else {
|
||||
for (const c of changes) {
|
||||
const sign = c.op === "insert" ? "+" : "-";
|
||||
const ctx = c.block ? ` @ ${truncate(c.block, 60)}` : "";
|
||||
lines.push(`${sign} ${truncate(c.text)}${ctx}`);
|
||||
}
|
||||
}
|
||||
return lines.join("\n");
|
||||
}
|
||||
/**
|
||||
* Diff two ProseMirror JSON documents the way Docmost's history editor does and
|
||||
* serialize the result to text + integrity counts.
|
||||
*
|
||||
* @param oldDocJson the earlier document
|
||||
* @param newDocJson the later document
|
||||
* @param notesHeading heading delimiting body from notes for footnote counting
|
||||
*/
|
||||
export function diffDocs(oldDocJson, newDocJson, notesHeading = "Примечания переводчика") {
|
||||
const integrity = computeIntegrity(oldDocJson, newDocJson, notesHeading);
|
||||
let changes = [];
|
||||
let inserted = 0;
|
||||
let deleted = 0;
|
||||
let fellBack = false;
|
||||
const changedBlocks = new Set();
|
||||
try {
|
||||
const oldNode = Node.fromJSON(schema, oldDocJson);
|
||||
const newNode = Node.fromJSON(schema, newDocJson);
|
||||
const tr = recreateTransform(oldNode, newNode, {
|
||||
complexSteps: false,
|
||||
wordDiffs: true,
|
||||
simplifyDiff: true,
|
||||
});
|
||||
const changeSet = ChangeSet.create(oldNode).addSteps(tr.doc, tr.mapping.maps, []);
|
||||
const simplified = simplifyChanges(changeSet.changes, newNode);
|
||||
for (const change of simplified) {
|
||||
// Deleted text lives in the OLD doc coordinate range [fromA, toA).
|
||||
if (change.toA > change.fromA) {
|
||||
const text = oldNode.textBetween(change.fromA, change.toA, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
deleted += text.length;
|
||||
const block = blockContextAt(oldNode, change.fromA);
|
||||
changes.push({ op: "delete", block, text });
|
||||
if (block)
|
||||
changedBlocks.add("d:" + block);
|
||||
}
|
||||
}
|
||||
// Inserted text lives in the NEW doc coordinate range [fromB, toB).
|
||||
if (change.toB > change.fromB) {
|
||||
const text = newNode.textBetween(change.fromB, change.toB, "\n", " ");
|
||||
if (text.length > 0) {
|
||||
inserted += text.length;
|
||||
const block = blockContextAt(newNode, change.fromB);
|
||||
changes.push({ op: "insert", block, text });
|
||||
if (block)
|
||||
changedBlocks.add("i:" + block);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch {
|
||||
// Pathological pair: degrade to a coarse block-level diff so we never throw.
|
||||
fellBack = true;
|
||||
changes = coarseDiff(oldDocJson, newDocJson);
|
||||
for (const c of changes) {
|
||||
if (c.op === "insert")
|
||||
inserted += c.text.length;
|
||||
else
|
||||
deleted += c.text.length;
|
||||
if (c.block)
|
||||
changedBlocks.add(c.op[0] + ":" + c.block);
|
||||
}
|
||||
}
|
||||
const partial = {
|
||||
summary: { inserted, deleted, blocksChanged: changedBlocks.size },
|
||||
integrity,
|
||||
changes,
|
||||
};
|
||||
return { ...partial, markdown: renderMarkdown(partial, fellBack) };
|
||||
}
|
||||
9
packages/git-sync/build/lib/docmost-schema.d.ts
vendored
Normal file
9
packages/git-sync/build/lib/docmost-schema.d.ts
vendored
Normal file
@@ -0,0 +1,9 @@
|
||||
import { Node, Extension, Mark } from "@tiptap/core";
|
||||
export declare const clampCalloutType: (value: string | null | undefined) => string;
|
||||
export declare const sanitizeCssColor: (value: string | null | undefined) => string | null;
|
||||
/**
|
||||
* Full extension list. Image is block-level (matches Docmost); the
|
||||
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
|
||||
* StarterKit v3 already bundles the link extension, configured here.
|
||||
*/
|
||||
export declare const docmostExtensions: (Node<any, any> | Mark<any, any> | Extension<any, any> | Extension<import("@tiptap/starter-kit").StarterKitOptions, any> | Node<import("@tiptap/extension-image").ImageOptions, any> | Node<import("@tiptap/extension-task-list").TaskListOptions, any> | Node<import("@tiptap/extension-task-item").TaskItemOptions, any> | Mark<import("@tiptap/extension-highlight").HighlightOptions, any> | Mark<import("@tiptap/extension-subscript").SubscriptExtensionOptions, any>)[];
|
||||
999
packages/git-sync/build/lib/docmost-schema.js
Normal file
999
packages/git-sync/build/lib/docmost-schema.js
Normal file
@@ -0,0 +1,999 @@
|
||||
/**
|
||||
* Full TipTap extension set matching the real Docmost document schema.
|
||||
*
|
||||
* The default StarterKit-only schema silently destroys Docmost-specific
|
||||
* nodes (callout, table) and drops attributes it does not know about
|
||||
* (node ids, image sizing, link targets). Every code path that converts
|
||||
* to or from ProseMirror JSON must use THIS set, otherwise a round-trip
|
||||
* loses content.
|
||||
*/
|
||||
import StarterKit from "@tiptap/starter-kit";
|
||||
import Image from "@tiptap/extension-image";
|
||||
import TaskList from "@tiptap/extension-task-list";
|
||||
import TaskItem from "@tiptap/extension-task-item";
|
||||
import Highlight from "@tiptap/extension-highlight";
|
||||
import Subscript from "@tiptap/extension-subscript";
|
||||
import Superscript from "@tiptap/extension-superscript";
|
||||
import { Node, Extension, Mark } from "@tiptap/core";
|
||||
// Inlined from @tiptap/core's getStyleProperty (added after 3.20.x) so this
|
||||
// package can stay on the same @tiptap/core version as the editor and avoid a
|
||||
// duplicate-tiptap version split in the monorepo. Reads a single declaration
|
||||
// from an element's inline `style` attribute, last-wins, case-insensitive.
|
||||
function getStyleProperty(element, propertyName) {
|
||||
const styleAttr = element.getAttribute("style");
|
||||
if (!styleAttr) {
|
||||
return null;
|
||||
}
|
||||
const decls = styleAttr.split(";").map((decl) => decl.trim()).filter(Boolean);
|
||||
const target = propertyName.toLowerCase();
|
||||
for (let i = decls.length - 1; i >= 0; i -= 1) {
|
||||
const decl = decls[i];
|
||||
const colonIndex = decl.indexOf(":");
|
||||
if (colonIndex === -1) {
|
||||
continue;
|
||||
}
|
||||
const prop = decl.slice(0, colonIndex).trim().toLowerCase();
|
||||
if (prop === target) {
|
||||
return decl.slice(colonIndex + 1).trim();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/** Allowed Docmost callout types; anything else falls back to "info". */
|
||||
const CALLOUT_TYPES = ["info", "warning", "danger", "success"];
|
||||
export const clampCalloutType = (value) => value && CALLOUT_TYPES.includes(value.toLowerCase())
|
||||
? value.toLowerCase()
|
||||
: "info";
|
||||
/**
|
||||
* Allowlist guard for CSS color values imported from HTML.
|
||||
*
|
||||
* Docmost interpolates stored mark colors straight into an inline style
|
||||
* attribute (e.g. style="background-color: ${color}" / "color: ${color}").
|
||||
* An unsanitized value such as `red; --x: url(...)` or `red"><script>` would
|
||||
* let a crafted document break out of the style attribute. We therefore only
|
||||
* accept a narrow, well-formed subset of CSS <color> syntax and reject (-> null)
|
||||
* anything else.
|
||||
*
|
||||
* Accepted forms:
|
||||
* - named colors: letters only, e.g. "red", "rebeccapurple"
|
||||
* - hex: #rgb, #rgba, #rrggbb, #rrggbbaa
|
||||
* - functional notation: rgb()/rgba()/hsl()/hsla() containing only
|
||||
* digits, %, ., commas, spaces and slashes
|
||||
*/
|
||||
const SAFE_COLOR_RE = /^(?:[a-zA-Z]+|#(?:[0-9a-fA-F]{3,4}|[0-9a-fA-F]{6}|[0-9a-fA-F]{8})|(?:rgb|rgba|hsl|hsla)\([0-9.,%/\s]+\))$/;
|
||||
export const sanitizeCssColor = (value) => {
|
||||
if (typeof value !== "string")
|
||||
return null;
|
||||
const color = value.trim();
|
||||
return color && SAFE_COLOR_RE.test(color) ? color : null;
|
||||
};
|
||||
/** Docmost callout (info/warning/danger/success banner). */
|
||||
const Callout = Node.create({
|
||||
name: "callout",
|
||||
group: "block",
|
||||
content: "block+",
|
||||
defining: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
// Read the type from data-callout-type so generateJSON(html) preserves
|
||||
// it; without an explicit parseHTML every imported callout became "info".
|
||||
type: {
|
||||
default: "info",
|
||||
parseHTML: (el) => clampCalloutType(el.getAttribute("data-callout-type")),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-callout-type": clampCalloutType(attrs.type),
|
||||
}),
|
||||
},
|
||||
icon: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-icon"),
|
||||
renderHTML: (attrs) => attrs.icon ? { "data-icon": attrs.icon } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="callout"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "callout", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Minimal table family: enough for schema round-trips and HTML parsing. */
|
||||
const Table = Node.create({
|
||||
name: "table",
|
||||
group: "block",
|
||||
content: "tableRow+",
|
||||
isolating: true,
|
||||
parseHTML() {
|
||||
return [{ tag: "table" }];
|
||||
},
|
||||
renderHTML() {
|
||||
return ["table", ["tbody", 0]];
|
||||
},
|
||||
});
|
||||
const TableRow = Node.create({
|
||||
name: "tableRow",
|
||||
content: "(tableCell | tableHeader)*",
|
||||
parseHTML() {
|
||||
return [{ tag: "tr" }];
|
||||
},
|
||||
renderHTML() {
|
||||
return ["tr", 0];
|
||||
},
|
||||
});
|
||||
const cellAttributes = () => ({
|
||||
colspan: { default: 1 },
|
||||
rowspan: { default: 1 },
|
||||
colwidth: { default: null },
|
||||
backgroundColor: { default: null },
|
||||
backgroundColorName: { default: null },
|
||||
// Column alignment so GFM aligned tables (|:--|:-:|--:|) round-trip.
|
||||
align: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("align") || el.style.textAlign || null,
|
||||
renderHTML: (attrs) => attrs.align ? { align: attrs.align } : {},
|
||||
},
|
||||
});
|
||||
const TableCell = Node.create({
|
||||
name: "tableCell",
|
||||
content: "block+",
|
||||
isolating: true,
|
||||
addAttributes: cellAttributes,
|
||||
parseHTML() {
|
||||
return [{ tag: "td" }];
|
||||
},
|
||||
renderHTML() {
|
||||
return ["td", 0];
|
||||
},
|
||||
});
|
||||
const TableHeader = Node.create({
|
||||
name: "tableHeader",
|
||||
content: "block+",
|
||||
isolating: true,
|
||||
addAttributes: cellAttributes,
|
||||
parseHTML() {
|
||||
return [{ tag: "th" }];
|
||||
},
|
||||
renderHTML() {
|
||||
return ["th", 0];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Attributes Docmost stores on standard nodes that the stock extensions
|
||||
* do not declare. Without these, Node.fromJSON silently drops them —
|
||||
* including the block ids that heading anchors rely on.
|
||||
*/
|
||||
const DocmostAttributes = Extension.create({
|
||||
name: "docmostAttributes",
|
||||
addGlobalAttributes() {
|
||||
return [
|
||||
{
|
||||
types: ["heading", "paragraph"],
|
||||
attributes: {
|
||||
id: { default: null },
|
||||
indent: { default: null },
|
||||
textAlign: { default: null },
|
||||
},
|
||||
},
|
||||
{
|
||||
types: ["image"],
|
||||
attributes: {
|
||||
align: { default: null },
|
||||
attachmentId: { default: null },
|
||||
aspectRatio: { default: null },
|
||||
height: { default: null },
|
||||
placeholder: { default: null },
|
||||
size: { default: null },
|
||||
width: { default: null },
|
||||
},
|
||||
},
|
||||
{
|
||||
types: ["orderedList"],
|
||||
attributes: { type: { default: null } },
|
||||
},
|
||||
{
|
||||
types: ["link"],
|
||||
attributes: { internal: { default: null }, title: { default: null } },
|
||||
},
|
||||
];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Docmost inline comment mark. Anchors a comment thread to a text range via
|
||||
* `commentId`. Without it, any document containing comment highlights fails to
|
||||
* round-trip through the schema ("There is no mark type comment in this schema"),
|
||||
* which breaks update_page_json and edit_page_text on every commented page.
|
||||
* Mirrors Docmost's @docmost/editor-ext comment mark (commentId / resolved).
|
||||
*/
|
||||
const Comment = Mark.create({
|
||||
name: "comment",
|
||||
exitable: true,
|
||||
inclusive: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
commentId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-comment-id"),
|
||||
renderHTML: (attrs) => attrs.commentId ? { "data-comment-id": attrs.commentId } : {},
|
||||
},
|
||||
resolved: {
|
||||
default: false,
|
||||
parseHTML: (el) => el.getAttribute("data-resolved") === "true",
|
||||
renderHTML: (attrs) => attrs.resolved ? { "data-resolved": "true" } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "span[data-comment-id]" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["span", { class: "comment-mark", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Text color mark. The markdown-converter emits colored text as
|
||||
* <span style="color: ...">, but with no mark parsing it back the color was
|
||||
* silently dropped on import. This mirrors TipTap's @tiptap/extension-text-style
|
||||
* `textStyle` mark (the name Docmost expects) and carries a single `color`
|
||||
* attribute. The parsed color is passed through the allowlist guard so a crafted
|
||||
* style cannot break out of the attribute when Docmost re-renders it.
|
||||
*/
|
||||
const TextStyle = Mark.create({
|
||||
name: "textStyle",
|
||||
addAttributes() {
|
||||
return {
|
||||
color: {
|
||||
default: null,
|
||||
parseHTML: (el) => sanitizeCssColor(el.style.color || el.getAttribute("data-color")),
|
||||
renderHTML: (attrs) => {
|
||||
const color = sanitizeCssColor(attrs.color);
|
||||
return color ? { style: `color: ${color}` } : {};
|
||||
},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [
|
||||
{
|
||||
tag: "span",
|
||||
// Only claim a plain colored span. Do NOT match spans that are already a
|
||||
// comment mark (data-comment-id) or a mention node (data-type=mention),
|
||||
// otherwise importing such HTML would silently drop the comment/mention.
|
||||
getAttrs: (el) => el.style.color &&
|
||||
!el.getAttribute("data-comment-id") &&
|
||||
el.getAttribute("data-type") !== "mention"
|
||||
? {}
|
||||
: false,
|
||||
},
|
||||
];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["span", HTMLAttributes, 0];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Passthrough definitions for the remaining Docmost-specific nodes.
|
||||
*
|
||||
* TiptapTransformer.toYdoc (the write path every mutation uses) throws
|
||||
* "Unknown node type: X" for any node not registered here, so editing ANY
|
||||
* page that contains one of these nodes used to fail outright. The read path
|
||||
* (fromYdoc) accepts them, which is why they appear in real documents.
|
||||
*
|
||||
* Each node below mirrors the real @docmost/editor-ext definition's name,
|
||||
* group, content, inline/atom flags and attribute keys (with the same data-*
|
||||
* HTML mapping) so that a fromYdoc -> transform -> toYdoc round-trip both
|
||||
* validates and preserves attributes faithfully. Interactive concerns
|
||||
* (node views, commands, keyboard shortcuts, input rules, suggestion plugins)
|
||||
* are intentionally omitted: the MCP server never renders these nodes, it only
|
||||
* needs the schema to accept and carry them. The Callout node above is the
|
||||
* pattern these follow.
|
||||
*/
|
||||
/** Docmost @mention (user/page reference). Inline atom. */
|
||||
const Mention = Node.create({
|
||||
name: "mention",
|
||||
group: "inline",
|
||||
inline: true,
|
||||
selectable: true,
|
||||
atom: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs) => attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
label: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-label"),
|
||||
renderHTML: (attrs) => attrs.label ? { "data-label": attrs.label } : {},
|
||||
},
|
||||
entityType: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-entity-type"),
|
||||
renderHTML: (attrs) => attrs.entityType ? { "data-entity-type": attrs.entityType } : {},
|
||||
},
|
||||
entityId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-entity-id"),
|
||||
renderHTML: (attrs) => attrs.entityId ? { "data-entity-id": attrs.entityId } : {},
|
||||
},
|
||||
slugId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-slug-id"),
|
||||
renderHTML: (attrs) => attrs.slugId ? { "data-slug-id": attrs.slugId } : {},
|
||||
},
|
||||
creatorId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-creator-id"),
|
||||
renderHTML: (attrs) => attrs.creatorId ? { "data-creator-id": attrs.creatorId } : {},
|
||||
},
|
||||
anchorId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-anchor-id"),
|
||||
renderHTML: (attrs) => attrs.anchorId ? { "data-anchor-id": attrs.anchorId } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'span[data-type="mention"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["span", { "data-type": "mention", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Inline KaTeX expression. Carries the LaTeX source in `text`. */
|
||||
const MathInline = Node.create({
|
||||
name: "mathInline",
|
||||
group: "inline",
|
||||
inline: true,
|
||||
atom: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
text: { default: "" },
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'span[data-type="mathInline"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return [
|
||||
"span",
|
||||
{ "data-type": "mathInline", "data-katex": "true" },
|
||||
`${HTMLAttributes.text ?? ""}`,
|
||||
];
|
||||
},
|
||||
});
|
||||
/** Block KaTeX expression. Carries the LaTeX source in `text`. */
|
||||
const MathBlock = Node.create({
|
||||
name: "mathBlock",
|
||||
group: "block",
|
||||
atom: true,
|
||||
isolating: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
text: { default: "" },
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="mathBlock"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return [
|
||||
"div",
|
||||
{ "data-type": "mathBlock", "data-katex": "true" },
|
||||
`${HTMLAttributes.text ?? ""}`,
|
||||
];
|
||||
},
|
||||
});
|
||||
/** Collapsible <details> wrapper: summary + content children. */
|
||||
const Details = Node.create({
|
||||
name: "details",
|
||||
group: "block",
|
||||
content: "detailsSummary detailsContent",
|
||||
defining: true,
|
||||
isolating: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
open: {
|
||||
default: false,
|
||||
parseHTML: (el) => el.getAttribute("open"),
|
||||
renderHTML: (attrs) => attrs.open ? { open: "" } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "details" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["details", { ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Clickable summary line of a <details> block. */
|
||||
const DetailsSummary = Node.create({
|
||||
name: "detailsSummary",
|
||||
group: "block",
|
||||
content: "inline*",
|
||||
defining: true,
|
||||
isolating: true,
|
||||
selectable: false,
|
||||
parseHTML() {
|
||||
return [{ tag: "summary" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["summary", { "data-type": "detailsSummary", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Body of a <details> block. Permissive content so fromYdoc output validates. */
|
||||
const DetailsContent = Node.create({
|
||||
name: "detailsContent",
|
||||
group: "block",
|
||||
// Docmost declares block* (an empty details body is valid); block+ would
|
||||
// reject a collapsed/empty details on round-trip.
|
||||
content: "block*",
|
||||
defining: true,
|
||||
selectable: false,
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="detailsContent"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "detailsContent", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** File attachment card (non-image upload). Block atom. */
|
||||
const Attachment = Node.create({
|
||||
name: "attachment",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
url: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-url"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-attachment-url": attrs.url ?? "",
|
||||
}),
|
||||
},
|
||||
name: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-name"),
|
||||
renderHTML: (attrs) => attrs.name ? { "data-attachment-name": attrs.name } : {},
|
||||
},
|
||||
mime: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-mime"),
|
||||
renderHTML: (attrs) => attrs.mime ? { "data-attachment-mime": attrs.mime } : {},
|
||||
},
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-size"),
|
||||
renderHTML: (attrs) => attrs.size != null ? { "data-attachment-size": attrs.size } : {},
|
||||
},
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs) => attrs.attachmentId
|
||||
? { "data-attachment-id": attrs.attachmentId }
|
||||
: {},
|
||||
},
|
||||
// Docmost declares `placeholder` (a transient upload key, not rendered
|
||||
// to HTML). Carry it so a round-trip never hits "Unsupported attribute".
|
||||
placeholder: { default: null },
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="attachment"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "attachment", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Uploaded <video> player. Block atom. */
|
||||
const Video = Node.create({
|
||||
name: "video",
|
||||
group: "block",
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
src: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("src"),
|
||||
renderHTML: (attrs) => ({ src: attrs.src ?? "" }),
|
||||
},
|
||||
alt: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("aria-label"),
|
||||
renderHTML: (attrs) => attrs.alt ? { "aria-label": attrs.alt } : {},
|
||||
},
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs) => attrs.attachmentId
|
||||
? { "data-attachment-id": attrs.attachmentId }
|
||||
: {},
|
||||
},
|
||||
width: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("width"),
|
||||
renderHTML: (attrs) => attrs.width != null ? { width: attrs.width } : {},
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("height"),
|
||||
renderHTML: (attrs) => attrs.height != null ? { height: attrs.height } : {},
|
||||
},
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-size"),
|
||||
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
|
||||
},
|
||||
align: {
|
||||
default: "center",
|
||||
parseHTML: (el) => el.getAttribute("data-align"),
|
||||
renderHTML: (attrs) => attrs.align ? { "data-align": attrs.align } : {},
|
||||
},
|
||||
aspectRatio: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-aspect-ratio"),
|
||||
renderHTML: (attrs) => attrs.aspectRatio != null
|
||||
? { "data-aspect-ratio": attrs.aspectRatio }
|
||||
: {},
|
||||
},
|
||||
// Docmost declares `placeholder` (a transient upload key, not rendered
|
||||
// to HTML). Carry it so a round-trip never hits "Unsupported attribute".
|
||||
placeholder: { default: null },
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "video" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["video", { controls: "true", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Defensive passthrough for a `youtube` node. Docmost itself has no dedicated
|
||||
* youtube node (YouTube is handled via `embed`), but the converter read path
|
||||
* references this type, so accept it as a generic block atom that preserves
|
||||
* its src so legacy/external documents survive a round-trip.
|
||||
*/
|
||||
const Youtube = Node.create({
|
||||
name: "youtube",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
src: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("data-src"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-src": attrs.src ?? "",
|
||||
}),
|
||||
},
|
||||
width: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-width"),
|
||||
renderHTML: (attrs) => attrs.width != null ? { "data-width": attrs.width } : {},
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-height"),
|
||||
renderHTML: (attrs) => attrs.height != null ? { "data-height": attrs.height } : {},
|
||||
},
|
||||
align: {
|
||||
default: "center",
|
||||
parseHTML: (el) => el.getAttribute("data-align"),
|
||||
renderHTML: (attrs) => attrs.align ? { "data-align": attrs.align } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="youtube"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "youtube", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Generic embed (provider iframe). Block atom. */
|
||||
const Embed = Node.create({
|
||||
name: "embed",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
src: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("data-src"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-src": attrs.src ?? "",
|
||||
}),
|
||||
},
|
||||
provider: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("data-provider"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-provider": attrs.provider ?? "",
|
||||
}),
|
||||
},
|
||||
align: {
|
||||
default: "center",
|
||||
parseHTML: (el) => el.getAttribute("data-align"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-align": attrs.align ?? "center",
|
||||
}),
|
||||
},
|
||||
width: {
|
||||
default: 800,
|
||||
parseHTML: (el) => el.getAttribute("data-width"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-width": attrs.width,
|
||||
}),
|
||||
},
|
||||
height: {
|
||||
default: 600,
|
||||
parseHTML: (el) => el.getAttribute("data-height"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-height": attrs.height,
|
||||
}),
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="embed"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "embed", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Shared attribute set for drawio/excalidraw diagram nodes. */
|
||||
const diagramAttributes = () => ({
|
||||
src: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("data-src"),
|
||||
renderHTML: (attrs) => ({
|
||||
"data-src": attrs.src ?? "",
|
||||
}),
|
||||
},
|
||||
title: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-title"),
|
||||
renderHTML: (attrs) => attrs.title ? { "data-title": attrs.title } : {},
|
||||
},
|
||||
alt: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-alt"),
|
||||
renderHTML: (attrs) => attrs.alt ? { "data-alt": attrs.alt } : {},
|
||||
},
|
||||
width: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-width"),
|
||||
renderHTML: (attrs) => attrs.width != null ? { "data-width": attrs.width } : {},
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-height"),
|
||||
renderHTML: (attrs) => attrs.height != null ? { "data-height": attrs.height } : {},
|
||||
},
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-size"),
|
||||
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
|
||||
},
|
||||
aspectRatio: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-aspect-ratio"),
|
||||
renderHTML: (attrs) => attrs.aspectRatio != null
|
||||
? { "data-aspect-ratio": attrs.aspectRatio }
|
||||
: {},
|
||||
},
|
||||
align: {
|
||||
default: "center",
|
||||
parseHTML: (el) => el.getAttribute("data-align"),
|
||||
renderHTML: (attrs) => attrs.align ? { "data-align": attrs.align } : {},
|
||||
},
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs) => attrs.attachmentId ? { "data-attachment-id": attrs.attachmentId } : {},
|
||||
},
|
||||
});
|
||||
/** draw.io diagram. Block atom (image-backed). */
|
||||
const Drawio = Node.create({
|
||||
name: "drawio",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes: diagramAttributes,
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="drawio"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "drawio", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Excalidraw diagram. Block atom (image-backed). */
|
||||
const Excalidraw = Node.create({
|
||||
name: "excalidraw",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes: diagramAttributes,
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="excalidraw"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "excalidraw", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Multi-column layout container holding one or more `column` children. */
|
||||
const Columns = Node.create({
|
||||
name: "columns",
|
||||
group: "block",
|
||||
content: "column+",
|
||||
defining: true,
|
||||
isolating: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
layout: {
|
||||
default: "two_equal",
|
||||
parseHTML: (el) => el.getAttribute("data-layout"),
|
||||
renderHTML: (attrs) => attrs.layout ? { "data-layout": attrs.layout } : {},
|
||||
},
|
||||
widthMode: {
|
||||
default: "normal",
|
||||
parseHTML: (el) => el.getAttribute("data-width-mode") || "normal",
|
||||
renderHTML: (attrs) => attrs.widthMode && attrs.widthMode !== "normal"
|
||||
? { "data-width-mode": attrs.widthMode }
|
||||
: {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="columns"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "columns", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Single column within a `columns` layout. */
|
||||
const Column = Node.create({
|
||||
name: "column",
|
||||
group: "block",
|
||||
content: "block+",
|
||||
defining: true,
|
||||
isolating: true,
|
||||
selectable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
width: {
|
||||
default: null,
|
||||
parseHTML: (el) => {
|
||||
const value = el.getAttribute("data-width");
|
||||
return value ? parseFloat(value) : null;
|
||||
},
|
||||
renderHTML: (attrs) => attrs.width ? { "data-width": attrs.width } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="column"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "column", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Subpages listing block (auto-generated index of child pages). Docmost
|
||||
* declares no attributes; the markdown-converter has a `case "subpages"`, so
|
||||
* the read path can emit it and toYdoc must accept it. Block atom.
|
||||
*/
|
||||
const Subpages = Node.create({
|
||||
name: "subpages",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="subpages"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "subpages", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Uploaded <audio> player. Block atom. Mirrors Docmost audio attrs. */
|
||||
const Audio = Node.create({
|
||||
name: "audio",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
src: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("src"),
|
||||
renderHTML: (attrs) => ({ src: attrs.src ?? "" }),
|
||||
},
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs) => attrs.attachmentId
|
||||
? { "data-attachment-id": attrs.attachmentId }
|
||||
: {},
|
||||
},
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-size"),
|
||||
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
|
||||
},
|
||||
// Transient upload key Docmost declares with rendered:false; carried so
|
||||
// a round-trip never hits "Unsupported attribute".
|
||||
placeholder: { default: null },
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "audio" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["audio", { controls: "true", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
/** Embedded PDF viewer. Block atom. Mirrors Docmost pdf attrs. */
|
||||
const Pdf = Node.create({
|
||||
name: "pdf",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
src: {
|
||||
default: "",
|
||||
parseHTML: (el) => el.getAttribute("src"),
|
||||
renderHTML: (attrs) => ({ src: attrs.src ?? "" }),
|
||||
},
|
||||
name: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-name"),
|
||||
renderHTML: (attrs) => attrs.name ? { "data-name": attrs.name } : {},
|
||||
},
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs) => attrs.attachmentId
|
||||
? { "data-attachment-id": attrs.attachmentId }
|
||||
: {},
|
||||
},
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("data-size"),
|
||||
renderHTML: (attrs) => attrs.size != null ? { "data-size": attrs.size } : {},
|
||||
},
|
||||
width: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("width"),
|
||||
renderHTML: (attrs) => attrs.width != null ? { width: attrs.width } : {},
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el) => el.getAttribute("height"),
|
||||
renderHTML: (attrs) => attrs.height != null ? { height: attrs.height } : {},
|
||||
},
|
||||
// Transient upload key Docmost declares with rendered:false; carried so
|
||||
// a round-trip never hits "Unsupported attribute".
|
||||
placeholder: { default: null },
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="pdf"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "pdf", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
/** Page break (print/export divider). Block atom; Docmost declares no attrs. */
|
||||
const PageBreak = Node.create({
|
||||
name: "pageBreak",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="pageBreak"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "pageBreak", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
/**
|
||||
* Full extension list. Image is block-level (matches Docmost); the
|
||||
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
|
||||
* StarterKit v3 already bundles the link extension, configured here.
|
||||
*/
|
||||
export const docmostExtensions = [
|
||||
StarterKit.configure({
|
||||
codeBlock: {},
|
||||
heading: {},
|
||||
link: { openOnClick: false },
|
||||
}),
|
||||
Image.configure({ inline: false }),
|
||||
TaskList,
|
||||
TaskItem.configure({ nested: true }),
|
||||
// Highlight stores its color unescaped and Docmost interpolates it into
|
||||
// style="background-color: ${color}". Wrap the color attribute's parseHTML
|
||||
// with the same allowlist guard used by textStyle so a crafted import color
|
||||
// cannot break out of the style attribute. Multicolor behavior is preserved.
|
||||
Highlight.extend({
|
||||
addAttributes() {
|
||||
const parent = this.parent?.() ?? {};
|
||||
return {
|
||||
...parent,
|
||||
color: {
|
||||
...parent.color,
|
||||
parseHTML: (el) => sanitizeCssColor(el.getAttribute("data-color") ||
|
||||
getStyleProperty(el, "background-color") ||
|
||||
el.style.backgroundColor),
|
||||
},
|
||||
};
|
||||
},
|
||||
}).configure({ multicolor: true }),
|
||||
Subscript,
|
||||
Superscript,
|
||||
// StarterKit does not provide a textStyle mark, so register ours; without it
|
||||
// generateJSON drops <span style="color: ...">, defeating the color import.
|
||||
TextStyle,
|
||||
Comment,
|
||||
Callout,
|
||||
Table,
|
||||
TableRow,
|
||||
TableCell,
|
||||
TableHeader,
|
||||
Mention,
|
||||
MathInline,
|
||||
MathBlock,
|
||||
Details,
|
||||
DetailsSummary,
|
||||
DetailsContent,
|
||||
Attachment,
|
||||
Video,
|
||||
Youtube,
|
||||
Embed,
|
||||
Drawio,
|
||||
Excalidraw,
|
||||
Columns,
|
||||
Column,
|
||||
Subpages,
|
||||
Audio,
|
||||
Pdf,
|
||||
PageBreak,
|
||||
DocmostAttributes,
|
||||
];
|
||||
16
packages/git-sync/build/lib/index.d.ts
vendored
Normal file
16
packages/git-sync/build/lib/index.d.ts
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
/**
|
||||
* Public surface of the pure converter (`lib/`). This barrel re-exports the
|
||||
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
|
||||
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
|
||||
* markdown -> ProseMirror import path, and semantic canonicalization for the
|
||||
* round-trip idempotency check (SPEC §11).
|
||||
*
|
||||
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
|
||||
* here — the gitmost server writes natively.
|
||||
*/
|
||||
export { serializeDocmostMarkdown, parseDocmostMarkdown, serializeDocmostMarkdownBody, } from "./markdown-document.js";
|
||||
export type { DocmostMdMeta } from "./markdown-document.js";
|
||||
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
|
||||
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
|
||||
export { canonicalizeContent, docsCanonicallyEqual, } from "./canonicalize.js";
|
||||
export { parsePageFile, serializePageFile } from "./page-file.js";
|
||||
15
packages/git-sync/build/lib/index.js
Normal file
15
packages/git-sync/build/lib/index.js
Normal file
@@ -0,0 +1,15 @@
|
||||
/**
|
||||
* Public surface of the pure converter (`lib/`). This barrel re-exports the
|
||||
* PURE, IO-free pieces the sync engine needs: the self-contained markdown
|
||||
* (de)serializers, the lossless ProseMirror <-> Markdown converter, the
|
||||
* markdown -> ProseMirror import path, and semantic canonicalization for the
|
||||
* round-trip idempotency check (SPEC §11).
|
||||
*
|
||||
* There is no REST client, websocket/collab write-path, auth-utils or page-lock
|
||||
* here — the gitmost server writes natively.
|
||||
*/
|
||||
export { serializeDocmostMarkdown, parseDocmostMarkdown, serializeDocmostMarkdownBody, } from "./markdown-document.js";
|
||||
export { convertProseMirrorToMarkdown } from "./markdown-converter.js";
|
||||
export { markdownToProseMirror } from "./markdown-to-prosemirror.js";
|
||||
export { canonicalizeContent, docsCanonicallyEqual, } from "./canonicalize.js";
|
||||
export { parsePageFile, serializePageFile } from "./page-file.js";
|
||||
5
packages/git-sync/build/lib/markdown-converter.d.ts
vendored
Normal file
5
packages/git-sync/build/lib/markdown-converter.d.ts
vendored
Normal file
@@ -0,0 +1,5 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
*/
|
||||
export declare function convertProseMirrorToMarkdown(content: any): string;
|
||||
801
packages/git-sync/build/lib/markdown-converter.js
Normal file
801
packages/git-sync/build/lib/markdown-converter.js
Normal file
@@ -0,0 +1,801 @@
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
*/
|
||||
export function convertProseMirrorToMarkdown(content) {
|
||||
if (!content || !content.content)
|
||||
return "";
|
||||
// Escape a value interpolated into an HTML double-quoted attribute value
|
||||
// (textAlign, colors, image src, math `text`, all data-* attrs, etc.). In the
|
||||
// ATTRIBUTE context only the quote that delimits the value and the ampersand
|
||||
// that starts an entity are special, so we escape ONLY & " (and ' for safety
|
||||
// when single-quoted delimiters are used). We deliberately do NOT escape < or
|
||||
// >: the HTML re-parser (parse5/jsdom via @tiptap/html) does NOT decode
|
||||
// </> back inside attribute values, so escaping them would corrupt the
|
||||
// stored data (e.g. a math node's LaTeX `a < b`) and ACCUMULATE escapes on
|
||||
// every round-trip (`a < b` -> `a < b` -> `a &lt; b`). Escaping & "
|
||||
// keeps the value inert against attribute-injection while staying idempotent.
|
||||
// NOTE: escape ONLY & and " here. The value is always wrapped in double
|
||||
// quotes, so " is the only delimiter; ' is NOT special in a double-quoted
|
||||
// value, and parse5 does not decode ' back inside attribute values, so
|
||||
// escaping ' would (like < >) corrupt the value and accumulate & on every
|
||||
// round-trip. Escaping & and " is idempotent (parse5 decodes them back).
|
||||
const escapeAttr = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/"/g, """);
|
||||
// Escape a value placed as HTML element TEXT content (between tags), where
|
||||
// <, >, and & are all significant. Used for text rendered inside raw-HTML
|
||||
// blocks (table cells / columns) so stored characters cannot inject markup.
|
||||
const escapeHtmlText = (value) => String(value)
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
// Percent-encode characters that would break out of a markdown URL target
|
||||
// (...) — whitespace/newlines and parentheses — so a stored src stays a
|
||||
// single inert token (used for image/video/youtube srcs).
|
||||
const encodeMdUrl = (value) => String(value || "")
|
||||
.replace(/\s/g, (c) => (c === " " ? "%20" : encodeURIComponent(c)))
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
const processNode = (node) => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
switch (type) {
|
||||
case "doc":
|
||||
return nodeContent.map(processNode).join("\n\n");
|
||||
case "paragraph":
|
||||
const text = nodeContent.map(processNode).join("");
|
||||
const align = node.attrs?.textAlign;
|
||||
if (align && align !== "left") {
|
||||
return `<div align="${escapeAttr(align)}">${text}</div>`;
|
||||
}
|
||||
return text || "";
|
||||
case "heading":
|
||||
const level = node.attrs?.level || 1;
|
||||
const headingText = nodeContent.map(processNode).join("");
|
||||
return "#".repeat(level) + " " + headingText;
|
||||
case "text":
|
||||
let textContent = node.text || "";
|
||||
// Apply marks (bold, italic, code, etc.)
|
||||
if (node.marks) {
|
||||
// The schema's `code` mark declares `excludes: "_"` — it excludes every
|
||||
// other inline mark — so the editor can NEVER produce a text run that
|
||||
// carries `code` together with another mark, and on import any
|
||||
// co-occurring mark is always dropped (the run comes back as code-only).
|
||||
// The lossless, byte-stable behavior is therefore: when a run has the
|
||||
// `code` mark, emit ONLY the backtick code span and ignore every other
|
||||
// mark, so md1 is already code-only and md2 === md1. Runs WITHOUT a code
|
||||
// mark are rendered exactly as before.
|
||||
const markTypes = node.marks.map((m) => m.type);
|
||||
const hasCode = markTypes.includes("code");
|
||||
if (hasCode) {
|
||||
textContent = `\`${textContent}\``;
|
||||
return textContent;
|
||||
}
|
||||
const codeCombined = false;
|
||||
for (const mark of node.marks) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
textContent = codeCombined
|
||||
? `<strong>${textContent}</strong>`
|
||||
: `**${textContent}**`;
|
||||
break;
|
||||
case "italic":
|
||||
textContent = codeCombined
|
||||
? `<em>${textContent}</em>`
|
||||
: `*${textContent}*`;
|
||||
break;
|
||||
case "code":
|
||||
// When combined with another mark, wrap as <code> so the
|
||||
// surrounding HTML marks can nest around it; otherwise use the
|
||||
// plain backtick span.
|
||||
textContent = codeCombined
|
||||
? `<code>${textContent}</code>`
|
||||
: `\`${textContent}\``;
|
||||
break;
|
||||
case "link": {
|
||||
const href = mark.attrs?.href || "";
|
||||
const title = mark.attrs?.title;
|
||||
if (codeCombined) {
|
||||
// Emit an HTML anchor so it can wrap the nested <code>.
|
||||
const safeHref = escapeAttr(href);
|
||||
if (title) {
|
||||
textContent = `<a href="${safeHref}" title="${escapeAttr(String(title))}">${textContent}</a>`;
|
||||
}
|
||||
else {
|
||||
textContent = `<a href="${safeHref}">${textContent}</a>`;
|
||||
}
|
||||
}
|
||||
else if (title) {
|
||||
// Emit the optional markdown link title; escape an embedded
|
||||
// double-quote so it cannot terminate the title string early.
|
||||
const safeTitle = String(title).replace(/"/g, '\\"');
|
||||
textContent = `[${textContent}](${href} "${safeTitle}")`;
|
||||
}
|
||||
else {
|
||||
textContent = `[${textContent}](${href})`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "strike":
|
||||
textContent = codeCombined
|
||||
? `<s>${textContent}</s>`
|
||||
: `~~${textContent}~~`;
|
||||
break;
|
||||
case "underline":
|
||||
textContent = `<u>${textContent}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
textContent = `<sub>${textContent}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
textContent = `<sup>${textContent}</sup>`;
|
||||
break;
|
||||
case "highlight": {
|
||||
// Preserve a null/empty color as a plain highlight (a bare
|
||||
// <mark> with no background-color); only emit the style when a
|
||||
// color is actually set, so a plain highlight is not forced to
|
||||
// yellow on export.
|
||||
const color = mark.attrs?.color;
|
||||
textContent = color
|
||||
? `<mark style="background-color: ${escapeAttr(color)}">${textContent}</mark>`
|
||||
: `<mark>${textContent}</mark>`;
|
||||
break;
|
||||
}
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color) {
|
||||
textContent = `<span style="color: ${escapeAttr(mark.attrs.color)}">${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
case "comment": {
|
||||
// Emit the inline comment anchor so highlights round-trip. The
|
||||
// schema's Comment mark parses span[data-comment-id] (attrs
|
||||
// commentId/resolved).
|
||||
const cid = mark.attrs?.commentId;
|
||||
if (cid) {
|
||||
const resolvedAttr = mark.attrs?.resolved
|
||||
? ` data-resolved="true"`
|
||||
: "";
|
||||
textContent = `<span data-comment-id="${escapeAttr(cid)}"${resolvedAttr}>${textContent}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return textContent;
|
||||
case "codeBlock":
|
||||
const language = node.attrs?.language || "";
|
||||
// Strip ALL trailing newlines so the export is idempotent: marked
|
||||
// re-adds exactly one trailing "\n" on import, so trimming only one
|
||||
// here would let the text grow by "\n" on each round-trip. Removing
|
||||
// every trailing newline makes repeated cycles stable.
|
||||
const code = nodeContent
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
.map((item) => processListItem(item, "-"))
|
||||
.join("\n");
|
||||
case "orderedList":
|
||||
return nodeContent
|
||||
.map((item, index) => processListItem(item, `${index + 1}.`))
|
||||
.join("\n");
|
||||
case "taskList":
|
||||
return nodeContent.map((item) => processTaskItem(item)).join("\n");
|
||||
case "taskItem":
|
||||
// Delegate to the same helper used by taskList so multi-block and
|
||||
// nested task items render and indent consistently.
|
||||
return processTaskItem(node);
|
||||
case "listItem":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "blockquote":
|
||||
// Prefix EVERY line of EVERY child with "> " and separate block-level
|
||||
// children with a blank ">" line so code blocks / multi-paragraph
|
||||
// quotes round-trip correctly.
|
||||
return nodeContent
|
||||
.map((n) => processNode(n)
|
||||
.split("\n")
|
||||
.map((line) => (line.length ? `> ${line}` : ">"))
|
||||
.join("\n"))
|
||||
.join("\n>\n");
|
||||
case "horizontalRule":
|
||||
return "---";
|
||||
case "hardBreak":
|
||||
// Two trailing spaces before the newline encode a markdown hard break;
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
case "image":
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||
// not emit one (the previous caption branch was dead).
|
||||
return ``;
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
// node with its attrs intact. The schema's parseHTML reads src/aria-label
|
||||
// from the standard attributes and the remaining attrs from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`aria-label="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
// Wrap in a block <div> so marked treats it as a block (a bare <video>
|
||||
// is inline-level HTML and marked wraps it in <p>, leaving a spurious
|
||||
// empty paragraph beside the hoisted block atom). The wrapper has no
|
||||
// data-type, so the schema parser ignores it and just hoists the video.
|
||||
return `<div><video ${parts.join(" ")}></video></div>`;
|
||||
}
|
||||
case "youtube": {
|
||||
// Emit the schema-matching div[data-type="youtube"]; the schema reads
|
||||
// src from data-src and width/height/align from data-* attributes.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="youtube"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "table": {
|
||||
// A GFM pipe table cannot represent merged cells. If ANY cell carries
|
||||
// colspan>1 or rowspan>1, a pipe table would corrupt the grid on
|
||||
// re-import, so emit the WHOLE table as raw HTML <table> instead: the
|
||||
// schema's table family parseHTML (tag table/tr/td/th, with colspan/
|
||||
// rowspan read from the same-named HTML attrs and align via parseHTML)
|
||||
// round-trips it faithfully. Otherwise keep the lighter GFM pipe table.
|
||||
const tableRows = nodeContent;
|
||||
if (tableRows.length === 0)
|
||||
return "";
|
||||
const hasSpan = tableRows.some((row) => (row.content || []).some((cell) => (cell.attrs?.colspan ?? 1) > 1 || (cell.attrs?.rowspan ?? 1) > 1));
|
||||
if (hasSpan) {
|
||||
// Render each cell's block children to HTML (marked does NOT parse
|
||||
// markdown inside a raw HTML block, so emitting markdown here would
|
||||
// leak literal ** / `` into the cell). blockToHtml mirrors the schema
|
||||
// HTML so inner formatting re-parses into the right marks/nodes.
|
||||
const renderHtmlCell = (cell) => {
|
||||
const tag = cell.type === "tableHeader" ? "th" : "td";
|
||||
const a = cell.attrs || {};
|
||||
const cellParts = [];
|
||||
if ((a.colspan ?? 1) > 1)
|
||||
cellParts.push(`colspan="${escapeAttr(a.colspan)}"`);
|
||||
if ((a.rowspan ?? 1) > 1)
|
||||
cellParts.push(`rowspan="${escapeAttr(a.rowspan)}"`);
|
||||
if (a.align)
|
||||
cellParts.push(`align="${escapeAttr(a.align)}"`);
|
||||
const open = cellParts.length
|
||||
? `<${tag} ${cellParts.join(" ")}>`
|
||||
: `<${tag}>`;
|
||||
const inner = (cell.content || [])
|
||||
.map((block) => blockToHtml(block))
|
||||
.join("");
|
||||
return `${open}${inner}</${tag}>`;
|
||||
};
|
||||
const htmlRows = tableRows
|
||||
.map((row) => `<tr>${(row.content || []).map(renderHtmlCell).join("")}</tr>`)
|
||||
.join("");
|
||||
return `<table><tbody>${htmlRows}</tbody></table>`;
|
||||
}
|
||||
// No merged cells: emit a GFM table (header row + separator) so the
|
||||
// markdown can be parsed back into a table on re-import.
|
||||
const rows = tableRows.map(processNode);
|
||||
const headerCells = tableRows[0]?.content || [];
|
||||
const columns = headerCells.length || 1;
|
||||
// Derive alignment markers (:--, :-:, --:) from each header cell.
|
||||
const markers = Array.from({ length: columns }, (_, i) => {
|
||||
const align = headerCells[i]?.attrs?.align;
|
||||
switch (align) {
|
||||
case "left":
|
||||
return ":--";
|
||||
case "center":
|
||||
return ":-:";
|
||||
case "right":
|
||||
return "--:";
|
||||
default:
|
||||
return "---";
|
||||
}
|
||||
});
|
||||
const separator = "| " + markers.join(" | ") + " |";
|
||||
return [rows[0], separator, ...rows.slice(1)].join("\n");
|
||||
}
|
||||
case "tableRow":
|
||||
return "| " + nodeContent.map(processNode).join(" | ") + " |";
|
||||
case "tableCell":
|
||||
case "tableHeader": {
|
||||
// Join multiple block children with a space (not "") so adjacent blocks
|
||||
// like a paragraph followed by a list don't collide into "line1- a".
|
||||
// Then collapse newlines and escape pipes so a cell containing "|" or a
|
||||
// line break cannot corrupt the surrounding GFM row.
|
||||
return nodeContent
|
||||
.map(processNode)
|
||||
.join(" ")
|
||||
.replace(/\r?\n/g, " ")
|
||||
.replace(/\|/g, "\\|");
|
||||
}
|
||||
case "callout":
|
||||
const calloutType = node.attrs?.type || "info";
|
||||
const calloutContent = nodeContent.map(processNode).join("\n");
|
||||
return `:::${calloutType.toLowerCase()}\n${calloutContent}\n:::`;
|
||||
case "details":
|
||||
return nodeContent.map(processNode).join("\n");
|
||||
case "detailsSummary":
|
||||
const summaryText = nodeContent.map(processNode).join("");
|
||||
return `<details>\n<summary>${summaryText}</summary>\n`;
|
||||
case "detailsContent":
|
||||
const detailsText = nodeContent.map(processNode).join("\n");
|
||||
return `${detailsText}\n</details>`;
|
||||
case "mathInline": {
|
||||
// The schema's `text` attribute has no parseHTML, so TipTap's default
|
||||
// parser reads it from the `text` HTML attribute (NOT the element's text
|
||||
// content). Emit span[data-type="mathInline"] carrying the LaTeX in a
|
||||
// `text="..."` attribute so it round-trips. marked cannot parse $...$
|
||||
// back, so the previous form was lossy.
|
||||
const inlineMath = node.attrs?.text || "";
|
||||
return `<span data-type="mathInline" data-katex="true" text="${escapeAttr(inlineMath)}"></span>`;
|
||||
}
|
||||
case "mathBlock": {
|
||||
// Same as mathInline: the LaTeX must ride in the `text` HTML attribute
|
||||
// for the schema's default parser to recover it.
|
||||
const blockMath = node.attrs?.text || "";
|
||||
return `<div data-type="mathBlock" data-katex="true" text="${escapeAttr(blockMath)}"></div>`;
|
||||
}
|
||||
case "mention": {
|
||||
// Emit span[data-type="mention"] with the schema's data-* attributes so
|
||||
// generateJSON rebuilds the mention node instead of leaving "@label"
|
||||
// plain text that cannot re-parse.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="mention"`];
|
||||
if (attrs.id)
|
||||
parts.push(`data-id="${escapeAttr(attrs.id)}"`);
|
||||
if (attrs.label)
|
||||
parts.push(`data-label="${escapeAttr(attrs.label)}"`);
|
||||
if (attrs.entityType)
|
||||
parts.push(`data-entity-type="${escapeAttr(attrs.entityType)}"`);
|
||||
if (attrs.entityId)
|
||||
parts.push(`data-entity-id="${escapeAttr(attrs.entityId)}"`);
|
||||
if (attrs.slugId)
|
||||
parts.push(`data-slug-id="${escapeAttr(attrs.slugId)}"`);
|
||||
if (attrs.creatorId)
|
||||
parts.push(`data-creator-id="${escapeAttr(attrs.creatorId)}"`);
|
||||
if (attrs.anchorId)
|
||||
parts.push(`data-anchor-id="${escapeAttr(attrs.anchorId)}"`);
|
||||
// Keep the label as visible text content too; the schema reads attrs
|
||||
// from data-*, so the inner text is purely cosmetic and harmless.
|
||||
const mentionLabel = attrs.label || attrs.id || "";
|
||||
// The label is visible element TEXT content here (the data-* attrs above
|
||||
// carry the real values), so escape it for the text context, not attrs.
|
||||
return `<span ${parts.join(" ")}>@${escapeHtmlText(mentionLabel)}</span>`;
|
||||
}
|
||||
case "attachment": {
|
||||
// BUG FIX: the old code read node.attrs.fileName / node.attrs.src, but
|
||||
// the schema stores name/url (plus mime/size/attachmentId). Emit the
|
||||
// schema-matching div[data-type="attachment"] with data-attachment-*
|
||||
// attrs so the node round-trips instead of degrading to a markdown link.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="attachment"`,
|
||||
`data-attachment-url="${escapeAttr(attrs.url ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-attachment-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.mime)
|
||||
parts.push(`data-attachment-mime="${escapeAttr(attrs.mime)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-attachment-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "drawio":
|
||||
case "excalidraw": {
|
||||
// Emit the schema-matching div[data-type=...] carrying the diagram's
|
||||
// attrs as data-* (the schema's diagramAttributes reads src/title/alt/
|
||||
// width/height/size/aspectRatio/align/attachmentId from data-*), so the
|
||||
// diagram round-trips instead of degrading to a lossy placeholder.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="${type}"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.title != null)
|
||||
parts.push(`data-title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.alt != null)
|
||||
parts.push(`data-alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "embed": {
|
||||
// Emit the schema-matching div[data-type="embed"]; the schema reads
|
||||
// src/provider/align/width/height from data-* attributes so the node
|
||||
// (and its provider iframe info) survives the round-trip.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="embed"`,
|
||||
`data-src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
`data-provider="${escapeAttr(attrs.provider ?? "")}"`,
|
||||
];
|
||||
if (attrs.align)
|
||||
parts.push(`data-align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "audio": {
|
||||
// Emit the schema-matching <audio> element (was emitting nothing). The
|
||||
// schema reads src from src and attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
// Wrap in a block <div> for the same reason as video: a bare <audio> is
|
||||
// inline-level HTML that marked would wrap in <p>.
|
||||
return `<div><audio ${parts.join(" ")}></audio></div>`;
|
||||
}
|
||||
case "pdf": {
|
||||
// Emit the schema-matching div[data-type="pdf"] (was emitting nothing).
|
||||
// The schema reads src/width/height from standard attrs and name/
|
||||
// attachmentId/size from data-*.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [
|
||||
`data-type="pdf"`,
|
||||
`src="${escapeAttr(attrs.src ?? "")}"`,
|
||||
];
|
||||
if (attrs.name)
|
||||
parts.push(`data-name="${escapeAttr(attrs.name)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
case "columns": {
|
||||
// Emit the schema-matching div[data-type="columns"] wrapper so the
|
||||
// multi-column layout survives. Without a case the children were
|
||||
// concatenated with no separator and the text merged. The schema reads
|
||||
// layout from data-layout and widthMode from data-width-mode. The whole
|
||||
// block is raw HTML, so render children via blockToHtml (NOT markdown,
|
||||
// which marked would not re-parse inside a raw HTML block).
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="columns"`];
|
||||
if (attrs.layout)
|
||||
parts.push(`data-layout="${escapeAttr(attrs.layout)}"`);
|
||||
if (attrs.widthMode && attrs.widthMode !== "normal")
|
||||
parts.push(`data-width-mode="${escapeAttr(attrs.widthMode)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "column": {
|
||||
// Emit the schema-matching div[data-type="column"]; the schema reads the
|
||||
// column width from data-width. Children are rendered as HTML so their
|
||||
// formatting survives inside this raw HTML block.
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`data-type="column"`];
|
||||
if (attrs.width)
|
||||
parts.push(`data-width="${escapeAttr(attrs.width)}"`);
|
||||
const inner = nodeContent.map((n) => blockToHtml(n)).join("");
|
||||
return `<div ${parts.join(" ")}>${inner}</div>`;
|
||||
}
|
||||
case "pageBreak":
|
||||
// Emit the schema-matching div[data-type="pageBreak"] so marked passes
|
||||
// it through as a block and generateJSON rebuilds the pageBreak atom.
|
||||
// Without this case the node fell through to `default` and rendered ""
|
||||
// (the divider silently disappeared and could not round-trip).
|
||||
return `<div data-type="pageBreak"></div>`;
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
}
|
||||
};
|
||||
// Render inline content (text runs + their marks) to HTML. Used by the raw
|
||||
// HTML fallbacks (spanned tables, columns) where marked will NOT re-parse
|
||||
// markdown, so backtick/asterisk/bracket syntax would otherwise leak as
|
||||
// literal characters. Each mark is mirrored to the HTML the schema's parseHTML
|
||||
// accepts so it re-imports as the matching ProseMirror mark.
|
||||
const inlineToHtml = (inlineNodes) => (inlineNodes || [])
|
||||
.map((n) => {
|
||||
if (n.type === "hardBreak")
|
||||
return "<br>";
|
||||
if (n.type !== "text") {
|
||||
// Inline atoms (mention, mathInline) already emit schema HTML.
|
||||
return processNode(n);
|
||||
}
|
||||
let t = escapeHtmlText(n.text || "");
|
||||
for (const mark of n.marks || []) {
|
||||
switch (mark.type) {
|
||||
case "bold":
|
||||
t = `<strong>${t}</strong>`;
|
||||
break;
|
||||
case "italic":
|
||||
t = `<em>${t}</em>`;
|
||||
break;
|
||||
case "code":
|
||||
t = `<code>${t}</code>`;
|
||||
break;
|
||||
case "strike":
|
||||
t = `<s>${t}</s>`;
|
||||
break;
|
||||
case "underline":
|
||||
t = `<u>${t}</u>`;
|
||||
break;
|
||||
case "subscript":
|
||||
t = `<sub>${t}</sub>`;
|
||||
break;
|
||||
case "superscript":
|
||||
t = `<sup>${t}</sup>`;
|
||||
break;
|
||||
case "link":
|
||||
t = `<a href="${escapeAttr(mark.attrs?.href || "")}">${t}</a>`;
|
||||
break;
|
||||
case "highlight":
|
||||
t = mark.attrs?.color
|
||||
? `<mark style="background-color: ${escapeAttr(mark.attrs.color)}">${t}</mark>`
|
||||
: `<mark>${t}</mark>`;
|
||||
break;
|
||||
case "textStyle":
|
||||
if (mark.attrs?.color)
|
||||
t = `<span style="color: ${escapeAttr(mark.attrs.color)}">${t}</span>`;
|
||||
break;
|
||||
case "comment":
|
||||
// Inline comment anchor inside a raw-HTML container (columns /
|
||||
// spanned table cells), so commented text there also round-trips.
|
||||
if (mark.attrs?.commentId) {
|
||||
const r = mark.attrs?.resolved ? ` data-resolved="true"` : "";
|
||||
t = `<span data-comment-id="${escapeAttr(mark.attrs.commentId)}"${r}>${t}</span>`;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return t;
|
||||
})
|
||||
.join("");
|
||||
// Emit the schema-matching <img> for an image node. Shared so the image is
|
||||
// emitted as real HTML wherever a raw-HTML container needs it (inside a column
|
||||
// or a spanned table cell), where markdown `` would NOT be re-parsed
|
||||
// and would survive as literal text. The Image extension reads src/alt from
|
||||
// the standard attributes; the Docmost extra attrs (width/height/align/size/
|
||||
// attachmentId/aspectRatio) are global attributes read from same-named DOM
|
||||
// attributes, so emit them by name.
|
||||
const imageToHtml = (node) => {
|
||||
const attrs = node.attrs || {};
|
||||
const parts = [`src="${escapeAttr(attrs.src ?? "")}"`];
|
||||
if (attrs.alt)
|
||||
parts.push(`alt="${escapeAttr(attrs.alt)}"`);
|
||||
if (attrs.title)
|
||||
parts.push(`title="${escapeAttr(attrs.title)}"`);
|
||||
if (attrs.width != null)
|
||||
parts.push(`width="${escapeAttr(attrs.width)}"`);
|
||||
if (attrs.height != null)
|
||||
parts.push(`height="${escapeAttr(attrs.height)}"`);
|
||||
if (attrs.align)
|
||||
parts.push(`align="${escapeAttr(attrs.align)}"`);
|
||||
if (attrs.size != null)
|
||||
parts.push(`data-size="${escapeAttr(attrs.size)}"`);
|
||||
if (attrs.attachmentId)
|
||||
parts.push(`data-attachment-id="${escapeAttr(attrs.attachmentId)}"`);
|
||||
if (attrs.aspectRatio != null)
|
||||
parts.push(`data-aspect-ratio="${escapeAttr(attrs.aspectRatio)}"`);
|
||||
return `<img ${parts.join(" ")}>`;
|
||||
};
|
||||
// Emit the schema-matching div[data-type="callout"] for a callout node. The
|
||||
// schema reads the banner type from data-callout-type. Children are rendered
|
||||
// as HTML so they survive inside a raw-HTML container.
|
||||
const calloutToHtml = (node) => {
|
||||
const type = (node.attrs?.type || "info").toLowerCase();
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="callout" data-callout-type="${escapeAttr(type)}">${inner}</div>`;
|
||||
};
|
||||
// Emit a schema-matching <details> tree. The schema parses <details>,
|
||||
// summary[data-type="detailsSummary"], and div[data-type="detailsContent"].
|
||||
const detailsToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<details>${inner}</details>`;
|
||||
};
|
||||
const detailsSummaryToHtml = (node) => `<summary data-type="detailsSummary">${inlineToHtml(node.content || [])}</summary>`;
|
||||
const detailsContentToHtml = (node) => {
|
||||
const inner = (node.content || []).map(blockToHtml).join("");
|
||||
return `<div data-type="detailsContent">${inner}</div>`;
|
||||
};
|
||||
// Emit the schema-matching taskList/taskItem HTML. bridgeTaskLists (in
|
||||
// collaboration.ts) recognizes ul[data-type="taskList"] with
|
||||
// li[data-type="taskItem"][data-checked]; emitting that directly here keeps
|
||||
// task lists inside columns/cells from degrading to literal "- [ ]" text.
|
||||
const taskListToHtml = (node) => {
|
||||
const items = (node.content || [])
|
||||
.map((it) => {
|
||||
const checked = it.attrs?.checked ? "true" : "false";
|
||||
return `<li data-type="taskItem" data-checked="${checked}">${blockChildrenToHtml(it)}</li>`;
|
||||
})
|
||||
.join("");
|
||||
return `<ul data-type="taskList">${items}</ul>`;
|
||||
};
|
||||
// Render a block node to HTML for the raw-HTML containers (spanned tables,
|
||||
// columns). marked does NOT re-parse markdown inside a raw-HTML block, so
|
||||
// EVERY block type that can appear inside a column or a spanned cell must be
|
||||
// emitted as schema-matching HTML here — never as markdown, or it would land
|
||||
// as literal text on re-import. Nodes whose processNode case already produces
|
||||
// schema-matching HTML (math/media/embed/attachment/nested columns/spanned
|
||||
// table) are delegated to processNode; the markdown-emitting cases
|
||||
// (image/blockquote/callout/details/hr/taskList) get explicit HTML here.
|
||||
const blockToHtml = (block) => {
|
||||
const children = block.content || [];
|
||||
switch (block.type) {
|
||||
case "paragraph":
|
||||
return `<p>${inlineToHtml(children)}</p>`;
|
||||
case "heading": {
|
||||
const level = block.attrs?.level || 1;
|
||||
return `<h${level}>${inlineToHtml(children)}</h${level}>`;
|
||||
}
|
||||
case "bulletList":
|
||||
return `<ul>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ul>`;
|
||||
case "orderedList":
|
||||
return `<ol>${children
|
||||
.map((li) => `<li>${blockChildrenToHtml(li)}</li>`)
|
||||
.join("")}</ol>`;
|
||||
case "codeBlock": {
|
||||
const lang = block.attrs?.language || "";
|
||||
// The code itself is element TEXT content (between <code> tags), so it
|
||||
// must escape < > & — NOT the attribute escaper. The language rides in
|
||||
// a class ATTRIBUTE, so it uses escapeAttr.
|
||||
const code = escapeHtmlText(children
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, ""));
|
||||
const cls = lang ? ` class="language-${escapeAttr(lang)}"` : "";
|
||||
return `<pre><code${cls}>${code}</code></pre>`;
|
||||
}
|
||||
case "image":
|
||||
return imageToHtml(block);
|
||||
case "blockquote":
|
||||
return `<blockquote>${children.map(blockToHtml).join("")}</blockquote>`;
|
||||
case "horizontalRule":
|
||||
return "<hr>";
|
||||
case "callout":
|
||||
return calloutToHtml(block);
|
||||
case "details":
|
||||
return detailsToHtml(block);
|
||||
case "detailsSummary":
|
||||
return detailsSummaryToHtml(block);
|
||||
case "detailsContent":
|
||||
return detailsContentToHtml(block);
|
||||
case "taskList":
|
||||
return taskListToHtml(block);
|
||||
case "taskItem":
|
||||
// A bare taskItem (outside a taskList) still needs a wrapping list so
|
||||
// the schema parses it; wrap it in a single-item taskList.
|
||||
return taskListToHtml({ content: [block] });
|
||||
// table (incl. spanned), columns/column, math, media, embed, attachment,
|
||||
// mention, etc. already emit schema-matching HTML from processNode.
|
||||
case "table":
|
||||
case "columns":
|
||||
case "column":
|
||||
case "mathBlock":
|
||||
case "video":
|
||||
case "audio":
|
||||
case "pdf":
|
||||
case "youtube":
|
||||
case "embed":
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
// raw-HTML block (it would become literal text). Wrap its rendered
|
||||
// children in a <div> so their content is preserved; if it has no block
|
||||
// children, render its inline content instead.
|
||||
if (children.length && children.some((c) => c.type !== "text")) {
|
||||
return `<div>${children.map(blockToHtml).join("")}</div>`;
|
||||
}
|
||||
return `<div>${inlineToHtml(children)}</div>`;
|
||||
}
|
||||
};
|
||||
// Render the block children of a list item to HTML (a listItem holds block+
|
||||
// content). Mirrors processListItem but for the HTML fallback path.
|
||||
const blockChildrenToHtml = (item) => (item.content || []).map((b) => blockToHtml(b)).join("");
|
||||
// Indent the rendered children of a list item under a marker prefix.
|
||||
// Each child block is a (possibly multi-line) string. The very first physical
|
||||
// line of the first child carries the marker (e.g. "- " or "1. "); EVERY
|
||||
// other line — the remaining lines of the first child AND all lines of every
|
||||
// subsequent child (nested lists, code blocks, extra paragraphs) — is indented
|
||||
// to align under the marker. Without indenting these continuation lines, the
|
||||
// 2nd/3rd line of a nested child collapses to column 0 and escapes the list.
|
||||
//
|
||||
// The continuation indent MUST equal the LIST marker width, which is not the
|
||||
// same as the visible prefix width:
|
||||
// - bullet "- " -> 2 columns
|
||||
// - task "- [ ] " -> marker is still "- " (the "[ ] " is content), 2
|
||||
// - ordered "1. "/"10. " -> 3/4 columns, scaling with the number's digits
|
||||
// CommonMark anchors nested content to the marker column, so an ordered item
|
||||
// indented to only 2 columns would be re-parsed as a sibling/loose content on
|
||||
// re-import. Callers therefore pass the exact indent width to use.
|
||||
const indentItemChildren = (childStrings, prefix, indentWidth) => {
|
||||
const indent = " ".repeat(indentWidth);
|
||||
const lines = [];
|
||||
childStrings.forEach((child, childIndex) => {
|
||||
child.split("\n").forEach((line, lineIndex) => {
|
||||
if (childIndex === 0 && lineIndex === 0) {
|
||||
// First physical line of the first block gets the marker.
|
||||
lines.push(`${prefix} ${line}`);
|
||||
}
|
||||
else {
|
||||
// Indent every continuation line by the marker width; keep blank
|
||||
// lines blank rather than emitting trailing whitespace.
|
||||
lines.push(line.length ? `${indent}${line}` : "");
|
||||
}
|
||||
});
|
||||
});
|
||||
return lines.join("\n");
|
||||
};
|
||||
const processListItem = (item, prefix) => {
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The rendered marker is `${prefix} ` (prefix + one space), so its width —
|
||||
// and thus the continuation indent — is prefix.length + 1. This is correct
|
||||
// for both bullet ("-" -> 2) and ordered ("1." -> 3, "10." -> 4) markers,
|
||||
// since for those the visible prefix IS the list marker.
|
||||
return indentItemChildren(childStrings, prefix, prefix.length + 1);
|
||||
};
|
||||
const processTaskItem = (item) => {
|
||||
const checked = item.attrs?.checked || false;
|
||||
const checkbox = checked ? "[x]" : "[ ]";
|
||||
const prefix = `- ${checkbox}`;
|
||||
const itemContent = item.content || [];
|
||||
const childStrings = itemContent.map(processNode);
|
||||
// An empty task item still needs its checkbox marker; without this guard
|
||||
// the indent below produces "" and the "- [ ]"/"- [x]" row disappears.
|
||||
if (childStrings.length === 0)
|
||||
return prefix;
|
||||
// The list marker for a task item is just "- " (2 columns); the "[ ] "/"[x] "
|
||||
// checkbox is item content, NOT part of the marker. So the continuation
|
||||
// indent is a fixed 2 — do NOT derive it from the wider prefix.length.
|
||||
return indentItemChildren(childStrings, prefix, 2);
|
||||
};
|
||||
return processNode(content).trim();
|
||||
}
|
||||
68
packages/git-sync/build/lib/markdown-document.d.ts
vendored
Normal file
68
packages/git-sync/build/lib/markdown-document.d.ts
vendored
Normal file
@@ -0,0 +1,68 @@
|
||||
/**
|
||||
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||
*
|
||||
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||
* a page through "download -> edit body -> re-upload":
|
||||
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||
* threads.
|
||||
*
|
||||
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||
*
|
||||
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||
* literal occurrence is left in the body untouched.
|
||||
*
|
||||
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||
* the file but are NOT pushed back to the server on import — only the inline
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
*/
|
||||
export interface DocmostMdMeta {
|
||||
version: number;
|
||||
pageId?: string;
|
||||
slugId?: string;
|
||||
title?: string;
|
||||
spaceId?: string;
|
||||
parentPageId?: string | null;
|
||||
}
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export declare function serializeDocmostMarkdown(meta: DocmostMdMeta, body: string, comments: any[]): string;
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
* corresponding value is returned as `null` and the whole input is treated as
|
||||
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export declare function parseDocmostMarkdown(full: string): {
|
||||
meta: DocmostMdMeta | null;
|
||||
body: string;
|
||||
comments: any[] | null;
|
||||
};
|
||||
/**
|
||||
* Serialize a self-contained markdown file with the meta block + body ONLY —
|
||||
* NO trailing `docmost:comments` block. The sync engine never touches
|
||||
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
|
||||
* the body, where comment threads survive only as inline `<span
|
||||
* data-comment-id>` anchor marks inside the body.
|
||||
*
|
||||
* `parseDocmostMarkdown` already tolerates a missing comments block (it returns
|
||||
* `comments: null` and treats the rest as body), so a file produced here
|
||||
* round-trips cleanly through the parser.
|
||||
*/
|
||||
export declare function serializeDocmostMarkdownBody(meta: DocmostMdMeta, body: string): string;
|
||||
118
packages/git-sync/build/lib/markdown-document.js
Normal file
118
packages/git-sync/build/lib/markdown-document.js
Normal file
@@ -0,0 +1,118 @@
|
||||
/**
|
||||
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
||||
*
|
||||
* A single `.md` file that packages everything needed to losslessly round-trip
|
||||
* a page through "download -> edit body -> re-upload":
|
||||
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
||||
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
||||
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
||||
* threads.
|
||||
*
|
||||
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
||||
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
||||
* importer without first stripping the blocks, the metadata cannot leak into the
|
||||
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
||||
* codeBlock node, so a fenced block is deliberately NOT used.)
|
||||
*
|
||||
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
||||
* re-pastes an exported `.md` into a page, or a page documents this very
|
||||
* format). To stay robust, parsing treats only the FINAL, document-ending
|
||||
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
||||
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
||||
* literal occurrence is left in the body untouched.
|
||||
*
|
||||
* NOTE on comments: in this version the comment THREAD records are preserved in
|
||||
* the file but are NOT pushed back to the server on import — only the inline
|
||||
* comment marks (anchors) embedded in the body are restored. Managing comment
|
||||
* records stays with the comment tools/UI.
|
||||
*/
|
||||
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
||||
// the JSON text between the markers.
|
||||
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
||||
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
|
||||
// rather than end-anchoring a single regex (which would mis-capture across a
|
||||
// literal opener that appears earlier in the body).
|
||||
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
||||
/**
|
||||
* Assemble the full self-contained markdown file: meta block, body, and the
|
||||
* comments block. The meta block is always emitted; the comments block is always
|
||||
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
||||
* and parsing stays simple.
|
||||
*/
|
||||
export function serializeDocmostMarkdown(meta, body, comments) {
|
||||
const metaJson = JSON.stringify(meta);
|
||||
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
||||
const trimmedBody = (body ?? "").trim();
|
||||
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
||||
`${trimmedBody}\n\n` +
|
||||
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
|
||||
}
|
||||
/**
|
||||
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
||||
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
||||
* corresponding value is returned as `null` and the whole input is treated as
|
||||
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
||||
* inside a block that IS present is surfaced as a thrown Error with a clear
|
||||
* message. Robust to `\r\n` line endings.
|
||||
*/
|
||||
export function parseDocmostMarkdown(full) {
|
||||
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
||||
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
||||
// Extract the leading meta block (start-anchored — already unambiguous).
|
||||
let meta = null;
|
||||
let metaEnd = 0;
|
||||
const metaMatch = normalized.match(META_RE);
|
||||
if (metaMatch) {
|
||||
try {
|
||||
meta = JSON.parse(metaMatch[1]);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
// Body starts right after the matched meta block.
|
||||
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
||||
}
|
||||
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
||||
// the final one whose closing `-->` ends the document. Any earlier literal
|
||||
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
||||
let lastOpenStart = -1;
|
||||
let lastOpenEnd = -1;
|
||||
let m;
|
||||
COMMENTS_OPEN_RE.lastIndex = 0;
|
||||
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
||||
lastOpenStart = m.index;
|
||||
lastOpenEnd = m.index + m[0].length;
|
||||
}
|
||||
let comments = null;
|
||||
let bodyEnd = normalized.length;
|
||||
if (lastOpenStart !== -1) {
|
||||
const rest = normalized.slice(lastOpenEnd);
|
||||
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
||||
if (close) {
|
||||
const jsonText = rest.slice(0, close.index);
|
||||
try {
|
||||
comments = JSON.parse(jsonText);
|
||||
}
|
||||
catch (e) {
|
||||
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
||||
}
|
||||
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
||||
}
|
||||
}
|
||||
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
||||
return { meta, body, comments };
|
||||
}
|
||||
/**
|
||||
* Serialize a self-contained markdown file with the meta block + body ONLY —
|
||||
* NO trailing `docmost:comments` block. The sync engine never touches
|
||||
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
|
||||
* the body, where comment threads survive only as inline `<span
|
||||
* data-comment-id>` anchor marks inside the body.
|
||||
*
|
||||
* `parseDocmostMarkdown` already tolerates a missing comments block (it returns
|
||||
* `comments: null` and treats the rest as body), so a file produced here
|
||||
* round-trips cleanly through the parser.
|
||||
*/
|
||||
export function serializeDocmostMarkdownBody(meta, body) {
|
||||
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
|
||||
}
|
||||
2
packages/git-sync/build/lib/markdown-to-prosemirror.d.ts
vendored
Normal file
2
packages/git-sync/build/lib/markdown-to-prosemirror.d.ts
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||
export declare function markdownToProseMirror(markdownContent: string): Promise<any>;
|
||||
306
packages/git-sync/build/lib/markdown-to-prosemirror.js
Normal file
306
packages/git-sync/build/lib/markdown-to-prosemirror.js
Normal file
@@ -0,0 +1,306 @@
|
||||
/**
|
||||
* Pure markdown -> ProseMirror conversion.
|
||||
*
|
||||
* The converter path is `markdownToProseMirror` (marked -> HTML ->
|
||||
* generateJSON) plus the two pre/post processors it needs (`preprocessCallouts`,
|
||||
* `bridgeTaskLists`). The gitmost server writes the resulting page bodies
|
||||
* natively through the collab gateway, so no websocket/Yjs write-path lives
|
||||
* here.
|
||||
*/
|
||||
import { generateJSON } from "@tiptap/html";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { marked } from "marked";
|
||||
import { docmostExtensions } from "./docmost-schema.js";
|
||||
// Setup DOM environment for Tiptap HTML parsing in Node.js
|
||||
const dom = new JSDOM("<!DOCTYPE html><html><body></body></html>");
|
||||
global.window = dom.window;
|
||||
global.document = dom.window.document;
|
||||
// @ts-ignore
|
||||
global.Element = dom.window.Element;
|
||||
/**
|
||||
* Hard ceiling above which we skip callout preprocessing entirely. The linear
|
||||
* scanner below has no quadratic blow-up, but we still cap input defensively so
|
||||
* a pathological multi-megabyte payload cannot tie up the event loop; in that
|
||||
* case the markdown is passed through verbatim (callouts are simply not
|
||||
* detected) rather than risking a slow scan.
|
||||
*/
|
||||
const MAX_CALLOUT_PREPROCESS_BYTES = 4 * 1024 * 1024; // 4 MB
|
||||
/** Matches an opening callout fence: `:::type` (type captured, lower-cased). */
|
||||
const CALLOUT_OPEN_RE = /^:::\s*(\w+)\s*$/;
|
||||
/** Matches a bare closing callout fence: `:::`. */
|
||||
const CALLOUT_CLOSE_RE = /^:::\s*$/;
|
||||
/** Matches the start/end of a code fence (``` or ~~~), capturing the marker. */
|
||||
const CODE_FENCE_RE = /^(\s*)(`{3,}|~{3,})/;
|
||||
/**
|
||||
* Pre-process Docmost-flavoured markdown: convert `:::type ... :::`
|
||||
* callout blocks (the syntax our markdown export produces) into HTML
|
||||
* divs that the callout extension parses. The inner content is rendered
|
||||
* through marked as regular markdown.
|
||||
*
|
||||
* Implemented as a single linear pass over the lines (no quadratic regex
|
||||
* rescan). It:
|
||||
* - tracks fenced code regions (```...``` and ~~~...~~~) and never treats a
|
||||
* `:::` line that lives inside a code fence as a callout delimiter, so a
|
||||
* callout body that itself contains a fenced code block with a `:::` line is
|
||||
* no longer corrupted;
|
||||
* - matches an opening `:::type` line with the next CLOSING `:::` at the SAME
|
||||
* nesting level, supporting NESTED callouts via a depth counter (an inner
|
||||
* `:::type` opens a deeper level and consumes a matching `:::`);
|
||||
* - emits the same `<div data-type="callout" data-callout-type="TYPE">` output
|
||||
* (inner rendered through marked) as the previous regex implementation.
|
||||
*/
|
||||
async function preprocessCallouts(markdown) {
|
||||
// Defensive cap: skip preprocessing for pathologically large inputs.
|
||||
if (markdown.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return markdown;
|
||||
}
|
||||
// Recursively transform a slice of lines, converting top-level callouts in
|
||||
// that slice into <div> blocks and rendering their inner content (which may
|
||||
// itself contain nested callouts) through this same function.
|
||||
const transform = async (lines) => {
|
||||
const out = [];
|
||||
let inCodeFence = false;
|
||||
let codeFenceMarker = ""; // the exact run of backticks/tildes that opened it
|
||||
let i = 0;
|
||||
while (i < lines.length) {
|
||||
const line = lines[i];
|
||||
// Inside a code fence, only its matching closing fence is significant;
|
||||
// everything else (including `:::` lines) is copied through verbatim.
|
||||
if (inCodeFence) {
|
||||
out.push(line);
|
||||
const fence = line.match(CODE_FENCE_RE);
|
||||
if (fence && fence[2].startsWith(codeFenceMarker[0]) &&
|
||||
fence[2].length >= codeFenceMarker.length) {
|
||||
inCodeFence = false;
|
||||
codeFenceMarker = "";
|
||||
}
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// A code fence opening outside any callout body: enter code-fence mode.
|
||||
const fenceOpen = line.match(CODE_FENCE_RE);
|
||||
if (fenceOpen) {
|
||||
inCodeFence = true;
|
||||
codeFenceMarker = fenceOpen[2];
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
// An opening callout fence: scan forward (with code-fence and nested
|
||||
// callout awareness) for its matching closing `:::` at the same level.
|
||||
const open = line.match(CALLOUT_OPEN_RE);
|
||||
if (open) {
|
||||
const type = open[1].toLowerCase();
|
||||
const bodyLines = [];
|
||||
let depth = 1;
|
||||
let innerInCodeFence = false;
|
||||
let innerCodeFenceMarker = "";
|
||||
let j = i + 1;
|
||||
for (; j < lines.length; j++) {
|
||||
const bl = lines[j];
|
||||
if (innerInCodeFence) {
|
||||
const f = bl.match(CODE_FENCE_RE);
|
||||
if (f && f[2].startsWith(innerCodeFenceMarker[0]) &&
|
||||
f[2].length >= innerCodeFenceMarker.length) {
|
||||
innerInCodeFence = false;
|
||||
innerCodeFenceMarker = "";
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
const innerFence = bl.match(CODE_FENCE_RE);
|
||||
if (innerFence) {
|
||||
innerInCodeFence = true;
|
||||
innerCodeFenceMarker = innerFence[2];
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_OPEN_RE.test(bl)) {
|
||||
depth++;
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
if (CALLOUT_CLOSE_RE.test(bl)) {
|
||||
depth--;
|
||||
if (depth === 0)
|
||||
break; // matching close for THIS callout
|
||||
bodyLines.push(bl);
|
||||
continue;
|
||||
}
|
||||
bodyLines.push(bl);
|
||||
}
|
||||
if (j < lines.length) {
|
||||
// Found the matching closing fence: render the body (recursively, so
|
||||
// nested callouts are handled) and emit the callout div.
|
||||
const inner = await transform(bodyLines);
|
||||
const renderedInner = await marked.parse(inner);
|
||||
out.push(`\n<div data-type="callout" data-callout-type="${type}">${renderedInner}</div>\n`);
|
||||
i = j + 1; // skip past the closing `:::`
|
||||
continue;
|
||||
}
|
||||
// No matching close (unterminated callout): treat the opener as a
|
||||
// literal line and continue, preserving the original text.
|
||||
out.push(line);
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
out.push(line);
|
||||
i++;
|
||||
}
|
||||
return out.join("\n");
|
||||
};
|
||||
return transform(markdown.split("\n"));
|
||||
}
|
||||
/**
|
||||
* Bridge marked's checkbox lists to TipTap task lists.
|
||||
*
|
||||
* marked renders GitHub task list items (`- [x] done`) as a plain
|
||||
* `<ul><li><p><input type="checkbox" checked> text</p></li></ul>` WITHOUT the
|
||||
* markup TipTap's TaskList/TaskItem extensions parse. This rewrites such lists
|
||||
* into the shape those extensions expect:
|
||||
* TaskList parseHTML matches `ul[data-type="taskList"]`,
|
||||
* TaskItem matches `li[data-type="taskItem"]`,
|
||||
* the checked state is read from `data-checked === "true"`.
|
||||
*
|
||||
* A list is only converted when it has at least one `<li>` and EVERY direct
|
||||
* `<li>` contains a checkbox input. Both `<ul>` and `<ol>` are considered: a
|
||||
* numbered checklist (`1. [x] a`, which marked renders as an `<ol>` of checkbox
|
||||
* `<li>`s) would otherwise lose its task state. TipTap task lists are unordered,
|
||||
* so a matching `<ol>` is emitted as `data-type="taskList"` exactly like a
|
||||
* `<ul>`. Mixed or ordinary lists (including ordinary `<ol>` lists) are left
|
||||
* untouched so they keep rendering as bullet/numbered lists. The marked `<p>`
|
||||
* wrapper is kept inside the `<li>` because TaskItem content allows paragraphs.
|
||||
*/
|
||||
function bridgeTaskLists(html) {
|
||||
// Cheap early-out: if the markup contains no checkbox input at all there is
|
||||
// nothing to bridge, so skip the expensive JSDOM parse entirely. This is the
|
||||
// common case (most pages have no task lists).
|
||||
if (!/type=["']?checkbox/i.test(html)) {
|
||||
return html;
|
||||
}
|
||||
// Defensive cap (consistent with preprocessCallouts): skip the bridge for
|
||||
// pathologically large inputs rather than running a second expensive JSDOM
|
||||
// parse on a multi-megabyte payload. The markup is passed through verbatim.
|
||||
if (html.length > MAX_CALLOUT_PREPROCESS_BYTES) {
|
||||
return html;
|
||||
}
|
||||
const dom = new JSDOM(html);
|
||||
const document = dom.window.document;
|
||||
// Collect the checkbox(es) that belong to THIS <li> directly: either direct
|
||||
// child <input type="checkbox"> elements or ones inside the <li>'s direct <p>
|
||||
// child (the shape marked emits: `<li><p><input type="checkbox"> text</p></li>`).
|
||||
// Checkboxes nested deeper (e.g. inside a child <ul>/<ol>) are excluded so a
|
||||
// bullet <li> that merely contains a nested task sublist is not misdetected.
|
||||
// Raw inline HTML can put more than one checkbox in a single <li>; we gather
|
||||
// ALL of them so none survive into the converted item.
|
||||
const directCheckboxes = (li) => {
|
||||
const found = [];
|
||||
for (const child of Array.from(li.children)) {
|
||||
if (child.tagName === "INPUT" &&
|
||||
child.getAttribute("type") === "checkbox") {
|
||||
found.push(child);
|
||||
continue;
|
||||
}
|
||||
if (child.tagName === "P") {
|
||||
for (const inp of Array.from(child.querySelectorAll(":scope > input[type='checkbox']"))) {
|
||||
found.push(inp);
|
||||
}
|
||||
}
|
||||
}
|
||||
return found;
|
||||
};
|
||||
// Both <ul> and <ol> are candidates: an <ol> whose every direct <li> carries
|
||||
// its own checkbox is a numbered checklist that must also become a taskList.
|
||||
const lists = Array.from(document.querySelectorAll("ul, ol"));
|
||||
for (const list of lists) {
|
||||
// Only consider DIRECT child <li> elements; nested lists are handled by
|
||||
// their own iteration of the outer loop.
|
||||
const items = Array.from(list.children).filter((child) => child.tagName === "LI");
|
||||
if (items.length === 0)
|
||||
continue;
|
||||
const itemCheckboxes = items.map((li) => directCheckboxes(li));
|
||||
// Convert only when every direct <li> carries at least one OWN checkbox.
|
||||
if (!itemCheckboxes.every((boxes) => boxes.length > 0))
|
||||
continue;
|
||||
// A numbered checklist arrives as an <ol>. We must NOT leave the tag as
|
||||
// <ol> while tagging it data-type="taskList": generateJSON would then match
|
||||
// BOTH the orderedList rule (tag ol) and the taskList rule (data-type),
|
||||
// emitting a phantom empty orderedList beside the real taskList. So rename a
|
||||
// qualifying <ol> to a <ul> — move its <li> children over and replace it —
|
||||
// leaving only the taskList rule to match. Already-<ul> lists are unchanged.
|
||||
let target = list;
|
||||
if (list.tagName === "OL") {
|
||||
const ul = document.createElement("ul");
|
||||
// Carry over existing attributes (e.g. class) so nothing is silently lost.
|
||||
for (const attr of Array.from(list.attributes)) {
|
||||
ul.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Move every child node (including the <li>s we collected) into the <ul>.
|
||||
while (list.firstChild) {
|
||||
ul.appendChild(list.firstChild);
|
||||
}
|
||||
list.replaceWith(ul);
|
||||
target = ul;
|
||||
}
|
||||
target.setAttribute("data-type", "taskList");
|
||||
items.forEach((li, index) => {
|
||||
const boxes = itemCheckboxes[index];
|
||||
// The first checkbox determines the checked state (matches the previous
|
||||
// single-checkbox behaviour); any extras only need removing.
|
||||
const input = boxes[0] ?? null;
|
||||
li.setAttribute("data-type", "taskItem");
|
||||
const checked = input != null &&
|
||||
(input.hasAttribute("checked") || input.checked);
|
||||
li.setAttribute("data-checked", checked ? "true" : "false");
|
||||
// Remove ALL direct checkbox inputs so none survive into the content
|
||||
// (a raw-inline-HTML <li> may carry more than one).
|
||||
for (const box of boxes) {
|
||||
box.remove();
|
||||
}
|
||||
});
|
||||
}
|
||||
return document.body.innerHTML;
|
||||
}
|
||||
/**
|
||||
* Recursively strip content-less paragraph nodes from a generated doc.
|
||||
*
|
||||
* A block-level atom whose markdown form is INLINE (e.g. the block `image`'s
|
||||
* ``, or a bare media element) is wrapped by marked in a <p>; the schema
|
||||
* then HOISTS the block atom out of that paragraph, leaving an EMPTY paragraph
|
||||
* sibling. On the next export that empty `<p>` renders to "" and the doc "\n\n"
|
||||
* join injects a phantom blank gap, so the markdown is not byte-stable.
|
||||
*
|
||||
* Markdown blank lines are separators, never content, so generateJSON only ever
|
||||
* produces an empty paragraph as such a hoist artifact — removing them is safe
|
||||
* and general (it also subsumes the <div>-wrapper workaround the `video` case
|
||||
* uses). We remove ONLY `type === 'paragraph'` nodes whose `content` is absent
|
||||
* or an empty array; every other node (including atoms without `content`) is
|
||||
* preserved, and we recurse into the content of any node that has children.
|
||||
*/
|
||||
function stripEmptyParagraphs(node) {
|
||||
if (!node || !Array.isArray(node.content)) {
|
||||
// Atom / leaf node (no children to recurse into): keep as-is.
|
||||
return node;
|
||||
}
|
||||
const mapped = node.content.map((child) => stripEmptyParagraphs(child));
|
||||
const isEmptyParagraph = (child) => !!child &&
|
||||
child.type === "paragraph" &&
|
||||
(!Array.isArray(child.content) || child.content.length === 0);
|
||||
const filtered = mapped.filter((child) => !isEmptyParagraph(child));
|
||||
// Schema-validity guard: several nodes require NON-empty block content
|
||||
// (`content: "block+"` — tableCell, tableHeader, blockquote, column, callout,
|
||||
// and the doc root). For an empty one of those, generateJSON materializes a
|
||||
// single empty paragraph as its OBLIGATORY content — that is not a hoist
|
||||
// artifact. If stripping would empty the container, keep ONE empty paragraph
|
||||
// so the result stays schema-valid (an empty cell/quote must not become `[]`).
|
||||
const cleaned = filtered.length === 0 && mapped.length > 0 ? [mapped[0]] : filtered;
|
||||
return { ...node, content: cleaned };
|
||||
}
|
||||
/** Convert markdown to a ProseMirror doc using the full Docmost schema. */
|
||||
export async function markdownToProseMirror(markdownContent) {
|
||||
const withCallouts = await preprocessCallouts(markdownContent);
|
||||
const html = await marked.parse(withCallouts);
|
||||
const bridged = bridgeTaskLists(html);
|
||||
const doc = generateJSON(bridged, docmostExtensions);
|
||||
return stripEmptyParagraphs(doc);
|
||||
}
|
||||
194
packages/git-sync/build/lib/node-ops.d.ts
vendored
Normal file
194
packages/git-sync/build/lib/node-ops.d.ts
vendored
Normal file
@@ -0,0 +1,194 @@
|
||||
/**
|
||||
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
|
||||
* tree by node id.
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
|
||||
* table cells hold their children in `content` just like any other block, so a
|
||||
* single recursive walk reaches them all.
|
||||
*
|
||||
* Every exported function operates on a DEEP CLONE of the input document and
|
||||
* returns the new document. The input doc and any `newNode`/`node` argument are
|
||||
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
/**
|
||||
* Recursively concatenate all text contained in a node.
|
||||
*
|
||||
* Text nodes contribute their `text` string; container nodes contribute the
|
||||
* joined `blockPlainText` of their `content` children. Returns "" for nullish
|
||||
* or non-object inputs.
|
||||
*/
|
||||
export declare function blockPlainText(node: any): string;
|
||||
/** One compact outline entry for a single top-level block. */
|
||||
export interface OutlineEntry {
|
||||
index: number;
|
||||
type: string | undefined;
|
||||
id: string | null;
|
||||
firstText: string;
|
||||
/** Present for headings only. */
|
||||
level?: number | null;
|
||||
/** Present for tables only. */
|
||||
rows?: number;
|
||||
cols?: number;
|
||||
header?: string[];
|
||||
/** Present for list blocks only (bulletList/orderedList/taskList). */
|
||||
items?: number;
|
||||
}
|
||||
/**
|
||||
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
|
||||
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
|
||||
* table cells — compactness is the point; use `getNodeByRef` to drill into a
|
||||
* specific block.
|
||||
*
|
||||
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
|
||||
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
|
||||
* cell texts as `header`; list blocks (types ending in "List") add `items`.
|
||||
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
|
||||
* a missing or non-object doc/content yields `[]`.
|
||||
*/
|
||||
export declare function buildOutline(doc: any): OutlineEntry[];
|
||||
/**
|
||||
* Resolve a single node by reference and return `{ node, path, type }`, or
|
||||
* `null` when nothing matches.
|
||||
*
|
||||
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
|
||||
* `n` in `doc.content`. This is the only way to address table/tableRow/
|
||||
* tableCell nodes, which carry no `attrs.id`.
|
||||
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
|
||||
* tree with `attrs.id === ref` is returned.
|
||||
*
|
||||
* `path` is the array of child indices from the doc root down to the node
|
||||
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
|
||||
* so callers can mutate it without touching the input doc. Null-safe.
|
||||
*/
|
||||
export declare function getNodeByRef(doc: any, ref: string): {
|
||||
node: any;
|
||||
path: number[];
|
||||
type: string | undefined;
|
||||
} | null;
|
||||
/**
|
||||
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
|
||||
* `newNode`, anywhere in the tree (including inside callouts and table cells).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
|
||||
* is the number of nodes substituted. A fresh clone of `newNode` is used for
|
||||
* each match so they do not share references.
|
||||
*/
|
||||
export declare function replaceNodeById(doc: any, nodeId: string, newNode: any): {
|
||||
doc: any;
|
||||
replaced: number;
|
||||
};
|
||||
/**
|
||||
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
|
||||
* array, anywhere in the tree (recursive, including callouts and tables).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
|
||||
* the number of nodes removed.
|
||||
*/
|
||||
export declare function deleteNodeById(doc: any, nodeId: string): {
|
||||
doc: any;
|
||||
deleted: number;
|
||||
};
|
||||
/**
|
||||
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
|
||||
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
|
||||
* "Unexpected content type" when asked to store an `undefined` attribute value).
|
||||
*
|
||||
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
|
||||
* legitimate JSON-storable values and are preserved. Operates on a clone and
|
||||
* returns it; the input is never mutated. Defensively null-safe like the rest
|
||||
* of the file.
|
||||
*/
|
||||
export declare function sanitizeForYjs(doc: any): any;
|
||||
/**
|
||||
* Diagnostics helper: walk the tree and return a human-readable path string for
|
||||
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
|
||||
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
|
||||
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
|
||||
* every attribute is storable. Null-safe.
|
||||
*/
|
||||
export declare function findUnstorableAttr(doc: any): string | null;
|
||||
/** Options controlling where `insertNodeRelative` places the new node. */
|
||||
export interface InsertOptions {
|
||||
position: "before" | "after" | "append";
|
||||
/** Resolve the anchor by node id anywhere in the tree (preferred). */
|
||||
anchorNodeId?: string;
|
||||
/** Fallback: first TOP-LEVEL block whose plain text includes this string. */
|
||||
anchorText?: string;
|
||||
}
|
||||
/**
|
||||
* Insert a deep clone of `node` relative to an anchor.
|
||||
*
|
||||
* - position "append": push the node onto the top-level `doc.content`.
|
||||
* - position "before"/"after": locate the anchor and splice the node into the
|
||||
* anchor's parent `content` array immediately before / after it.
|
||||
*
|
||||
* Anchor resolution for before/after:
|
||||
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
|
||||
* anywhere in the tree (recursive);
|
||||
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
|
||||
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor could not be resolved (the doc is returned unchanged
|
||||
* apart from being cloned).
|
||||
*/
|
||||
export declare function insertNodeRelative(doc: any, node: any, opts: InsertOptions): {
|
||||
doc: any;
|
||||
inserted: boolean;
|
||||
};
|
||||
/**
|
||||
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
|
||||
*
|
||||
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
|
||||
* Tables may be ragged (rows of differing length), so `cols` reflects only
|
||||
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
|
||||
* width.
|
||||
* - `cells`: `string[][]` of each cell's `blockPlainText`.
|
||||
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
|
||||
* so callers can `patch_node` a cell for rich-formatted edits.
|
||||
* - `path`: index path of the table within the doc.
|
||||
*/
|
||||
export declare function readTable(doc: any, tableRef: string): {
|
||||
rows: number;
|
||||
cols: number;
|
||||
cells: string[][];
|
||||
cellIds: (string | null)[][];
|
||||
path: number[];
|
||||
} | null;
|
||||
/**
|
||||
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
|
||||
*
|
||||
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
|
||||
* MORE cells than columns throws. Each new cell copies `colwidth` for its
|
||||
* column from the header row when present, gets a fresh-id paragraph, and a
|
||||
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
|
||||
* the row there; otherwise the row is appended at the end.
|
||||
*/
|
||||
export declare function insertTableRow(doc: any, tableRef: string, cells: string[], index?: number): {
|
||||
doc: any;
|
||||
inserted: boolean;
|
||||
};
|
||||
/**
|
||||
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
|
||||
* `deleted` is false only when the table cannot be located. Throws on an
|
||||
* out-of-range index, and refuses to delete the table's only row.
|
||||
*/
|
||||
export declare function deleteTableRow(doc: any, tableRef: string, index: number): {
|
||||
doc: any;
|
||||
deleted: boolean;
|
||||
};
|
||||
/**
|
||||
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
|
||||
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
|
||||
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
|
||||
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
|
||||
* that reuses the cell's existing first-paragraph id when present, else a fresh
|
||||
* one.
|
||||
*/
|
||||
export declare function updateTableCell(doc: any, tableRef: string, row: number, col: number, text: string): {
|
||||
doc: any;
|
||||
updated: boolean;
|
||||
};
|
||||
770
packages/git-sync/build/lib/node-ops.js
Normal file
770
packages/git-sync/build/lib/node-ops.js
Normal file
@@ -0,0 +1,770 @@
|
||||
/**
|
||||
* Pure, network-free helpers for manipulating a ProseMirror/TipTap document
|
||||
* tree by node id.
|
||||
*
|
||||
* A ProseMirror node here is a plain JSON object of the shape produced by
|
||||
* Docmost: `{ type, attrs?, content?, text?, marks? }`. Children live in the
|
||||
* `content` array; a node carries a stable id in `attrs.id`. Callouts and
|
||||
* table cells hold their children in `content` just like any other block, so a
|
||||
* single recursive walk reaches them all.
|
||||
*
|
||||
* Every exported function operates on a DEEP CLONE of the input document and
|
||||
* returns the new document. The input doc and any `newNode`/`node` argument are
|
||||
* never mutated. All functions are defensively null-safe: missing/!Array
|
||||
* `content`, non-object nodes, and absent `attrs` are tolerated.
|
||||
*/
|
||||
/** Deep-clone a JSON-serializable value without mutating the original. */
|
||||
function clone(value) {
|
||||
if (typeof structuredClone === "function") {
|
||||
return structuredClone(value);
|
||||
}
|
||||
// Fallback for environments without structuredClone.
|
||||
return JSON.parse(JSON.stringify(value));
|
||||
}
|
||||
/** True if `value` is a non-null object (and not an array). */
|
||||
function isObject(value) {
|
||||
return value != null && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
/** True if `node` carries the given id in `node.attrs.id`. */
|
||||
function matchesId(node, nodeId) {
|
||||
return isObject(node) && isObject(node.attrs) && node.attrs.id === nodeId;
|
||||
}
|
||||
/**
|
||||
* Recursively concatenate all text contained in a node.
|
||||
*
|
||||
* Text nodes contribute their `text` string; container nodes contribute the
|
||||
* joined `blockPlainText` of their `content` children. Returns "" for nullish
|
||||
* or non-object inputs.
|
||||
*/
|
||||
export function blockPlainText(node) {
|
||||
if (!isObject(node))
|
||||
return "";
|
||||
let out = "";
|
||||
if (typeof node.text === "string") {
|
||||
out += node.text;
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
out += blockPlainText(child);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/** Truncate `text` to at most `n` chars, appending an ellipsis when cut. */
|
||||
function truncate(text, n) {
|
||||
return text.length > n ? text.slice(0, n) + "…" : text;
|
||||
}
|
||||
/**
|
||||
* Build a COMPACT outline of the TOP-LEVEL blocks of `doc` (the entries in
|
||||
* `doc.content`). Deliberately does NOT recurse into paragraphs, list items, or
|
||||
* table cells — compactness is the point; use `getNodeByRef` to drill into a
|
||||
* specific block.
|
||||
*
|
||||
* Each entry carries `{ index, type, id, firstText }`, plus type-specific
|
||||
* extras: headings add `level`; tables add `rows`/`cols` and the first row's
|
||||
* cell texts as `header`; list blocks (types ending in "List") add `items`.
|
||||
* `firstText` is the block's plain text truncated to 100 chars. Null-safe:
|
||||
* a missing or non-object doc/content yields `[]`.
|
||||
*/
|
||||
export function buildOutline(doc) {
|
||||
if (!isObject(doc) || !Array.isArray(doc.content))
|
||||
return [];
|
||||
const out = [];
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const block = doc.content[i];
|
||||
const type = isObject(block) ? block.type : undefined;
|
||||
const entry = {
|
||||
index: i,
|
||||
type,
|
||||
id: isObject(block) && isObject(block.attrs) ? block.attrs.id ?? null : null,
|
||||
firstText: truncate(blockPlainText(block), 100),
|
||||
};
|
||||
if (type === "heading") {
|
||||
entry.level = isObject(block.attrs) ? block.attrs.level ?? null : null;
|
||||
}
|
||||
else if (type === "table") {
|
||||
const headerRow = block.content?.[0]?.content ?? [];
|
||||
entry.rows = block.content?.length ?? 0;
|
||||
entry.cols = block.content?.[0]?.content?.length ?? 0;
|
||||
entry.header = headerRow.map((cell) => truncate(blockPlainText(cell), 40));
|
||||
}
|
||||
else if (typeof type === "string" && type.endsWith("List")) {
|
||||
entry.items = block.content?.length ?? 0;
|
||||
}
|
||||
out.push(entry);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Resolve a single node by reference and return `{ node, path, type }`, or
|
||||
* `null` when nothing matches.
|
||||
*
|
||||
* - `ref` of the form `#<n>` (e.g. `#2`) selects the TOP-LEVEL block at index
|
||||
* `n` in `doc.content`. This is the only way to address table/tableRow/
|
||||
* tableCell nodes, which carry no `attrs.id`.
|
||||
* - Otherwise `ref` is treated as a block id: the FIRST node anywhere in the
|
||||
* tree with `attrs.id === ref` is returned.
|
||||
*
|
||||
* `path` is the array of child indices from the doc root down to the node
|
||||
* (so a top-level block is `[index]`). The returned `node` is a DEEP CLONE,
|
||||
* so callers can mutate it without touching the input doc. Null-safe.
|
||||
*/
|
||||
export function getNodeByRef(doc, ref) {
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
// "#<n>": index into the top-level content array.
|
||||
const indexMatch = typeof ref === "string" ? ref.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(doc.content) ? doc.content[index] : undefined;
|
||||
if (!isObject(block))
|
||||
return null;
|
||||
return { node: clone(block), path: [index], type: block.type };
|
||||
}
|
||||
// Otherwise: depth-first search for the first node with attrs.id === ref.
|
||||
const search = (node, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const path = [...trail, i];
|
||||
if (matchesId(child, ref)) {
|
||||
return { node: clone(child), path, type: child.type };
|
||||
}
|
||||
const hit = search(child, path);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, []);
|
||||
}
|
||||
/**
|
||||
* Replace EVERY node whose `attrs.id === nodeId` with a deep clone of
|
||||
* `newNode`, anywhere in the tree (including inside callouts and table cells).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, replaced }` where `replaced`
|
||||
* is the number of nodes substituted. A fresh clone of `newNode` is used for
|
||||
* each match so they do not share references.
|
||||
*/
|
||||
export function replaceNodeById(doc, nodeId, newNode) {
|
||||
const out = clone(doc);
|
||||
let replaced = 0;
|
||||
// Walk a content array, replacing direct matches and recursing into the
|
||||
// (possibly new) children of non-matching nodes.
|
||||
const walkContent = (content) => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, nodeId)) {
|
||||
content[i] = clone(newNode);
|
||||
replaced++;
|
||||
// Do not recurse into a freshly substituted node.
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, replaced };
|
||||
}
|
||||
/**
|
||||
* Remove EVERY node whose `attrs.id === nodeId` from its parent `content`
|
||||
* array, anywhere in the tree (recursive, including callouts and tables).
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, deleted }` where `deleted` is
|
||||
* the number of nodes removed.
|
||||
*/
|
||||
export function deleteNodeById(doc, nodeId) {
|
||||
const out = clone(doc);
|
||||
let deleted = 0;
|
||||
// Filter a content array in place, dropping matches and recursing into the
|
||||
// surviving children.
|
||||
const walkContent = (content) => {
|
||||
const kept = [];
|
||||
for (const child of content) {
|
||||
if (matchesId(child, nodeId)) {
|
||||
deleted++;
|
||||
continue;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
child.content = walkContent(child.content);
|
||||
}
|
||||
kept.push(child);
|
||||
}
|
||||
return kept;
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
out.content = walkContent(out.content);
|
||||
}
|
||||
return { doc: out, deleted };
|
||||
}
|
||||
/**
|
||||
* Deep-clone `doc` and strip every node/mark attribute whose value is strictly
|
||||
* `undefined`, so the result is safe to hand to Yjs (which throws an opaque
|
||||
* "Unexpected content type" when asked to store an `undefined` attribute value).
|
||||
*
|
||||
* Only `undefined` keys are removed; `null`, `false`, `0`, and `""` are all
|
||||
* legitimate JSON-storable values and are preserved. Operates on a clone and
|
||||
* returns it; the input is never mutated. Defensively null-safe like the rest
|
||||
* of the file.
|
||||
*/
|
||||
export function sanitizeForYjs(doc) {
|
||||
const out = clone(doc);
|
||||
// Drop every key whose value is strictly `undefined` from an attrs object.
|
||||
const stripUndefined = (attrs) => {
|
||||
if (!isObject(attrs))
|
||||
return;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
if (attrs[key] === undefined) {
|
||||
delete attrs[key];
|
||||
}
|
||||
}
|
||||
};
|
||||
const walk = (node) => {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
stripUndefined(node.attrs);
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (const mark of node.marks) {
|
||||
if (isObject(mark))
|
||||
stripUndefined(mark.attrs);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
walk(out);
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
* Diagnostics helper: walk the tree and return a human-readable path string for
|
||||
* the FIRST attribute value (in any `node.attrs` or `mark.attrs`) that Yjs
|
||||
* cannot store — i.e. `undefined`, a `function`, a `symbol`, or a `bigint`
|
||||
* (e.g. `content[3].content[0].attrs.indent (undefined)`). Returns `null` when
|
||||
* every attribute is storable. Null-safe.
|
||||
*/
|
||||
export function findUnstorableAttr(doc) {
|
||||
const isUnstorable = (value) => {
|
||||
if (value === undefined)
|
||||
return "undefined";
|
||||
const t = typeof value;
|
||||
if (t === "function")
|
||||
return "function";
|
||||
if (t === "symbol")
|
||||
return "symbol";
|
||||
if (t === "bigint")
|
||||
return "bigint";
|
||||
return null;
|
||||
};
|
||||
// Check an attrs object; return the offending sub-path or null.
|
||||
const checkAttrs = (attrs, basePath) => {
|
||||
if (!isObject(attrs))
|
||||
return null;
|
||||
for (const key of Object.keys(attrs)) {
|
||||
const kind = isUnstorable(attrs[key]);
|
||||
if (kind != null)
|
||||
return `${basePath}.${key} (${kind})`;
|
||||
}
|
||||
return null;
|
||||
};
|
||||
const walk = (node, path) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
const attrHit = checkAttrs(node.attrs, `${path}.attrs`);
|
||||
if (attrHit != null)
|
||||
return attrHit;
|
||||
if (Array.isArray(node.marks)) {
|
||||
for (let i = 0; i < node.marks.length; i++) {
|
||||
const markHit = checkAttrs(node.marks[i]?.attrs, `${path}.marks[${i}].attrs`);
|
||||
if (markHit != null)
|
||||
return markHit;
|
||||
}
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const childHit = walk(node.content[i], `${path}.content[${i}]`);
|
||||
if (childHit != null)
|
||||
return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
// The root doc node carries no useful index, so start the path at "doc".
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
const attrHit = checkAttrs(doc.attrs, "attrs");
|
||||
if (attrHit != null)
|
||||
return attrHit;
|
||||
if (Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
const childHit = walk(doc.content[i], `content[${i}]`);
|
||||
if (childHit != null)
|
||||
return childHit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Table structural node types and the container each must live directly inside.
|
||||
* Used by `insertNodeRelative` to splice rows/cells into the correct ancestor
|
||||
* rather than blindly into the anchor's direct parent (which would corrupt the
|
||||
* table's nesting).
|
||||
*/
|
||||
const STRUCTURAL_TYPES = new Set(["tableRow", "tableCell", "tableHeader"]);
|
||||
const REQUIRED_CONTAINER = {
|
||||
tableRow: "table",
|
||||
tableCell: "tableRow",
|
||||
tableHeader: "tableRow",
|
||||
};
|
||||
/**
|
||||
* Locate an anchor and return its ancestor chain (from `doc` down to and
|
||||
* including the matched node). Each chain entry is `{ node, index }` where
|
||||
* `index` is the node's position inside its parent's `content` array (the root
|
||||
* doc has index -1). Returns `null` when the anchor cannot be resolved.
|
||||
*/
|
||||
function findAnchorChain(doc, opts) {
|
||||
if (!isObject(doc))
|
||||
return null;
|
||||
// DFS by id anywhere in the tree, accumulating the path.
|
||||
if (opts.anchorNodeId != null) {
|
||||
const targetId = opts.anchorNodeId;
|
||||
const search = (node, index, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
const here = [...trail, { node, index }];
|
||||
if (matchesId(node, targetId))
|
||||
return here;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const hit = search(node.content[i], i, here);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(doc, -1, []);
|
||||
}
|
||||
// By text: only top-level blocks are scanned (same rule as the JSON path).
|
||||
if (opts.anchorText != null && Array.isArray(doc.content)) {
|
||||
for (let i = 0; i < doc.content.length; i++) {
|
||||
if (blockPlainText(doc.content[i]).includes(opts.anchorText)) {
|
||||
return [
|
||||
{ node: doc, index: -1 },
|
||||
{ node: doc.content[i], index: i },
|
||||
];
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Insert a deep clone of `node` relative to an anchor.
|
||||
*
|
||||
* - position "append": push the node onto the top-level `doc.content`.
|
||||
* - position "before"/"after": locate the anchor and splice the node into the
|
||||
* anchor's parent `content` array immediately before / after it.
|
||||
*
|
||||
* Anchor resolution for before/after:
|
||||
* - if `anchorNodeId` is given, find the node with `attrs.id === anchorNodeId`
|
||||
* anywhere in the tree (recursive);
|
||||
* - otherwise, if `anchorText` is given, scan only TOP-LEVEL `doc.content`
|
||||
* blocks and pick the first whose `blockPlainText` includes `anchorText`.
|
||||
*
|
||||
* Operates on a clone of `doc`; returns `{ doc, inserted }`. `inserted` is
|
||||
* false when the anchor could not be resolved (the doc is returned unchanged
|
||||
* apart from being cloned).
|
||||
*/
|
||||
export function insertNodeRelative(doc, node, opts) {
|
||||
const out = clone(doc);
|
||||
const fresh = clone(node);
|
||||
// Defensive: stay null-safe like the other exports — a missing opts means
|
||||
// there is nothing actionable to do.
|
||||
if (!isObject(opts))
|
||||
return { doc: out, inserted: false };
|
||||
const isStructural = isObject(node) && STRUCTURAL_TYPES.has(node.type);
|
||||
// "append": top-level push.
|
||||
if (opts.position === "append") {
|
||||
// Structural table nodes (tableRow/tableCell/tableHeader) cannot live at the
|
||||
// top level — appending one would produce invalid nesting.
|
||||
if (isStructural) {
|
||||
throw new Error(`insert_node: cannot append a ${node.type} at the top level; use ` +
|
||||
`position before/after with an anchor inside the target table`);
|
||||
}
|
||||
if (isObject(out)) {
|
||||
if (!Array.isArray(out.content))
|
||||
out.content = [];
|
||||
out.content.push(fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
const offset = opts.position === "after" ? 1 : 0;
|
||||
// Structural insert (before/after a tableRow/tableCell/tableHeader): splice
|
||||
// into the nearest enclosing table/tableRow rather than the anchor's direct
|
||||
// parent, so the row/cell lands at the correct level of the table.
|
||||
if (isStructural) {
|
||||
const containerType = REQUIRED_CONTAINER[node.type];
|
||||
const chain = findAnchorChain(out, opts);
|
||||
// Anchor not resolved at all — keep the existing "anchor not found" path.
|
||||
if (chain == null)
|
||||
return { doc: out, inserted: false };
|
||||
// Find the DEEPEST ancestor (including the anchor itself) of the required
|
||||
// container type.
|
||||
let containerIdx = -1;
|
||||
for (let i = chain.length - 1; i >= 0; i--) {
|
||||
if (isObject(chain[i].node) && chain[i].node.type === containerType) {
|
||||
containerIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (containerIdx === -1) {
|
||||
throw new Error(`insert_node: cannot insert a ${node.type} here — the anchor is not ` +
|
||||
`inside a ${containerType}. Anchor on a cell's text or a block id ` +
|
||||
`that lives inside the target table.`);
|
||||
}
|
||||
const container = chain[containerIdx].node;
|
||||
if (!Array.isArray(container.content))
|
||||
container.content = [];
|
||||
if (containerIdx === chain.length - 1) {
|
||||
// The matched container IS the anchor node itself (e.g. anchorText
|
||||
// resolved to the table block): append/prepend within it.
|
||||
const at = opts.position === "after" ? container.content.length : 0;
|
||||
container.content.splice(at, 0, fresh);
|
||||
}
|
||||
else {
|
||||
// The immediate child on the path leading to the anchor is the row/cell
|
||||
// to splice next to.
|
||||
const enclosingChildIndex = chain[containerIdx + 1].index;
|
||||
container.content.splice(enclosingChildIndex + offset, 0, fresh);
|
||||
}
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
// Resolve by id anywhere in the tree: splice into the parent content array.
|
||||
if (opts.anchorNodeId != null) {
|
||||
let inserted = false;
|
||||
const walkContent = (content) => {
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const child = content[i];
|
||||
if (matchesId(child, opts.anchorNodeId)) {
|
||||
content.splice(i + offset, 0, fresh);
|
||||
inserted = true;
|
||||
return;
|
||||
}
|
||||
if (isObject(child) && Array.isArray(child.content)) {
|
||||
walkContent(child.content);
|
||||
if (inserted)
|
||||
return;
|
||||
}
|
||||
}
|
||||
};
|
||||
if (isObject(out) && Array.isArray(out.content)) {
|
||||
walkContent(out.content);
|
||||
}
|
||||
return { doc: out, inserted };
|
||||
}
|
||||
// Resolve by text: only top-level doc.content blocks are scanned.
|
||||
if (opts.anchorText != null && isObject(out) && Array.isArray(out.content)) {
|
||||
for (let i = 0; i < out.content.length; i++) {
|
||||
if (blockPlainText(out.content[i]).includes(opts.anchorText)) {
|
||||
out.content.splice(i + offset, 0, fresh);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
return { doc: out, inserted: false };
|
||||
}
|
||||
// ===========================================================================
|
||||
// Table editing helpers
|
||||
//
|
||||
// A Docmost table is a ProseMirror subtree with NO ids on the structural nodes:
|
||||
// table -> { type:"table", content:[tableRow...] }
|
||||
// row -> { type:"tableRow", content:[tableCell|tableHeader...] }
|
||||
// cell -> { type:"tableCell"|"tableHeader", attrs:{colspan,rowspan,colwidth},
|
||||
// content:[paragraph...] }
|
||||
// para -> { type:"paragraph", attrs:{id,indent}, content:[textNode...] }
|
||||
// Only paragraphs/headings carry an `attrs.id`, so a cell is addressed via the
|
||||
// id of the paragraph inside it. The helpers below all operate on a DEEP CLONE
|
||||
// of the input doc (via `clone`) and never mutate their inputs.
|
||||
// ===========================================================================
|
||||
/**
|
||||
* Collect EVERY `attrs.id` present anywhere in `node` into `used`. Used to seed
|
||||
* `makeFreshId` so generated paragraph ids never collide with existing ones.
|
||||
*/
|
||||
function collectIds(node, used) {
|
||||
if (!isObject(node))
|
||||
return;
|
||||
if (isObject(node.attrs) && typeof node.attrs.id === "string") {
|
||||
used.add(node.attrs.id);
|
||||
}
|
||||
if (Array.isArray(node.content)) {
|
||||
for (const child of node.content)
|
||||
collectIds(child, used);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Fresh-id generator: returns a random Docmost-style id (12 chars from
|
||||
* lowercase `a-z0-9`) that is not already in `used`, and records it. On the
|
||||
* rare collision the id is regenerated. Callers rely on uniqueness, not on the
|
||||
* exact string, so randomness is fine — and unlike a module-local counter it
|
||||
* needs no reset and cannot become predictable across calls.
|
||||
*/
|
||||
function makeFreshId(used) {
|
||||
const alphabet = "abcdefghijklmnopqrstuvwxyz0123456789";
|
||||
let id;
|
||||
do {
|
||||
id = "";
|
||||
for (let i = 0; i < 12; i++) {
|
||||
id += alphabet[Math.floor(Math.random() * alphabet.length)];
|
||||
}
|
||||
} while (used.has(id) || id === "");
|
||||
used.add(id);
|
||||
return id;
|
||||
}
|
||||
/**
|
||||
* Resolve a table reference against an ALREADY-CLONED doc and return the LIVE
|
||||
* table node (a reference inside `rootClone`, so the caller may mutate it) plus
|
||||
* its index path. Returns null when no table matches.
|
||||
*
|
||||
* - `#<n>`: the top-level block at index `n`, only if its `type === "table"`.
|
||||
* - otherwise: DFS for the node with `attrs.id === tableRef`, then walk UP its
|
||||
* ancestor chain to the nearest `type === "table"` ancestor.
|
||||
*/
|
||||
function locateTable(rootClone, tableRef) {
|
||||
if (!isObject(rootClone))
|
||||
return null;
|
||||
// "#<n>": index into the top-level content array; must be a table.
|
||||
const indexMatch = typeof tableRef === "string" ? tableRef.match(/^#(\d+)$/) : null;
|
||||
if (indexMatch) {
|
||||
const index = Number(indexMatch[1]);
|
||||
const block = Array.isArray(rootClone.content)
|
||||
? rootClone.content[index]
|
||||
: undefined;
|
||||
if (isObject(block) && block.type === "table") {
|
||||
return { table: block, path: [index] };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
// Otherwise: DFS for attrs.id === tableRef, tracking the ancestor chain, then
|
||||
// climb to the nearest enclosing table.
|
||||
const search = (node, trail) => {
|
||||
if (!isObject(node))
|
||||
return null;
|
||||
if (Array.isArray(node.content)) {
|
||||
for (let i = 0; i < node.content.length; i++) {
|
||||
const child = node.content[i];
|
||||
const here = [...trail, { node: child, index: i }];
|
||||
if (matchesId(child, tableRef)) {
|
||||
// Walk UP to the nearest table ancestor (including the match itself).
|
||||
for (let j = here.length - 1; j >= 0; j--) {
|
||||
if (isObject(here[j].node) && here[j].node.type === "table") {
|
||||
return {
|
||||
table: here[j].node,
|
||||
path: here.slice(0, j + 1).map((e) => e.index),
|
||||
};
|
||||
}
|
||||
}
|
||||
return null; // id found but no enclosing table
|
||||
}
|
||||
const hit = search(child, here);
|
||||
if (hit != null)
|
||||
return hit;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
};
|
||||
return search(rootClone, []);
|
||||
}
|
||||
/** Build the plain-text → single-paragraph cell content used by all writers. */
|
||||
function makeCellParagraph(id, text) {
|
||||
return {
|
||||
type: "paragraph",
|
||||
attrs: { id, indent: 0 },
|
||||
// Empty string → a paragraph with an empty content array.
|
||||
content: text ? [{ type: "text", text }] : [],
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Read a table as a matrix. Returns null when `tableRef` resolves to no table.
|
||||
*
|
||||
* - `rows`/`cols`: the table's row count and the column count of its FIRST row.
|
||||
* Tables may be ragged (rows of differing length), so `cols` reflects only
|
||||
* row 0; use the per-row length of `cells`/`cellIds` for each row's actual
|
||||
* width.
|
||||
* - `cells`: `string[][]` of each cell's `blockPlainText`.
|
||||
* - `cellIds`: `(string|null)[][]` of each cell's FIRST paragraph id (or null),
|
||||
* so callers can `patch_node` a cell for rich-formatted edits.
|
||||
* - `path`: index path of the table within the doc.
|
||||
*/
|
||||
export function readTable(doc, tableRef) {
|
||||
const root = clone(doc);
|
||||
const located = locateTable(root, tableRef);
|
||||
if (located == null)
|
||||
return null;
|
||||
const { table, path } = located;
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const cols = rowNodes[0]?.content?.length ?? 0;
|
||||
const cells = [];
|
||||
const cellIds = [];
|
||||
for (const rowNode of rowNodes) {
|
||||
const cellNodes = Array.isArray(rowNode?.content) ? rowNode.content : [];
|
||||
const rowText = [];
|
||||
const rowIds = [];
|
||||
for (const cellNode of cellNodes) {
|
||||
rowText.push(blockPlainText(cellNode));
|
||||
// The cell's first paragraph carries the id used for patch_node.
|
||||
const firstPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
const id = isObject(firstPara) && isObject(firstPara.attrs)
|
||||
? firstPara.attrs.id ?? null
|
||||
: null;
|
||||
rowIds.push(id);
|
||||
}
|
||||
cells.push(rowText);
|
||||
cellIds.push(rowIds);
|
||||
}
|
||||
return { rows, cols, cells, cellIds, path };
|
||||
}
|
||||
/**
|
||||
* Insert a row of plain-text cells into a table. Returns `{ doc, inserted }`.
|
||||
*
|
||||
* The row is padded to the table's column count (`cells[i] ?? ""`); supplying
|
||||
* MORE cells than columns throws. Each new cell copies `colwidth` for its
|
||||
* column from the header row when present, gets a fresh-id paragraph, and a
|
||||
* `colspan:1, rowspan:1` attrs. `index` (when an integer in `[0, rows]`) splices
|
||||
* the row there; otherwise the row is appended at the end.
|
||||
*/
|
||||
export function insertTableRow(doc, tableRef, cells, index) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, inserted: false };
|
||||
const { table } = located;
|
||||
if (!Array.isArray(table.content))
|
||||
table.content = [];
|
||||
const rows = table.content.length;
|
||||
const headerRow = table.content[0];
|
||||
const headerCells = Array.isArray(headerRow?.content) ? headerRow.content : [];
|
||||
// Column count is the WIDEST existing row, so the guard below stays
|
||||
// meaningful for ragged tables and the new row matches the table's width.
|
||||
// Fall back to the supplied cell count only when the table has no rows.
|
||||
let colCount = 0;
|
||||
for (const r of table.content) {
|
||||
if (isObject(r) && Array.isArray(r.content))
|
||||
colCount = Math.max(colCount, r.content.length);
|
||||
}
|
||||
if (colCount === 0)
|
||||
colCount = Array.isArray(cells) ? cells.length : 0;
|
||||
if (Array.isArray(cells) && cells.length > colCount) {
|
||||
throw new Error(`table_insert_row: got ${cells.length} cell(s) but the table has ${colCount} column(s)`);
|
||||
}
|
||||
// Resolve the landing index up front so the cell-type decision and the splice
|
||||
// below agree: a valid integer in [0, rows] splices there, else we append.
|
||||
const landingIndex = typeof index === "number" && Number.isInteger(index) && index >= 0 && index <= rows
|
||||
? index
|
||||
: rows;
|
||||
// Seed the id generator with every id already in the doc so the new cell
|
||||
// paragraph ids are unique within the whole document.
|
||||
const used = new Set();
|
||||
collectIds(out, used);
|
||||
const newCells = [];
|
||||
for (let i = 0; i < colCount; i++) {
|
||||
const text = (Array.isArray(cells) ? cells[i] : undefined) ?? "";
|
||||
const attrs = { colspan: 1, rowspan: 1 };
|
||||
// Copy this column's colwidth from the header row's cell when present.
|
||||
const colwidth = headerCells[i]?.attrs?.colwidth;
|
||||
if (colwidth !== undefined)
|
||||
attrs.colwidth = colwidth;
|
||||
// A row landing at index 0 becomes the new header row, so inherit the
|
||||
// current header cell's type per column (Docmost uses "tableHeader" there);
|
||||
// every other position is a plain data cell.
|
||||
const cellType = landingIndex === 0 ? headerCells[i]?.type ?? "tableCell" : "tableCell";
|
||||
newCells.push({
|
||||
type: cellType,
|
||||
attrs,
|
||||
content: [makeCellParagraph(makeFreshId(used), text)],
|
||||
});
|
||||
}
|
||||
const newRow = { type: "tableRow", content: newCells };
|
||||
// Splice at the resolved landing index (append when index was omitted/invalid).
|
||||
table.content.splice(landingIndex, 0, newRow);
|
||||
return { doc: out, inserted: true };
|
||||
}
|
||||
/**
|
||||
* Delete the row at 0-based `index` from a table. Returns `{ doc, deleted }`.
|
||||
* `deleted` is false only when the table cannot be located. Throws on an
|
||||
* out-of-range index, and refuses to delete the table's only row.
|
||||
*/
|
||||
export function deleteTableRow(doc, tableRef, index) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, deleted: false };
|
||||
const { table } = located;
|
||||
if (!Array.isArray(table.content))
|
||||
table.content = [];
|
||||
const rows = table.content.length;
|
||||
if (!Number.isInteger(index) || index < 0 || index >= rows) {
|
||||
throw new Error(`table_delete_row: row index ${index} out of range (table has ${rows} row(s))`);
|
||||
}
|
||||
if (rows <= 1) {
|
||||
throw new Error("table_delete_row: refusing to delete the only row of the table");
|
||||
}
|
||||
table.content.splice(index, 1);
|
||||
return { doc: out, deleted: true };
|
||||
}
|
||||
/**
|
||||
* Set the plain-text content of cell `[row, col]` (0-based) to `text`. Returns
|
||||
* `{ doc, updated }`; `updated` is false only when the table cannot be located.
|
||||
* Throws when `row`/`col` is out of range. The cell's own attrs (colspan/
|
||||
* rowspan/colwidth) are preserved; its content becomes a single text paragraph
|
||||
* that reuses the cell's existing first-paragraph id when present, else a fresh
|
||||
* one.
|
||||
*/
|
||||
export function updateTableCell(doc, tableRef, row, col, text) {
|
||||
const out = clone(doc);
|
||||
const located = locateTable(out, tableRef);
|
||||
if (located == null)
|
||||
return { doc: out, updated: false };
|
||||
const { table } = located;
|
||||
const rowNodes = Array.isArray(table.content) ? table.content : [];
|
||||
const rows = rowNodes.length;
|
||||
const rowNode = rowNodes[row];
|
||||
const cols = isObject(rowNode) && Array.isArray(rowNode.content)
|
||||
? rowNode.content.length
|
||||
: 0;
|
||||
if (!Number.isInteger(row) ||
|
||||
row < 0 ||
|
||||
row >= rows ||
|
||||
!Number.isInteger(col) ||
|
||||
col < 0 ||
|
||||
col >= cols) {
|
||||
throw new Error(`table_update_cell: cell [${row},${col}] out of range`);
|
||||
}
|
||||
const cellNode = rowNode.content[col];
|
||||
// Reuse the cell's existing first-paragraph id, or mint a fresh unique one.
|
||||
const existingPara = Array.isArray(cellNode?.content)
|
||||
? cellNode.content[0]
|
||||
: undefined;
|
||||
let id = isObject(existingPara) && isObject(existingPara.attrs)
|
||||
? existingPara.attrs.id
|
||||
: undefined;
|
||||
if (typeof id !== "string" || id.length === 0) {
|
||||
const used = new Set();
|
||||
collectIds(out, used);
|
||||
id = makeFreshId(used);
|
||||
}
|
||||
cellNode.content = [makeCellParagraph(id, text)];
|
||||
return { doc: out, updated: true };
|
||||
}
|
||||
50
packages/git-sync/build/lib/page-file.d.ts
vendored
Normal file
50
packages/git-sync/build/lib/page-file.d.ts
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
/**
|
||||
* The native-Obsidian page-file format (design: docs/backlog/git-sync-thin-meta.md).
|
||||
* A page file is CLEAN markdown with a minimal YAML frontmatter carrying ONLY the
|
||||
* page's durable identity:
|
||||
*
|
||||
* ---
|
||||
* gitmost_id: 019ef6fc-2638-7ce1-9ce3-2756ce038480
|
||||
* ---
|
||||
* <clean markdown body>
|
||||
*
|
||||
* Everything else is derived (title = filename, parentPageId = enclosing folder,
|
||||
* spaceId = the vault, updatedAt = git). `gitmost_id` (a Docmost pageId) is the
|
||||
* only non-derivable bit and travels WITH the file so identity survives any move,
|
||||
* even one git's rename detection misses. Third-party editors (Obsidian, …) see
|
||||
* clean markdown; the frontmatter is hidden in their preview.
|
||||
*
|
||||
* No backward-compat with the old `docmost:meta` format: vaults are a cache, wiped
|
||||
* and rebuilt native. A file WITHOUT a `gitmost_id` frontmatter is an un-tracked
|
||||
* (e.g. hand-written) file -> the caller ADOPTS it (creates a page, writes the id).
|
||||
*/
|
||||
/**
|
||||
* The frontmatter key carrying the Docmost pageId. NAMESPACED (not a bare `id`)
|
||||
* so it never collides with a user's own frontmatter fields.
|
||||
*/
|
||||
export declare const ID_KEY = "gitmost_id";
|
||||
/**
|
||||
* Parse a page file into its identity (`id`) and clean markdown `body`. Tolerant:
|
||||
* a file with no frontmatter (a hand-written third-party file) returns `id: null`
|
||||
* and the whole text as the body — the caller then ADOPTS it (creates a page,
|
||||
* writes the id back).
|
||||
*
|
||||
* KNOWN LIMITATION (phase 4 — adoption, see docs/backlog/git-sync-thin-meta.md):
|
||||
* a leading frontmatter block is stripped from `body` even when it carries NO
|
||||
* `gitmost_id` but DOES carry the user's own Obsidian properties (`tags:` etc.).
|
||||
* On adoption those fields are not yet round-tripped — `serializePageFile`
|
||||
* write-back persists only `gitmost_id`. Preserving arbitrary user frontmatter
|
||||
* across the Docmost round-trip (BOTH adoption write-back AND the next pull's
|
||||
* re-serialize) is deferred to the adoption phase; until then, do NOT roll the
|
||||
* native format onto a real Obsidian vault whose notes carry properties.
|
||||
*/
|
||||
export declare function parsePageFile(full: string): {
|
||||
id: string | null;
|
||||
body: string;
|
||||
};
|
||||
/**
|
||||
* Serialize a page into the thin format: `id` frontmatter + a blank line + the
|
||||
* clean body + a trailing newline. Deterministic so an unchanged page re-syncs to
|
||||
* byte-identical output (no churn — the loop-guard relies on it).
|
||||
*/
|
||||
export declare function serializePageFile(id: string, body: string): string;
|
||||
72
packages/git-sync/build/lib/page-file.js
Normal file
72
packages/git-sync/build/lib/page-file.js
Normal file
@@ -0,0 +1,72 @@
|
||||
/**
|
||||
* The native-Obsidian page-file format (design: docs/backlog/git-sync-thin-meta.md).
|
||||
* A page file is CLEAN markdown with a minimal YAML frontmatter carrying ONLY the
|
||||
* page's durable identity:
|
||||
*
|
||||
* ---
|
||||
* gitmost_id: 019ef6fc-2638-7ce1-9ce3-2756ce038480
|
||||
* ---
|
||||
* <clean markdown body>
|
||||
*
|
||||
* Everything else is derived (title = filename, parentPageId = enclosing folder,
|
||||
* spaceId = the vault, updatedAt = git). `gitmost_id` (a Docmost pageId) is the
|
||||
* only non-derivable bit and travels WITH the file so identity survives any move,
|
||||
* even one git's rename detection misses. Third-party editors (Obsidian, …) see
|
||||
* clean markdown; the frontmatter is hidden in their preview.
|
||||
*
|
||||
* No backward-compat with the old `docmost:meta` format: vaults are a cache, wiped
|
||||
* and rebuilt native. A file WITHOUT a `gitmost_id` frontmatter is an un-tracked
|
||||
* (e.g. hand-written) file -> the caller ADOPTS it (creates a page, writes the id).
|
||||
*/
|
||||
/**
|
||||
* The frontmatter key carrying the Docmost pageId. NAMESPACED (not a bare `id`)
|
||||
* so it never collides with a user's own frontmatter fields.
|
||||
*/
|
||||
export const ID_KEY = "gitmost_id";
|
||||
/** Leading YAML frontmatter block: `---\n…\n---` at the very start of the file. */
|
||||
const FRONTMATTER_RE = /^?---\n([\s\S]*?)\n---\n?/;
|
||||
/** The top-level `<ID_KEY>: <value>` line inside the frontmatter (quotes optional). */
|
||||
function readIdFromYaml(yaml) {
|
||||
const re = new RegExp(`^${ID_KEY}:\\s*(.+?)\\s*$`);
|
||||
for (const line of yaml.split("\n")) {
|
||||
const m = line.match(re);
|
||||
if (m) {
|
||||
const v = m[1].trim().replace(/^["']|["']$/g, "");
|
||||
return v === "" ? null : v;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
/**
|
||||
* Parse a page file into its identity (`id`) and clean markdown `body`. Tolerant:
|
||||
* a file with no frontmatter (a hand-written third-party file) returns `id: null`
|
||||
* and the whole text as the body — the caller then ADOPTS it (creates a page,
|
||||
* writes the id back).
|
||||
*
|
||||
* KNOWN LIMITATION (phase 4 — adoption, see docs/backlog/git-sync-thin-meta.md):
|
||||
* a leading frontmatter block is stripped from `body` even when it carries NO
|
||||
* `gitmost_id` but DOES carry the user's own Obsidian properties (`tags:` etc.).
|
||||
* On adoption those fields are not yet round-tripped — `serializePageFile`
|
||||
* write-back persists only `gitmost_id`. Preserving arbitrary user frontmatter
|
||||
* across the Docmost round-trip (BOTH adoption write-back AND the next pull's
|
||||
* re-serialize) is deferred to the adoption phase; until then, do NOT roll the
|
||||
* native format onto a real Obsidian vault whose notes carry properties.
|
||||
*/
|
||||
export function parsePageFile(full) {
|
||||
const text = (full ?? "").replace(/\r\n/g, "\n");
|
||||
// Native format: a `gitmost_id` YAML frontmatter. Anything else (no frontmatter,
|
||||
// or frontmatter without the key) is an un-tracked file -> adopt.
|
||||
const fm = text.match(FRONTMATTER_RE);
|
||||
if (fm) {
|
||||
return { id: readIdFromYaml(fm[1]), body: text.slice(fm[0].length).trim() };
|
||||
}
|
||||
return { id: null, body: text.trim() };
|
||||
}
|
||||
/**
|
||||
* Serialize a page into the thin format: `id` frontmatter + a blank line + the
|
||||
* clean body + a trailing newline. Deterministic so an unchanged page re-syncs to
|
||||
* byte-identical output (no churn — the loop-guard relies on it).
|
||||
*/
|
||||
export function serializePageFile(id, body) {
|
||||
return `---\n${ID_KEY}: ${id}\n---\n\n${body.trim()}\n`;
|
||||
}
|
||||
14
packages/git-sync/node_modules/.bin/esbuild
generated
vendored
Executable file
14
packages/git-sync/node_modules/.bin/esbuild
generated
vendored
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/node_modules:/home/claude/gitmost/node_modules/.pnpm/esbuild@0.28.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
"$basedir/../../../../node_modules/.pnpm/esbuild@0.28.0/node_modules/esbuild/bin/esbuild" "$@"
|
||||
exit $?
|
||||
17
packages/git-sync/node_modules/.bin/jiti
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/jiti
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/node_modules:/home/claude/gitmost/node_modules/.pnpm/jiti@2.4.2/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/jiti-cli.mjs" "$@"
|
||||
else
|
||||
exec node "$basedir/../../../../node_modules/.pnpm/jiti@2.4.2/node_modules/jiti/lib/jiti-cli.mjs" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/lessc
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/lessc
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules/less/node_modules:/home/claude/gitmost/node_modules/.pnpm/less@4.2.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/less@4.2.0/node_modules/less/bin/lessc" "$@"
|
||||
else
|
||||
exec node "$basedir/../../../../node_modules/.pnpm/less@4.2.0/node_modules/less/bin/lessc" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/marked
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/marked
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules/marked/node_modules:/home/claude/gitmost/node_modules/.pnpm/marked@17.0.5/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../marked/bin/marked.js" "$@"
|
||||
else
|
||||
exec node "$basedir/../marked/bin/marked.js" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/terser
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/terser
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules/terser/node_modules:/home/claude/gitmost/node_modules/.pnpm/terser@5.39.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/terser" "$@"
|
||||
else
|
||||
exec node "$basedir/../../../../node_modules/.pnpm/terser@5.39.0/node_modules/terser/bin/terser" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/tsc
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/tsc
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../typescript/bin/tsc" "$@"
|
||||
else
|
||||
exec node "$basedir/../typescript/bin/tsc" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/tsserver
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/tsserver
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules/typescript/node_modules:/home/claude/gitmost/node_modules/.pnpm/typescript@5.9.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../typescript/bin/tsserver" "$@"
|
||||
else
|
||||
exec node "$basedir/../typescript/bin/tsserver" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/tsx
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/tsx
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/node_modules:/home/claude/gitmost/node_modules/.pnpm/tsx@4.21.0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/cli.mjs" "$@"
|
||||
else
|
||||
exec node "$basedir/../../../../node_modules/.pnpm/tsx@4.21.0/node_modules/tsx/dist/cli.mjs" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/vite
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/vite
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/node_modules:/home/claude/gitmost/node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/vite.js" "$@"
|
||||
else
|
||||
exec node "$basedir/../../../../node_modules/.pnpm/vite@8.0.5_@types+node@20.19.43_esbuild@0.28.0_jiti@2.4.2_less@4.2.0_sugarss@5.0.1_post_af6663088600fc9d0834b42272c42df7/node_modules/vite/bin/vite.js" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/vitest
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/vitest
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules/vitest/node_modules:/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules/vitest/node_modules:/home/claude/gitmost/node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../vitest/vitest.mjs" "$@"
|
||||
else
|
||||
exec node "$basedir/../vitest/vitest.mjs" "$@"
|
||||
fi
|
||||
17
packages/git-sync/node_modules/.bin/yaml
generated
vendored
Executable file
17
packages/git-sync/node_modules/.bin/yaml
generated
vendored
Executable file
@@ -0,0 +1,17 @@
|
||||
#!/bin/sh
|
||||
basedir=$(dirname "$(echo "$0" | sed -e 's,\\,/,g')")
|
||||
|
||||
case `uname` in
|
||||
*CYGWIN*) basedir=`cygpath -w "$basedir"`;;
|
||||
esac
|
||||
|
||||
if [ -z "$NODE_PATH" ]; then
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/node_modules:/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules"
|
||||
else
|
||||
export NODE_PATH="/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/node_modules:/home/claude/gitmost/node_modules/.pnpm/yaml@2.8.3/node_modules:/home/claude/gitmost/node_modules/.pnpm/node_modules:$NODE_PATH"
|
||||
fi
|
||||
if [ -x "$basedir/node" ]; then
|
||||
exec "$basedir/node" "$basedir/../../../../node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/bin.mjs" "$@"
|
||||
else
|
||||
exec node "$basedir/../../../../node_modules/.pnpm/yaml@2.8.3/node_modules/yaml/bin.mjs" "$@"
|
||||
fi
|
||||
1
packages/git-sync/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json
generated
vendored
Normal file
1
packages/git-sync/node_modules/.vite/vitest/da39a3ee5e6b4b0d3255bfef95601890afd80709/results.json
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
{"version":"4.1.6","results":[[":test/node-ops.test.ts",{"duration":73.83617300000003,"failed":false}],[":test/markdown-converter.test.ts",{"duration":52.24364600000001,"failed":false}],[":test/diff.test.ts",{"duration":48.002140000000054,"failed":false}],[":test/node-ops-extra.test.ts",{"duration":64.79457399999995,"failed":false}],[":test/reconcile.test.ts",{"duration":13.454662000000042,"failed":false}],[":test/canonicalize.test.ts",{"duration":15.510864999999967,"failed":false}],[":test/markdown-roundtrip.property.test.ts",{"duration":10142.778976,"failed":false}],[":test/stabilize.test.ts",{"duration":180.60366900000008,"failed":false}],[":test/canonicalize-extra.test.ts",{"duration":265.1806279999996,"failed":false}],[":test/loop-guard.test.ts",{"duration":9.12148000000002,"failed":false}],[":test/markdown-document.test.ts",{"duration":9.338571000000002,"failed":false}],[":test/sanitize.test.ts",{"duration":20.903294999999957,"failed":false}],[":test/markdown-converter-golden.test.ts",{"duration":20.178874000000008,"failed":false}],[":test/roundtrip-corpus.test.ts",{"duration":375.9727969999999,"failed":false}],[":test/layout.test.ts",{"duration":25.806564999999978,"failed":false}],[":test/markdown-document-envelope.test.ts",{"duration":17.760928999999976,"failed":false}],[":test/roundtrip.test.ts",{"duration":202.1052659999998,"failed":false}],[":test/compute-push-actions.test.ts",{"duration":18.895632999999975,"failed":false}],[":test/apply-pull-actions.test.ts",{"duration":312.7543149999997,"failed":false}],[":test/git.test.ts",{"duration":2510.628562,"failed":false}],[":test/run-push.test.ts",{"duration":52.35109799999998,"failed":false}],[":test/compute-pull-actions.test.ts",{"duration":12.83178799999996,"failed":false}],[":test/apply-push-actions.test.ts",{"duration":40.049105,"failed":false}],[":test/classify-rename-moves.test.ts",{"duration":11.772115999999983,"failed":false}],[":test/git-merge.test.ts",{"duration":394.734729,"failed":false}],[":test/read-existing.test.ts",{"duration":9.485771000000113,"failed":false}],[":test/config-errors-invalid.test.ts",{"duration":22.83441799999997,"failed":false}],[":test/run-push-realgit.test.ts",{"duration":341.63427,"failed":false}],[":test/settings.test.ts",{"duration":18.815516000000002,"failed":false}],[":test/config-errors.test.ts",{"duration":22.358415000000036,"failed":false}],[":test/git-sync-client.contract.test-d.ts",{"duration":0,"failed":false}],[":test/engine-gaps.test.ts",{"duration":107.23285100000021,"failed":false}],[":test/markdown-converter-gaps.test.ts",{"duration":397.53935699999965,"failed":false}],[":test/git-integration-gaps.test.ts",{"duration":401.41072199999996,"failed":false}],[":test/markdown-to-prosemirror-gaps.test.ts",{"duration":446.77069600000004,"failed":false}],[":test/zzprobe.test.ts",{"duration":206.321958,"failed":false}],[":test/_probe_rt.test.ts",{"duration":113.90998200000013,"failed":false}],[":test/_probe2.test.ts",{"duration":87.88095900000008,"failed":false}],[":test/zz-probe.test.ts",{"duration":61.425263000000086,"failed":false}],[":test/zzz-probe.test.ts",{"duration":128.94683599999985,"failed":true}],[":test/_probe.test.ts",{"duration":135.79946900000004,"failed":false}],[":test/__probe.test.ts",{"duration":5.685652999999945,"failed":false}],[":test/markdown-converter-html-marks.test.ts",{"duration":10.321619999999996,"failed":false}],[":test/_probe/probe.test.ts",{"duration":71.38958900000011,"failed":false}],[":test/media-roundtrip.test.ts",{"duration":196.99739999999997,"failed":false}],[":test/diagram-roundtrip.test.ts",{"duration":82.55217999999968,"failed":false}],[":test/git-error-paths.test.ts",{"duration":303.43118300000003,"failed":false}],[":test/zzprobe2.test.ts",{"duration":54.94561099999987,"failed":false}],[":test/zzprobe3.test.ts",{"duration":77.88595900000018,"failed":false}],[":test/docmost-schema-attrs.test.ts",{"duration":10.282551000000012,"failed":false}],[":test/_valid_probe.test.ts",{"duration":92.35715300000015,"failed":false}],[":test/strip-empty-paragraphs-validity.test.ts",{"duration":127.7716620000001,"failed":false}],[":test/cycle.test.ts",{"duration":17.375657000000047,"failed":false}],[":test/cycle-roundtrip.test.ts",{"duration":582.6821960000002,"failed":false}],[":test/vault-index.test.ts",{"duration":9.033900000000017,"failed":false}],[":test/page-file.test.ts",{"duration":7.111135999999988,"failed":false}]]}
|
||||
1
packages/git-sync/node_modules/@fellow/prosemirror-recreate-transform
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@fellow/prosemirror-recreate-transform
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@fellow+prosemirror-recreate-transform@1.2.3/node_modules/@fellow/prosemirror-recreate-transform
|
||||
1
packages/git-sync/node_modules/@tiptap/core
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/core
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+core@3.20.4_@tiptap+pm@3.20.4/node_modules/@tiptap/core
|
||||
1
packages/git-sync/node_modules/@tiptap/extension-highlight
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/extension-highlight
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-highlight@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-highlight
|
||||
1
packages/git-sync/node_modules/@tiptap/extension-image
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/extension-image
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-image@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4_/node_modules/@tiptap/extension-image
|
||||
1
packages/git-sync/node_modules/@tiptap/extension-subscript
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/extension-subscript
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-subscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-subscript
|
||||
1
packages/git-sync/node_modules/@tiptap/extension-superscript
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/extension-superscript
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-superscript@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4/node_modules/@tiptap/extension-superscript
|
||||
1
packages/git-sync/node_modules/@tiptap/extension-task-item
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/extension-task-item
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-task-item@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_f120fce1a3d9fc85461b67496f03c362/node_modules/@tiptap/extension-task-item
|
||||
1
packages/git-sync/node_modules/@tiptap/extension-task-list
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/extension-task-list
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+extension-task-list@3.20.4_@tiptap+extension-list@3.20.4_@tiptap+core@3.20.4_@t_c94f69f56aee3556ec680ab7491aa1d4/node_modules/@tiptap/extension-task-list
|
||||
1
packages/git-sync/node_modules/@tiptap/html
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/html
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+html@3.20.4_@tiptap+core@3.20.4_@tiptap+pm@3.20.4__@tiptap+pm@3.20.4_happy-dom@20.8.9/node_modules/@tiptap/html
|
||||
1
packages/git-sync/node_modules/@tiptap/pm
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/pm
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+pm@3.20.4/node_modules/@tiptap/pm
|
||||
1
packages/git-sync/node_modules/@tiptap/starter-kit
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@tiptap/starter-kit
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@tiptap+starter-kit@3.20.4/node_modules/@tiptap/starter-kit
|
||||
1
packages/git-sync/node_modules/@types/jsdom
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@types/jsdom
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@types+jsdom@21.1.7/node_modules/@types/jsdom
|
||||
1
packages/git-sync/node_modules/@types/node
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/@types/node
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../../node_modules/.pnpm/@types+node@20.19.43/node_modules/@types/node
|
||||
1
packages/git-sync/node_modules/fast-check
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/fast-check
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../node_modules/.pnpm/fast-check@4.8.0/node_modules/fast-check
|
||||
1
packages/git-sync/node_modules/jsdom
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/jsdom
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../node_modules/.pnpm/jsdom@25.0.0/node_modules/jsdom
|
||||
1
packages/git-sync/node_modules/marked
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/marked
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../node_modules/.pnpm/marked@17.0.5/node_modules/marked
|
||||
1
packages/git-sync/node_modules/typescript
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/typescript
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../node_modules/.pnpm/typescript@5.9.3/node_modules/typescript
|
||||
1
packages/git-sync/node_modules/vitest
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/vitest
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../node_modules/.pnpm/vitest@4.1.6_@opentelemetry+api@1.9.0_@types+node@20.19.43_happy-dom@20.8.9_jsdom@25.0._8036f71cd985f114f75875ba7ccfe1d0/node_modules/vitest
|
||||
1
packages/git-sync/node_modules/zod
generated
vendored
Symbolic link
1
packages/git-sync/node_modules/zod
generated
vendored
Symbolic link
@@ -0,0 +1 @@
|
||||
../../../node_modules/.pnpm/zod@4.3.6/node_modules/zod
|
||||
Reference in New Issue
Block a user