Compare commits
67 Commits
main
...
c1c87c21c3
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c1c87c21c3 | ||
|
|
4b3153f2d2 | ||
|
|
c6f4442076 | ||
|
|
7a7b840ebf | ||
|
|
7fb1d4848a | ||
|
|
3a6dfad3ae | ||
|
|
0c7b73f7d0 | ||
|
|
937f04b735 | ||
|
|
032df2cf31 | ||
|
|
f4651f554d | ||
|
|
87343f241a | ||
|
|
e8c5a62410 | ||
|
|
687482a901 | ||
|
|
b751c4bdc5 | ||
|
|
3d7f434b0c | ||
|
|
142ed3a825 | ||
|
|
4213a12180 | ||
|
|
b5836eb93e | ||
|
|
c76f255b1c | ||
|
|
5ca4cc4657 | ||
|
|
3d9c508011 | ||
|
|
0443d90519 | ||
|
|
dcf614aa4c | ||
|
|
6b2a511dd1 | ||
|
|
f5bf5fd536 | ||
|
|
6c2a1b4a65 | ||
|
|
583b2e35c2 | ||
|
|
630f9291de | ||
|
|
257ea1df2c | ||
|
|
be70e9f09d | ||
|
|
b6bca5d8e1 | ||
|
|
1ee18e3ed7 | ||
|
|
3e0b0aa7c0 | ||
|
|
5d0d5e7af4 | ||
|
|
32c2939936 | ||
|
|
57b9ced95f | ||
|
|
a18302cdb4 | ||
|
|
61aad27fce | ||
|
|
f24c8e20d5 | ||
|
|
b0fc49cf9d | ||
|
|
3b334d9624 | ||
|
|
71a96581ca | ||
|
|
306d88c685 | ||
|
|
0318a148dc | ||
|
|
f923accc3d | ||
|
|
a0e1cde063 | ||
|
|
259d4ca6fa | ||
|
|
7ed33d8127 | ||
|
|
c5b05aacaf | ||
|
|
f90f3e272a | ||
|
|
3bba9425f4 | ||
|
|
9c805e8069 | ||
|
|
d716ca385a | ||
|
|
ff36f7bffa | ||
|
|
e5607cb1d2 | ||
|
|
66bd039f8f | ||
|
|
ba15fde809 | ||
|
|
eb0aa12c83 | ||
|
|
d1a8b48b96 | ||
|
|
0692e55981 | ||
|
|
55d610b7f8 | ||
|
|
8201e76c66 | ||
|
|
901147a224 | ||
|
|
afe1ba8398 | ||
|
|
d79807802c | ||
|
|
5aaeaaae3c | ||
|
|
c44d8ba05c |
40
.env.example
40
.env.example
@@ -187,3 +187,43 @@ MCP_DOCMOST_PASSWORD=
|
||||
# Per-request output-token ceiling for the anonymous assistant (default: 512).
|
||||
# Worst-case output per accepted call = agent steps (5) × this value.
|
||||
# SHARE_AI_MAX_OUTPUT_TOKENS=512
|
||||
|
||||
# --- GIT-SYNC (native two-way Docmost <-> git Markdown sync) ---
|
||||
# Master switch. Off by default. When 'true', GIT_SYNC_SERVICE_USER_ID below is
|
||||
# REQUIRED (the service account that git-originated create/move/rename/delete are
|
||||
# attributed to) — the server refuses to boot with sync enabled and no user id.
|
||||
# GIT_SYNC_ENABLED=false
|
||||
#
|
||||
# Serve the per-space vaults over smart-HTTP (the /git host). Defaults to
|
||||
# GIT_SYNC_ENABLED when unset.
|
||||
# GIT_SYNC_HTTP_ENABLED=false
|
||||
#
|
||||
# REQUIRED when GIT_SYNC_ENABLED=true: id of the user that git-originated page
|
||||
# operations (create / move / rename / delete) are attributed to.
|
||||
# GIT_SYNC_SERVICE_USER_ID=
|
||||
#
|
||||
# Where the per-space working vaults live (non-bare repos; the engine needs a
|
||||
# working tree).
|
||||
# Defaults to "<DATA_DIR or ./data>/git-sync".
|
||||
# GIT_SYNC_DATA_DIR=
|
||||
#
|
||||
# Optional remote URL template to mirror each space's vault to (e.g. a git host).
|
||||
# Leave unset to keep vaults local-only.
|
||||
# GIT_SYNC_REMOTE_TEMPLATE=
|
||||
#
|
||||
# Path to the SSH private key used when pushing to GIT_SYNC_REMOTE_TEMPLATE.
|
||||
# GIT_SYNC_SSH_KEY_PATH=
|
||||
#
|
||||
# Poll-safety interval in ms — the cadence of the background reconcile cycle
|
||||
# (default: 15000).
|
||||
# GIT_SYNC_POLL_INTERVAL_MS=15000
|
||||
#
|
||||
# Debounce window in ms for collapsing bursts of page edits into one sync cycle
|
||||
# (default: 2000).
|
||||
# GIT_SYNC_DEBOUNCE_MS=2000
|
||||
#
|
||||
# Watchdog timeout in ms for the spawned `git http-backend` process serving a
|
||||
# git smart-HTTP push (default: 120000). A stalled/hung receive-pack is killed
|
||||
# after this deadline so it cannot hold the per-space lock forever.
|
||||
# GIT_SYNC_BACKEND_TIMEOUT_MS=120000
|
||||
#
|
||||
|
||||
7
.github/workflows/test.yml
vendored
7
.github/workflows/test.yml
vendored
@@ -68,6 +68,13 @@ jobs:
|
||||
- name: Build editor-ext
|
||||
run: pnpm --filter @docmost/editor-ext build
|
||||
|
||||
# git-sync and mcp are no longer committed in built form (build/ is
|
||||
# gitignored), so CI must compile them: the server resolves both via their
|
||||
# built build/index.js. The server pretest also builds them, but building
|
||||
# here keeps it explicit and independent of pnpm lifecycle ordering.
|
||||
- name: Build git-sync and mcp
|
||||
run: pnpm --filter @docmost/git-sync build && pnpm --filter @docmost/mcp build
|
||||
|
||||
- name: Run unit tests
|
||||
run: pnpm -r test
|
||||
|
||||
|
||||
6
.gitignore
vendored
6
.gitignore
vendored
@@ -5,6 +5,12 @@ data
|
||||
# compiled output
|
||||
/dist
|
||||
/node_modules
|
||||
# workspace package node_modules (pnpm symlinks — never commit; they bake
|
||||
# machine-local store paths) and the git-sync compiled output (built in CI/Docker
|
||||
# via `pnpm build`, never committed, so src/ and prod can never silently diverge).
|
||||
packages/*/node_modules/
|
||||
packages/git-sync/build/
|
||||
packages/mcp/build/
|
||||
|
||||
# Logs
|
||||
logs
|
||||
|
||||
@@ -182,7 +182,7 @@ tea issues create --repo vvzvlad/gitmost --labels feature \
|
||||
|
||||
## Monorepo layout
|
||||
|
||||
pnpm workspace (`pnpm@10.4.0`) orchestrated by **Nx**. Four workspace packages:
|
||||
pnpm workspace (`pnpm@10.4.0`) orchestrated by **Nx**. Five workspace packages:
|
||||
|
||||
| Path | Name | Stack | Role |
|
||||
| --- | --- | --- | --- |
|
||||
@@ -190,6 +190,7 @@ pnpm workspace (`pnpm@10.4.0`) orchestrated by **Nx**. Four workspace packages:
|
||||
| `apps/client` | `client` | React 18 + Vite + Mantine 8 + TanStack Query + Jotai | SPA frontend |
|
||||
| `packages/editor-ext` | `@docmost/editor-ext` | Tiptap/ProseMirror | Shared Tiptap node/mark extensions, imported by both the client and the server |
|
||||
| `packages/mcp` | `@docmost/mcp` | MCP SDK, Tiptap, Yjs | Standalone MCP server, also bundled into the server at `/mcp`. Does **not** import `editor-ext` — it keeps its own vendored mirror of the schema in `packages/mcp/src/lib/` |
|
||||
| `packages/git-sync` | `@docmost/git-sync` | Tiptap/ProseMirror, Yjs, git | Pure ProseMirror↔Markdown converter plus the two-way Docmost↔git Markdown sync engine. Bundled into the server (loaded over the ESM bridge), built in CI and the Dockerfile. Does **not** import `editor-ext` — it keeps its own vendored mirror of the document schema (kept in sync with `editor-ext`). |
|
||||
|
||||
`build` targets are Nx-cached and dependency-ordered (`dependsOn: ["^build"]`), so `editor-ext` builds before the apps. `nx.json` sets `affected.defaultBase: main`.
|
||||
|
||||
@@ -263,7 +264,7 @@ The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes
|
||||
### Client structure
|
||||
Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirrors the server domains: `page`, `space`, `comment`, `ai-chat`, `editor`, …). Conventions:
|
||||
- **TanStack Query** for server state (one `queries/` file per feature), **Jotai** atoms for local/shared UI state, **Mantine 8** + CSS modules (`*.module.css`) + `postcss-preset-mantine` for UI.
|
||||
- The editor is Tiptap; shared node/mark extensions live in `packages/editor-ext` and are imported by **both the client and the server** (collaboration, import/export) — editor schema changes often need to be made in `editor-ext`, not just the client. Note `packages/mcp` does *not* depend on `editor-ext`; it carries its own mirrored copy of the schema, so keep the two in sync manually when the document schema changes.
|
||||
- The editor is Tiptap; shared node/mark extensions live in `packages/editor-ext` and are imported by **both the client and the server** (collaboration, import/export) — editor schema changes often need to be made in `editor-ext`, not just the client. Note neither `packages/mcp` nor `packages/git-sync` depends on `editor-ext`; each carries its own mirrored copy of the schema. There are now **three** independent copies (`editor-ext` is canonical, plus `packages/mcp` and `packages/git-sync`), so keep all three in sync manually when the document schema changes.
|
||||
- API access goes through `apps/client/src/lib/api-client.ts` (axios). The `@` alias maps to `apps/client/src`.
|
||||
- Runtime config is injected at build time by `vite.config.ts` via `define` (`APP_URL`, `COLLAB_URL`, `APP_VERSION`, …) — these come from the root `.env`, not from `import.meta.env`.
|
||||
|
||||
|
||||
@@ -17,8 +17,9 @@ RUN pnpm build
|
||||
|
||||
FROM base AS installer
|
||||
|
||||
# git: required by the git-sync VaultGit (shells out to git)
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends curl bash \
|
||||
&& apt-get install -y --no-install-recommends curl bash git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
@@ -33,6 +34,11 @@ COPY --from=builder /app/packages/editor-ext/dist /app/packages/editor-ext/dist
|
||||
COPY --from=builder /app/packages/editor-ext/package.json /app/packages/editor-ext/package.json
|
||||
COPY --from=builder /app/packages/mcp/build /app/packages/mcp/build
|
||||
COPY --from=builder /app/packages/mcp/package.json /app/packages/mcp/package.json
|
||||
# git-sync: the server requires @docmost/git-sync at runtime; without these the
|
||||
# image starts and crashes on `require('@docmost/git-sync')`. Built fresh by the
|
||||
# builder's `pnpm build` (nx builds the package's tsc `build` target).
|
||||
COPY --from=builder /app/packages/git-sync/build /app/packages/git-sync/build
|
||||
COPY --from=builder /app/packages/git-sync/package.json /app/packages/git-sync/package.json
|
||||
|
||||
# Copy root package files
|
||||
COPY --from=builder /app/package.json /app/package.json
|
||||
|
||||
@@ -1204,6 +1204,8 @@
|
||||
"Ran tool {{name}}": "Ran tool {{name}}",
|
||||
"AI-agent": "AI-agent",
|
||||
"Edited by AI agent on behalf of {{name}}": "Edited by AI agent on behalf of {{name}}",
|
||||
"Git sync": "Git sync",
|
||||
"Synced from Git on behalf of {{name}}": "Synced from Git on behalf of {{name}}",
|
||||
"Endpoints": "Endpoints",
|
||||
"where we fetch models": "where we fetch models",
|
||||
"All endpoints are OpenAI-compatible. Point the Base URL at OpenAI, OpenRouter, a local Ollama, or any self-hosted server.": "All endpoints are OpenAI-compatible. Point the Base URL at OpenAI, OpenRouter, a local Ollama, or any self-hosted server.",
|
||||
@@ -1228,6 +1230,8 @@
|
||||
"MCP server": "MCP server",
|
||||
"expose the workspace": "expose the workspace",
|
||||
"Enable MCP server": "Enable MCP server",
|
||||
"Enable Git sync": "Enable Git sync",
|
||||
"Sync this space's pages to a Git repository.": "Sync this space's pages to a Git repository.",
|
||||
"Exposes the workspace as an MCP server at /mcp — this provides a capability, it doesn't consume a model.": "Exposes the workspace as an MCP server at /mcp — this provides a capability, it doesn't consume a model.",
|
||||
"Resolves to {{url}}": "Resolves to {{url}}",
|
||||
"Model": "Model",
|
||||
|
||||
37
apps/client/src/components/ui/git-sync-badge.tsx
Normal file
37
apps/client/src/components/ui/git-sync-badge.tsx
Normal file
@@ -0,0 +1,37 @@
|
||||
import { Badge, Tooltip } from "@mantine/core";
|
||||
import { IconGitMerge } from "@tabler/icons-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
interface GitSyncBadgeProps {
|
||||
authorName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Badge marking a version produced by git-sync (provenance §8.1). The history
|
||||
* version is created on the PUSH path — when an incoming git body is written back
|
||||
* into the Docmost doc — not by the pull itself. Like {@link AiAgentBadge} it is
|
||||
* ADDITIVE — shown next to the human author, never replacing them — but a git-sync
|
||||
* edit is NOT an agent edit and has no chat to deep-link into, so it is a small,
|
||||
* neutral, non-clickable label.
|
||||
*/
|
||||
export function GitSyncBadge({ authorName }: GitSyncBadgeProps) {
|
||||
const { t } = useTranslation();
|
||||
|
||||
const tooltip = t("Synced from Git on behalf of {{name}}", {
|
||||
name: authorName ?? "",
|
||||
});
|
||||
|
||||
return (
|
||||
<Tooltip label={tooltip} withArrow>
|
||||
<Badge
|
||||
size="sm"
|
||||
variant="light"
|
||||
color="gray"
|
||||
radius="sm"
|
||||
leftSection={<IconGitMerge size={12} stroke={2} />}
|
||||
>
|
||||
{t("Git sync")}
|
||||
</Badge>
|
||||
</Tooltip>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,227 @@
|
||||
import { describe, it, expect, vi, afterEach, beforeAll } from "vitest";
|
||||
import { render, screen, cleanup, within } from "@testing-library/react";
|
||||
import { MantineProvider } from "@mantine/core";
|
||||
|
||||
// Mantine Tooltip mounts its label lazily on hover via Floating UI, which is
|
||||
// flaky under jsdom. Replace ONLY the Tooltip with a thin wrapper that renders
|
||||
// the label inline (keeping Badge/Switch/etc. real), so the provenance label —
|
||||
// the contract we care about — is deterministically queryable.
|
||||
vi.mock("@mantine/core", async () => {
|
||||
const actual =
|
||||
await vi.importActual<typeof import("@mantine/core")>("@mantine/core");
|
||||
const Tooltip = ({
|
||||
label,
|
||||
children,
|
||||
}: {
|
||||
label?: React.ReactNode;
|
||||
children?: React.ReactNode;
|
||||
}) => (
|
||||
<>
|
||||
{children}
|
||||
<span data-testid="tooltip-label">{label}</span>
|
||||
</>
|
||||
);
|
||||
Tooltip.Group = ({ children }: { children?: React.ReactNode }) => (
|
||||
<>{children}</>
|
||||
);
|
||||
return { ...actual, Tooltip };
|
||||
});
|
||||
|
||||
// jsdom lacks matchMedia, which MantineProvider's color-scheme hook needs.
|
||||
beforeAll(() => {
|
||||
if (!window.matchMedia) {
|
||||
window.matchMedia = (query: string) =>
|
||||
({
|
||||
matches: false,
|
||||
media: query,
|
||||
onchange: null,
|
||||
addListener: () => {},
|
||||
removeListener: () => {},
|
||||
addEventListener: () => {},
|
||||
removeEventListener: () => {},
|
||||
dispatchEvent: () => false,
|
||||
}) as unknown as MediaQueryList;
|
||||
}
|
||||
});
|
||||
|
||||
// --- Mocks for the heavy / networked module graph ---------------------------
|
||||
// HistoryItem pulls in i18n, jotai atoms (ai-chat / history), a config-backed
|
||||
// avatar and a time formatter. The provenance-badge contract is the unit under
|
||||
// test, so we stub everything else down to inert, deterministic renders and
|
||||
// keep the real Mantine Badge/Tooltip so role/label queries are meaningful.
|
||||
|
||||
// i18n: interpolate {{name}} so the git-sync tooltip carries the author name,
|
||||
// letting us assert provenance attribution without a real i18n backend.
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({
|
||||
t: (key: string, vars?: Record<string, unknown>) =>
|
||||
vars && typeof vars.name !== "undefined"
|
||||
? key.replace("{{name}}", String(vars.name))
|
||||
: key,
|
||||
}),
|
||||
}));
|
||||
|
||||
// jotai setters: the badges call useSetAtom; return inert setters so a click on
|
||||
// the (deep-linkable) AiAgentBadge would fire these — proving the git-sync badge
|
||||
// does NOT wire any of them.
|
||||
const setAiChatWindowOpen = vi.fn();
|
||||
const setActiveChatId = vi.fn();
|
||||
const setDraft = vi.fn();
|
||||
const setHistoryModalOpen = vi.fn();
|
||||
vi.mock("jotai", async () => {
|
||||
const actual = await vi.importActual<typeof import("jotai")>("jotai");
|
||||
return {
|
||||
...actual,
|
||||
useSetAtom: (atom: unknown) => {
|
||||
switch (atom) {
|
||||
case aiChatWindowOpenAtom:
|
||||
return setAiChatWindowOpen;
|
||||
case activeAiChatIdAtom:
|
||||
return setActiveChatId;
|
||||
case aiChatDraftAtom:
|
||||
return setDraft;
|
||||
case historyAtoms:
|
||||
return setHistoryModalOpen;
|
||||
default:
|
||||
return vi.fn();
|
||||
}
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Atoms are imported only as identity tokens for the useSetAtom switch above.
|
||||
vi.mock("@/features/ai-chat/atoms/ai-chat-atom.ts", () => ({
|
||||
activeAiChatIdAtom: { __tag: "activeAiChatIdAtom" },
|
||||
aiChatWindowOpenAtom: { __tag: "aiChatWindowOpenAtom" },
|
||||
aiChatDraftAtom: { __tag: "aiChatDraftAtom" },
|
||||
}));
|
||||
vi.mock("@/features/page-history/atoms/history-atoms.ts", () => ({
|
||||
historyAtoms: { __tag: "historyAtoms" },
|
||||
}));
|
||||
|
||||
// Avatar reaches into config (getAvatarUrl) — stub to a plain element.
|
||||
vi.mock("@/components/ui/custom-avatar.tsx", () => ({
|
||||
CustomAvatar: ({ name }: { name?: string }) => (
|
||||
<span data-testid="avatar">{name}</span>
|
||||
),
|
||||
}));
|
||||
|
||||
// Deterministic, locale-free date string.
|
||||
vi.mock("@/lib/time", () => ({
|
||||
formattedDate: () => "2026-06-21",
|
||||
}));
|
||||
|
||||
import HistoryItem from "./history-item";
|
||||
import {
|
||||
activeAiChatIdAtom,
|
||||
aiChatWindowOpenAtom,
|
||||
aiChatDraftAtom,
|
||||
} from "@/features/ai-chat/atoms/ai-chat-atom.ts";
|
||||
import { historyAtoms } from "@/features/page-history/atoms/history-atoms.ts";
|
||||
import type { IPageHistory } from "@/features/page-history/types/page.types";
|
||||
|
||||
function makeItem(overrides: Partial<IPageHistory> = {}): IPageHistory {
|
||||
return {
|
||||
id: "h1",
|
||||
pageId: "p1",
|
||||
title: "Title",
|
||||
slug: "slug",
|
||||
icon: "",
|
||||
coverPhoto: "",
|
||||
version: 1,
|
||||
lastUpdatedById: "u1",
|
||||
workspaceId: "w1",
|
||||
createdAt: "2026-06-21T00:00:00.000Z",
|
||||
updatedAt: "2026-06-21T00:00:00.000Z",
|
||||
lastUpdatedBy: { id: "u1", name: "Alice", avatarUrl: "" },
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function renderItem(item: IPageHistory) {
|
||||
return render(
|
||||
<MantineProvider>
|
||||
<HistoryItem
|
||||
historyItem={item}
|
||||
index={0}
|
||||
onSelect={vi.fn()}
|
||||
isActive={false}
|
||||
/>
|
||||
</MantineProvider>,
|
||||
);
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
vi.clearAllMocks();
|
||||
});
|
||||
|
||||
describe("HistoryItem git-sync provenance badge", () => {
|
||||
// Test 1: the git-sync badge renders ONLY for lastUpdatedSource === 'git-sync'.
|
||||
it("renders the Git sync badge only when lastUpdatedSource is 'git-sync'", () => {
|
||||
renderItem(makeItem({ lastUpdatedSource: "git-sync" }));
|
||||
expect(screen.getByText("Git sync")).toBeTruthy();
|
||||
});
|
||||
|
||||
it.each([
|
||||
["agent", "agent"],
|
||||
["user", "user"],
|
||||
["undefined", undefined],
|
||||
])(
|
||||
"does NOT render the Git sync badge when lastUpdatedSource is %s",
|
||||
(_label, source) => {
|
||||
renderItem(makeItem({ lastUpdatedSource: source }));
|
||||
expect(screen.queryByText("Git sync")).toBeNull();
|
||||
},
|
||||
);
|
||||
|
||||
// Test 2: provenance attribution + the git-sync badge is NOT interactive.
|
||||
it("attributes the git-sync provenance to the correct author and is not clickable", () => {
|
||||
renderItem(
|
||||
makeItem({
|
||||
lastUpdatedSource: "git-sync",
|
||||
lastUpdatedBy: { id: "u2", name: "Bob", avatarUrl: "" },
|
||||
}),
|
||||
);
|
||||
|
||||
const badge = screen.getByText("Git sync");
|
||||
|
||||
// Provenance attribution: the tooltip label carries the author name (the
|
||||
// git-sync badge passes authorName -> "Synced from Git on behalf of {{name}}").
|
||||
expect(screen.getByText("Synced from Git on behalf of Bob")).toBeTruthy();
|
||||
|
||||
// The git-sync badge must NOT behave like AiAgentBadge: the badge element
|
||||
// itself is not a button, carries no role=button and no tabIndex, and
|
||||
// clicking it must not trigger any ai-chat deep-link. (The surrounding
|
||||
// history-row IS an UnstyledButton — that is the row's own select affordance,
|
||||
// not the badge — so we scope these checks to the badge element.)
|
||||
const badgeRoot = (badge.closest("[class*='mantine-Badge-root']") ??
|
||||
badge) as HTMLElement;
|
||||
expect(badgeRoot.getAttribute("role")).not.toBe("button");
|
||||
expect(badgeRoot.getAttribute("tabindex")).toBeNull();
|
||||
expect(badgeRoot.tagName.toLowerCase()).not.toBe("button");
|
||||
// No interactive descendant button lives inside the badge itself.
|
||||
expect(within(badgeRoot).queryByRole("button")).toBeNull();
|
||||
|
||||
badgeRoot.dispatchEvent(new MouseEvent("click", { bubbles: true }));
|
||||
expect(setActiveChatId).not.toHaveBeenCalled();
|
||||
expect(setAiChatWindowOpen).not.toHaveBeenCalled();
|
||||
expect(setDraft).not.toHaveBeenCalled();
|
||||
expect(setHistoryModalOpen).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// Sanity contrast: the agent badge (the copy-paste source) IS interactive when
|
||||
// it carries an aiChatId — proving the not-clickable assertion above is real.
|
||||
it("contrast: the AI-agent badge is a deep-link button when it has an aiChatId", () => {
|
||||
renderItem(
|
||||
makeItem({
|
||||
lastUpdatedSource: "agent",
|
||||
lastUpdatedAiChatId: "chat-1",
|
||||
}),
|
||||
);
|
||||
const agentBadge = screen.getByText("AI-agent");
|
||||
const root = agentBadge.closest("[role='button']");
|
||||
expect(root).not.toBeNull();
|
||||
within(root as HTMLElement).getByText("AI-agent");
|
||||
});
|
||||
});
|
||||
@@ -1,6 +1,7 @@
|
||||
import { Text, Group, UnstyledButton, Avatar, Tooltip } from "@mantine/core";
|
||||
import { CustomAvatar } from "@/components/ui/custom-avatar.tsx";
|
||||
import { AiAgentBadge } from "@/components/ui/ai-agent-badge.tsx";
|
||||
import { GitSyncBadge } from "@/components/ui/git-sync-badge.tsx";
|
||||
import { formattedDate } from "@/lib/time";
|
||||
import classes from "./css/history.module.css";
|
||||
import clsx from "clsx";
|
||||
@@ -41,6 +42,7 @@ const HistoryItem = memo(function HistoryItem({
|
||||
const contributors = historyItem.contributors;
|
||||
const hasContributors = contributors && contributors.length > 0;
|
||||
const isAgentEdit = historyItem.lastUpdatedSource === "agent";
|
||||
const isGitSyncEdit = historyItem.lastUpdatedSource === "git-sync";
|
||||
|
||||
return (
|
||||
<UnstyledButton
|
||||
@@ -108,6 +110,10 @@ const HistoryItem = memo(function HistoryItem({
|
||||
onActivate={() => setHistoryModalOpen(false)}
|
||||
/>
|
||||
)}
|
||||
|
||||
{isGitSyncEdit && (
|
||||
<GitSyncBadge authorName={historyItem.lastUpdatedBy?.name} />
|
||||
)}
|
||||
</Group>
|
||||
</UnstyledButton>
|
||||
);
|
||||
|
||||
@@ -0,0 +1,240 @@
|
||||
import {
|
||||
describe,
|
||||
it,
|
||||
expect,
|
||||
vi,
|
||||
beforeAll,
|
||||
afterEach,
|
||||
} from "vitest";
|
||||
import {
|
||||
render,
|
||||
screen,
|
||||
cleanup,
|
||||
fireEvent,
|
||||
waitFor,
|
||||
} from "@testing-library/react";
|
||||
import { MantineProvider } from "@mantine/core";
|
||||
|
||||
// --- Mocks for the heavy / networked module graph ---------------------------
|
||||
// EditSpaceForm wires the "Enable Git sync" Switch to a TanStack-Query mutation
|
||||
// (useUpdateSpaceMutation). We mock ONLY that hook so the test fully controls
|
||||
// mutateAsync (resolve / reject) and isPending, and stub i18n. The real Mantine
|
||||
// Switch is rendered so the checkbox role / disabled state is meaningful.
|
||||
|
||||
// i18n: identity translator — labels stay as their English keys for queries.
|
||||
vi.mock("react-i18next", () => ({
|
||||
useTranslation: () => ({ t: (key: string) => key }),
|
||||
}));
|
||||
|
||||
// Mutation hook: a controllable mutateAsync plus a togglable isPending.
|
||||
const mutateAsync = vi.fn();
|
||||
let isPending = false;
|
||||
vi.mock("@/features/space/queries/space-query.ts", () => ({
|
||||
useUpdateSpaceMutation: () => ({
|
||||
mutateAsync,
|
||||
get isPending() {
|
||||
return isPending;
|
||||
},
|
||||
}),
|
||||
}));
|
||||
|
||||
// jsdom lacks matchMedia, which MantineProvider's color-scheme hook needs.
|
||||
beforeAll(() => {
|
||||
if (!window.matchMedia) {
|
||||
window.matchMedia = (query: string) =>
|
||||
({
|
||||
matches: false,
|
||||
media: query,
|
||||
onchange: null,
|
||||
addListener: () => {},
|
||||
removeListener: () => {},
|
||||
addEventListener: () => {},
|
||||
removeEventListener: () => {},
|
||||
dispatchEvent: () => false,
|
||||
}) as unknown as MediaQueryList;
|
||||
}
|
||||
});
|
||||
|
||||
import { EditSpaceForm } from "./edit-space-form";
|
||||
import type { ISpace } from "@/features/space/types/space.types.ts";
|
||||
|
||||
function makeSpace(overrides: Partial<ISpace> = {}): ISpace {
|
||||
return {
|
||||
id: "space-1",
|
||||
name: "Engineering",
|
||||
description: "",
|
||||
slug: "eng",
|
||||
hostname: "host",
|
||||
creatorId: "u1",
|
||||
createdAt: new Date("2026-01-01"),
|
||||
updatedAt: new Date("2026-01-01"),
|
||||
...overrides,
|
||||
} as ISpace;
|
||||
}
|
||||
|
||||
function renderForm(props: { space: ISpace; readOnly?: boolean }) {
|
||||
return render(
|
||||
<MantineProvider>
|
||||
<EditSpaceForm space={props.space} readOnly={props.readOnly} />
|
||||
</MantineProvider>,
|
||||
);
|
||||
}
|
||||
|
||||
// The form now renders TWO switches (git-sync enable + auto-merge-conflicts) in
|
||||
// that DOM order. Mantine renders each as an <input type="checkbox"
|
||||
// role="switch"> but does NOT expose its label as the accessible name, so we
|
||||
// disambiguate by DOM order (index 0 = enable, 1 = auto-merge) and assert the
|
||||
// human-readable label text is present alongside.
|
||||
function getToggle(): HTMLInputElement {
|
||||
screen.getByText("Enable Git sync");
|
||||
return screen.getAllByRole("switch")[0] as HTMLInputElement;
|
||||
}
|
||||
|
||||
function getAutoMergeToggle(): HTMLInputElement {
|
||||
screen.getByText("Auto-merge conflicts on push");
|
||||
return screen.getAllByRole("switch")[1] as HTMLInputElement;
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
cleanup();
|
||||
mutateAsync.mockReset();
|
||||
isPending = false;
|
||||
});
|
||||
|
||||
describe("EditSpaceForm git-sync toggle", () => {
|
||||
// Test 3: initial checked state derives from settings.gitSync.enabled ?? false.
|
||||
it("derives initial checked state from space.settings.gitSync.enabled (true -> checked)", () => {
|
||||
renderForm({
|
||||
space: makeSpace({ settings: { gitSync: { enabled: true } } }),
|
||||
});
|
||||
expect(getToggle().checked).toBe(true);
|
||||
});
|
||||
|
||||
it("defaults to unchecked when gitSync settings are missing", () => {
|
||||
renderForm({ space: makeSpace() });
|
||||
expect(getToggle().checked).toBe(false);
|
||||
});
|
||||
|
||||
// Test 4: toggling fires the mutation with { spaceId, gitSyncEnabled } and
|
||||
// optimistically flips the switch.
|
||||
it("fires the mutation with the correct payload and optimistically flips on", async () => {
|
||||
mutateAsync.mockResolvedValue(undefined);
|
||||
renderForm({ space: makeSpace() });
|
||||
|
||||
const toggle = getToggle();
|
||||
expect(toggle.checked).toBe(false);
|
||||
|
||||
fireEvent.click(toggle);
|
||||
|
||||
// Optimistic update: the switch reflects the new state immediately.
|
||||
expect(toggle.checked).toBe(true);
|
||||
|
||||
expect(mutateAsync).toHaveBeenCalledTimes(1);
|
||||
expect(mutateAsync).toHaveBeenCalledWith({
|
||||
spaceId: "space-1",
|
||||
gitSyncEnabled: true,
|
||||
});
|
||||
|
||||
// Resolution leaves the toggle on.
|
||||
await waitFor(() => expect(toggle.checked).toBe(true));
|
||||
});
|
||||
|
||||
// Test 5: rollback on mutation error — the most valuable test.
|
||||
it("rolls back the toggle to its prior state when the mutation rejects", async () => {
|
||||
mutateAsync.mockRejectedValue(new Error("network"));
|
||||
renderForm({
|
||||
space: makeSpace({ settings: { gitSync: { enabled: false } } }),
|
||||
});
|
||||
|
||||
const toggle = getToggle();
|
||||
expect(toggle.checked).toBe(false);
|
||||
|
||||
fireEvent.click(toggle);
|
||||
|
||||
// Optimistically flips on before the rejection lands.
|
||||
expect(toggle.checked).toBe(true);
|
||||
expect(mutateAsync).toHaveBeenCalledWith({
|
||||
spaceId: "space-1",
|
||||
gitSyncEnabled: true,
|
||||
});
|
||||
|
||||
// After the rejected promise settles, the component reverts to OFF so the
|
||||
// user is not misled into believing sync is enabled.
|
||||
await waitFor(() => expect(toggle.checked).toBe(false));
|
||||
});
|
||||
|
||||
// Test 6: disabled when readOnly and when the mutation is pending.
|
||||
it("disables the toggle when readOnly", () => {
|
||||
renderForm({ space: makeSpace(), readOnly: true });
|
||||
expect(getToggle().disabled).toBe(true);
|
||||
});
|
||||
|
||||
it("disables the toggle while the mutation is pending", () => {
|
||||
isPending = true;
|
||||
renderForm({ space: makeSpace() });
|
||||
expect(getToggle().disabled).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("EditSpaceForm auto-merge-conflicts toggle", () => {
|
||||
it("derives initial checked state from space.settings.gitSync.autoMergeConflicts (true -> checked)", () => {
|
||||
renderForm({
|
||||
space: makeSpace({
|
||||
settings: { gitSync: { autoMergeConflicts: true } },
|
||||
}),
|
||||
});
|
||||
expect(getAutoMergeToggle().checked).toBe(true);
|
||||
});
|
||||
|
||||
it("defaults to unchecked when autoMergeConflicts is missing (SAFE default)", () => {
|
||||
renderForm({ space: makeSpace() });
|
||||
expect(getAutoMergeToggle().checked).toBe(false);
|
||||
});
|
||||
|
||||
it("fires the mutation with { spaceId, autoMergeConflicts } and optimistically flips on", async () => {
|
||||
mutateAsync.mockResolvedValue(undefined);
|
||||
renderForm({ space: makeSpace() });
|
||||
|
||||
const toggle = getAutoMergeToggle();
|
||||
expect(toggle.checked).toBe(false);
|
||||
|
||||
fireEvent.click(toggle);
|
||||
|
||||
// Optimistic update.
|
||||
expect(toggle.checked).toBe(true);
|
||||
expect(mutateAsync).toHaveBeenCalledTimes(1);
|
||||
expect(mutateAsync).toHaveBeenCalledWith({
|
||||
spaceId: "space-1",
|
||||
autoMergeConflicts: true,
|
||||
});
|
||||
|
||||
await waitFor(() => expect(toggle.checked).toBe(true));
|
||||
});
|
||||
|
||||
it("rolls back to its prior state when the mutation rejects", async () => {
|
||||
mutateAsync.mockRejectedValue(new Error("network"));
|
||||
renderForm({
|
||||
space: makeSpace({
|
||||
settings: { gitSync: { autoMergeConflicts: false } },
|
||||
}),
|
||||
});
|
||||
|
||||
const toggle = getAutoMergeToggle();
|
||||
expect(toggle.checked).toBe(false);
|
||||
|
||||
fireEvent.click(toggle);
|
||||
|
||||
expect(toggle.checked).toBe(true);
|
||||
expect(mutateAsync).toHaveBeenCalledWith({
|
||||
spaceId: "space-1",
|
||||
autoMergeConflicts: true,
|
||||
});
|
||||
|
||||
await waitFor(() => expect(toggle.checked).toBe(false));
|
||||
});
|
||||
|
||||
it("disables the toggle when readOnly", () => {
|
||||
renderForm({ space: makeSpace(), readOnly: true });
|
||||
expect(getAutoMergeToggle().disabled).toBe(true);
|
||||
});
|
||||
});
|
||||
@@ -1,5 +1,14 @@
|
||||
import { Group, Box, Button, TextInput, Stack, Textarea } from "@mantine/core";
|
||||
import React from "react";
|
||||
import {
|
||||
Group,
|
||||
Box,
|
||||
Button,
|
||||
TextInput,
|
||||
Stack,
|
||||
Textarea,
|
||||
Divider,
|
||||
Switch,
|
||||
} from "@mantine/core";
|
||||
import React, { useState } from "react";
|
||||
import { useForm } from "@mantine/form";
|
||||
import { zod4Resolver } from "mantine-form-zod-resolver";
|
||||
import { z } from "zod/v4";
|
||||
@@ -29,6 +38,44 @@ export function EditSpaceForm({ space, readOnly }: EditSpaceFormProps) {
|
||||
const { t } = useTranslation();
|
||||
const updateSpaceMutation = useUpdateSpaceMutation();
|
||||
|
||||
const [gitSyncEnabled, setGitSyncEnabled] = useState<boolean>(
|
||||
space?.settings?.gitSync?.enabled ?? false,
|
||||
);
|
||||
|
||||
const [autoMergeConflicts, setAutoMergeConflicts] = useState<boolean>(
|
||||
space?.settings?.gitSync?.autoMergeConflicts ?? false,
|
||||
);
|
||||
|
||||
const handleGitSyncToggle = async (value: boolean) => {
|
||||
const previous = gitSyncEnabled;
|
||||
setGitSyncEnabled(value); // optimistic update
|
||||
try {
|
||||
await updateSpaceMutation.mutateAsync({
|
||||
spaceId: space.id,
|
||||
gitSyncEnabled: value,
|
||||
});
|
||||
} catch (err) {
|
||||
setGitSyncEnabled(previous); // revert on failure
|
||||
// The mutation surfaces a toast via onError; still log the raw error so it
|
||||
// is not silently swallowed (AGENTS.md).
|
||||
console.error("Failed to toggle git-sync for space", err);
|
||||
}
|
||||
};
|
||||
|
||||
const handleAutoMergeConflictsToggle = async (value: boolean) => {
|
||||
const previous = autoMergeConflicts;
|
||||
setAutoMergeConflicts(value); // optimistic update
|
||||
try {
|
||||
await updateSpaceMutation.mutateAsync({
|
||||
spaceId: space.id,
|
||||
autoMergeConflicts: value,
|
||||
});
|
||||
} catch (err) {
|
||||
setAutoMergeConflicts(previous); // revert on failure
|
||||
console.error("Failed to toggle git-sync auto-merge-conflicts", err);
|
||||
}
|
||||
};
|
||||
|
||||
const form = useForm<FormValues>({
|
||||
validate: zod4Resolver(formSchema),
|
||||
initialValues: {
|
||||
@@ -104,6 +151,31 @@ export function EditSpaceForm({ space, readOnly }: EditSpaceFormProps) {
|
||||
</Group>
|
||||
)}
|
||||
</form>
|
||||
|
||||
<Divider my="lg" />
|
||||
|
||||
<Switch
|
||||
label={t("Enable Git sync")}
|
||||
description={t("Sync this space's pages to a Git repository.")}
|
||||
checked={gitSyncEnabled}
|
||||
disabled={readOnly || updateSpaceMutation.isPending}
|
||||
onChange={(event) =>
|
||||
handleGitSyncToggle(event.currentTarget.checked)
|
||||
}
|
||||
/>
|
||||
|
||||
<Switch
|
||||
mt="md"
|
||||
label={t("Auto-merge conflicts on push")}
|
||||
description={t(
|
||||
"When off (recommended), a page whose content still has unresolved Git conflict markers is skipped on push until you resolve the conflict in Git. When on, the markers are stripped and both sides' content is pushed.",
|
||||
)}
|
||||
checked={autoMergeConflicts}
|
||||
disabled={readOnly || updateSpaceMutation.isPending}
|
||||
onChange={(event) =>
|
||||
handleAutoMergeConflictsToggle(event.currentTarget.checked)
|
||||
}
|
||||
/>
|
||||
</Box>
|
||||
</>
|
||||
);
|
||||
|
||||
@@ -13,9 +13,15 @@ export interface ISpaceCommentsSettings {
|
||||
allowViewerComments?: boolean;
|
||||
}
|
||||
|
||||
export interface ISpaceGitSyncSettings {
|
||||
enabled?: boolean;
|
||||
autoMergeConflicts?: boolean;
|
||||
}
|
||||
|
||||
export interface ISpaceSettings {
|
||||
sharing?: ISpaceSharingSettings;
|
||||
comments?: ISpaceCommentsSettings;
|
||||
gitSync?: ISpaceGitSyncSettings;
|
||||
}
|
||||
|
||||
export interface ISpace {
|
||||
@@ -35,6 +41,8 @@ export interface ISpace {
|
||||
// for updates
|
||||
disablePublicSharing?: boolean;
|
||||
allowViewerComments?: boolean;
|
||||
gitSyncEnabled?: boolean;
|
||||
autoMergeConflicts?: boolean;
|
||||
}
|
||||
|
||||
interface IMembership {
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
"migration:reset": "tsx src/database/migrate.ts down-to NO_MIGRATIONS",
|
||||
"migration:codegen": "kysely-codegen --dialect=postgres --camel-case --env-file=../../.env --out-file=./src/database/types/db.d.ts",
|
||||
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\" --fix",
|
||||
"pretest": "pnpm --filter @docmost/editor-ext build",
|
||||
"pretest": "pnpm --filter @docmost/editor-ext build && pnpm --filter @docmost/git-sync build && pnpm --filter @docmost/mcp build",
|
||||
"test": "jest",
|
||||
"test:int": "jest --config test/jest-integration.json",
|
||||
"test:watch": "jest --watch",
|
||||
@@ -41,6 +41,7 @@
|
||||
"@aws-sdk/s3-request-presigner": "3.1050.0",
|
||||
"@azure/storage-blob": "12.31.0",
|
||||
"@clickhouse/client": "^1.18.2",
|
||||
"@docmost/git-sync": "workspace:*",
|
||||
"@docmost/mcp": "workspace:*",
|
||||
"@docmost/pdf-inspector": "1.9.6",
|
||||
"@fastify/cookie": "^11.0.2",
|
||||
@@ -188,7 +189,12 @@
|
||||
]
|
||||
}
|
||||
],
|
||||
"^.+\\.(t|j)sx?$": "ts-jest"
|
||||
"^.+\\.(t|j)sx?$": [
|
||||
"ts-jest",
|
||||
{
|
||||
"isolatedModules": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"transformIgnorePatterns": [
|
||||
"/node_modules/(?!(\\.pnpm/)?(nanoid|uuid|image-dimensions|marked|happy-dom|lib0)(@|/))"
|
||||
@@ -198,11 +204,17 @@
|
||||
],
|
||||
"coverageDirectory": "../coverage",
|
||||
"testEnvironment": "node",
|
||||
"setupFiles": [
|
||||
"<rootDir>/../test/jest.setup.ts"
|
||||
],
|
||||
"moduleNameMapper": {
|
||||
"^@docmost/db/(.*)$": "<rootDir>/database/$1",
|
||||
"^@docmost/transactional/(.*)$": "<rootDir>/integrations/transactional/$1",
|
||||
"^@docmost/ee/(.*)$": "<rootDir>/ee/$1",
|
||||
"^src/(.*)$": "<rootDir>/$1"
|
||||
"^src/(.*)$": "<rootDir>/$1",
|
||||
"^@docmost/git-sync$": "<rootDir>/../../../packages/git-sync/src/index.ts",
|
||||
"^@docmost/git-sync/(.*)$": "<rootDir>/../../../packages/git-sync/src/$1",
|
||||
"^(\\.{1,2}/.*)\\.js$": "$1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,6 +28,7 @@ import { ClsModule } from 'nestjs-cls';
|
||||
import { NoopAuditModule } from './integrations/audit/audit.module';
|
||||
import { ThrottleModule } from './integrations/throttle/throttle.module';
|
||||
import { McpModule } from './integrations/mcp/mcp.module';
|
||||
import { GitSyncModule } from './integrations/git-sync/git-sync.module';
|
||||
import { AiModule } from './integrations/ai/ai.module';
|
||||
import { AiChatModule } from './core/ai-chat/ai-chat.module';
|
||||
|
||||
@@ -89,6 +90,7 @@ try {
|
||||
TelemetryModule,
|
||||
ThrottleModule,
|
||||
McpModule,
|
||||
GitSyncModule,
|
||||
AiModule,
|
||||
AiChatModule,
|
||||
...enterpriseModules,
|
||||
|
||||
@@ -149,6 +149,45 @@ export class CollaborationGateway {
|
||||
return this.hocuspocus.openDirectConnection(documentName, context);
|
||||
}
|
||||
|
||||
/**
|
||||
* Write a git-originated body into a page, applying the merge on the instance
|
||||
* that OWNS the live Y.Doc so a connected editor CONVERGES on the change.
|
||||
*
|
||||
* git-sync must NOT use openDirectConnection directly for this: that opens the
|
||||
* document on whichever instance/process runs git-sync (the API/worker). When
|
||||
* an editor is connected to a DIFFERENT collab instance/process, that is a
|
||||
* SEPARATE, detached Y.Doc — the merge lands in the detached doc and the DB,
|
||||
* but the live editor never receives the Yjs update; its next debounced
|
||||
* autosave then overwrites the DB with its stale state and SILENTLY REVERTS
|
||||
* the git change (the data-loss bug). Routing through the custom-event channel
|
||||
* runs the merge on the owning instance's shared Document, whose update is
|
||||
* broadcast to every connection (handleUpdate), so the editor's CRDT converges
|
||||
* on the merged result.
|
||||
*
|
||||
* Without redis there is a single instance, so the write runs locally — which
|
||||
* is already the owning (and only) instance the editor is connected to.
|
||||
*/
|
||||
async writePageBody(
|
||||
documentName: string,
|
||||
payload: {
|
||||
prosemirrorJson: unknown;
|
||||
baseProsemirrorJson?: unknown;
|
||||
userId: string;
|
||||
},
|
||||
): Promise<void> {
|
||||
if (this.redisSync) {
|
||||
await this.handleYjsEvent(
|
||||
'gitSyncWriteBody',
|
||||
documentName,
|
||||
payload as any,
|
||||
);
|
||||
return;
|
||||
}
|
||||
await this.collabEventsService
|
||||
.getHandlers(this.hocuspocus)
|
||||
.gitSyncWriteBody(documentName, payload as any);
|
||||
}
|
||||
|
||||
/*
|
||||
*Can be used before calling openDirectConnection directly
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,182 @@
|
||||
// Exercises the REAL `gitSyncWriteBody` collab handler (the owner-routed body
|
||||
// write the data-loss fix introduces). The handler imports the editor graph via
|
||||
// collaboration.util / yjs.util (tiptapExtensions -> editor-ext -> react-dom,
|
||||
// unloadable under jest's node env, same coupling noted in
|
||||
// gitmost-datasource.service.spec.ts), so we stub those + the transformer. The
|
||||
// stubbed toYdoc builds paragraph blocks straight from the ProseMirror JSON so
|
||||
// we can assert convergence on real text.
|
||||
jest.mock('./collaboration.util', () => ({
|
||||
tiptapExtensions: [],
|
||||
getPageId: (name: string) => name.replace(/^page\./, ''),
|
||||
prosemirrorNodeToYElement: jest.fn(),
|
||||
}));
|
||||
jest.mock('./yjs.util', () => ({
|
||||
setYjsMark: jest.fn(),
|
||||
updateYjsMarkAttribute: jest.fn(),
|
||||
}));
|
||||
jest.mock('@hocuspocus/transformer', () => {
|
||||
const Yjs = require('yjs');
|
||||
return {
|
||||
TiptapTransformer: {
|
||||
toYdoc: (json: any) => {
|
||||
if (json?.__throw) throw new Error('boom: malformed doc');
|
||||
const d = new Yjs.Doc();
|
||||
const frag = d.getXmlFragment('default');
|
||||
const blocks = (json?.content ?? []).map((node: any) => {
|
||||
const el = new Yjs.XmlElement(node.type || 'paragraph');
|
||||
const text = (node.content ?? [])
|
||||
.map((t: any) => t.text ?? '')
|
||||
.join('');
|
||||
const t = new Yjs.XmlText();
|
||||
if (text) t.insert(0, text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
});
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return d;
|
||||
},
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
import * as Y from 'yjs';
|
||||
import { CollaborationHandler } from './collaboration.handler';
|
||||
|
||||
const pmDoc = (...paras: string[]) => ({
|
||||
type: 'doc',
|
||||
content: paras.map((text) => ({
|
||||
type: 'paragraph',
|
||||
content: text ? [{ type: 'text', text }] : [],
|
||||
})),
|
||||
});
|
||||
|
||||
const texts = (frag: Y.XmlFragment): string[] =>
|
||||
frag.toArray().map((el) =>
|
||||
(el as Y.XmlElement)
|
||||
.toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''),
|
||||
);
|
||||
|
||||
// Build a fake Hocuspocus whose openDirectConnection yields a DirectConnection
|
||||
// over a REAL shared Document, with a connected "editor" doc that receives the
|
||||
// shared doc's updates (modelling Document.handleUpdate's broadcast on the
|
||||
// OWNING instance). Initial content carries live block ids; the editor starts
|
||||
// fully synced with the shared doc.
|
||||
function fakeHocuspocus(initial: { text: string; id: string }[]) {
|
||||
const shared = new Y.Doc();
|
||||
const frag = shared.getXmlFragment('default');
|
||||
shared.transact(() => {
|
||||
frag.insert(
|
||||
0,
|
||||
initial.map((s) => {
|
||||
const el = new Y.XmlElement('paragraph');
|
||||
el.setAttribute('id', s.id);
|
||||
const t = new Y.XmlText();
|
||||
if (s.text) t.insert(0, s.text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
}),
|
||||
);
|
||||
});
|
||||
const editor = new Y.Doc();
|
||||
Y.applyUpdate(editor, Y.encodeStateAsUpdate(shared));
|
||||
// Broadcast relay: server-originated updates flow to the connected editor.
|
||||
shared.on('update', (u: Uint8Array, origin: any) => {
|
||||
if (origin !== 'editor') Y.applyUpdate(editor, u, 'server');
|
||||
});
|
||||
|
||||
const openDirectConnection = jest.fn(async () => ({
|
||||
// DirectConnection.transact runs the fn directly against the Document (no
|
||||
// wrapping Y transaction), exactly like @hocuspocus/server.
|
||||
transact: async (fn: (doc: Y.Doc) => void) => fn(shared),
|
||||
disconnect: jest.fn(async () => undefined),
|
||||
}));
|
||||
|
||||
return { hocuspocus: { openDirectConnection } as any, shared, editor };
|
||||
}
|
||||
|
||||
describe('CollaborationHandler.gitSyncWriteBody (owner-routed body write)', () => {
|
||||
it('converges a connected editor on the git change (no silent revert)', async () => {
|
||||
const { hocuspocus, shared, editor } = fakeHocuspocus([
|
||||
{ text: 'alpha', id: 'p1' },
|
||||
{ text: 'beta', id: 'p2' },
|
||||
]);
|
||||
const handler = new CollaborationHandler();
|
||||
const handlers = handler.getHandlers(hocuspocus);
|
||||
|
||||
// git changed block 1 beta -> beta2; base is the pre-change content.
|
||||
await handlers.gitSyncWriteBody('page.x', {
|
||||
prosemirrorJson: pmDoc('alpha', 'beta2'),
|
||||
baseProsemirrorJson: pmDoc('alpha', 'beta'),
|
||||
userId: 'svc-user',
|
||||
});
|
||||
|
||||
// The shared (owning-instance) doc holds the merge...
|
||||
expect(texts(shared.getXmlFragment('default'))).toEqual(['alpha', 'beta2']);
|
||||
// ...and the connected editor CONVERGED via the broadcast (the bug would
|
||||
// leave it on 'beta' and revert the page on its next autosave).
|
||||
expect(texts(editor.getXmlFragment('default'))).toEqual(['alpha', 'beta2']);
|
||||
});
|
||||
|
||||
it('preserves a concurrent edit to a DIFFERENT block (3-way, finding #2)', async () => {
|
||||
const { hocuspocus, shared, editor } = fakeHocuspocus([
|
||||
{ text: 'alpha', id: 'p1' },
|
||||
{ text: 'beta', id: 'p2' },
|
||||
]);
|
||||
// The editor is actively editing block 0 while the push arrives.
|
||||
const eFrag = editor.getXmlFragment('default');
|
||||
editor.transact(
|
||||
() => (eFrag.get(0) as Y.XmlElement).get(0) instanceof Y.XmlText &&
|
||||
((eFrag.get(0) as Y.XmlElement).get(0) as Y.XmlText).insert(5, ' EDIT'),
|
||||
'editor',
|
||||
);
|
||||
Y.applyUpdate(shared, Y.encodeStateAsUpdate(editor), 'editor');
|
||||
|
||||
const handler = new CollaborationHandler();
|
||||
await handler.getHandlers(hocuspocus).gitSyncWriteBody('page.x', {
|
||||
prosemirrorJson: pmDoc('alpha', 'beta2'),
|
||||
baseProsemirrorJson: pmDoc('alpha', 'beta'),
|
||||
userId: 'svc-user',
|
||||
});
|
||||
|
||||
// Human's block-0 edit AND git's block-1 change both survive on the editor.
|
||||
expect(texts(editor.getXmlFragment('default'))).toEqual([
|
||||
'alpha EDIT',
|
||||
'beta2',
|
||||
]);
|
||||
});
|
||||
|
||||
it('crash-safe: a transform failure never opens the connection or mutates the live doc', async () => {
|
||||
const { hocuspocus, shared } = fakeHocuspocus([{ text: 'alpha', id: 'p1' }]);
|
||||
const before = texts(shared.getXmlFragment('default'));
|
||||
const handler = new CollaborationHandler();
|
||||
|
||||
await expect(
|
||||
handler.getHandlers(hocuspocus).gitSyncWriteBody('page.x', {
|
||||
prosemirrorJson: { __throw: true } as any,
|
||||
userId: 'svc-user',
|
||||
}),
|
||||
).rejects.toThrow('boom');
|
||||
|
||||
// The incoming doc is built BEFORE opening the connection, so the throw
|
||||
// happens first: the live doc is untouched and no connection was opened.
|
||||
expect(hocuspocus.openDirectConnection).not.toHaveBeenCalled();
|
||||
expect(texts(shared.getXmlFragment('default'))).toEqual(before);
|
||||
});
|
||||
|
||||
it('falls back to a 2-way merge when no base is supplied', async () => {
|
||||
const { hocuspocus, shared, editor } = fakeHocuspocus([
|
||||
{ text: 'alpha', id: 'p1' },
|
||||
]);
|
||||
const handler = new CollaborationHandler();
|
||||
|
||||
await handler.getHandlers(hocuspocus).gitSyncWriteBody('page.x', {
|
||||
prosemirrorJson: pmDoc('alpha', 'gamma'),
|
||||
userId: 'svc-user',
|
||||
});
|
||||
|
||||
expect(texts(shared.getXmlFragment('default'))).toEqual(['alpha', 'gamma']);
|
||||
expect(texts(editor.getXmlFragment('default'))).toEqual(['alpha', 'gamma']);
|
||||
});
|
||||
});
|
||||
@@ -8,6 +8,10 @@ import {
|
||||
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
|
||||
import * as Y from 'yjs';
|
||||
import { User } from '@docmost/db/types/entity.types';
|
||||
import {
|
||||
mergeXmlFragments,
|
||||
mergeXmlFragments3Way,
|
||||
} from '../integrations/git-sync/services/yjs-body-merge';
|
||||
|
||||
export type CollabEventHandlers = ReturnType<
|
||||
CollaborationHandler['getHandlers']
|
||||
@@ -112,6 +116,69 @@ export class CollaborationHandler {
|
||||
},
|
||||
);
|
||||
},
|
||||
/**
|
||||
* Git-sync body write, applied as a block-level MERGE into the LIVE doc on
|
||||
* the instance that OWNS it (routed here via the custom-event channel —
|
||||
* see CollaborationGateway.writePageBody). Running on the owning instance
|
||||
* is what makes a connected editor CONVERGE: the merge mutates the shared
|
||||
* Document, whose update is broadcast to every connection, so the editor's
|
||||
* CRDT applies the git change instead of silently reverting it on its next
|
||||
* autosave (the data-loss bug this fixes).
|
||||
*
|
||||
* With a `baseProsemirrorJson` (the last-synced common ancestor) it does a
|
||||
* THREE-WAY merge — a block only the human changed is kept, a block only
|
||||
* git changed is taken (conflicts -> git). Without a base it falls back to
|
||||
* the 2-way merge.
|
||||
*/
|
||||
gitSyncWriteBody: async (
|
||||
documentName: string,
|
||||
payload: {
|
||||
prosemirrorJson: any;
|
||||
baseProsemirrorJson?: any;
|
||||
userId: string;
|
||||
},
|
||||
) => {
|
||||
const { prosemirrorJson, baseProsemirrorJson, userId } = payload;
|
||||
|
||||
// Build the incoming (and base) Yjs docs BEFORE opening the connection /
|
||||
// touching the live doc. If a transform throws (a malformed/unsupported
|
||||
// doc) we must NOT have mutated the live body — otherwise a conversion
|
||||
// failure could leave the page empty (crash-safe conversion).
|
||||
const targetDoc = TiptapTransformer.toYdoc(
|
||||
prosemirrorJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
const baseDoc =
|
||||
baseProsemirrorJson != null
|
||||
? TiptapTransformer.toYdoc(
|
||||
baseProsemirrorJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
)
|
||||
: null;
|
||||
|
||||
// actor:'git-sync' + the service user flow into PersistenceExtension
|
||||
// (lastUpdatedSource='git-sync', lastUpdatedById=userId).
|
||||
await this.withYdocConnection(
|
||||
hocuspocus,
|
||||
documentName,
|
||||
{ actor: 'git-sync', user: { id: userId } },
|
||||
(doc) => {
|
||||
const liveFrag = doc.getXmlFragment('default');
|
||||
const targetFrag = targetDoc.getXmlFragment('default');
|
||||
if (baseDoc) {
|
||||
mergeXmlFragments3Way(
|
||||
liveFrag,
|
||||
targetFrag,
|
||||
baseDoc.getXmlFragment('default'),
|
||||
);
|
||||
} else {
|
||||
mergeXmlFragments(liveFrag, targetFrag);
|
||||
}
|
||||
},
|
||||
);
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
// Stub collaboration.util so importing the extension does not drag in the
|
||||
// editor-ext -> @tiptap/react -> react-dom graph (unloadable under jest's node
|
||||
// env, same coupling the gitmost-datasource / mcp specs document). The
|
||||
// extension only calls getPageId, jsonToText and isEmptyParagraphDoc from it on
|
||||
// the store path; tiptapExtensions is unused by onStoreDocument.
|
||||
jest.mock('../collaboration.util', () => ({
|
||||
tiptapExtensions: [],
|
||||
getPageId: (name: string) => name.replace(/^page\./, ''),
|
||||
jsonToText: () => 'text',
|
||||
isEmptyParagraphDoc: () => false,
|
||||
// The post-write mention extraction walks the doc via jsonToNode().descendants;
|
||||
// return a node-like stub with no descendants so no mentions are produced
|
||||
// (mention handling is out of scope here — we only assert provenance).
|
||||
jsonToNode: () => ({ descendants: () => undefined }),
|
||||
}));
|
||||
|
||||
// Control the Yjs<->JSON bridge: fromYdoc returns the "incoming" doc the writer
|
||||
// is storing. We keep it distinct from the page's persisted content so the
|
||||
// no-op guard (isDeepStrictEqual) never short-circuits the write.
|
||||
const INCOMING_JSON = { type: 'doc', content: [{ type: 'paragraph' }, { t: 1 }] };
|
||||
jest.mock('@hocuspocus/transformer', () => ({
|
||||
TiptapTransformer: {
|
||||
fromYdoc: jest.fn(() => INCOMING_JSON),
|
||||
toYdoc: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
// Run the executeTx callback inline with a passthrough trx.
|
||||
jest.mock('@docmost/db/utils', () => ({
|
||||
executeTx: jest.fn(async (_db: any, cb: any) => cb({} as any)),
|
||||
}));
|
||||
|
||||
import * as Y from 'yjs';
|
||||
import { PersistenceExtension } from './persistence.extension';
|
||||
import {
|
||||
onChangePayload,
|
||||
onStoreDocumentPayload,
|
||||
} from '@hocuspocus/server';
|
||||
|
||||
/**
|
||||
* Provenance-precedence coverage for PersistenceExtension.onStoreDocument
|
||||
* (test-strategy Module 4 / item #2): the contract `agent > git-sync > user`,
|
||||
* plus the negative that a git-sync store does NOT pin a boundary history
|
||||
* snapshot. We drive the precedence through the real public method (onChange to
|
||||
* arm the sticky agent marker, then onStoreDocument), mocking the repos / db /
|
||||
* Yjs bridge so no real database or collab server is needed. The store's
|
||||
* persisted `lastUpdatedSource` and the saveHistory call are the observable
|
||||
* outputs.
|
||||
*/
|
||||
describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)', () => {
|
||||
const DOCUMENT_NAME = 'page.page-1';
|
||||
const PAGE_ID = 'page-1';
|
||||
|
||||
// `page.content` differs from INCOMING_JSON so the write is never skipped.
|
||||
const persistedPage = (overrides?: { lastUpdatedSource?: string }) => ({
|
||||
id: PAGE_ID,
|
||||
slugId: 'slug-1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
creatorId: 'creator-1',
|
||||
contributorIds: ['creator-1'],
|
||||
content: { type: 'doc', content: [{ type: 'paragraph', content: [] }] },
|
||||
lastUpdatedSource: overrides?.lastUpdatedSource ?? 'user',
|
||||
createdAt: new Date(),
|
||||
});
|
||||
|
||||
const build = (pageOverrides?: { lastUpdatedSource?: string }) => {
|
||||
const pageRepo = {
|
||||
findById: jest.fn().mockResolvedValue(persistedPage(pageOverrides)),
|
||||
updatePage: jest.fn().mockResolvedValue({ numUpdatedRows: 1n }),
|
||||
};
|
||||
const pageHistoryRepo = {
|
||||
// No prior snapshot -> humanBaselineMissing is true, so the ONLY thing
|
||||
// gating the boundary snapshot in these tests is the source precedence.
|
||||
findPageLastHistory: jest.fn().mockResolvedValue(null),
|
||||
saveHistory: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
const aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
const historyQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
const notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
const collabHistory = {
|
||||
addContributors: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
const transclusionService = {
|
||||
syncPageTransclusions: jest.fn().mockResolvedValue(undefined),
|
||||
syncPageReferences: jest.fn().mockResolvedValue(undefined),
|
||||
syncPageTemplateReferences: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
const ext = new PersistenceExtension(
|
||||
pageRepo as any,
|
||||
pageHistoryRepo as any,
|
||||
{} as any, // db
|
||||
aiQueue as any,
|
||||
historyQueue as any,
|
||||
notificationQueue as any,
|
||||
collabHistory as any,
|
||||
transclusionService as any,
|
||||
);
|
||||
|
||||
return { ext, pageRepo, pageHistoryRepo, historyQueue };
|
||||
};
|
||||
|
||||
// A real Y.Doc is required for Y.encodeStateAsUpdate(document); broadcastStateless
|
||||
// is a no-op spy. The fromYdoc bridge is mocked, so the doc's contents are
|
||||
// irrelevant to the JSON path.
|
||||
const makeStorePayload = (context: any): onStoreDocumentPayload =>
|
||||
({
|
||||
documentName: DOCUMENT_NAME,
|
||||
document: Object.assign(new Y.Doc(), {
|
||||
broadcastStateless: jest.fn(),
|
||||
}),
|
||||
context,
|
||||
}) as any;
|
||||
|
||||
const makeChangePayload = (actor: string): onChangePayload =>
|
||||
({
|
||||
documentName: DOCUMENT_NAME,
|
||||
context: { user: { id: 'user-1' }, actor },
|
||||
}) as any;
|
||||
|
||||
const sourceOf = (pageRepo: { updatePage: jest.Mock }) =>
|
||||
pageRepo.updatePage.mock.calls[0][0].lastUpdatedSource;
|
||||
|
||||
it("tags 'user' for a plain write (no agent touch, no git-sync actor)", async () => {
|
||||
const { ext, pageRepo } = build();
|
||||
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'user-1' }, actor: 'user' }),
|
||||
);
|
||||
|
||||
expect(sourceOf(pageRepo)).toBe('user');
|
||||
});
|
||||
|
||||
it("tags 'git-sync' when the writer's actor is 'git-sync' and no agent touched the window", async () => {
|
||||
const { ext, pageRepo } = build();
|
||||
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
||||
);
|
||||
|
||||
expect(sourceOf(pageRepo)).toBe('git-sync');
|
||||
});
|
||||
|
||||
it("keeps 'git-sync' for an explicit git-sync store even with a sticky agent marker (#14 loop-guard)", async () => {
|
||||
const { ext, pageRepo } = build();
|
||||
|
||||
// An agent edit landed earlier in the coalescing window (sticky marker),
|
||||
// then a git-sync writer performs the store. Red-team finding #14: an
|
||||
// EXPLICIT current-write actor is authoritative for THIS write, so the
|
||||
// store must stay 'git-sync' — otherwise the PageChangeListener loop-guard
|
||||
// (keyed on lastUpdatedSource === 'git-sync') fails to recognize git-sync's
|
||||
// own write and re-exports it. Explicit 'agent' still wins (see below); the
|
||||
// sticky marker only promotes a plain human writer to 'agent'.
|
||||
await ext.onChange(makeChangePayload('agent'));
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
||||
);
|
||||
|
||||
expect(sourceOf(pageRepo)).toBe('git-sync');
|
||||
});
|
||||
|
||||
it("tags 'agent' when the storing writer itself is the agent (no prior onChange)", async () => {
|
||||
const { ext, pageRepo } = build();
|
||||
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'agent-user' }, actor: 'agent' }),
|
||||
);
|
||||
|
||||
expect(sourceOf(pageRepo)).toBe('agent');
|
||||
});
|
||||
|
||||
// --- negative: a git-sync store must NOT pin a boundary history snapshot ----
|
||||
// The boundary-snapshot branch only fires when the resolved source is 'agent'
|
||||
// AND the prior persisted source is not 'agent'. A git-sync store resolves to
|
||||
// 'git-sync', so saveHistory must NOT be called.
|
||||
it('does NOT write a boundary history snapshot for a git-sync store', async () => {
|
||||
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
||||
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
||||
);
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('DOES pin a boundary snapshot for an agent store over a prior human state (control)', async () => {
|
||||
// Confirms the negative above is meaningful: under the SAME mocks, an agent
|
||||
// store over a 'user' baseline DOES trigger the boundary snapshot.
|
||||
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
||||
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'agent-user' }, actor: 'agent' }),
|
||||
);
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('does NOT pin a boundary snapshot for a plain user store', async () => {
|
||||
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
||||
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'user-1' }, actor: 'user' }),
|
||||
);
|
||||
|
||||
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
@@ -52,7 +52,17 @@ export function resolveSource(
|
||||
stickyTouched: boolean,
|
||||
contextActor?: string,
|
||||
): ProvenanceSource {
|
||||
return stickyTouched || contextActor === 'agent' ? 'agent' : 'user';
|
||||
// An EXPLICIT current-write actor is authoritative for THIS write and wins
|
||||
// over the sticky-agent fallback. Order: explicit 'agent' > explicit
|
||||
// 'git-sync' > sticky agent marker > plain human 'user'. The git-sync case
|
||||
// must NOT be masked by the sticky marker, or the PageChangeListener
|
||||
// loop-guard (which keys on lastUpdatedSource === 'git-sync') would re-export
|
||||
// git-sync's own writes (#14). Explicit agent still wins so a window that
|
||||
// mixed an agent edit stays tagged 'agent'.
|
||||
if (contextActor === 'agent') return 'agent';
|
||||
if (contextActor === 'git-sync') return 'git-sync';
|
||||
if (stickyTouched) return 'agent';
|
||||
return 'user';
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -176,6 +186,11 @@ export class PersistenceExtension implements Extension {
|
||||
// Sticky agent marker: 'agent' if any agent edit landed in this window, OR
|
||||
// if the current writer is the agent (covers a store with no prior onChange
|
||||
// agent event in the same window). §15 H2.
|
||||
// Provenance precedence: agent > git-sync > user (see resolveSource). A
|
||||
// 'git-sync' store is NOT given an immediate history snapshot — it is
|
||||
// debounced like a human edit (a git-sync write is a block-level merge into
|
||||
// the live doc, so it reads like an incremental human edit, not a bulk
|
||||
// import that would warrant its own immediate snapshot).
|
||||
const lastUpdatedSource = resolveSource(
|
||||
this.consumeAgentTouched(documentName),
|
||||
context?.actor,
|
||||
|
||||
@@ -0,0 +1,208 @@
|
||||
// Regression coverage for the custom-event request/reply protocol in the
|
||||
// RedisSyncExtension. git-sync routes its body write through a custom event
|
||||
// (`gitSyncWriteBody`) which, when the target doc is owned by a DIFFERENT collab
|
||||
// instance, runs REMOTELY inside `handleRedisMessage` on the owning instance. The
|
||||
// remote handler can THROW (markdown->ProseMirror transform on a malformed body).
|
||||
//
|
||||
// Before the fix the throw was uncaught: (1) no `customEventComplete` reply was
|
||||
// published, so the origin's awaiting promise only rejected after `customEventTTL`
|
||||
// (~30s) as a generic 'TIMEOUT', and (2) an unhandledRejection escaped the async
|
||||
// `messageBuffer` listener on the owning instance. These tests assert the throw is
|
||||
// turned into an error-carrying reply that rejects the origin PROMPTLY with the
|
||||
// real message, with the no-throw and local paths unchanged.
|
||||
|
||||
import { RedisSyncExtension } from './redis-sync.extension';
|
||||
|
||||
type Listener = (channel: Buffer, message: Buffer) => unknown;
|
||||
|
||||
// Minimal in-memory pub/sub + lock store shared across FakeRedis duplicates,
|
||||
// modelling the two-instance topology (origin + owner) over one Redis.
|
||||
class FakeRedisBus {
|
||||
instances: FakeRedis[] = [];
|
||||
locks = new Map<string, string>();
|
||||
published: { channel: string; message: Buffer }[] = [];
|
||||
|
||||
register(inst: FakeRedis) {
|
||||
this.instances.push(inst);
|
||||
}
|
||||
|
||||
publish(channel: string, message: Buffer) {
|
||||
this.published.push({ channel, message });
|
||||
for (const inst of this.instances) {
|
||||
if (!inst.subscribed.has(channel)) continue;
|
||||
for (const listener of inst.messageListeners) {
|
||||
// ioredis delivers async; `void` mirrors the production listener
|
||||
// registration (`sub.on('messageBuffer', ...)`), whose rejection would
|
||||
// surface as an unhandledRejection if the handler did not catch.
|
||||
void listener(Buffer.from(channel), message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class FakeRedis {
|
||||
subscribed = new Set<string>();
|
||||
messageListeners: Listener[] = [];
|
||||
|
||||
constructor(private bus: FakeRedisBus) {
|
||||
bus.register(this);
|
||||
}
|
||||
|
||||
duplicate() {
|
||||
return new FakeRedis(this.bus);
|
||||
}
|
||||
|
||||
subscribe(...channels: string[]) {
|
||||
for (const c of channels) this.subscribed.add(c);
|
||||
return Promise.resolve();
|
||||
}
|
||||
|
||||
on(event: string, cb: any) {
|
||||
if (event === 'messageBuffer') this.messageListeners.push(cb as Listener);
|
||||
return this;
|
||||
}
|
||||
|
||||
publish(channel: string, message: Buffer) {
|
||||
this.bus.publish(channel, message);
|
||||
return Promise.resolve(1);
|
||||
}
|
||||
|
||||
// Models `SET key val PX ttl NX GET`: only writes when absent (NX); returns the
|
||||
// previous value (GET) so the origin observes the owner already holding the lock.
|
||||
set(key: string, val: string, ...args: any[]) {
|
||||
const hasNX = args.includes('NX');
|
||||
const hasGET = args.includes('GET');
|
||||
const old = this.bus.locks.get(key) ?? null;
|
||||
if (!hasNX || old === null) this.bus.locks.set(key, val);
|
||||
return Promise.resolve(hasGET ? old : 'OK');
|
||||
}
|
||||
|
||||
del(key: string) {
|
||||
this.bus.locks.delete(key);
|
||||
return Promise.resolve(1);
|
||||
}
|
||||
|
||||
disconnect() {}
|
||||
}
|
||||
|
||||
const pack = (m: any) => Buffer.from(JSON.stringify(m));
|
||||
const unpack = (b: Buffer) => JSON.parse(b.toString());
|
||||
|
||||
function makeExtension(
|
||||
bus: FakeRedisBus,
|
||||
serverId: string,
|
||||
customEvents: Record<string, (doc: string, payload: any) => Promise<any>>,
|
||||
) {
|
||||
const ext = new RedisSyncExtension({
|
||||
redis: new FakeRedis(bus) as any,
|
||||
pack: pack as any,
|
||||
unpack: unpack as any,
|
||||
serverId,
|
||||
customEvents: customEvents as any,
|
||||
customEventTTL: 30_000,
|
||||
});
|
||||
// Doc is NOT loaded on this instance -> handleEvent takes the remote/proxy path.
|
||||
(ext as any).instance = { documents: new Map() };
|
||||
return ext;
|
||||
}
|
||||
|
||||
describe('RedisSyncExtension custom-event error propagation', () => {
|
||||
let unhandled: unknown[];
|
||||
let onUnhandled: (e: unknown) => void;
|
||||
|
||||
beforeEach(() => {
|
||||
// Fake timers so the 30s TTL fallback timer never fires (and never dangles).
|
||||
jest.useFakeTimers();
|
||||
unhandled = [];
|
||||
onUnhandled = (e) => unhandled.push(e);
|
||||
process.on('unhandledRejection', onUnhandled);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
process.off('unhandledRejection', onUnhandled);
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
const flush = async () => {
|
||||
for (let i = 0; i < 10; i++) await Promise.resolve();
|
||||
};
|
||||
|
||||
it('owner publishes an error-carrying reply (no unhandledRejection) when the remote handler throws', async () => {
|
||||
const bus = new FakeRedisBus();
|
||||
const owner = makeExtension(bus, 'owner', {
|
||||
boom: async () => {
|
||||
throw new Error('kaboom');
|
||||
},
|
||||
});
|
||||
|
||||
// Drive the remote branch directly, as if the origin's customEventStart arrived.
|
||||
await (owner as any).handleRedisMessage(
|
||||
Buffer.from('collabMsg:owner'),
|
||||
pack({
|
||||
type: 'customEventStart',
|
||||
documentName: 'page.x',
|
||||
eventName: 'boom',
|
||||
payload: {},
|
||||
replyTo: 'collabMsg:origin',
|
||||
replyId: 7,
|
||||
}),
|
||||
);
|
||||
await flush();
|
||||
|
||||
const replies = bus.published
|
||||
.filter((p) => p.channel === 'collabMsg:origin')
|
||||
.map((p) => unpack(p.message));
|
||||
expect(replies).toHaveLength(1);
|
||||
expect(replies[0]).toMatchObject({
|
||||
type: 'customEventComplete',
|
||||
replyId: 7,
|
||||
error: 'kaboom',
|
||||
});
|
||||
expect(unhandled).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('origin rejects PROMPTLY with the real error (not a TTL TIMEOUT) when the remote handler throws', async () => {
|
||||
const bus = new FakeRedisBus();
|
||||
// Owner already holds the document lock.
|
||||
bus.locks.set('collabLock:page.x', 'owner');
|
||||
makeExtension(bus, 'owner', {
|
||||
boom: async () => {
|
||||
throw new Error('kaboom');
|
||||
},
|
||||
});
|
||||
const origin = makeExtension(bus, 'origin', {
|
||||
boom: async () => undefined,
|
||||
});
|
||||
|
||||
const promise = (origin as any).handleEvent('boom', 'page.x', { foo: 1 });
|
||||
// Attach a catch immediately so a rejection is never momentarily unhandled.
|
||||
const settled = promise.then(
|
||||
() => ({ ok: true as const }),
|
||||
(e: unknown) => ({ ok: false as const, error: e }),
|
||||
);
|
||||
|
||||
await flush();
|
||||
// Resolves WITHOUT advancing any timer -> the 30s TIMEOUT fallback did not fire.
|
||||
const result = await settled;
|
||||
expect(result.ok).toBe(false);
|
||||
expect((result as any).error).toBeInstanceOf(Error);
|
||||
expect(((result as any).error as Error).message).toBe('kaboom');
|
||||
expect(unhandled).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('origin resolves with the payload when the remote handler succeeds (unchanged behavior)', async () => {
|
||||
const bus = new FakeRedisBus();
|
||||
bus.locks.set('collabLock:page.x', 'owner');
|
||||
makeExtension(bus, 'owner', {
|
||||
ok: async (_doc: string, payload: any) => ({ echoed: payload }),
|
||||
});
|
||||
const origin = makeExtension(bus, 'origin', {
|
||||
ok: async () => undefined,
|
||||
});
|
||||
|
||||
const promise = (origin as any).handleEvent('ok', 'page.x', { foo: 1 });
|
||||
await flush();
|
||||
await expect(promise).resolves.toEqual({ echoed: { foo: 1 } });
|
||||
expect(unhandled).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
@@ -51,9 +51,15 @@ export class RedisSyncExtension<TCE extends CustomEvents> implements Extension {
|
||||
private instance!: Hocuspocus;
|
||||
private readonly customEvents: TCE;
|
||||
private replyIdCounter: number = 0;
|
||||
// @ts-ignore
|
||||
private pendingReplies: Record<number, PromiseWithResolvers<any>['resolve']> =
|
||||
{};
|
||||
private pendingReplies: Record<
|
||||
number,
|
||||
{
|
||||
// @ts-ignore
|
||||
resolve: PromiseWithResolvers<any>['resolve'];
|
||||
// @ts-ignore
|
||||
reject: PromiseWithResolvers<any>['reject'];
|
||||
}
|
||||
> = {};
|
||||
|
||||
constructor(configuration: Configuration<TCE>) {
|
||||
const {
|
||||
@@ -176,25 +182,45 @@ export class RedisSyncExtension<TCE extends CustomEvents> implements Extension {
|
||||
}
|
||||
if (type === 'customEventStart') {
|
||||
const { documentName, eventName, payload, replyTo, replyId } = msg;
|
||||
const res = await this.handleEventLocally(
|
||||
eventName as Extract<keyof TCE, string>,
|
||||
documentName,
|
||||
payload,
|
||||
);
|
||||
const reply: RSAMessageCustomEventComplete = {
|
||||
type: 'customEventComplete',
|
||||
replyId,
|
||||
payload: res,
|
||||
};
|
||||
let reply: RSAMessageCustomEventComplete;
|
||||
try {
|
||||
const res = await this.handleEventLocally(
|
||||
eventName as Extract<keyof TCE, string>,
|
||||
documentName,
|
||||
payload,
|
||||
);
|
||||
reply = {
|
||||
type: 'customEventComplete',
|
||||
replyId,
|
||||
payload: res,
|
||||
};
|
||||
} catch (err) {
|
||||
// The remote handler threw (e.g. the markdown->ProseMirror transform in
|
||||
// gitSyncWriteBody can throw on a malformed body). Reply with the error on
|
||||
// the SAME correlation channel so the origin rejects promptly with the real
|
||||
// message instead of waiting out customEventTTL as a generic 'TIMEOUT'.
|
||||
// Catching here also keeps the throw from escaping this async messageBuffer
|
||||
// listener as an unhandledRejection on the owning instance.
|
||||
reply = {
|
||||
type: 'customEventComplete',
|
||||
replyId,
|
||||
payload: undefined,
|
||||
error: err instanceof Error ? err.message : String(err),
|
||||
};
|
||||
}
|
||||
this.pub.publish(`${replyTo}`, this.pack(reply));
|
||||
return;
|
||||
}
|
||||
if (type === 'customEventComplete') {
|
||||
const { replyId, payload } = msg;
|
||||
const resolveFn = this.pendingReplies[replyId];
|
||||
if (!resolveFn) return;
|
||||
const { replyId, payload, error } = msg;
|
||||
const pending = this.pendingReplies[replyId];
|
||||
if (!pending) return;
|
||||
delete this.pendingReplies[replyId];
|
||||
resolveFn(payload);
|
||||
if (error !== undefined) {
|
||||
pending.reject(new Error(error));
|
||||
} else {
|
||||
pending.resolve(payload);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const { socketId } = msg;
|
||||
@@ -273,11 +299,22 @@ export class RedisSyncExtension<TCE extends CustomEvents> implements Extension {
|
||||
};
|
||||
const msg = this.pack(proxyMessage);
|
||||
this.pub.publish(`${this.msgChannel}:${proxyTo}`, msg);
|
||||
// @ts-ignore
|
||||
const { promise, resolve, reject } = Promise.withResolvers();
|
||||
this.pendingReplies[replyId] = resolve;
|
||||
// Manual deferred (no Promise.withResolvers) so this runs on Node < 22 too.
|
||||
let resolve!: (v: unknown) => void;
|
||||
let reject!: (e: unknown) => void;
|
||||
const promise = new Promise((res, rej) => {
|
||||
resolve = res;
|
||||
reject = rej;
|
||||
});
|
||||
this.pendingReplies[replyId] = { resolve, reject };
|
||||
setTimeout(() => {
|
||||
reject('TIMEOUT');
|
||||
// Fallback for a genuinely lost reply. A handler that threw now rejects
|
||||
// promptly via the error-carrying customEventComplete above; this TIMEOUT
|
||||
// only fires when no reply ever comes back.
|
||||
if (this.pendingReplies[replyId]) {
|
||||
delete this.pendingReplies[replyId];
|
||||
reject('TIMEOUT');
|
||||
}
|
||||
}, this.customEventTTL);
|
||||
return promise as Promise<ReturnType<TCE[TName]>>;
|
||||
}
|
||||
|
||||
@@ -72,6 +72,10 @@ export type RSAMessageCustomEventComplete = {
|
||||
type: 'customEventComplete';
|
||||
replyId: number;
|
||||
payload: unknown;
|
||||
// When the remote handler THREW, the owner sends back the error message here
|
||||
// instead of a payload, so the origin can reject its awaiting promise promptly
|
||||
// (with the real error) rather than waiting out the customEventTTL timeout.
|
||||
error?: string;
|
||||
};
|
||||
|
||||
export type RSAMessage =
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
import { resolveSource } from './persistence.extension';
|
||||
|
||||
// Red-team finding #14: an explicit git-sync write (no agent edit in the
|
||||
// coalescing window) must keep the 'git-sync' source so the git-sync
|
||||
// listener's loop-guard can recognize its own writes and not re-export them.
|
||||
describe('resolveSource — #14 git-sync provenance loop-guard', () => {
|
||||
it('keeps git-sync source for an explicit git-sync write (stickyTouched=true, actor=git-sync)', () => {
|
||||
expect(resolveSource(true, 'git-sync')).toBe('git-sync');
|
||||
});
|
||||
});
|
||||
535
apps/server/src/collaboration/git-sync-converter-gate.spec.ts
Normal file
535
apps/server/src/collaboration/git-sync-converter-gate.spec.ts
Normal file
@@ -0,0 +1,535 @@
|
||||
/**
|
||||
* JEST CONFIG NOTE (#119 ESM refactor): this is the one spec that needs the REAL
|
||||
* `@docmost/git-sync` converter (not a mock). The package is now ESM, which jest
|
||||
* cannot `require()` nor `import()` without --experimental-vm-modules, so the
|
||||
* server jest config `moduleNameMapper`s `@docmost/git-sync` to its TS SOURCE and
|
||||
* strips the ESM `.js` import suffixes. ts-jest then type-checks that source under
|
||||
* the server's (looser) tsconfig and trips a benign narrowing; the global
|
||||
* `isolatedModules: true` on the ts-jest transform (apps/server/package.json)
|
||||
* makes it transpile-only so this spec loads. Full type-checking of the package
|
||||
* is still enforced by its own `tsc`/vitest gates and the server `tsc --noEmit`.
|
||||
*
|
||||
* §13.1 IDEMPOTENCY GATE — the blocking gate for git-sync Phase B.
|
||||
*
|
||||
* Proves the `@docmost/git-sync` pure converter is schema-compatible
|
||||
* with the server's REAL editor-ext document schema: a representative corpus of
|
||||
* editor-ext ProseMirror documents must survive a full round trip through the
|
||||
* actual server write path without losing any node / mark / attribute.
|
||||
*
|
||||
* Pipeline per document (issue #194 §13.1):
|
||||
* 1. md = convertProseMirrorToMarkdown(content) // git-sync export
|
||||
* 2. doc = await markdownToProseMirror(md) // git-sync import
|
||||
* 3. push `doc` through the REAL editor-ext Yjs write path the server uses:
|
||||
* ydoc = TiptapTransformer.toYdoc(doc, 'default', tiptapExtensions)
|
||||
* normalized = TiptapTransformer.fromYdoc(ydoc, 'default')
|
||||
* This is exactly what PersistenceExtension does on store
|
||||
* (apps/server/src/collaboration/extensions/persistence.extension.ts:96/115)
|
||||
* with the same `tiptapExtensions` (collaboration.util.ts) and the same
|
||||
* `@hocuspocus/transformer`, so the gate exercises the real schema
|
||||
* validation that runs on a git-sync write (issue #194 §3.3).
|
||||
* 4. assert docsCanonicallyEqual(canon(original), canon(normalized)) === true
|
||||
*
|
||||
* Any node / mark / attr that editor-ext drops (because the git-sync
|
||||
* docmost-schema named it differently, or declares a different default) makes
|
||||
* the gate FAIL for that document — exactly the schema-divergence issue #194 §3.3 /
|
||||
* §13.1 warn about. Genuine, irreducible divergences are isolated into the
|
||||
* clearly-named `KNOWN DIVERGENCE` block at the bottom (never silently hidden).
|
||||
*
|
||||
* Requires the workspace packages built first:
|
||||
* pnpm --filter @docmost/editor-ext build
|
||||
* pnpm --filter @docmost/git-sync build
|
||||
*/
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
// Import the server's real schema FIRST so `@docmost/editor-ext` resolves to its
|
||||
// built CJS `dist` (its `main`). The ESM-only `@docmost/git-sync` package is
|
||||
// mapped to its TS SOURCE by the jest `moduleNameMapper` (the built ESM cannot
|
||||
// be `require()`d nor dynamically `import()`ed under jest's node VM), so ts-jest
|
||||
// transpiles the real converter to CJS here — exercising the actual converter
|
||||
// the server ships, not a stub.
|
||||
import { tiptapExtensions } from './collaboration.util';
|
||||
import {
|
||||
convertProseMirrorToMarkdown,
|
||||
markdownToProseMirror,
|
||||
canonicalizeContent,
|
||||
docsCanonicallyEqual,
|
||||
} from '@docmost/git-sync';
|
||||
|
||||
/**
|
||||
* Run a single editor-ext document through the full gate pipeline and return
|
||||
* the canonical original vs the canonical doc as it lands after the real Yjs
|
||||
* write path, plus the intermediate markdown for diagnostics.
|
||||
*/
|
||||
async function runGate(original: any): Promise<{
|
||||
md: string;
|
||||
imported: any;
|
||||
normalized: any;
|
||||
canonOriginal: any;
|
||||
canonNormalized: any;
|
||||
}> {
|
||||
// 1) editor-ext JSON -> markdown (git-sync export).
|
||||
const md = convertProseMirrorToMarkdown(original);
|
||||
|
||||
// 2) markdown -> ProseMirror JSON (git-sync import, docmost-schema).
|
||||
const imported = await markdownToProseMirror(md);
|
||||
|
||||
// 3) push through the REAL editor-ext schema via the server's Yjs write path.
|
||||
// toYdoc validates `imported` against tiptapExtensions (throws on an
|
||||
// unknown node, drops unknown attrs); fromYdoc reads it back as the
|
||||
// normalized editor-ext JSON the server would persist.
|
||||
const ydoc = TiptapTransformer.toYdoc(imported, 'default', tiptapExtensions);
|
||||
const normalized = TiptapTransformer.fromYdoc(ydoc, 'default');
|
||||
|
||||
return {
|
||||
md,
|
||||
imported,
|
||||
normalized,
|
||||
canonOriginal: canonicalizeContent(original),
|
||||
canonNormalized: canonicalizeContent(normalized),
|
||||
};
|
||||
}
|
||||
|
||||
const doc = (...content: any[]) => ({ type: 'doc', content });
|
||||
const text = (t: string, marks?: any[]) =>
|
||||
marks ? { type: 'text', text: t, marks } : { type: 'text', text: t };
|
||||
const para = (...content: any[]) => ({ type: 'paragraph', content });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Corpus: editor-ext ProseMirror documents covering the common node/mark types.
|
||||
// Node / mark / attr names and DEFAULTS are taken from the real schema —
|
||||
// editor-ext (packages/editor-ext/src) + the server's tiptapExtensions
|
||||
// (collaboration.util.ts) — NOT guessed. Where editor-ext materializes a
|
||||
// non-null default on import (e.g. image.align="center", callout.type, list
|
||||
// start) the fixture pre-authors that materialized value so the round trip is
|
||||
// already at its fixpoint (matches how the engine normalizes-on-write, SPEC §11).
|
||||
// ---------------------------------------------------------------------------
|
||||
const CORPUS: Record<string, any> = {
|
||||
'paragraphs + headings (h1-h3)': doc(
|
||||
{ type: 'heading', attrs: { level: 1 }, content: [text('Heading one')] },
|
||||
{ type: 'heading', attrs: { level: 2 }, content: [text('Heading two')] },
|
||||
{ type: 'heading', attrs: { level: 3 }, content: [text('Heading three')] },
|
||||
para(text('A plain paragraph of text.')),
|
||||
para(text('Second paragraph.')),
|
||||
),
|
||||
|
||||
'inline marks (bold/italic/strike/code)': doc(
|
||||
para(
|
||||
text('normal '),
|
||||
text('bold', [{ type: 'bold' }]),
|
||||
text(' '),
|
||||
text('italic', [{ type: 'italic' }]),
|
||||
text(' '),
|
||||
text('struck', [{ type: 'strike' }]),
|
||||
text(' '),
|
||||
text('code', [{ type: 'code' }]),
|
||||
),
|
||||
),
|
||||
|
||||
'links': doc(
|
||||
para(
|
||||
text('see '),
|
||||
text('the site', [
|
||||
{ type: 'link', attrs: { href: 'https://example.com' } },
|
||||
]),
|
||||
text(' for more'),
|
||||
),
|
||||
),
|
||||
|
||||
'bullet list': doc({
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{ type: 'listItem', content: [para(text('first'))] },
|
||||
{ type: 'listItem', content: [para(text('second'))] },
|
||||
{ type: 'listItem', content: [para(text('third'))] },
|
||||
],
|
||||
}),
|
||||
|
||||
'ordered list': doc({
|
||||
type: 'orderedList',
|
||||
attrs: { start: 1 },
|
||||
content: [
|
||||
{ type: 'listItem', content: [para(text('one'))] },
|
||||
{ type: 'listItem', content: [para(text('two'))] },
|
||||
],
|
||||
}),
|
||||
|
||||
'task list (checkbox)': doc({
|
||||
type: 'taskList',
|
||||
content: [
|
||||
{
|
||||
type: 'taskItem',
|
||||
attrs: { checked: true },
|
||||
content: [para(text('done item'))],
|
||||
},
|
||||
{
|
||||
type: 'taskItem',
|
||||
attrs: { checked: false },
|
||||
content: [para(text('todo item'))],
|
||||
},
|
||||
],
|
||||
}),
|
||||
|
||||
'blockquote': doc({
|
||||
type: 'blockquote',
|
||||
content: [para(text('a quoted line')), para(text('second quoted line'))],
|
||||
}),
|
||||
|
||||
'callout (info)': doc({
|
||||
type: 'callout',
|
||||
attrs: { type: 'info' },
|
||||
content: [para(text('an informational callout'))],
|
||||
}),
|
||||
|
||||
'callout (warning)': doc({
|
||||
type: 'callout',
|
||||
attrs: { type: 'warning' },
|
||||
content: [para(text('a warning callout'))],
|
||||
}),
|
||||
|
||||
'code block (with language)': doc({
|
||||
type: 'codeBlock',
|
||||
attrs: { language: 'typescript' },
|
||||
// A fenced code block's body is stored with a trailing newline (the form a
|
||||
// markdown ``` fence round-trips to: marked normalizes the code text to end
|
||||
// in "\n"). Authoring the fixture at that fixpoint mirrors how the engine
|
||||
// normalizes-on-write (SPEC §11): codeBlock + `language` round-trip exactly.
|
||||
content: [text('const a: number = 1;\nconsole.log(a);\n')],
|
||||
}),
|
||||
|
||||
'horizontal rule': doc(
|
||||
para(text('before')),
|
||||
{ type: 'horizontalRule' },
|
||||
para(text('after')),
|
||||
),
|
||||
|
||||
'table (header row + cells)': doc({
|
||||
type: 'table',
|
||||
content: [
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableHeader',
|
||||
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
||||
content: [para(text('Name'))],
|
||||
},
|
||||
{
|
||||
type: 'tableHeader',
|
||||
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
||||
content: [para(text('Value'))],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableCell',
|
||||
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
||||
content: [para(text('alpha'))],
|
||||
},
|
||||
{
|
||||
type: 'tableCell',
|
||||
attrs: { colspan: 1, rowspan: 1, colwidth: null },
|
||||
content: [para(text('1'))],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
}),
|
||||
|
||||
// --- editor-ext nodes/marks beyond the original corpus (item #7) ----------
|
||||
// Each of these was verified to round-trip CLEANLY through the real gate
|
||||
// (export -> markdown -> import -> editor-ext Yjs write path). Fixtures are
|
||||
// pre-authored at the engine's normalize-on-write fixpoint (SPEC §11), e.g.
|
||||
// details carries the materialized `open:false`, and color marks use the
|
||||
// `rgb(...)` form the HTML re-parser normalizes to.
|
||||
|
||||
'mention (user)': doc(
|
||||
para(
|
||||
text('hi '),
|
||||
{
|
||||
type: 'mention',
|
||||
attrs: {
|
||||
id: 'user-123',
|
||||
label: 'Alice',
|
||||
entityType: 'user',
|
||||
entityId: 'user-123',
|
||||
creatorId: 'creator-1',
|
||||
},
|
||||
},
|
||||
text(' there'),
|
||||
),
|
||||
),
|
||||
|
||||
'inline math': doc(
|
||||
para(
|
||||
text('inline '),
|
||||
{ type: 'mathInline', attrs: { text: 'x^2' } },
|
||||
text(' math'),
|
||||
),
|
||||
),
|
||||
|
||||
'block math': doc({ type: 'mathBlock', attrs: { text: 'x^2 + y^2 = z^2' } }),
|
||||
|
||||
'details (collapsible)': doc({
|
||||
type: 'details',
|
||||
// `open:false` is the value editor-ext materializes on import; pre-authoring
|
||||
// it puts the fixture at its round-trip fixpoint.
|
||||
attrs: { open: false },
|
||||
content: [
|
||||
{ type: 'detailsSummary', content: [text('Summary line')] },
|
||||
{ type: 'detailsContent', content: [para(text('hidden body'))] },
|
||||
],
|
||||
}),
|
||||
|
||||
'highlight (mark, no color)': doc(
|
||||
para(
|
||||
text('a '),
|
||||
text('highlighted', [{ type: 'highlight' }]),
|
||||
text(' word'),
|
||||
),
|
||||
),
|
||||
|
||||
'highlight (mark, with color)': doc(
|
||||
para(
|
||||
text('a '),
|
||||
text('red', [{ type: 'highlight', attrs: { color: 'rgb(255, 0, 0)' } }]),
|
||||
text(' word'),
|
||||
),
|
||||
),
|
||||
|
||||
'subscript': doc(
|
||||
para(text('H'), text('2', [{ type: 'subscript' }]), text('O')),
|
||||
),
|
||||
|
||||
'superscript': doc(
|
||||
para(text('E=mc'), text('2', [{ type: 'superscript' }])),
|
||||
),
|
||||
|
||||
'text color (textStyle)': doc(
|
||||
// The HTML re-parser normalizes CSS colors to the `rgb(...)` form, so the
|
||||
// fixture pre-authors that form; a `#hex` color would round-trip to the
|
||||
// equivalent rgb() and is therefore a value-normalization divergence (see
|
||||
// the KNOWN DIVERGENCE block below).
|
||||
para(text('green', [{ type: 'textStyle', attrs: { color: 'rgb(0, 255, 0)' } }])),
|
||||
),
|
||||
|
||||
'nested / mixed document': doc(
|
||||
{ type: 'heading', attrs: { level: 1 }, content: [text('Mixed')] },
|
||||
para(
|
||||
text('intro with '),
|
||||
text('bold', [{ type: 'bold' }]),
|
||||
text(' and a '),
|
||||
text('link', [{ type: 'link', attrs: { href: 'https://example.com' } }]),
|
||||
text('.'),
|
||||
),
|
||||
{
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
para(text('item with '), text('code', [{ type: 'code' }])),
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'listItem',
|
||||
content: [
|
||||
para(text('item with sublist')),
|
||||
{
|
||||
type: 'bulletList',
|
||||
content: [
|
||||
{ type: 'listItem', content: [para(text('nested a'))] },
|
||||
{ type: 'listItem', content: [para(text('nested b'))] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'callout',
|
||||
attrs: { type: 'success' },
|
||||
content: [
|
||||
para(text('callout body')),
|
||||
{ type: 'codeBlock', attrs: { language: 'bash' }, content: [text('echo hi\n')] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'blockquote',
|
||||
content: [para(text('quote at the end'))],
|
||||
},
|
||||
),
|
||||
|
||||
// Atom embeds that carry no inline text: they must round-trip via their
|
||||
// schema-matching HTML (data-type div), NOT a literal that re-imports as plain
|
||||
// text. `subpages` used to export as the literal "{{SUBPAGES}}" and came back
|
||||
// as visible text on the page (red-team round-trip data loss) — this locks it.
|
||||
// editor-ext materializes the `recursive: false` default on import, so the
|
||||
// fixture pre-authors it to sit at the round-trip fixpoint (matches the other
|
||||
// default-materializing fixtures above).
|
||||
'subpages embed': doc({ type: 'subpages', attrs: { recursive: false } }),
|
||||
};
|
||||
|
||||
describe('git-sync converter §13.1 idempotency gate (editor-ext schema)', () => {
|
||||
for (const [name, original] of Object.entries(CORPUS)) {
|
||||
it(`round-trips losslessly: ${name}`, async () => {
|
||||
const { md, canonOriginal, canonNormalized } = await runGate(original);
|
||||
|
||||
const equal = docsCanonicallyEqual(original, canonNormalized);
|
||||
if (!equal) {
|
||||
// Surface a readable diff so a real divergence is actionable.
|
||||
// eslint-disable-next-line no-console
|
||||
console.error(
|
||||
`\n[GATE FAIL] ${name}\n--- markdown ---\n${md}\n` +
|
||||
`--- canonical original ---\n${JSON.stringify(canonOriginal, null, 2)}\n` +
|
||||
`--- canonical round-tripped ---\n${JSON.stringify(canonNormalized, null, 2)}\n`,
|
||||
);
|
||||
}
|
||||
expect(equal).toBe(true);
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// KNOWN DIVERGENCE — images (isolated so it does NOT silently weaken the gate).
|
||||
//
|
||||
// This is NOT a schema-name divergence: the `image` NODE itself round-trips
|
||||
// through editor-ext fine (it survives toYdoc under the real tiptapExtensions).
|
||||
// The loss is intrinsic to MARKDOWN, the on-disk transport format git-sync uses:
|
||||
//
|
||||
// 1. `convertProseMirrorToMarkdown` emits a standard `` image
|
||||
// (markdown-converter.ts case "image"). Standard markdown image syntax has
|
||||
// no way to express `width` / `height` / `align`, so those attrs are
|
||||
// DROPPED on export and cannot be recovered on import.
|
||||
// 2. A block-level image is hoisted out of its line by the HTML re-parser,
|
||||
// leaving a leading EMPTY paragraph (the same block-image-hoist limitation
|
||||
// documented in packages/git-sync/test/fixtures/known-limitations).
|
||||
//
|
||||
// The gate documents the EXACT lossy shape below. If the converter is ever
|
||||
// taught to preserve image dimensions (e.g. by emitting an HTML <img> with
|
||||
// data-* attrs, as it already does for video/diagrams), these assertions flip
|
||||
// and the image fixture should be promoted into the green CORPUS above.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('git-sync converter §13.1 image dimensions preserved (was KNOWN DIVERGENCE)', () => {
|
||||
const imageDoc = doc({
|
||||
type: 'image',
|
||||
attrs: {
|
||||
src: 'https://example.com/pic.png',
|
||||
width: 640,
|
||||
height: 480,
|
||||
align: 'center',
|
||||
},
|
||||
});
|
||||
|
||||
it('preserves width/height/align by exporting an HTML <img> (PR #119 round-trip fix)', async () => {
|
||||
const { md, canonNormalized } = await runGate(imageDoc);
|
||||
|
||||
// A top-level image carrying layout attrs is now exported as a schema-
|
||||
// matching HTML <img> (the same path video/diagrams already use), so the
|
||||
// dimensions and alignment survive the round trip instead of collapsing to
|
||||
// bare ``.
|
||||
expect(md.trim()).toBe(
|
||||
'<img src="https://example.com/pic.png" width="640" height="480" align="center">',
|
||||
);
|
||||
|
||||
// The round-tripped image keeps src + the layout attrs. width/height are
|
||||
// re-imported as strings (matching the video/audio/pdf string convention),
|
||||
// so assert the values rather than the JS type.
|
||||
const imgAttrs = (canonNormalized as any).content[0].attrs;
|
||||
expect((canonNormalized as any).content[0].type).toBe('image');
|
||||
expect(imgAttrs.src).toBe('https://example.com/pic.png');
|
||||
expect(imgAttrs.align).toBe('center');
|
||||
expect(String(imgAttrs.width)).toBe('640');
|
||||
expect(String(imgAttrs.height)).toBe('480');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// KNOWN DIVERGENCE — text alignment (item #7; isolated, not silently dropped).
|
||||
//
|
||||
// editor-ext registers TextAlign for heading+paragraph, and the SERVER schema
|
||||
// fully supports it — the loss is intrinsic to the MARKDOWN transport:
|
||||
//
|
||||
// • A paragraph's `textAlign` is EXPORTED as `<div align="...">text</div>`
|
||||
// (markdown-converter case "paragraph"), but on import the converter's
|
||||
// docmost-schema declares `textAlign` WITHOUT a parseHTML mapping, so the
|
||||
// `align` attribute is never recovered -> it imports as `textAlign:null`
|
||||
// and canonicalizes away. A heading's alignment is not even exported.
|
||||
// • Therefore any non-default alignment is dropped on a full round trip.
|
||||
//
|
||||
// If the converter is ever taught to parse `align`/`text-align` back onto the
|
||||
// block, this assertion flips and an aligned-paragraph fixture should be
|
||||
// promoted into the green CORPUS above.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('git-sync converter §13.1 KNOWN DIVERGENCE (text alignment dropped)', () => {
|
||||
it('drops a paragraph textAlign on the markdown round trip', async () => {
|
||||
const alignedDoc = doc({
|
||||
type: 'paragraph',
|
||||
attrs: { textAlign: 'center' },
|
||||
content: [text('centered')],
|
||||
});
|
||||
|
||||
const { canonNormalized } = await runGate(alignedDoc);
|
||||
|
||||
// The round-tripped paragraph carries no alignment.
|
||||
expect(canonNormalized).toEqual({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'centered' }] }],
|
||||
});
|
||||
expect(docsCanonicallyEqual(alignedDoc, canonNormalized)).toBe(false);
|
||||
});
|
||||
|
||||
it('drops a heading textAlign (headings do not export alignment at all)', async () => {
|
||||
const alignedHeading = doc({
|
||||
type: 'heading',
|
||||
attrs: { level: 2, textAlign: 'center' },
|
||||
content: [text('centered heading')],
|
||||
});
|
||||
|
||||
const { md, canonNormalized } = await runGate(alignedHeading);
|
||||
|
||||
// Export is a plain markdown heading — no alignment syntax.
|
||||
expect(md.trim()).toBe('## centered heading');
|
||||
expect(docsCanonicallyEqual(alignedHeading, canonNormalized)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// KNOWN DIVERGENCE — textStyle color is VALUE-NORMALIZED, not lost (item #7).
|
||||
//
|
||||
// The textStyle/color mark itself round-trips (the green CORPUS has the rgb()
|
||||
// form). But a `#hex` color is normalized to the equivalent `rgb(...)` string
|
||||
// by the HTML re-parser on import, and canonicalize.ts does NOT normalize color
|
||||
// formats — so a `#hex` original is not STRING-identical to its round trip even
|
||||
// though the color is semantically preserved. Locked here so the boundary is
|
||||
// explicit: author color fixtures in rgb() form to stay in the green corpus.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('git-sync converter §13.1 KNOWN DIVERGENCE (textStyle color #hex -> rgb)', () => {
|
||||
it('normalizes a #hex text color to rgb() (semantically preserved, string-divergent)', async () => {
|
||||
const hexDoc = doc(
|
||||
para(text('green', [{ type: 'textStyle', attrs: { color: '#00ff00' } }])),
|
||||
);
|
||||
|
||||
const { canonNormalized } = await runGate(hexDoc);
|
||||
|
||||
// Color survives, but as the normalized rgb() string.
|
||||
expect(canonNormalized).toEqual({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'green',
|
||||
marks: [{ type: 'textStyle', attrs: { color: 'rgb(0, 255, 0)' } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
});
|
||||
// Not string-identical to the #hex original.
|
||||
expect(docsCanonicallyEqual(hexDoc, canonNormalized)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -9,6 +9,8 @@ import { ProvenanceSource } from '../../core/auth/dto/jwt-payload';
|
||||
* cannot fake an 'agent' marker.
|
||||
*/
|
||||
export interface AuthProvenanceData {
|
||||
// ProvenanceSource includes 'git-sync' — set by the in-process git-sync data
|
||||
// plane (issue #194 §8.1) when it drives PageService writes; never from a request token.
|
||||
actor: ProvenanceSource;
|
||||
aiChatId: string | null;
|
||||
}
|
||||
@@ -60,6 +62,14 @@ export function agentSourceFields<S extends string, C extends string>(
|
||||
sourceKey: S,
|
||||
chatKey: C,
|
||||
): Partial<Record<S, ProvenanceSource> & Record<C, string | null>> {
|
||||
// git-sync data-plane write (issue #194 §8.1): stamp the source 'git-sync' with NO
|
||||
// aiChatId (it has no internal ai_chats row). Mirrors the agent branch; each
|
||||
// write has a single actor, so precedence is irrelevant here.
|
||||
if (provenance?.actor === 'git-sync') {
|
||||
return { [sourceKey]: 'git-sync' } as Partial<
|
||||
Record<S, ProvenanceSource> & Record<C, string | null>
|
||||
>;
|
||||
}
|
||||
if (provenance?.actor !== 'agent') return {};
|
||||
return {
|
||||
[sourceKey]: 'agent',
|
||||
|
||||
@@ -3,8 +3,12 @@
|
||||
* from the SIGNED token claim (never a request body), so 'agent' is unspoofable.
|
||||
* Single source of truth so a typo like 'agnet' can't slip through as a bare
|
||||
* string (#143 review). Distinct from `ActorType` (auth principal kind).
|
||||
*
|
||||
* 'git-sync' marks writes made by the git-sync data plane (issue #194 §8.1). It NEVER
|
||||
* travels in a user-facing token; it is set in-process on the collab connection
|
||||
* context by the native datasource, so it cannot be spoofed from a request.
|
||||
*/
|
||||
export type ProvenanceSource = 'user' | 'agent';
|
||||
export type ProvenanceSource = 'user' | 'agent' | 'git-sync';
|
||||
|
||||
export enum JwtType {
|
||||
ACCESS = 'access',
|
||||
@@ -26,7 +30,8 @@ export type JwtPayload = {
|
||||
// normal user token (treated as 'user'); set only when the internal agent
|
||||
// mints a provenance access token so REST writes (create/rename/move page,
|
||||
// comment create/resolve) record a non-spoofable 'agent' marker (§6.5 / §15
|
||||
// C3 / §14 N2).
|
||||
// C3 / §14 N2). (git-sync writes use the in-process actor, not a token — see
|
||||
// the ProvenanceSource note.)
|
||||
actor?: ProvenanceSource;
|
||||
// Nullable: an external MCP agent has no internal ai_chats row, so it carries
|
||||
// an 'agent' actor with a null aiChatId.
|
||||
@@ -39,7 +44,8 @@ export type JwtCollabPayload = {
|
||||
type: 'collab';
|
||||
// Optional agent-edit provenance, signed into the collab token. Absent for
|
||||
// the human collab path (treated as 'user'); set only when the internal agent
|
||||
// mints a provenance collab token (§6.6 / §15 C2).
|
||||
// mints a provenance collab token (§6.6 / §15 C2). 'git-sync' (in ProvenanceSource)
|
||||
// is accepted for type-compatibility with the in-process git-sync write path.
|
||||
actor?: ProvenanceSource;
|
||||
// Nullable: an external MCP agent has no internal ai_chats row, so it carries
|
||||
// an 'agent' actor with a null aiChatId.
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
import { BadRequestException } from '@nestjs/common';
|
||||
import { PageService } from './page.service';
|
||||
import { MovePageDto } from '../dto/move-page.dto';
|
||||
import { Page } from '@docmost/db/types/entity.types';
|
||||
import { CreatePageDto } from '../dto/create-page.dto';
|
||||
import { UpdatePageDto } from '../dto/update-page.dto';
|
||||
import { Page, User } from '@docmost/db/types/entity.types';
|
||||
import { AuthProvenanceData } from '../../../common/decorators/auth-provenance.decorator';
|
||||
|
||||
// Direct instantiation with stub deps. The Test.createTestingModule form failed
|
||||
// to resolve the @InjectKysely()/@InjectQueue() tokens at compile(), and this
|
||||
@@ -389,4 +392,279 @@ describe('PageService', () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('git-sync provenance stamping (#1)', () => {
|
||||
const GIT_SYNC: AuthProvenanceData = { actor: 'git-sync', aiChatId: null };
|
||||
const USER_PROVENANCE: AuthProvenanceData = { actor: 'user', aiChatId: null };
|
||||
|
||||
describe('create()', () => {
|
||||
// Build a service whose insertPage/generalQueue are observable and whose
|
||||
// nextPagePosition (a DB query) is stubbed, so create() reaches insertPage
|
||||
// without a real database.
|
||||
const makeService = () => {
|
||||
const insertedPage = { id: 'page-1', slugId: 'slug-1' };
|
||||
const pageRepo = {
|
||||
insertPage: jest.fn().mockResolvedValue(insertedPage),
|
||||
};
|
||||
// add() is fire-and-forget (the service .catch()es it); resolve so no
|
||||
// unhandled rejection leaks.
|
||||
const generalQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
|
||||
const svc = new PageService(
|
||||
pageRepo as any, // pageRepo
|
||||
{} as any, // pagePermissionRepo
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // db
|
||||
{} as any, // storageService
|
||||
{} as any, // attachmentQueue
|
||||
{} as any, // aiQueue
|
||||
generalQueue as any, // generalQueue
|
||||
{} as any, // eventEmitter
|
||||
{} as any, // collaborationGateway
|
||||
{} as any, // watcherService
|
||||
{} as any, // transclusionService
|
||||
);
|
||||
|
||||
// nextPagePosition runs a kysely query; stub it so create() never hits
|
||||
// the db. No DTO content is provided, so parseProsemirrorContent is
|
||||
// skipped entirely (content/textContent/ydoc stay undefined).
|
||||
jest.spyOn(svc, 'nextPagePosition').mockResolvedValue('a0');
|
||||
|
||||
return { svc, pageRepo };
|
||||
};
|
||||
|
||||
const createDto: CreatePageDto = {
|
||||
title: 'New page',
|
||||
spaceId: 'space-1',
|
||||
} as any;
|
||||
|
||||
it("stamps lastUpdatedSource:'git-sync' on the insertPage payload", async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.create('user-1', 'ws-1', createDto, GIT_SYNC);
|
||||
|
||||
expect(pageRepo.insertPage).toHaveBeenCalledTimes(1);
|
||||
expect(pageRepo.insertPage).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ lastUpdatedSource: 'git-sync' }),
|
||||
);
|
||||
// git-sync carries no aiChatId (unlike the agent branch).
|
||||
const payload = pageRepo.insertPage.mock.calls[0][0];
|
||||
expect(payload.lastUpdatedAiChatId).toBeUndefined();
|
||||
// The human stays the responsible author.
|
||||
expect(payload.creatorId).toBe('user-1');
|
||||
expect(payload.lastUpdatedById).toBe('user-1');
|
||||
});
|
||||
|
||||
it('leaves the source column unset for a plain user create', async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.create('user-1', 'ws-1', createDto, USER_PROVENANCE);
|
||||
|
||||
const payload = pageRepo.insertPage.mock.calls[0][0];
|
||||
expect(payload.lastUpdatedSource).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('update() (rename)', () => {
|
||||
const makeService = () => {
|
||||
const pageRepo = {
|
||||
updatePage: jest.fn().mockResolvedValue({ numUpdatedRows: 1n }),
|
||||
// update() re-reads the row at the end to return the refreshed page.
|
||||
findById: jest.fn().mockResolvedValue({ id: 'page-1' }),
|
||||
};
|
||||
const generalQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
const aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
||||
|
||||
const svc = new PageService(
|
||||
pageRepo as any, // pageRepo
|
||||
{} as any, // pagePermissionRepo
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // db
|
||||
{} as any, // storageService
|
||||
{} as any, // attachmentQueue
|
||||
aiQueue as any, // aiQueue
|
||||
generalQueue as any, // generalQueue
|
||||
{} as any, // eventEmitter
|
||||
{} as any, // collaborationGateway
|
||||
{} as any, // watcherService
|
||||
{} as any, // transclusionService
|
||||
);
|
||||
|
||||
return { svc, pageRepo };
|
||||
};
|
||||
|
||||
const page: Page = {
|
||||
id: 'page-1',
|
||||
slugId: 'slug-1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
title: 'Old title',
|
||||
icon: null,
|
||||
parentPageId: null,
|
||||
contributorIds: [],
|
||||
} as any;
|
||||
|
||||
const user: User = { id: 'user-1' } as any;
|
||||
|
||||
it("stamps lastUpdatedSource:'git-sync' on the updatePage payload", async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
const dto: UpdatePageDto = { title: 'New title' } as any;
|
||||
|
||||
await svc.update(page, dto, user, GIT_SYNC);
|
||||
|
||||
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
|
||||
const payload = pageRepo.updatePage.mock.calls[0][0];
|
||||
expect(payload.lastUpdatedSource).toBe('git-sync');
|
||||
expect(payload.lastUpdatedAiChatId).toBeUndefined();
|
||||
// The acting user stays the responsible author.
|
||||
expect(payload.lastUpdatedById).toBe('user-1');
|
||||
});
|
||||
|
||||
it('leaves the source column unset for a plain user rename', async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
const dto: UpdatePageDto = { title: 'New title' } as any;
|
||||
|
||||
await svc.update(page, dto, user, USER_PROVENANCE);
|
||||
|
||||
const payload = pageRepo.updatePage.mock.calls[0][0];
|
||||
expect(payload.lastUpdatedSource).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('movePage()', () => {
|
||||
const SPACE_ID = 'space-1';
|
||||
const VALID_POSITION = 'a0';
|
||||
|
||||
const makeService = () => {
|
||||
const pageRepo = {
|
||||
findById: jest.fn().mockResolvedValue({
|
||||
id: 'dest-parent',
|
||||
deletedAt: null,
|
||||
spaceId: SPACE_ID,
|
||||
}),
|
||||
updatePage: jest.fn().mockResolvedValue({ numUpdatedRows: 1n }),
|
||||
};
|
||||
const eventEmitter = { emit: jest.fn() };
|
||||
|
||||
const svc = new PageService(
|
||||
pageRepo as any, // pageRepo
|
||||
{} as any, // pagePermissionRepo
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // db
|
||||
{} as any, // storageService
|
||||
{} as any, // attachmentQueue
|
||||
{} as any, // aiQueue
|
||||
{} as any, // generalQueue
|
||||
eventEmitter as any, // eventEmitter
|
||||
{} as any, // collaborationGateway
|
||||
{} as any, // watcherService
|
||||
{} as any, // transclusionService
|
||||
);
|
||||
|
||||
// No cycle: the destination's ancestor chain does not contain the moved
|
||||
// page, so movePage reaches updatePage.
|
||||
jest
|
||||
.spyOn(svc, 'getPageBreadCrumbs')
|
||||
.mockResolvedValue([{ id: 'dest-parent' }, { id: 'root' }] as any);
|
||||
|
||||
return { svc, pageRepo };
|
||||
};
|
||||
|
||||
const movedPage: Page = {
|
||||
id: 'page-1',
|
||||
parentPageId: 'old-parent',
|
||||
spaceId: SPACE_ID,
|
||||
workspaceId: 'ws-1',
|
||||
slugId: 'slug-1',
|
||||
title: 'Page 1',
|
||||
icon: null,
|
||||
} as any;
|
||||
|
||||
const dto: MovePageDto = {
|
||||
pageId: 'page-1',
|
||||
position: VALID_POSITION,
|
||||
parentPageId: 'dest-parent',
|
||||
};
|
||||
|
||||
it("stamps lastUpdatedSource:'git-sync' on the updatePage payload", async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.movePage(dto, movedPage, GIT_SYNC);
|
||||
|
||||
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
|
||||
const payload = pageRepo.updatePage.mock.calls[0][0];
|
||||
expect(payload.lastUpdatedSource).toBe('git-sync');
|
||||
expect(payload.lastUpdatedAiChatId).toBeUndefined();
|
||||
});
|
||||
|
||||
it('leaves the source column unset for a plain user move', async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.movePage(dto, movedPage, USER_PROVENANCE);
|
||||
|
||||
const payload = pageRepo.updatePage.mock.calls[0][0];
|
||||
expect(payload.lastUpdatedSource).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('removePage()', () => {
|
||||
// removePage forwards a `source` 4th arg to pageRepo.removePage: 'git-sync'
|
||||
// for a git-sync-driven soft-delete (so the change-listener loop-guard skips
|
||||
// its own write), undefined otherwise.
|
||||
const makeService = () => {
|
||||
const pageRepo = {
|
||||
removePage: jest.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
|
||||
const svc = new PageService(
|
||||
pageRepo as any, // pageRepo
|
||||
{} as any, // pagePermissionRepo
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // db
|
||||
{} as any, // storageService
|
||||
{} as any, // attachmentQueue
|
||||
{} as any, // aiQueue
|
||||
{} as any, // generalQueue
|
||||
{} as any, // eventEmitter
|
||||
{} as any, // collaborationGateway
|
||||
{} as any, // watcherService
|
||||
{} as any, // transclusionService
|
||||
);
|
||||
|
||||
return { svc, pageRepo };
|
||||
};
|
||||
|
||||
it("forwards 'git-sync' as the source for a git-sync soft-delete", async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.removePage('page-1', 'user-1', 'ws-1', GIT_SYNC);
|
||||
|
||||
expect(pageRepo.removePage).toHaveBeenCalledTimes(1);
|
||||
const [pageId, userId, workspaceId, source] =
|
||||
pageRepo.removePage.mock.calls[0];
|
||||
expect(pageId).toBe('page-1');
|
||||
expect(userId).toBe('user-1');
|
||||
expect(workspaceId).toBe('ws-1');
|
||||
expect(source).toBe('git-sync');
|
||||
});
|
||||
|
||||
it('forwards undefined as the source for a plain user delete', async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.removePage('page-1', 'user-1', 'ws-1', USER_PROVENANCE);
|
||||
|
||||
const [, , , source] = pageRepo.removePage.mock.calls[0];
|
||||
expect(source).toBeUndefined();
|
||||
});
|
||||
|
||||
it('forwards undefined as the source when no provenance is given', async () => {
|
||||
const { svc, pageRepo } = makeService();
|
||||
|
||||
await svc.removePage('page-1', 'user-1', 'ws-1');
|
||||
|
||||
const [, , , source] = pageRepo.removePage.mock.calls[0];
|
||||
expect(source).toBeUndefined();
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1182,8 +1182,18 @@ export class PageService {
|
||||
pageId: string,
|
||||
userId: string,
|
||||
workspaceId: string,
|
||||
// Optional provenance. A git-sync-driven soft-delete stamps
|
||||
// `lastUpdatedSource = 'git-sync'` so the change-listener loop-guard skips
|
||||
// its own write (mirrors the create/update/move provenance branches above).
|
||||
provenance?: AuthProvenanceData,
|
||||
): Promise<void> {
|
||||
await this.pageRepo.removePage(pageId, userId, workspaceId);
|
||||
const isGitSync = provenance?.actor === 'git-sync';
|
||||
await this.pageRepo.removePage(
|
||||
pageId,
|
||||
userId,
|
||||
workspaceId,
|
||||
isGitSync ? 'git-sync' : undefined,
|
||||
);
|
||||
}
|
||||
|
||||
private async parseProsemirrorContent(
|
||||
|
||||
@@ -15,4 +15,12 @@ export class UpdateSpaceDto extends PartialType(CreateSpaceDto) {
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
allowViewerComments: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
gitSyncEnabled?: boolean;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
autoMergeConflicts?: boolean;
|
||||
}
|
||||
|
||||
@@ -22,4 +22,199 @@ describe('SpaceService', () => {
|
||||
it('should be defined', () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
describe('updateSpace gitSyncEnabled', () => {
|
||||
const workspaceId = 'ws-1';
|
||||
const spaceId = 'space-1';
|
||||
|
||||
// executeTx runs the callback immediately with a passthrough trx so the
|
||||
// repo calls happen inline; mirrors how the sibling sharing/comments flags
|
||||
// are persisted.
|
||||
const buildService = (settingsBefore: Record<string, any>) => {
|
||||
const spaceRepo = {
|
||||
findById: jest.fn().mockResolvedValue({
|
||||
id: spaceId,
|
||||
name: 'Space',
|
||||
slug: 'space',
|
||||
description: '',
|
||||
settings: settingsBefore,
|
||||
}),
|
||||
updateGitSyncSettings: jest.fn().mockResolvedValue({}),
|
||||
updateSharingSettings: jest.fn().mockResolvedValue({}),
|
||||
updateCommentSettings: jest.fn().mockResolvedValue({}),
|
||||
updateSpace: jest
|
||||
.fn()
|
||||
.mockResolvedValue({ id: spaceId, name: 'Space', slug: 'space' }),
|
||||
slugExists: jest.fn().mockResolvedValue(false),
|
||||
};
|
||||
const auditService = { log: jest.fn() };
|
||||
|
||||
const svc = new SpaceService(
|
||||
spaceRepo as any,
|
||||
{} as any, // spaceMemberService
|
||||
{} as any, // shareRepo
|
||||
{} as any, // workspaceRepo
|
||||
{} as any, // licenseCheckService
|
||||
{} as any, // db
|
||||
{} as any, // attachmentQueue
|
||||
auditService as any,
|
||||
);
|
||||
|
||||
// executeTx is invoked via the imported helper; patch it on the module.
|
||||
jest
|
||||
.spyOn(require('@docmost/db/utils'), 'executeTx')
|
||||
.mockImplementation(async (_db: any, cb: any) => cb({} as any));
|
||||
|
||||
return { svc, spaceRepo, auditService };
|
||||
};
|
||||
|
||||
it('persists gitSyncEnabled via updateGitSyncSettings(enabled)', async () => {
|
||||
const { svc, spaceRepo } = buildService({});
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, gitSyncEnabled: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(spaceRepo.updateGitSyncSettings).toHaveBeenCalledWith(
|
||||
spaceId,
|
||||
workspaceId,
|
||||
'enabled',
|
||||
true,
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
|
||||
it('does not call updateGitSyncSettings when flag is undefined', async () => {
|
||||
const { svc, spaceRepo } = buildService({});
|
||||
|
||||
await svc.updateSpace({ spaceId } as any, workspaceId);
|
||||
|
||||
expect(spaceRepo.updateGitSyncSettings).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// --- audit delta on the git-sync toggle (test-strategy Module 4 / item #5)
|
||||
// updateSpace builds a before/after delta only when a flag's value actually
|
||||
// changes, and only logs an audit event when that delta is non-empty. These
|
||||
// assert that contract specifically for gitSyncEnabled.
|
||||
it('writes a SPACE_UPDATED audit delta on a REAL gitSyncEnabled change (false -> true)', async () => {
|
||||
// Prior persisted state: gitSync.enabled = false; the request flips it on.
|
||||
const { svc, auditService } = buildService({ gitSync: { enabled: false } });
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, gitSyncEnabled: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(auditService.log).toHaveBeenCalledTimes(1);
|
||||
expect(auditService.log).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
resourceId: spaceId,
|
||||
spaceId,
|
||||
changes: {
|
||||
before: expect.objectContaining({ gitSyncEnabled: false }),
|
||||
after: expect.objectContaining({ gitSyncEnabled: true }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('also records the delta when no prior gitSync settings exist (undefined -> true defaults prev to false)', async () => {
|
||||
// No gitSync key at all: prev resolves to the `?? false` default, so
|
||||
// enabling it is still a real change and is audited.
|
||||
const { svc, auditService } = buildService({});
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, gitSyncEnabled: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(auditService.log).toHaveBeenCalledTimes(1);
|
||||
const call = auditService.log.mock.calls[0][0];
|
||||
expect(call.changes.before.gitSyncEnabled).toBe(false);
|
||||
expect(call.changes.after.gitSyncEnabled).toBe(true);
|
||||
});
|
||||
|
||||
it('does NOT write an audit delta on a no-op gitSyncEnabled (same value true -> true)', async () => {
|
||||
// Prior persisted state already true; the request sets the same value.
|
||||
// updateGitSyncSettings still runs (idempotent persist), but nothing is
|
||||
// added to the before/after delta, so no audit event is emitted.
|
||||
const { svc, spaceRepo, auditService } = buildService({
|
||||
gitSync: { enabled: true },
|
||||
});
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, gitSyncEnabled: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(spaceRepo.updateGitSyncSettings).toHaveBeenCalledTimes(1);
|
||||
expect(auditService.log).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// --- autoMergeConflicts: a SECOND key in the SAME `gitSync` jsonb object,
|
||||
// persisted the same way as `enabled` (the repo's jsonb-merge keeps siblings).
|
||||
it('persists autoMergeConflicts via updateGitSyncSettings(autoMergeConflicts)', async () => {
|
||||
const { svc, spaceRepo } = buildService({});
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, autoMergeConflicts: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(spaceRepo.updateGitSyncSettings).toHaveBeenCalledWith(
|
||||
spaceId,
|
||||
workspaceId,
|
||||
'autoMergeConflicts',
|
||||
true,
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
|
||||
it('does not call updateGitSyncSettings when autoMergeConflicts is undefined', async () => {
|
||||
const { svc, spaceRepo } = buildService({});
|
||||
|
||||
await svc.updateSpace({ spaceId } as any, workspaceId);
|
||||
|
||||
expect(spaceRepo.updateGitSyncSettings).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('writes a SPACE_UPDATED audit delta on a REAL autoMergeConflicts change (false -> true)', async () => {
|
||||
// Prior persisted state: gitSync.autoMergeConflicts = false; flip it on.
|
||||
const { svc, auditService } = buildService({
|
||||
gitSync: { autoMergeConflicts: false },
|
||||
});
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, autoMergeConflicts: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(auditService.log).toHaveBeenCalledTimes(1);
|
||||
expect(auditService.log).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
resourceId: spaceId,
|
||||
spaceId,
|
||||
changes: {
|
||||
before: expect.objectContaining({ autoMergeConflicts: false }),
|
||||
after: expect.objectContaining({ autoMergeConflicts: true }),
|
||||
},
|
||||
}),
|
||||
);
|
||||
});
|
||||
|
||||
it('does NOT write an audit delta on a no-op autoMergeConflicts (same value true -> true)', async () => {
|
||||
const { svc, spaceRepo, auditService } = buildService({
|
||||
gitSync: { autoMergeConflicts: true },
|
||||
});
|
||||
|
||||
await svc.updateSpace(
|
||||
{ spaceId, autoMergeConflicts: true } as any,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
expect(spaceRepo.updateGitSyncSettings).toHaveBeenCalledTimes(1);
|
||||
expect(auditService.log).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -213,6 +213,41 @@ export class SpaceService {
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof updateSpaceDto.gitSyncEnabled !== 'undefined') {
|
||||
const prev = settingsBefore?.gitSync?.enabled ?? false;
|
||||
if (prev !== updateSpaceDto.gitSyncEnabled) {
|
||||
before.gitSyncEnabled = prev;
|
||||
after.gitSyncEnabled = updateSpaceDto.gitSyncEnabled;
|
||||
}
|
||||
|
||||
await this.spaceRepo.updateGitSyncSettings(
|
||||
updateSpaceDto.spaceId,
|
||||
workspaceId,
|
||||
'enabled',
|
||||
updateSpaceDto.gitSyncEnabled,
|
||||
trx,
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof updateSpaceDto.autoMergeConflicts !== 'undefined') {
|
||||
const prev = settingsBefore?.gitSync?.autoMergeConflicts ?? false;
|
||||
if (prev !== updateSpaceDto.autoMergeConflicts) {
|
||||
before.autoMergeConflicts = prev;
|
||||
after.autoMergeConflicts = updateSpaceDto.autoMergeConflicts;
|
||||
}
|
||||
|
||||
// Merges into the SAME `gitSync` jsonb object as `enabled` (the repo's
|
||||
// jsonb-merge preserves sibling keys), so toggling one never clobbers the
|
||||
// other.
|
||||
await this.spaceRepo.updateGitSyncSettings(
|
||||
updateSpaceDto.spaceId,
|
||||
workspaceId,
|
||||
'autoMergeConflicts',
|
||||
updateSpaceDto.autoMergeConflicts,
|
||||
trx,
|
||||
);
|
||||
}
|
||||
|
||||
updatedSpace = await this.spaceRepo.updateSpace(
|
||||
{
|
||||
name: updateSpaceDto.name,
|
||||
|
||||
157
apps/server/src/database/repos/page/page.repo.spec.ts
Normal file
157
apps/server/src/database/repos/page/page.repo.spec.ts
Normal file
@@ -0,0 +1,157 @@
|
||||
import {
|
||||
Kysely,
|
||||
CamelCasePlugin,
|
||||
DummyDriver,
|
||||
PostgresAdapter,
|
||||
PostgresIntrospector,
|
||||
PostgresQueryCompiler,
|
||||
CompiledQuery,
|
||||
} from 'kysely';
|
||||
import { PageRepo } from './page.repo';
|
||||
import type { KyselyDB } from '../../types/kysely.types';
|
||||
|
||||
/**
|
||||
* SQL-builder unit test for the git-sync provenance stamp on PageRepo's
|
||||
* soft-delete / restore paths (PR #119 review). Both `removePage` and
|
||||
* `restorePage` take an optional `lastUpdatedSource` arg and conditionally fold
|
||||
* it into the recursive-subtree `UPDATE pages SET ...` via
|
||||
* `...(lastUpdatedSource ? { lastUpdatedSource } : {})`. The change-listener
|
||||
* loop-guard reads `last_updated_source = 'git-sync'` to recognize git-sync's own
|
||||
* writes and skip the echo cycle; this test guards that the stamp is present when
|
||||
* the arg is supplied and ABSENT when it is omitted (an ordinary user delete must
|
||||
* not clobber the column).
|
||||
*
|
||||
* Harness: the same compile-only Kysely/DummyDriver pattern as
|
||||
* space.repo.spec.ts, plus the production `CamelCasePlugin` (so the compiled SQL
|
||||
* carries the real snake_case column names, e.g. `last_updated_source`) and a
|
||||
* thin driver that returns ONE fixed row for every query. The fixed row is what
|
||||
* lets the repo's guard reads (root snapshot / recursive descendants / restore
|
||||
* target) resolve non-empty so execution reaches the subtree UPDATE we assert on
|
||||
* — a bare DummyDriver returns no rows and both methods short-circuit before the
|
||||
* update. We never hit a real database; we capture each compiled statement via
|
||||
* Kysely's `log` hook and inspect the `update "pages" set ...` SQL.
|
||||
*/
|
||||
describe('PageRepo — git-sync provenance on soft-delete / restore SQL', () => {
|
||||
// A single row shaped to satisfy every column the repo reads off its guard
|
||||
// queries. `parentPageId: null` keeps restorePage on the simple path (no
|
||||
// parent-detach UPDATE), so the only `update "pages"` statement is the one we
|
||||
// assert on.
|
||||
const FIXED_ROW = {
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Doc',
|
||||
icon: null,
|
||||
position: 'a0',
|
||||
spaceId: 'space-1',
|
||||
parentPageId: null,
|
||||
deletedAt: null,
|
||||
};
|
||||
|
||||
class FixedRowDriver extends DummyDriver {
|
||||
async acquireConnection(): Promise<any> {
|
||||
return {
|
||||
async executeQuery() {
|
||||
return { rows: [{ ...FIXED_ROW }] };
|
||||
},
|
||||
// eslint-disable-next-line @typescript-eslint/no-empty-function
|
||||
async *streamQuery() {},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
interface Captured {
|
||||
sql: string;
|
||||
parameters: readonly unknown[];
|
||||
}
|
||||
|
||||
// Compile-only Kysely on the Postgres dialect (CamelCasePlugin for real column
|
||||
// names) whose `log` hook records every executed statement's compiled SQL.
|
||||
function makeRepoCapturingSql() {
|
||||
const captured: Captured[] = [];
|
||||
const db = new Kysely<any>({
|
||||
dialect: {
|
||||
createAdapter: () => new PostgresAdapter(),
|
||||
createDriver: () => new FixedRowDriver(),
|
||||
createIntrospector: (d) => new PostgresIntrospector(d),
|
||||
createQueryCompiler: () => new PostgresQueryCompiler(),
|
||||
},
|
||||
plugins: [new CamelCasePlugin()],
|
||||
log: (event) => {
|
||||
if (event.level === 'query') {
|
||||
const q = event.query as CompiledQuery;
|
||||
captured.push({ sql: q.sql, parameters: q.parameters });
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
const repo = new PageRepo(
|
||||
db as unknown as KyselyDB,
|
||||
{} as any,
|
||||
{ emit: jest.fn() } as any,
|
||||
);
|
||||
// Find the single subtree UPDATE on pages (collapse whitespace for matching).
|
||||
const getUpdatePagesSql = (): Captured | undefined =>
|
||||
captured
|
||||
.map((c) => ({ ...c, sql: c.sql.replace(/\s+/g, ' ') }))
|
||||
.find((c) => /update "pages" set/i.test(c.sql));
|
||||
return { repo, getUpdatePagesSql };
|
||||
}
|
||||
|
||||
describe('removePage', () => {
|
||||
it("stamps last_updated_source = 'git-sync' on the subtree soft-delete when the provenance arg is supplied", async () => {
|
||||
const { repo, getUpdatePagesSql } = makeRepoCapturingSql();
|
||||
|
||||
await repo.removePage('p1', 'user-1', 'ws-1', 'git-sync');
|
||||
|
||||
const update = getUpdatePagesSql();
|
||||
expect(update).toBeDefined();
|
||||
// The provenance column is in the UPDATE's SET clause...
|
||||
expect(update!.sql).toContain('"last_updated_source" =');
|
||||
// ...with the 'git-sync' marker as the bound value.
|
||||
expect(update!.parameters).toContain('git-sync');
|
||||
// Sanity: it is still the soft-delete UPDATE (sets deleted_at too).
|
||||
expect(update!.sql).toContain('"deleted_at" =');
|
||||
});
|
||||
|
||||
it('OMITS last_updated_source from the soft-delete when the provenance arg is undefined', async () => {
|
||||
const { repo, getUpdatePagesSql } = makeRepoCapturingSql();
|
||||
|
||||
await repo.removePage('p1', 'user-1', 'ws-1');
|
||||
|
||||
const update = getUpdatePagesSql();
|
||||
expect(update).toBeDefined();
|
||||
// Ordinary user delete: the column must NOT be touched (keeps prior value).
|
||||
expect(update!.sql).not.toContain('last_updated_source');
|
||||
expect(update!.parameters).not.toContain('git-sync');
|
||||
// It is still the soft-delete UPDATE.
|
||||
expect(update!.sql).toContain('"deleted_at" =');
|
||||
});
|
||||
});
|
||||
|
||||
describe('restorePage', () => {
|
||||
it("stamps last_updated_source = 'git-sync' on the subtree restore when the provenance arg is supplied", async () => {
|
||||
const { repo, getUpdatePagesSql } = makeRepoCapturingSql();
|
||||
|
||||
await repo.restorePage('p1', 'ws-1', 'git-sync');
|
||||
|
||||
const update = getUpdatePagesSql();
|
||||
expect(update).toBeDefined();
|
||||
expect(update!.sql).toContain('"last_updated_source" =');
|
||||
expect(update!.parameters).toContain('git-sync');
|
||||
// Sanity: it is the restore UPDATE (clears deleted_at).
|
||||
expect(update!.sql).toContain('"deleted_at" =');
|
||||
});
|
||||
|
||||
it('OMITS last_updated_source from the restore when the provenance arg is undefined', async () => {
|
||||
const { repo, getUpdatePagesSql } = makeRepoCapturingSql();
|
||||
|
||||
await repo.restorePage('p1', 'ws-1');
|
||||
|
||||
const update = getUpdatePagesSql();
|
||||
expect(update).toBeDefined();
|
||||
expect(update!.sql).not.toContain('last_updated_source');
|
||||
expect(update!.parameters).not.toContain('git-sync');
|
||||
expect(update!.sql).toContain('"deleted_at" =');
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -297,6 +297,11 @@ export class PageRepo {
|
||||
pageId: string,
|
||||
deletedById: string,
|
||||
workspaceId: string,
|
||||
// Optional provenance marker. When the soft-delete is driven by an automated
|
||||
// data plane (e.g. git-sync), stamp `lastUpdatedSource` so the change-listener
|
||||
// loop-guard recognizes it as its own write and does not schedule an echo
|
||||
// cycle. Omitted for ordinary user deletes (column keeps its prior value).
|
||||
lastUpdatedSource?: string,
|
||||
): Promise<void> {
|
||||
const currentDate = new Date();
|
||||
|
||||
@@ -347,6 +352,7 @@ export class PageRepo {
|
||||
.set({
|
||||
deletedById: deletedById,
|
||||
deletedAt: currentDate,
|
||||
...(lastUpdatedSource ? { lastUpdatedSource } : {}),
|
||||
})
|
||||
.where('id', 'in', pageIds)
|
||||
.where('deletedAt', 'is', null)
|
||||
@@ -377,7 +383,14 @@ export class PageRepo {
|
||||
}
|
||||
}
|
||||
|
||||
async restorePage(pageId: string, workspaceId: string): Promise<void> {
|
||||
async restorePage(
|
||||
pageId: string,
|
||||
workspaceId: string,
|
||||
// See removePage: stamp `lastUpdatedSource` for automated (git-sync) restores
|
||||
// so the change-listener loop-guard skips the echo cycle. Omitted for
|
||||
// ordinary user restores.
|
||||
lastUpdatedSource?: string,
|
||||
): Promise<void> {
|
||||
// First, check if the page being restored has a deleted parent
|
||||
const pageToRestore = await this.db
|
||||
.selectFrom('pages')
|
||||
@@ -425,7 +438,11 @@ export class PageRepo {
|
||||
// Restore all pages, but only detach the root page if its parent is deleted
|
||||
await this.db
|
||||
.updateTable('pages')
|
||||
.set({ deletedById: null, deletedAt: null })
|
||||
.set({
|
||||
deletedById: null,
|
||||
deletedAt: null,
|
||||
...(lastUpdatedSource ? { lastUpdatedSource } : {}),
|
||||
})
|
||||
.where('id', 'in', pageIds)
|
||||
.execute();
|
||||
|
||||
|
||||
141
apps/server/src/database/repos/space/space.repo.spec.ts
Normal file
141
apps/server/src/database/repos/space/space.repo.spec.ts
Normal file
@@ -0,0 +1,141 @@
|
||||
import {
|
||||
Kysely,
|
||||
DummyDriver,
|
||||
PostgresAdapter,
|
||||
PostgresIntrospector,
|
||||
PostgresQueryCompiler,
|
||||
CompiledQuery,
|
||||
} from 'kysely';
|
||||
import { SpaceRepo } from './space.repo';
|
||||
import type { KyselyDB } from '../../types/kysely.types';
|
||||
|
||||
/**
|
||||
* SQL-builder unit test for the jsonb-merge invariant of
|
||||
* SpaceRepo.updateGitSyncSettings (review comment #694 / test-strategy item #6).
|
||||
*
|
||||
* The merge is RAW SQL, so a behavioural test would need a live Postgres — which
|
||||
* is intentionally out of scope here (the reviewer's own §13.3 was deferred for
|
||||
* the same reason). Instead we follow the existing repo-spec convention
|
||||
* (ai-agent-roles.repo.spec.ts) of NOT executing: we compile the query with a
|
||||
* DummyDriver Postgres dialect and assert the generated SQL preserves sibling
|
||||
* keys. The structural invariant the SQL must encode:
|
||||
*
|
||||
* settings := COALESCE(settings, '{}') || jsonb_build_object('gitSync', ...)
|
||||
* gitSync := COALESCE(settings->'gitSync', '{}') || jsonb_build_object(key, value)
|
||||
*
|
||||
* The OUTER `||` merges into the existing top-level `settings`, so a sibling
|
||||
* top-level key (e.g. `sharing`) is preserved. The INNER COALESCE merges into
|
||||
* the existing `gitSync` object, so a sibling key inside gitSync (e.g. `other`)
|
||||
* is preserved. A naive `set settings = jsonb_build_object('gitSync', ...)`
|
||||
* would clobber both — this test guards exactly that regression.
|
||||
*/
|
||||
describe('SpaceRepo.updateGitSyncSettings — jsonb merge SQL', () => {
|
||||
// A real Kysely on the Postgres dialect, but with a DummyDriver: it compiles
|
||||
// queries to real Postgres SQL without ever opening a connection.
|
||||
function makeCompileOnlyDb() {
|
||||
return new Kysely<any>({
|
||||
dialect: {
|
||||
createAdapter: () => new PostgresAdapter(),
|
||||
createDriver: () => new DummyDriver(),
|
||||
createIntrospector: (db) => new PostgresIntrospector(db),
|
||||
createQueryCompiler: () => new PostgresQueryCompiler(),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
// Build the repo over the compile-only db. The repo terminates the query with
|
||||
// `.executeTakeFirst()`, so we wrap every kysely builder in a Proxy: when the
|
||||
// repo finally calls `executeTakeFirst`, we `.compile()` that same builder
|
||||
// ourselves to capture the exact SQL it was about to run, then delegate.
|
||||
function makeRepoCapturingSql() {
|
||||
const db = makeCompileOnlyDb();
|
||||
let captured: CompiledQuery | undefined;
|
||||
|
||||
// kysely builders are immutable — each .set()/.where()/.returningAll()
|
||||
// returns a NEW builder — so re-wrap any chainable result.
|
||||
const wrap = (b: any): any =>
|
||||
new Proxy(b, {
|
||||
get(target, prop, receiver) {
|
||||
const value = Reflect.get(target, prop, receiver);
|
||||
if (typeof value !== 'function') return value;
|
||||
return (...callArgs: unknown[]) => {
|
||||
// Capture the SQL at the terminal execute call.
|
||||
if (
|
||||
(prop === 'executeTakeFirst' || prop === 'execute') &&
|
||||
typeof target.compile === 'function'
|
||||
) {
|
||||
captured = target.compile();
|
||||
}
|
||||
const result = value.apply(target, callArgs);
|
||||
if (
|
||||
result &&
|
||||
typeof result === 'object' &&
|
||||
typeof (result as any).compile === 'function'
|
||||
) {
|
||||
return wrap(result);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
},
|
||||
});
|
||||
|
||||
const originalUpdateTable = db.updateTable.bind(db);
|
||||
jest
|
||||
.spyOn(db, 'updateTable')
|
||||
.mockImplementation((...args: Parameters<typeof originalUpdateTable>) =>
|
||||
wrap(originalUpdateTable(...args)),
|
||||
);
|
||||
|
||||
const repo = new SpaceRepo(db as unknown as KyselyDB, {} as any);
|
||||
return { repo, getCaptured: () => captured };
|
||||
}
|
||||
|
||||
it("compiles a jsonb merge that preserves sibling top-level and gitSync keys", async () => {
|
||||
const { repo, getCaptured } = makeRepoCapturingSql();
|
||||
|
||||
// DummyDriver yields no rows; executeTakeFirst resolves to undefined. The
|
||||
// SQL is fully compiled by then, which is all we assert.
|
||||
await repo.updateGitSyncSettings('space-1', 'ws-1', 'enabled', true);
|
||||
|
||||
const compiled = getCaptured();
|
||||
expect(compiled).toBeDefined();
|
||||
// The raw SQL template carries newlines/indentation; collapse whitespace so
|
||||
// the structural assertions are not coupled to source formatting.
|
||||
const sql = compiled!.sql.replace(/\s+/g, ' ');
|
||||
|
||||
// OUTER merge into the existing settings object -> sibling top-level keys
|
||||
// (e.g. `sharing`) survive (NOT a bare jsonb_build_object assignment).
|
||||
expect(sql).toContain(`set "settings" = COALESCE(settings, '{}'::jsonb) ||`);
|
||||
// INNER merge into the existing gitSync object -> sibling gitSync keys
|
||||
// (e.g. `other`) survive.
|
||||
expect(sql).toContain(
|
||||
`jsonb_build_object('gitSync', COALESCE(settings->'gitSync', '{}'::jsonb) ||`,
|
||||
);
|
||||
// The pref key is set via jsonb_build_object on the inner object.
|
||||
expect(sql).toContain(`jsonb_build_object('enabled',`);
|
||||
// Scoped to the row + workspace.
|
||||
expect(sql).toContain(`where "id" =`);
|
||||
expect(sql).toContain(`and "workspaceId" =`);
|
||||
|
||||
// Sanity: this is NOT a clobbering assignment (no top-level
|
||||
// `set "settings" = jsonb_build_object(` without the COALESCE/merge).
|
||||
expect(sql).not.toContain(`set "settings" = jsonb_build_object(`);
|
||||
|
||||
// The pref VALUE is inlined via sql.lit (matches the repo's sql.lit usage);
|
||||
// updatedAt + id + workspaceId are the only bound parameters (the jsonb
|
||||
// merge text is all literal). updatedAt is a Date, so assert id/workspaceId.
|
||||
expect(compiled!.parameters).toContain('space-1');
|
||||
expect(compiled!.parameters).toContain('ws-1');
|
||||
});
|
||||
|
||||
it('inlines the prefKey/prefValue literally (sql.raw key, sql.lit value)', async () => {
|
||||
const { repo, getCaptured } = makeRepoCapturingSql();
|
||||
|
||||
await repo.updateGitSyncSettings('space-1', 'ws-1', 'enabled', false);
|
||||
|
||||
const sql = getCaptured()!.sql.replace(/\s+/g, ' ');
|
||||
// key via sql.raw + value via sql.lit -> both appear literally in the
|
||||
// inner build object (no bound parameter for either).
|
||||
expect(sql).toContain(`jsonb_build_object('enabled', false)`);
|
||||
});
|
||||
});
|
||||
@@ -111,6 +111,28 @@ export class SpaceRepo {
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
async updateGitSyncSettings(
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
prefKey: string,
|
||||
prefValue: string | boolean,
|
||||
trx?: KyselyTransaction,
|
||||
) {
|
||||
const db = dbOrTx(this.db, trx);
|
||||
return db
|
||||
.updateTable('spaces')
|
||||
.set({
|
||||
settings: sql`COALESCE(settings, '{}'::jsonb)
|
||||
|| jsonb_build_object('gitSync', COALESCE(settings->'gitSync', '{}'::jsonb)
|
||||
|| jsonb_build_object('${sql.raw(prefKey)}', ${sql.lit(prefValue)}))`,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where('id', '=', spaceId)
|
||||
.where('workspaceId', '=', workspaceId)
|
||||
.returningAll()
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
async updateCommentSettings(
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
|
||||
@@ -14,4 +14,112 @@ describe('EnvironmentService', () => {
|
||||
it('should be defined', () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
describe('getGitSyncPollIntervalMs', () => {
|
||||
const withEnv = (value?: string) =>
|
||||
new EnvironmentService({
|
||||
get: (_key: string, fallback?: string) => value ?? fallback,
|
||||
} as any);
|
||||
|
||||
it('defaults to 15000 when unset', () => {
|
||||
expect(withEnv().getGitSyncPollIntervalMs()).toBe(15000);
|
||||
});
|
||||
|
||||
it('parses a valid positive int', () => {
|
||||
expect(withEnv('30000').getGitSyncPollIntervalMs()).toBe(30000);
|
||||
});
|
||||
|
||||
it('falls back to 15000 for non-positive or unparseable values', () => {
|
||||
expect(withEnv('0').getGitSyncPollIntervalMs()).toBe(15000);
|
||||
expect(withEnv('-100').getGitSyncPollIntervalMs()).toBe(15000);
|
||||
expect(withEnv('not-a-number').getGitSyncPollIntervalMs()).toBe(15000);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getGitSyncDebounceMs', () => {
|
||||
const withEnv = (value?: string) =>
|
||||
new EnvironmentService({
|
||||
get: (_key: string, fallback?: string) => value ?? fallback,
|
||||
} as any);
|
||||
|
||||
it('defaults to 2000 when unset', () => {
|
||||
expect(withEnv().getGitSyncDebounceMs()).toBe(2000);
|
||||
});
|
||||
|
||||
it('parses a valid positive int', () => {
|
||||
expect(withEnv('500').getGitSyncDebounceMs()).toBe(500);
|
||||
});
|
||||
|
||||
it('falls back to 2000 for non-positive or unparseable values', () => {
|
||||
expect(withEnv('0').getGitSyncDebounceMs()).toBe(2000);
|
||||
expect(withEnv('-5').getGitSyncDebounceMs()).toBe(2000);
|
||||
expect(withEnv('not-a-number').getGitSyncDebounceMs()).toBe(2000);
|
||||
});
|
||||
});
|
||||
|
||||
// getGitSyncDataDir reads two distinct keys (GIT_SYNC_DATA_DIR and DATA_DIR),
|
||||
// so this builder maps each key to a supplied value (and honours the fallback
|
||||
// the getter passes for DATA_DIR's `|| './data'`).
|
||||
describe('getGitSyncDataDir', () => {
|
||||
const withEnv = (values: Record<string, string | undefined>) =>
|
||||
new EnvironmentService({
|
||||
get: (key: string, fallback?: string) => values[key] ?? fallback,
|
||||
} as any);
|
||||
|
||||
it("defaults to './data/git-sync' when neither key is set", () => {
|
||||
expect(withEnv({}).getGitSyncDataDir()).toBe('./data/git-sync');
|
||||
});
|
||||
|
||||
it('derives from DATA_DIR with the /git-sync suffix', () => {
|
||||
expect(
|
||||
withEnv({ DATA_DIR: '/var/lib/docmost' }).getGitSyncDataDir(),
|
||||
).toBe('/var/lib/docmost/git-sync');
|
||||
});
|
||||
|
||||
it('strips trailing slashes from DATA_DIR before appending', () => {
|
||||
expect(
|
||||
withEnv({ DATA_DIR: '/var/lib/docmost///' }).getGitSyncDataDir(),
|
||||
).toBe('/var/lib/docmost/git-sync');
|
||||
});
|
||||
|
||||
it('lets an explicit GIT_SYNC_DATA_DIR override the DATA_DIR derivation', () => {
|
||||
expect(
|
||||
withEnv({
|
||||
GIT_SYNC_DATA_DIR: '/custom/vault',
|
||||
DATA_DIR: '/var/lib/docmost',
|
||||
}).getGitSyncDataDir(),
|
||||
).toBe('/custom/vault');
|
||||
});
|
||||
|
||||
it('returns the explicit override verbatim (no /git-sync suffix, no slash strip)', () => {
|
||||
expect(
|
||||
withEnv({ GIT_SYNC_DATA_DIR: '/custom/vault/' }).getGitSyncDataDir(),
|
||||
).toBe('/custom/vault/');
|
||||
});
|
||||
});
|
||||
|
||||
// isGitSyncEnabled is the `.toLowerCase() === 'true'` contract: only a
|
||||
// case-insensitive "true" enables it; everything else (unset, "false",
|
||||
// garbage) is false.
|
||||
describe('isGitSyncEnabled', () => {
|
||||
const withEnv = (value?: string) =>
|
||||
new EnvironmentService({
|
||||
get: (_key: string, fallback?: string) => value ?? fallback,
|
||||
} as any);
|
||||
|
||||
it('is true for "true" and "TRUE" (case-insensitive)', () => {
|
||||
expect(withEnv('true').isGitSyncEnabled()).toBe(true);
|
||||
expect(withEnv('TRUE').isGitSyncEnabled()).toBe(true);
|
||||
});
|
||||
|
||||
it('is false when unset (defaults to "false")', () => {
|
||||
expect(withEnv().isGitSyncEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it('is false for "false" and garbage values', () => {
|
||||
expect(withEnv('false').isGitSyncEnabled()).toBe(false);
|
||||
expect(withEnv('maybe').isGitSyncEnabled()).toBe(false);
|
||||
expect(withEnv('1').isGitSyncEnabled()).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -320,4 +320,96 @@ export class EnvironmentService {
|
||||
.map((o) => o.trim())
|
||||
.filter(Boolean);
|
||||
}
|
||||
|
||||
// --- git-sync (issue #194 §7.2) -------------------------------------------------
|
||||
|
||||
/** Global master switch for the git-sync control plane (default false). */
|
||||
isGitSyncEnabled(): boolean {
|
||||
return (
|
||||
this.configService.get<string>('GIT_SYNC_ENABLED', 'false').toLowerCase() ===
|
||||
'true'
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether gitmost serves the per-space vaults over smart-HTTP (the /git host).
|
||||
* When GIT_SYNC_HTTP_ENABLED is UNSET it DEFAULTS to isGitSyncEnabled() — so
|
||||
* enabling sync also enables the host unless explicitly disabled. When set, it
|
||||
* is honored verbatim ('true' -> on, anything else -> off).
|
||||
*/
|
||||
isGitSyncHttpEnabled(): boolean {
|
||||
const raw = this.configService.get<string>('GIT_SYNC_HTTP_ENABLED');
|
||||
if (raw === undefined) return this.isGitSyncEnabled();
|
||||
return raw.toLowerCase() === 'true';
|
||||
}
|
||||
|
||||
/**
|
||||
* Root directory holding the per-space vault repos. Defaults to
|
||||
* `<DATA_DIR or ./data>/git-sync`. `DATA_DIR` is read directly (no dedicated
|
||||
* getter exists in this codebase) so the vault root tracks the data volume.
|
||||
*/
|
||||
getGitSyncDataDir(): string {
|
||||
const explicit = this.configService.get<string>('GIT_SYNC_DATA_DIR');
|
||||
if (explicit) return explicit;
|
||||
const dataDir = this.configService.get<string>('DATA_DIR') || './data';
|
||||
return `${dataDir.replace(/\/+$/, '')}/git-sync`;
|
||||
}
|
||||
|
||||
/** Optional remote template, e.g. `git@host:vault-{spaceId}.git`. */
|
||||
getGitSyncRemoteTemplate(): string | undefined {
|
||||
return this.configService.get<string>('GIT_SYNC_REMOTE_TEMPLATE');
|
||||
}
|
||||
|
||||
/**
|
||||
* Poll-safety interval in ms (default 15000). A NaN / non-positive value falls
|
||||
* back to the default so a bad override can never disable or zero the poll loop.
|
||||
*/
|
||||
getGitSyncPollIntervalMs(): number {
|
||||
const parsed = parseInt(
|
||||
this.configService.get<string>('GIT_SYNC_POLL_INTERVAL_MS', '15000'),
|
||||
10,
|
||||
);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : 15000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Spawned `git http-backend` watchdog timeout in ms (default 120000). Bounds a
|
||||
* single smart-HTTP request so a stalled `git-receive-pack` cannot hold the
|
||||
* per-space lock forever (the child is killed and a 500 sent on expiry). A NaN /
|
||||
* non-positive value falls back to the default so a bad override can never
|
||||
* disable the watchdog.
|
||||
*/
|
||||
getGitSyncBackendTimeoutMs(): number {
|
||||
const v = parseInt(
|
||||
this.configService.get<string>('GIT_SYNC_BACKEND_TIMEOUT_MS', '120000'),
|
||||
10,
|
||||
);
|
||||
return Number.isFinite(v) && v > 0 ? v : 120000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Event debounce window in ms (default 2000). A NaN / non-positive value falls
|
||||
* back to the default so a bad override can never disable the debounce.
|
||||
*/
|
||||
getGitSyncDebounceMs(): number {
|
||||
const parsed = parseInt(
|
||||
this.configService.get<string>('GIT_SYNC_DEBOUNCE_MS', '2000'),
|
||||
10,
|
||||
);
|
||||
return Number.isFinite(parsed) && parsed > 0 ? parsed : 2000;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The service user id git-sync writes are attributed to. Required when sync is
|
||||
* enabled (validated in environment.validation.ts); optional otherwise.
|
||||
*/
|
||||
getGitSyncServiceUserId(): string | undefined {
|
||||
return this.configService.get<string>('GIT_SYNC_SERVICE_USER_ID');
|
||||
}
|
||||
|
||||
/** Optional path to the SSH key used for git remote access. */
|
||||
getGitSyncSshKeyPath(): string | undefined {
|
||||
return this.configService.get<string>('GIT_SYNC_SSH_KEY_PATH');
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
import { plainToInstance } from 'class-transformer';
|
||||
import { validateSync } from 'class-validator';
|
||||
import { EnvironmentVariables } from './environment.validation';
|
||||
|
||||
/**
|
||||
* Validation-layer coverage for the git-sync env contract (test-strategy Module
|
||||
* 4 / item #4). We drive the decorated class with `validateSync` directly — the
|
||||
* exported `validate()` helper calls `process.exit(1)` on failure and so cannot
|
||||
* be asserted in-process. We only assert the git-sync rules, providing the
|
||||
* minimal always-required fields so unrelated validators do not add noise.
|
||||
*/
|
||||
describe('EnvironmentVariables — git-sync validation', () => {
|
||||
// A baseline config that satisfies the unconditionally-required fields
|
||||
// (DATABASE_URL, REDIS_URL, APP_SECRET) so the only errors we ever see come
|
||||
// from the git-sync rules under test.
|
||||
const baseConfig = {
|
||||
DATABASE_URL: 'postgres://user:pass@localhost:5432/docmost',
|
||||
REDIS_URL: 'redis://localhost:6379',
|
||||
APP_SECRET: 'x'.repeat(32),
|
||||
};
|
||||
|
||||
const validate = (extra: Record<string, unknown>) => {
|
||||
const instance = plainToInstance(EnvironmentVariables, {
|
||||
...baseConfig,
|
||||
...extra,
|
||||
});
|
||||
return validateSync(instance);
|
||||
};
|
||||
|
||||
const errorFor = (errors: ReturnType<typeof validateSync>, property: string) =>
|
||||
errors.find((e) => e.property === property);
|
||||
|
||||
it('flags GIT_SYNC_SERVICE_USER_ID when GIT_SYNC_ENABLED="true" and the id is absent', () => {
|
||||
const errors = validate({ GIT_SYNC_ENABLED: 'true' });
|
||||
|
||||
const err = errorFor(errors, 'GIT_SYNC_SERVICE_USER_ID');
|
||||
expect(err).toBeDefined();
|
||||
// @IsNotEmpty is the failing constraint (sync is on but no attributable
|
||||
// author was configured).
|
||||
expect(err?.constraints).toHaveProperty('isNotEmpty');
|
||||
});
|
||||
|
||||
it('accepts GIT_SYNC_ENABLED="true" once GIT_SYNC_SERVICE_USER_ID is present', () => {
|
||||
const errors = validate({
|
||||
GIT_SYNC_ENABLED: 'true',
|
||||
GIT_SYNC_SERVICE_USER_ID: 'service-user-1',
|
||||
});
|
||||
|
||||
expect(errorFor(errors, 'GIT_SYNC_SERVICE_USER_ID')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('does not require the service user id when git-sync is disabled (unset)', () => {
|
||||
const errors = validate({});
|
||||
|
||||
// The @ValidateIf gate (GIT_SYNC_ENABLED === "true") is not met, so the
|
||||
// required-if-enabled rule is skipped entirely.
|
||||
expect(errorFor(errors, 'GIT_SYNC_SERVICE_USER_ID')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('does not require the service user id when git-sync is explicitly "false"', () => {
|
||||
const errors = validate({ GIT_SYNC_ENABLED: 'false' });
|
||||
|
||||
expect(errorFor(errors, 'GIT_SYNC_SERVICE_USER_ID')).toBeUndefined();
|
||||
expect(errorFor(errors, 'GIT_SYNC_ENABLED')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('rejects a GIT_SYNC_ENABLED value outside the {true,false} set via @IsIn', () => {
|
||||
const errors = validate({ GIT_SYNC_ENABLED: 'maybe' });
|
||||
|
||||
const err = errorFor(errors, 'GIT_SYNC_ENABLED');
|
||||
expect(err).toBeDefined();
|
||||
expect(err?.constraints).toHaveProperty('isIn');
|
||||
});
|
||||
});
|
||||
@@ -170,6 +170,56 @@ export class EnvironmentVariables {
|
||||
},
|
||||
)
|
||||
CLICKHOUSE_URL: string;
|
||||
|
||||
// --- git-sync (issue #194 §7.2) — all OPTIONAL. The master switch defaults off; a
|
||||
// required-if-enabled service user id is validated only when sync is on. ---
|
||||
|
||||
@IsOptional()
|
||||
@IsIn(['true', 'false'])
|
||||
@IsString()
|
||||
GIT_SYNC_ENABLED: string;
|
||||
|
||||
// Whether to serve the per-space vaults over smart-HTTP (the /git host).
|
||||
// When unset, defaults to GIT_SYNC_ENABLED (see isGitSyncHttpEnabled).
|
||||
@IsOptional()
|
||||
@IsIn(['true', 'false'])
|
||||
@IsString()
|
||||
GIT_SYNC_HTTP_ENABLED: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
GIT_SYNC_DATA_DIR: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
GIT_SYNC_REMOTE_TEMPLATE: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
GIT_SYNC_POLL_INTERVAL_MS: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
GIT_SYNC_DEBOUNCE_MS: string;
|
||||
|
||||
// Watchdog timeout (ms) for the spawned `git http-backend` process (default
|
||||
// 120000): a stalled receive-pack is killed so it cannot hold the per-space
|
||||
// lock forever. Optional int (validated as a string env).
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
GIT_SYNC_BACKEND_TIMEOUT_MS: string;
|
||||
|
||||
|
||||
// Required when git-sync is enabled: the service user create/move/rename/delete
|
||||
// are attributed to (issue #194 §7.2). Optional otherwise.
|
||||
@ValidateIf((obj) => obj.GIT_SYNC_ENABLED === 'true')
|
||||
@IsNotEmpty()
|
||||
@IsString()
|
||||
GIT_SYNC_SERVICE_USER_ID: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
GIT_SYNC_SSH_KEY_PATH: string;
|
||||
}
|
||||
|
||||
export function validate(config: Record<string, any>) {
|
||||
|
||||
41
apps/server/src/integrations/git-sync/git-sync.constants.ts
Normal file
41
apps/server/src/integrations/git-sync/git-sync.constants.ts
Normal file
@@ -0,0 +1,41 @@
|
||||
/**
|
||||
* Git-sync control-plane constants.
|
||||
*
|
||||
* Event/job names are REUSED from the shared event contract (event.contants.ts)
|
||||
* so the listener subscribes to the exact names the rest of the server emits —
|
||||
* never a string literal that could drift. The Redis lock-key prefix + TTLs back
|
||||
* the single-writer leader lock (§9); the debounce default backs the per-space
|
||||
* event coalescing (§10).
|
||||
*/
|
||||
import { EventName } from '../../common/events/event.contants';
|
||||
|
||||
/**
|
||||
* The page lifecycle events the git-sync listener reacts to. A change
|
||||
* to any of these in an enabled space schedules a debounced sync cycle.
|
||||
* - PAGE_CREATED / PAGE_UPDATED / PAGE_MOVED — structural + content edits;
|
||||
* - PAGE_SOFT_DELETED / PAGE_RESTORED — Trash transitions (deletes are soft);
|
||||
* - PAGE_MOVED_TO_SPACE — cross-space move (cross-repo).
|
||||
*
|
||||
* NOTE: body edits arrive via PAGE_UPDATED (emitted from persistence.extension),
|
||||
* NOT via EventName.PAGE_CONTENT_UPDATED — that name is a BullMQ queue-job name,
|
||||
* not an EventEmitter2 event, so @OnEvent would never fire for it.
|
||||
*/
|
||||
export const GIT_SYNC_PAGE_EVENTS = [
|
||||
EventName.PAGE_CREATED,
|
||||
EventName.PAGE_UPDATED,
|
||||
EventName.PAGE_MOVED,
|
||||
EventName.PAGE_MOVED_TO_SPACE,
|
||||
EventName.PAGE_SOFT_DELETED,
|
||||
EventName.PAGE_RESTORED,
|
||||
] as const;
|
||||
|
||||
/** Redis key prefix for the per-space leader lock. */
|
||||
export const GIT_SYNC_LOCK_PREFIX = 'git-sync:lock:';
|
||||
|
||||
/**
|
||||
* Leader-lock TTL (ms). Must exceed the maximum expected cycle duration so the
|
||||
* lock is not lost mid-cycle; on a crash it expires on its own. The
|
||||
* in-process mutex (orchestrator) prevents overlapping cycles on one instance,
|
||||
* and the Redis lock prevents two instances racing the same space.
|
||||
*/
|
||||
export const GIT_SYNC_LOCK_TTL_MS = 5 * 60 * 1000;
|
||||
@@ -0,0 +1,115 @@
|
||||
// Unit tests for the ops/testing controller. The orchestrator, env,
|
||||
// and the workspace-ability factory are hand-built mocks. We assert the admin
|
||||
// guard (non-admin -> ForbiddenException, no orchestrator call), that trigger
|
||||
// uses the workspace from request context (never the body), and that status
|
||||
// returns the env-derived object.
|
||||
import { ForbiddenException } from '@nestjs/common';
|
||||
import {
|
||||
WorkspaceCaslAction,
|
||||
WorkspaceCaslSubject,
|
||||
} from '../../core/casl/interfaces/workspace-ability.type';
|
||||
import { GitSyncController } from './git-sync.controller';
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
interface Built {
|
||||
controller: GitSyncController;
|
||||
orchestrator: { runOnce: AnyMock };
|
||||
env: Record<string, AnyMock>;
|
||||
workspaceAbility: { createForUser: AnyMock };
|
||||
ability: { cannot: AnyMock };
|
||||
}
|
||||
|
||||
function build(opts: { cannot?: boolean } = {}): Built {
|
||||
const { cannot = false } = opts;
|
||||
const ability = { cannot: jest.fn(() => cannot) };
|
||||
const workspaceAbility = { createForUser: jest.fn(() => ability) };
|
||||
|
||||
const orchestrator = {
|
||||
runOnce: jest.fn(async () => ({ spaceId: 'space-1', ran: true })),
|
||||
};
|
||||
const env: Record<string, AnyMock> = {
|
||||
isGitSyncEnabled: jest.fn(() => true),
|
||||
getGitSyncDataDir: jest.fn(() => '/vaults'),
|
||||
getGitSyncPollIntervalMs: jest.fn(() => 15000),
|
||||
getGitSyncDebounceMs: jest.fn(() => 2000),
|
||||
getGitSyncServiceUserId: jest.fn(() => 'svc-user'),
|
||||
};
|
||||
|
||||
const controller = new GitSyncController(
|
||||
orchestrator as any,
|
||||
env as any,
|
||||
workspaceAbility as any,
|
||||
);
|
||||
return { controller, orchestrator, env, workspaceAbility, ability };
|
||||
}
|
||||
|
||||
const USER = { id: 'user-1' } as any;
|
||||
const WORKSPACE = { id: 'ctx-ws' } as any;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('GitSyncController', () => {
|
||||
describe('trigger', () => {
|
||||
it('blocks a non-admin: throws ForbiddenException and never calls runOnce', async () => {
|
||||
const { controller, orchestrator, ability } = build({ cannot: true });
|
||||
|
||||
await expect(
|
||||
controller.trigger({ spaceId: 'space-1' } as any, USER, WORKSPACE),
|
||||
).rejects.toBeInstanceOf(ForbiddenException);
|
||||
|
||||
expect(ability.cannot).toHaveBeenCalledWith(
|
||||
WorkspaceCaslAction.Manage,
|
||||
WorkspaceCaslSubject.Settings,
|
||||
);
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('admin: calls runOnce(dto.spaceId, workspace.id) using the workspace from context', async () => {
|
||||
const { controller, orchestrator } = build({ cannot: false });
|
||||
|
||||
// The body carries an attacker-controlled workspaceId that must be ignored.
|
||||
const res = await controller.trigger(
|
||||
{ spaceId: 'space-1', workspaceId: 'evil-ws' } as any,
|
||||
USER,
|
||||
WORKSPACE,
|
||||
);
|
||||
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('space-1', 'ctx-ws');
|
||||
expect(res).toEqual({ spaceId: 'space-1', ran: true });
|
||||
});
|
||||
});
|
||||
|
||||
describe('status', () => {
|
||||
it('blocks a non-admin: throws ForbiddenException and never reads env', async () => {
|
||||
const { controller, env, ability } = build({ cannot: true });
|
||||
|
||||
await expect(controller.status(USER, WORKSPACE)).rejects.toBeInstanceOf(
|
||||
ForbiddenException,
|
||||
);
|
||||
|
||||
expect(ability.cannot).toHaveBeenCalledWith(
|
||||
WorkspaceCaslAction.Manage,
|
||||
WorkspaceCaslSubject.Settings,
|
||||
);
|
||||
// The admin guard short-circuits before the env-derived status is built.
|
||||
expect(env.isGitSyncEnabled).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('admin: returns the env-derived status object', async () => {
|
||||
const { controller } = build({ cannot: false });
|
||||
|
||||
const res = await controller.status(USER, WORKSPACE);
|
||||
|
||||
expect(res).toEqual({
|
||||
enabled: true,
|
||||
dataDir: '/vaults',
|
||||
pollIntervalMs: 15000,
|
||||
debounceMs: 2000,
|
||||
serviceUserConfigured: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
97
apps/server/src/integrations/git-sync/git-sync.controller.ts
Normal file
97
apps/server/src/integrations/git-sync/git-sync.controller.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import {
|
||||
Body,
|
||||
Controller,
|
||||
ForbiddenException,
|
||||
HttpCode,
|
||||
HttpStatus,
|
||||
Post,
|
||||
Get,
|
||||
UseGuards,
|
||||
} from '@nestjs/common';
|
||||
import { JwtAuthGuard } from '../../common/guards/jwt-auth.guard';
|
||||
import { AuthUser } from '../../common/decorators/auth-user.decorator';
|
||||
import { AuthWorkspace } from '../../common/decorators/auth-workspace.decorator';
|
||||
import { User, Workspace } from '@docmost/db/types/entity.types';
|
||||
import WorkspaceAbilityFactory from '../../core/casl/abilities/workspace-ability.factory';
|
||||
import {
|
||||
WorkspaceCaslAction,
|
||||
WorkspaceCaslSubject,
|
||||
} from '../../core/casl/interfaces/workspace-ability.type';
|
||||
import { EnvironmentService } from '../environment/environment.service';
|
||||
import { IsUUID } from 'class-validator';
|
||||
import {
|
||||
GitSyncOrchestrator,
|
||||
GitSyncRunStatus,
|
||||
} from './services/git-sync.orchestrator';
|
||||
|
||||
/** Body for the manual one-shot trigger. */
|
||||
class TriggerGitSyncDto {
|
||||
// The global ValidationPipe runs with whitelist:true, which STRIPS any field
|
||||
// lacking a validation decorator — without this @IsUUID the spaceId would be
|
||||
// dropped and arrive as undefined.
|
||||
@IsUUID()
|
||||
spaceId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ops/testing endpoints for the git-sync control plane. Admin-guarded
|
||||
* (workspace Manage/Settings, mirroring WorkspaceController) so only workspace
|
||||
* admins can force a cycle. Mounted under the global `/api` prefix:
|
||||
* - POST /api/git-sync/trigger { spaceId } — run one cycle now (await result),
|
||||
* - GET /api/git-sync/status — report whether sync is enabled + config.
|
||||
*/
|
||||
@UseGuards(JwtAuthGuard)
|
||||
@Controller('git-sync')
|
||||
export class GitSyncController {
|
||||
constructor(
|
||||
private readonly orchestrator: GitSyncOrchestrator,
|
||||
private readonly environmentService: EnvironmentService,
|
||||
private readonly workspaceAbility: WorkspaceAbilityFactory,
|
||||
) {}
|
||||
|
||||
/** Throw unless the caller is a workspace admin (Manage Settings). */
|
||||
private assertAdmin(user: User, workspace: Workspace): void {
|
||||
const ability = this.workspaceAbility.createForUser(user, workspace);
|
||||
if (
|
||||
ability.cannot(WorkspaceCaslAction.Manage, WorkspaceCaslSubject.Settings)
|
||||
) {
|
||||
throw new ForbiddenException();
|
||||
}
|
||||
}
|
||||
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Post('trigger')
|
||||
async trigger(
|
||||
@Body() dto: TriggerGitSyncDto,
|
||||
@AuthUser() user: User,
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
): Promise<GitSyncRunStatus> {
|
||||
this.assertAdmin(user, workspace);
|
||||
// Use the workspace from the request context (never client-supplied).
|
||||
return this.orchestrator.runOnce(dto.spaceId, workspace.id);
|
||||
}
|
||||
|
||||
@HttpCode(HttpStatus.OK)
|
||||
@Get('status')
|
||||
async status(
|
||||
@AuthUser() user: User,
|
||||
@AuthWorkspace() workspace: Workspace,
|
||||
): Promise<{
|
||||
enabled: boolean;
|
||||
dataDir: string;
|
||||
pollIntervalMs: number;
|
||||
debounceMs: number;
|
||||
serviceUserConfigured: boolean;
|
||||
}> {
|
||||
this.assertAdmin(user, workspace);
|
||||
return {
|
||||
enabled: this.environmentService.isGitSyncEnabled(),
|
||||
dataDir: this.environmentService.getGitSyncDataDir(),
|
||||
pollIntervalMs: this.environmentService.getGitSyncPollIntervalMs(),
|
||||
debounceMs: this.environmentService.getGitSyncDebounceMs(),
|
||||
serviceUserConfigured: Boolean(
|
||||
this.environmentService.getGitSyncServiceUserId(),
|
||||
),
|
||||
};
|
||||
}
|
||||
}
|
||||
59
apps/server/src/integrations/git-sync/git-sync.loader.ts
Normal file
59
apps/server/src/integrations/git-sync/git-sync.loader.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { pathToFileURL } from 'node:url';
|
||||
import type {
|
||||
VaultGit as VaultGitClass,
|
||||
vaultGitEnv as vaultGitEnvFn,
|
||||
runCycle as runCycleFn,
|
||||
parseDocmostMarkdown as parseDocmostMarkdownFn,
|
||||
markdownToProseMirror as markdownToProseMirrorFn,
|
||||
} from '@docmost/git-sync';
|
||||
|
||||
/**
|
||||
* Runtime value-export surface of the ESM-only `@docmost/git-sync` package that
|
||||
* the server consumes. Types are imported with `import type` (erased at compile,
|
||||
* no runtime require); only the VALUE exports below need the dynamic-load
|
||||
* treatment so a CJS `require()` of the ESM package never happens.
|
||||
*/
|
||||
interface GitSyncModule {
|
||||
VaultGit: typeof VaultGitClass;
|
||||
vaultGitEnv: typeof vaultGitEnvFn;
|
||||
runCycle: typeof runCycleFn;
|
||||
parseDocmostMarkdown: typeof parseDocmostMarkdownFn;
|
||||
markdownToProseMirror: typeof markdownToProseMirrorFn;
|
||||
}
|
||||
|
||||
// TS with module:commonjs downlevels a literal `import()` to `require()`, which
|
||||
// cannot load the ESM-only `@docmost/git-sync` package. Indirect through
|
||||
// Function so the real dynamic `import()` survives compilation and can load ESM
|
||||
// from CommonJS at runtime (same trick as
|
||||
// apps/server/src/core/ai-chat/tools/docmost-client.loader.ts and
|
||||
// integrations/mcp/mcp.service.ts).
|
||||
const esmImport = new Function(
|
||||
'specifier',
|
||||
'return import(specifier)',
|
||||
) as (specifier: string) => Promise<unknown>;
|
||||
|
||||
// Memoize the in-flight/loaded module so the dynamic import runs at most once.
|
||||
let modulePromise: Promise<GitSyncModule> | null = null;
|
||||
|
||||
/**
|
||||
* Lazily load the ESM-only `@docmost/git-sync` package (cached). Resolves the
|
||||
* package entry to an absolute path, then imports it as a `file://` URL so the
|
||||
* package "exports" map is honoured without bare-specifier resolution-base
|
||||
* fragility.
|
||||
*/
|
||||
export async function loadGitSync(): Promise<GitSyncModule> {
|
||||
if (!modulePromise) {
|
||||
modulePromise = (async () => {
|
||||
const entry = require.resolve('@docmost/git-sync');
|
||||
const mod = (await esmImport(
|
||||
pathToFileURL(entry).href,
|
||||
)) as GitSyncModule;
|
||||
return mod;
|
||||
})().catch((err) => {
|
||||
// Do not cache a rejected import — allow the next call to retry.
|
||||
modulePromise = null;
|
||||
throw err;
|
||||
});
|
||||
}
|
||||
return modulePromise;
|
||||
}
|
||||
62
apps/server/src/integrations/git-sync/git-sync.module.ts
Normal file
62
apps/server/src/integrations/git-sync/git-sync.module.ts
Normal file
@@ -0,0 +1,62 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { ScheduleModule } from '@nestjs/schedule';
|
||||
import { DatabaseModule } from '@docmost/db/database.module';
|
||||
import { EnvironmentModule } from '../environment/environment.module';
|
||||
import { CollaborationModule } from '../../collaboration/collaboration.module';
|
||||
import { PageModule } from '../../core/page/page.module';
|
||||
import { AuthModule } from '../../core/auth/auth.module';
|
||||
import { GitmostDataSourceService } from './services/gitmost-datasource.service';
|
||||
import { GitSyncOrchestrator } from './services/git-sync.orchestrator';
|
||||
import { SpaceLockService } from './services/space-lock.service';
|
||||
import { VaultRegistryService } from './services/vault-registry.service';
|
||||
import { PageChangeListener } from './listeners/page-change.listener';
|
||||
import { GitSyncController } from './git-sync.controller';
|
||||
import { GitHttpBackendService } from './http/git-http-backend.service';
|
||||
import { GitHttpService } from './http/git-http.service';
|
||||
|
||||
/**
|
||||
* The git-sync control plane. Wires the native datasource, the
|
||||
* orchestrator (poll + leader-lock), the per-space vault registry, the
|
||||
* event-driven listener, and the admin trigger controller.
|
||||
*
|
||||
* Imports:
|
||||
* - DatabaseModule (global) — PageRepo / SpaceRepo / KyselyDB for the
|
||||
* datasource + orchestrator queries;
|
||||
* - EnvironmentModule (global) — EnvironmentService config;
|
||||
* - CollaborationModule — exports CollaborationGateway for native body writes;
|
||||
* - PageModule — exports PageService for structural mutations;
|
||||
* - ScheduleModule (NOT forRoot) — so SchedulerRegistry is injectable (the
|
||||
* orchestrator registers a DYNAMIC poll interval in onModuleInit). forRoot()
|
||||
* is already registered globally by TelemetryModule; importing the plain
|
||||
* module here avoids a duplicate scheduler registration.
|
||||
*
|
||||
* RedisService is provided by the global RedisModule (app.module) and CASL's
|
||||
* WorkspaceAbilityFactory by the global CaslModule — both resolve without an
|
||||
* explicit import here.
|
||||
*/
|
||||
@Module({
|
||||
imports: [
|
||||
DatabaseModule,
|
||||
EnvironmentModule,
|
||||
CollaborationModule,
|
||||
PageModule,
|
||||
// AuthModule exports AuthService (verifyUserCredentials for /git HTTP Basic).
|
||||
AuthModule,
|
||||
ScheduleModule,
|
||||
],
|
||||
controllers: [GitSyncController],
|
||||
providers: [
|
||||
GitmostDataSourceService,
|
||||
GitSyncOrchestrator,
|
||||
SpaceLockService,
|
||||
VaultRegistryService,
|
||||
PageChangeListener,
|
||||
// /git smart-HTTP host (the raw Fastify route in main.ts resolves these).
|
||||
GitHttpBackendService,
|
||||
GitHttpService,
|
||||
],
|
||||
// Exported so the raw Fastify route registered in main.ts can resolve the
|
||||
// handler from the Nest container (app.get(GitHttpService)).
|
||||
exports: [GitHttpService],
|
||||
})
|
||||
export class GitSyncModule {}
|
||||
@@ -0,0 +1,375 @@
|
||||
// Unit tests for the pure CGI-response helpers used by GitHttpBackendService.
|
||||
// The header/body split MUST treat the body as binary (Buffer) and never
|
||||
// stringify it; the Status: header sets the HTTP status (default 200).
|
||||
import { EventEmitter } from 'node:events';
|
||||
import { spawn } from 'node:child_process';
|
||||
|
||||
// Mock the spawn boundary so run() never launches a real `git http-backend`; the
|
||||
// fake child lets us drive every stdout/stderr/error/close branch by hand.
|
||||
jest.mock('node:child_process', () => ({ spawn: jest.fn() }));
|
||||
// vaultGitEnv just builds the CGI env overlay; stub it to a passthrough so the
|
||||
// service runs without the real engine. The service loads it at runtime via the
|
||||
// `loadGitSync()` bridge (the ESM `@docmost/git-sync` package cannot be
|
||||
// `require()`d under jest), so we mock that loader rather than the package.
|
||||
jest.mock('../git-sync.loader', () => ({
|
||||
loadGitSync: jest.fn(async () => ({
|
||||
vaultGitEnv: (overlay: Record<string, string>) => overlay,
|
||||
})),
|
||||
}));
|
||||
|
||||
import {
|
||||
parseCgiResponse,
|
||||
splitCgiBuffer,
|
||||
buildGitBackendCgiEnv,
|
||||
GitHttpBackendService,
|
||||
} from './git-http-backend.service';
|
||||
import { Logger } from '@nestjs/common';
|
||||
import type { GitHttpBackendRequest } from './git-http-backend.service';
|
||||
|
||||
const spawnMock = spawn as unknown as jest.Mock;
|
||||
|
||||
/** A fake `git http-backend` child: EventEmitter + stdout/stderr/stdin streams. */
|
||||
function fakeChild() {
|
||||
const child = new EventEmitter() as any;
|
||||
child.stdout = new EventEmitter();
|
||||
child.stderr = new EventEmitter();
|
||||
// stdin is written/ended/piped to; capture the calls, swallow nothing.
|
||||
child.stdin = Object.assign(new EventEmitter(), {
|
||||
end: jest.fn(),
|
||||
write: jest.fn(),
|
||||
});
|
||||
// The watchdog kills the child on timeout; capture the signal.
|
||||
child.kill = jest.fn();
|
||||
return child;
|
||||
}
|
||||
|
||||
/** A fake raw Node ServerResponse capturing status/headers/body/end. */
|
||||
function fakeRes() {
|
||||
const res: any = {
|
||||
headersSent: false,
|
||||
writableEnded: false,
|
||||
statusCode: 200,
|
||||
_headers: {} as Record<string, string>,
|
||||
_written: [] as Buffer[],
|
||||
setHeader: jest.fn((name: string, value: string) => {
|
||||
res._headers[name] = value;
|
||||
}),
|
||||
write: jest.fn((chunk: Buffer) => {
|
||||
res._written.push(chunk);
|
||||
return true;
|
||||
}),
|
||||
end: jest.fn((chunk?: Buffer | string) => {
|
||||
if (chunk !== undefined) res._written.push(chunk as Buffer);
|
||||
res.writableEnded = true;
|
||||
}),
|
||||
};
|
||||
return res;
|
||||
}
|
||||
|
||||
/** A fake raw Node IncomingMessage (GET => no body piped). */
|
||||
function fakeReq() {
|
||||
const req = new EventEmitter() as any;
|
||||
req.pipe = jest.fn();
|
||||
return req;
|
||||
}
|
||||
|
||||
const baseRequest: GitHttpBackendRequest = {
|
||||
spaceId: 'space-1',
|
||||
subpath: 'info/refs',
|
||||
method: 'GET',
|
||||
queryString: 'service=git-upload-pack',
|
||||
contentType: '',
|
||||
remoteUser: 'alice@example.com',
|
||||
};
|
||||
|
||||
function buildService(backendTimeoutMs = 120000) {
|
||||
const env = {
|
||||
getGitSyncDataDir: jest.fn(() => '/vaults'),
|
||||
// The watchdog timeout for the spawned git http-backend. Tests inject a tiny
|
||||
// value (or use fake timers) to drive the timeout branch.
|
||||
getGitSyncBackendTimeoutMs: jest.fn(() => backendTimeoutMs),
|
||||
};
|
||||
return new GitHttpBackendService(env as any);
|
||||
}
|
||||
|
||||
// `run()` now awaits the async `loadGitSync()` bridge before it spawns the
|
||||
// child, so the spawn (and its stream-handler wiring) happens one microtask
|
||||
// after `run()` is called. These tests drive the fake child synchronously, so
|
||||
// flush the microtask queue first to let `run()` reach the spawn.
|
||||
const flush = () => new Promise((resolve) => setImmediate(resolve));
|
||||
|
||||
describe('GitHttpBackendService.run', () => {
|
||||
beforeEach(() => {
|
||||
spawnMock.mockReset();
|
||||
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
|
||||
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
|
||||
});
|
||||
afterEach(() => jest.restoreAllMocks());
|
||||
|
||||
it('(a) responds 500 when the child errors before any headers were written', async () => {
|
||||
const child = fakeChild();
|
||||
spawnMock.mockReturnValue(child);
|
||||
const service = buildService();
|
||||
const res = fakeRes();
|
||||
|
||||
const p = service.run(baseRequest, fakeReq(), res);
|
||||
await flush();
|
||||
// Emit a child 'error' before any stdout -> 500, headers not already sent.
|
||||
child.emit('error', new Error('ENOENT spawn git'));
|
||||
await p;
|
||||
|
||||
expect(res.statusCode).toBe(500);
|
||||
expect(res._headers['Content-Type']).toBe('text/plain');
|
||||
expect(res.end).toHaveBeenCalledWith('Internal server error');
|
||||
});
|
||||
|
||||
it('(a) responds 500 when the child closes before a complete CGI header block', async () => {
|
||||
const child = fakeChild();
|
||||
spawnMock.mockReturnValue(child);
|
||||
const service = buildService();
|
||||
const res = fakeRes();
|
||||
|
||||
const p = service.run(baseRequest, fakeReq(), res);
|
||||
await flush();
|
||||
// stderr diagnostics, then a close with no valid CGI output -> 500.
|
||||
child.stderr.emit('data', Buffer.from('fatal: boom'));
|
||||
child.emit('close', 128);
|
||||
await p;
|
||||
|
||||
expect(res.statusCode).toBe(500);
|
||||
expect(res.end).toHaveBeenCalledWith('Internal server error');
|
||||
});
|
||||
|
||||
it('(b) parses the CGI header block, sets status/headers, writes the body', async () => {
|
||||
const child = fakeChild();
|
||||
spawnMock.mockReturnValue(child);
|
||||
const service = buildService();
|
||||
const res = fakeRes();
|
||||
|
||||
const p = service.run(baseRequest, fakeReq(), res);
|
||||
await flush();
|
||||
// A full CGI response: status line + header + blank line + body.
|
||||
child.stdout.emit(
|
||||
'data',
|
||||
Buffer.from(
|
||||
'Status: 200 OK\r\nContent-Type: application/x-git-upload-pack-advertisement\r\n\r\nPACKBODY',
|
||||
'utf8',
|
||||
),
|
||||
);
|
||||
child.emit('close', 0);
|
||||
await p;
|
||||
|
||||
expect(res.statusCode).toBe(200);
|
||||
expect(res._headers['Content-Type']).toBe(
|
||||
'application/x-git-upload-pack-advertisement',
|
||||
);
|
||||
expect(Buffer.concat(res._written.map((c) => Buffer.from(c))).toString()).toContain(
|
||||
'PACKBODY',
|
||||
);
|
||||
expect(res.writableEnded).toBe(true);
|
||||
});
|
||||
|
||||
it('(c) swallows a stdout stream error (EPIPE) without throwing or 500ing', async () => {
|
||||
const child = fakeChild();
|
||||
spawnMock.mockReturnValue(child);
|
||||
const service = buildService();
|
||||
const res = fakeRes();
|
||||
const warnSpy = jest.spyOn(Logger.prototype, 'warn');
|
||||
|
||||
const p = service.run(baseRequest, fakeReq(), res);
|
||||
await flush();
|
||||
// The stdout 'error' handler must absorb this — no unhandled throw, no 500.
|
||||
expect(() => child.stdout.emit('error', new Error('EPIPE'))).not.toThrow();
|
||||
expect(() => child.stderr.emit('error', new Error('EPIPE'))).not.toThrow();
|
||||
expect(warnSpy).toHaveBeenCalled();
|
||||
expect(res.statusCode).not.toBe(500);
|
||||
|
||||
// Let run() settle so the promise does not dangle.
|
||||
child.emit('close', 0);
|
||||
await p;
|
||||
});
|
||||
|
||||
it('(d) timeout: a child that never closes is killed and a 500 is sent', async () => {
|
||||
// The child never emits stdout/close (a stalled git-receive-pack). With a
|
||||
// tiny injected watchdog timeout the run() promise must still resolve: the
|
||||
// child is killed and a clean 500 is sent (no headers were sent yet).
|
||||
const child = fakeChild();
|
||||
spawnMock.mockReturnValue(child);
|
||||
const service = buildService(5); // 5ms watchdog
|
||||
const res = fakeRes();
|
||||
const warnSpy = jest.spyOn(Logger.prototype, 'warn');
|
||||
|
||||
// run() resolves only via the watchdog firing (no close/error emitted).
|
||||
await service.run(baseRequest, fakeReq(), res);
|
||||
|
||||
expect(child.kill).toHaveBeenCalledWith('SIGTERM');
|
||||
expect(warnSpy).toHaveBeenCalled();
|
||||
expect(res.statusCode).toBe(500);
|
||||
expect(res.end).toHaveBeenCalledWith('Internal server error');
|
||||
});
|
||||
|
||||
it('(d) timeout watchdog is cleared on a normal close (no kill, no 500)', async () => {
|
||||
// A normal request that completes well within the watchdog window must NOT be
|
||||
// killed and must NOT trip the timeout 500 — the timer is cleared on close.
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const child = fakeChild();
|
||||
spawnMock.mockReturnValue(child);
|
||||
const service = buildService(120000);
|
||||
const res = fakeRes();
|
||||
|
||||
const p = service.run(baseRequest, fakeReq(), res);
|
||||
// loadGitSync resolves on a real microtask; advance it under fake timers.
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
child.stdout.emit(
|
||||
'data',
|
||||
Buffer.from('Status: 200 OK\r\nContent-Type: text/plain\r\n\r\nOK', 'utf8'),
|
||||
);
|
||||
child.emit('close', 0);
|
||||
await p;
|
||||
|
||||
// The watchdog never fired even if we advance past its window.
|
||||
jest.advanceTimersByTime(200000);
|
||||
expect(child.kill).not.toHaveBeenCalled();
|
||||
expect(res.statusCode).toBe(200);
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it('spawn throwing synchronously -> 500 (spawn-failed)', async () => {
|
||||
spawnMock.mockImplementation(() => {
|
||||
throw new Error('spawn EACCES');
|
||||
});
|
||||
const service = buildService();
|
||||
const res = fakeRes();
|
||||
|
||||
await service.run(baseRequest, fakeReq(), res);
|
||||
|
||||
expect(res.statusCode).toBe(500);
|
||||
expect(res.end).toHaveBeenCalledWith('Internal server error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildGitBackendCgiEnv', () => {
|
||||
const base = {
|
||||
spaceId: 'space-1',
|
||||
subpath: 'info/refs',
|
||||
method: 'GET',
|
||||
queryString: 'service=git-upload-pack',
|
||||
contentType: '',
|
||||
remoteUser: 'alice@example.com',
|
||||
};
|
||||
|
||||
it('points PATH_INFO at the NON-bare repo dir (no .git suffix)', () => {
|
||||
// Regression guard: the vault lives at <root>/<spaceId> (a working repo), so
|
||||
// PATH_INFO must be /<spaceId>/<subpath>. A `.git` suffix made git
|
||||
// http-backend resolve <root>/<spaceId>.git and 404 every fetch/push.
|
||||
const env = buildGitBackendCgiEnv(base, '/vaults');
|
||||
expect(env.PATH_INFO).toBe('/space-1/info/refs');
|
||||
expect(env.PATH_INFO).not.toContain('.git');
|
||||
expect(env.GIT_PROJECT_ROOT).toBe('/vaults');
|
||||
});
|
||||
|
||||
it('forwards method/query/content-type/remote-user and exports all repos', () => {
|
||||
const env = buildGitBackendCgiEnv(
|
||||
{ ...base, method: 'POST', subpath: 'git-receive-pack', contentType: 'application/x-git-receive-pack-request', queryString: '' },
|
||||
'/vaults',
|
||||
);
|
||||
expect(env.REQUEST_METHOD).toBe('POST');
|
||||
expect(env.PATH_INFO).toBe('/space-1/git-receive-pack');
|
||||
expect(env.CONTENT_TYPE).toBe('application/x-git-receive-pack-request');
|
||||
expect(env.REMOTE_USER).toBe('alice@example.com');
|
||||
expect(env.GIT_HTTP_EXPORT_ALL).toBe('1');
|
||||
});
|
||||
|
||||
it('sets GIT_PROTOCOL only when the client sent the header', () => {
|
||||
expect(buildGitBackendCgiEnv(base, '/vaults').GIT_PROTOCOL).toBeUndefined();
|
||||
expect(
|
||||
buildGitBackendCgiEnv({ ...base, gitProtocol: 'version=2' }, '/vaults')
|
||||
.GIT_PROTOCOL,
|
||||
).toBe('version=2');
|
||||
});
|
||||
});
|
||||
|
||||
describe('parseCgiResponse', () => {
|
||||
it('defaults to status 200 with no Status header', () => {
|
||||
const r = parseCgiResponse('Content-Type: application/x-git-upload-pack-result');
|
||||
expect(r.statusCode).toBe(200);
|
||||
expect(r.headers).toEqual([
|
||||
['Content-Type', 'application/x-git-upload-pack-result'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('honors a Status header and does not forward it', () => {
|
||||
const r = parseCgiResponse('Status: 404 Not Found\nContent-Type: text/plain');
|
||||
expect(r.statusCode).toBe(404);
|
||||
expect(r.headers).toEqual([['Content-Type', 'text/plain']]);
|
||||
});
|
||||
|
||||
it('parses multiple headers and trims whitespace', () => {
|
||||
const r = parseCgiResponse(
|
||||
'Status: 403 Forbidden\r\nContent-Type: text/plain \r\nX-Foo: bar ',
|
||||
);
|
||||
expect(r.statusCode).toBe(403);
|
||||
expect(r.headers).toEqual([
|
||||
['Content-Type', 'text/plain'],
|
||||
['X-Foo', 'bar'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('ignores malformed (colon-less) lines defensively', () => {
|
||||
const r = parseCgiResponse('Content-Type: text/plain\ngarbage-line\nX-A: b');
|
||||
expect(r.statusCode).toBe(200);
|
||||
expect(r.headers).toEqual([
|
||||
['Content-Type', 'text/plain'],
|
||||
['X-A', 'b'],
|
||||
]);
|
||||
});
|
||||
|
||||
it('ignores an out-of-range Status code and keeps the default', () => {
|
||||
const r = parseCgiResponse('Status: not-a-number\nContent-Type: text/plain');
|
||||
expect(r.statusCode).toBe(200);
|
||||
});
|
||||
|
||||
it('treats the Status header case-insensitively', () => {
|
||||
const r = parseCgiResponse('status: 500 Boom');
|
||||
expect(r.statusCode).toBe(500);
|
||||
expect(r.headers).toEqual([]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('splitCgiBuffer', () => {
|
||||
it('splits on a CRLF blank line and keeps the body as bytes', () => {
|
||||
const buf = Buffer.concat([
|
||||
Buffer.from('Status: 200 OK\r\nContent-Type: text/plain\r\n\r\n', 'utf8'),
|
||||
Buffer.from([0x00, 0x01, 0x02, 0xff]),
|
||||
]);
|
||||
const split = splitCgiBuffer(buf);
|
||||
expect(split).not.toBeNull();
|
||||
expect(split!.headerText).toBe('Status: 200 OK\r\nContent-Type: text/plain');
|
||||
expect(Array.from(split!.body)).toEqual([0x00, 0x01, 0x02, 0xff]);
|
||||
});
|
||||
|
||||
it('splits on a bare LF blank line', () => {
|
||||
const buf = Buffer.from('Content-Type: text/plain\n\nhello', 'utf8');
|
||||
const split = splitCgiBuffer(buf);
|
||||
expect(split).not.toBeNull();
|
||||
expect(split!.headerText).toBe('Content-Type: text/plain');
|
||||
expect(split!.body.toString('utf8')).toBe('hello');
|
||||
});
|
||||
|
||||
it('returns an empty body when nothing follows the separator', () => {
|
||||
const buf = Buffer.from('Content-Type: text/plain\r\n\r\n', 'utf8');
|
||||
const split = splitCgiBuffer(buf);
|
||||
expect(split).not.toBeNull();
|
||||
expect(split!.body.length).toBe(0);
|
||||
});
|
||||
|
||||
it('returns null when there is no blank-line separator yet', () => {
|
||||
const buf = Buffer.from('Content-Type: text/plain\r\nincomplete', 'utf8');
|
||||
expect(splitCgiBuffer(buf)).toBeNull();
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,335 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { spawn } from 'node:child_process';
|
||||
import type { IncomingMessage, ServerResponse } from 'node:http';
|
||||
import { loadGitSync } from '../git-sync.loader';
|
||||
import { EnvironmentService } from '../../environment/environment.service';
|
||||
|
||||
/** The parsed first part of a CGI response: the HTTP status + header pairs. */
|
||||
export interface ParsedCgiResponse {
|
||||
statusCode: number;
|
||||
/** Lower-cased? No — keep header names verbatim as git http-backend emits. */
|
||||
headers: Array<[string, string]>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the CGI header block emitted by `git http-backend` into an HTTP status
|
||||
* and a list of header pairs. The input is ONLY the header text (everything up
|
||||
* to, but not including, the blank-line separator) — the binary body is split
|
||||
* off by the caller on the raw Buffer (never stringified).
|
||||
*
|
||||
* CGI semantics (RFC 3875 §6): a `Status: <code> <reason>` header sets the HTTP
|
||||
* status (default 200 when absent). Every other header is forwarded verbatim.
|
||||
* Header lines are `Name: value`; a line without a ':' is ignored defensively.
|
||||
*
|
||||
* Pure + framework-free so it is unit-testable in isolation.
|
||||
*/
|
||||
export function parseCgiResponse(headerBlock: string): ParsedCgiResponse {
|
||||
let statusCode = 200;
|
||||
const headers: Array<[string, string]> = [];
|
||||
|
||||
// Header lines may be separated by CRLF or LF; split on either.
|
||||
const lines = headerBlock.split(/\r?\n/);
|
||||
for (const line of lines) {
|
||||
if (line.length === 0) continue;
|
||||
const sep = line.indexOf(':');
|
||||
if (sep === -1) continue; // not a header line — ignore defensively
|
||||
const name = line.slice(0, sep).trim();
|
||||
const value = line.slice(sep + 1).trim();
|
||||
if (name.toLowerCase() === 'status') {
|
||||
// `Status: 404 Not Found` — the leading integer is the HTTP status code.
|
||||
const code = parseInt(value, 10);
|
||||
if (Number.isFinite(code) && code >= 100 && code <= 599) {
|
||||
statusCode = code;
|
||||
}
|
||||
continue; // never forward the CGI Status header itself
|
||||
}
|
||||
headers.push([name, value]);
|
||||
}
|
||||
|
||||
return { statusCode, headers };
|
||||
}
|
||||
|
||||
/**
|
||||
* Split a raw CGI response buffer at the first blank-line boundary
|
||||
* (`\r\n\r\n` or `\n\n`). Returns the header text and the remaining body bytes.
|
||||
* Returns null when no blank-line separator is present (a malformed response).
|
||||
*
|
||||
* Pure (operates on Buffers, never stringifies the body) so it is testable.
|
||||
*/
|
||||
export function splitCgiBuffer(
|
||||
buf: Buffer,
|
||||
): { headerText: string; body: Buffer } | null {
|
||||
// Prefer the CRLF separator; fall back to bare LF.
|
||||
let idx = buf.indexOf('\r\n\r\n');
|
||||
let sepLen = 4;
|
||||
if (idx === -1) {
|
||||
idx = buf.indexOf('\n\n');
|
||||
sepLen = 2;
|
||||
}
|
||||
if (idx === -1) return null;
|
||||
const headerText = buf.subarray(0, idx).toString('utf8');
|
||||
const body = buf.subarray(idx + sepLen);
|
||||
return { headerText, body };
|
||||
}
|
||||
|
||||
/** A parsed git smart-HTTP request, resolved by the controller/handler. */
|
||||
export interface GitHttpBackendRequest {
|
||||
/** The space id (the on-disk vault dir name == GIT_PROJECT_ROOT child). */
|
||||
spaceId: string;
|
||||
/** The subpath after `<spaceId>.git/`, e.g. `info/refs` or `git-receive-pack`. */
|
||||
subpath: string;
|
||||
/** REQUEST_METHOD — `GET` or `POST`. */
|
||||
method: string;
|
||||
/** Raw query string WITHOUT the leading '?', e.g. `service=git-receive-pack`. */
|
||||
queryString: string;
|
||||
/** Content-Type header value (may be empty for GET). */
|
||||
contentType: string;
|
||||
/** The Git-Protocol request header value, or undefined when absent. */
|
||||
gitProtocol?: string;
|
||||
/** Authenticated user email — used as REMOTE_USER (reflog identity). */
|
||||
remoteUser: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Bridges an HTTP git smart-protocol request to `git http-backend` (the CGI that
|
||||
* implements the entire smart-HTTP protocol: info/refs, upload-pack,
|
||||
* receive-pack, protocol v2, dumb fallback). We do NOT reimplement pkt-line.
|
||||
*
|
||||
* The Fastify reply is hijacked by the caller; this service streams the request
|
||||
* body to the child's stdin and writes the child's CGI response (status +
|
||||
* headers parsed from the leading header block, then the raw binary body) to the
|
||||
* Node response. Errors before any output produce a 500. Credentials are never
|
||||
* logged.
|
||||
*/
|
||||
/**
|
||||
* Build the `git http-backend` CGI environment overlay for one request (the
|
||||
* variables layered on top of `vaultGitEnv`'s cwd-isolated base). Pure so the
|
||||
* PATH_INFO / REMOTE_USER / conditional GIT_PROTOCOL wiring is unit-testable
|
||||
* without spawning git.
|
||||
*
|
||||
* PATH_INFO is the repo-relative CGI path. The vault is a NON-BARE working repo
|
||||
* on disk at `<dataDir>/<spaceId>` (the engine needs a working tree), so the
|
||||
* repo directory git http-backend must resolve is `<spaceId>` — NOT
|
||||
* `<spaceId>.git`. The URL carries the conventional `.git` suffix (stripped by
|
||||
* parseGitPath into `spaceId`); re-appending it here pointed the CGI at a
|
||||
* non-existent `<dataDir>/<spaceId>.git` and every fetch/push 404'd.
|
||||
*/
|
||||
export function buildGitBackendCgiEnv(
|
||||
parsed: GitHttpBackendRequest,
|
||||
projectRoot: string,
|
||||
): Record<string, string> {
|
||||
const cgiEnv: Record<string, string> = {
|
||||
GIT_PROJECT_ROOT: projectRoot,
|
||||
GIT_HTTP_EXPORT_ALL: '1', // authz is done by us; no git-daemon-export-ok file
|
||||
PATH_INFO: `/${parsed.spaceId}/${parsed.subpath}`,
|
||||
REQUEST_METHOD: parsed.method,
|
||||
QUERY_STRING: parsed.queryString,
|
||||
CONTENT_TYPE: parsed.contentType,
|
||||
REMOTE_USER: parsed.remoteUser,
|
||||
};
|
||||
// GIT_PROTOCOL is only set when the client sent the Git-Protocol header.
|
||||
if (parsed.gitProtocol) {
|
||||
cgiEnv.GIT_PROTOCOL = parsed.gitProtocol;
|
||||
}
|
||||
return cgiEnv;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class GitHttpBackendService {
|
||||
private readonly logger = new Logger(GitHttpBackendService.name);
|
||||
|
||||
constructor(private readonly environmentService: EnvironmentService) {}
|
||||
|
||||
/**
|
||||
* Spawn `git http-backend` for one request and bridge it to the raw Node
|
||||
* request/response. Resolves when the response has been fully written (the
|
||||
* child exited and its output was flushed), or after a 500 was sent on an
|
||||
* early failure. Never rejects — push ingestion relies on this resolving so
|
||||
* the lock-held cycle body can run afterwards.
|
||||
*/
|
||||
async run(
|
||||
parsed: GitHttpBackendRequest,
|
||||
rawReq: IncomingMessage,
|
||||
rawRes: ServerResponse,
|
||||
): Promise<void> {
|
||||
const { vaultGitEnv } = await loadGitSync();
|
||||
const projectRoot = this.environmentService.getGitSyncDataDir();
|
||||
// Build the CGI env from the engine's cwd-isolated base (strips GIT_DIR /
|
||||
// GIT_WORK_TREE), then layer the http-backend CGI variables. PATH is
|
||||
// preserved (vaultGitEnv already copies process.env, so PATH carries
|
||||
// through).
|
||||
const env = vaultGitEnv(buildGitBackendCgiEnv(parsed, projectRoot));
|
||||
|
||||
return new Promise<void>((resolve) => {
|
||||
let settled = false;
|
||||
const done = () => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
resolve();
|
||||
};
|
||||
|
||||
let child: ReturnType<typeof spawn>;
|
||||
try {
|
||||
child = spawn('git', ['http-backend'], { env });
|
||||
} catch (err) {
|
||||
this.send500(rawRes, 'spawn-failed', err);
|
||||
return done();
|
||||
}
|
||||
|
||||
// Watchdog: a client that opens git-receive-pack and stalls keeps the
|
||||
// child alive forever, so run() never resolves and (because this runs
|
||||
// inside withSpaceLock) the per-space lock is held + heartbeat-refreshed
|
||||
// indefinitely. Bound the request: on expiry kill the child, send a clean
|
||||
// 500 if nothing was sent yet, and settle the promise. The log carries no
|
||||
// client echo / credentials / body. `.unref()` so the timer never keeps the
|
||||
// event loop alive; ALWAYS cleared in the close/error handlers below.
|
||||
const timer = setTimeout(() => {
|
||||
this.logger.warn(
|
||||
`git http-backend timed out after ` +
|
||||
`${this.environmentService.getGitSyncBackendTimeoutMs()}ms; killing child`,
|
||||
);
|
||||
try {
|
||||
child.kill('SIGTERM');
|
||||
// Escalate to SIGKILL shortly after in case SIGTERM is ignored.
|
||||
const sigkill = setTimeout(() => {
|
||||
try {
|
||||
child.kill('SIGKILL');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}, 2000);
|
||||
sigkill.unref?.();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
if (!headerParsed && !rawRes.headersSent) {
|
||||
this.send500(rawRes, 'timeout');
|
||||
} else {
|
||||
try {
|
||||
rawRes.end();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
done();
|
||||
}, this.environmentService.getGitSyncBackendTimeoutMs());
|
||||
timer.unref?.();
|
||||
|
||||
// Accumulate stdout until we have the full CGI header block, then write the
|
||||
// parsed status/headers and start streaming the remaining body bytes.
|
||||
let headerParsed = false;
|
||||
let pending: Buffer = Buffer.alloc(0);
|
||||
|
||||
const flushHeadersAndBody = (chunk: Buffer): void => {
|
||||
pending = Buffer.concat([pending, chunk]);
|
||||
const split = splitCgiBuffer(pending);
|
||||
if (!split) return; // header block not complete yet
|
||||
headerParsed = true;
|
||||
const { statusCode, headers } = parseCgiResponse(split.headerText);
|
||||
rawRes.statusCode = statusCode;
|
||||
for (const [name, value] of headers) {
|
||||
rawRes.setHeader(name, value);
|
||||
}
|
||||
if (split.body.length > 0) rawRes.write(split.body);
|
||||
pending = Buffer.alloc(0);
|
||||
};
|
||||
|
||||
child.stdout?.on('data', (chunk: Buffer) => {
|
||||
if (headerParsed) {
|
||||
rawRes.write(chunk);
|
||||
} else {
|
||||
flushHeadersAndBody(chunk);
|
||||
}
|
||||
});
|
||||
// A stream 'error' (e.g. EPIPE when the client aborts mid-response) is an
|
||||
// EventEmitter 'error' with no listener -> Node rethrows it as an uncaught
|
||||
// exception and crashes the process. Swallow + log it (never echo to the
|
||||
// client); child.on('close')/'error' below drives the actual cleanup.
|
||||
child.stdout?.on('error', (err) => {
|
||||
this.logger.warn(`git http-backend stdout stream error: ${err.message}`);
|
||||
});
|
||||
|
||||
let stderr = '';
|
||||
child.stderr?.on('data', (chunk: Buffer) => {
|
||||
// Capture for diagnostics; never echo to the client. http-backend writes
|
||||
// CGI errors here. We do NOT log the request body or any credentials.
|
||||
if (stderr.length < 8192) stderr += chunk.toString('utf8');
|
||||
});
|
||||
child.stderr?.on('error', (err) => {
|
||||
this.logger.warn(`git http-backend stderr stream error: ${err.message}`);
|
||||
});
|
||||
|
||||
child.on('error', (err) => {
|
||||
clearTimeout(timer);
|
||||
if (!headerParsed && !rawRes.headersSent) {
|
||||
this.send500(rawRes, 'child-error', err);
|
||||
} else {
|
||||
// Output already started — we can only terminate the stream.
|
||||
try {
|
||||
rawRes.end();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
done();
|
||||
});
|
||||
|
||||
child.on('close', (code) => {
|
||||
clearTimeout(timer);
|
||||
if (!headerParsed && !rawRes.headersSent) {
|
||||
// The child exited before emitting a complete CGI header block.
|
||||
this.logger.error(
|
||||
`git http-backend produced no valid response (exit ${code}) for ` +
|
||||
`space; stderr: ${stderr.trim().slice(0, 500)}`,
|
||||
);
|
||||
this.send500(rawRes, 'no-output');
|
||||
} else {
|
||||
try {
|
||||
rawRes.end();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
done();
|
||||
});
|
||||
|
||||
// Pipe the request body to the child's stdin. For GET there is no body, so
|
||||
// end stdin immediately. We pipe `rawReq` (the raw Node stream) directly so
|
||||
// large pushes are streamed, not buffered.
|
||||
if (parsed.method === 'POST') {
|
||||
rawReq.pipe(child.stdin!);
|
||||
rawReq.on('error', () => {
|
||||
try {
|
||||
child.stdin?.end();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
});
|
||||
} else {
|
||||
child.stdin?.end();
|
||||
}
|
||||
// Swallow EPIPE etc. on the child's stdin so a client disconnect does not
|
||||
// crash the process.
|
||||
child.stdin?.on('error', () => {
|
||||
/* ignore broken-pipe on stdin */
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/** Send a clean 500 without leaking credentials or the request body. */
|
||||
private send500(rawRes: ServerResponse, reason: string, err?: unknown): void {
|
||||
const message = err instanceof Error ? err.message : undefined;
|
||||
this.logger.error(
|
||||
`git http-backend failed (${reason})${message ? `: ${message}` : ''}`,
|
||||
);
|
||||
try {
|
||||
if (!rawRes.headersSent) {
|
||||
rawRes.statusCode = 500;
|
||||
rawRes.setHeader('Content-Type', 'text/plain');
|
||||
}
|
||||
rawRes.end('Internal server error');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
// Unit tests for the pure /git smart-HTTP helpers: URL parsing, service->kind
|
||||
// mapping (read vs write), and the gating/auth decision precedence.
|
||||
import {
|
||||
decideGitHttpGate,
|
||||
parseGitPath,
|
||||
resolveServiceKind,
|
||||
} from './git-http.helpers';
|
||||
|
||||
describe('parseGitPath', () => {
|
||||
it('parses spaceId + subpath, stripping the trailing .git', () => {
|
||||
expect(parseGitPath('abc123.git/info/refs')).toEqual({
|
||||
spaceId: 'abc123',
|
||||
subpath: 'info/refs',
|
||||
});
|
||||
});
|
||||
|
||||
it('tolerates a leading slash', () => {
|
||||
expect(parseGitPath('/abc.git/git-receive-pack')).toEqual({
|
||||
spaceId: 'abc',
|
||||
subpath: 'git-receive-pack',
|
||||
});
|
||||
});
|
||||
|
||||
it('returns an empty subpath for the bare repo root', () => {
|
||||
expect(parseGitPath('abc.git')).toEqual({ spaceId: 'abc', subpath: '' });
|
||||
});
|
||||
|
||||
it('returns null when the first segment lacks .git', () => {
|
||||
expect(parseGitPath('abc/info/refs')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns null on an empty space id', () => {
|
||||
expect(parseGitPath('.git/info/refs')).toBeNull();
|
||||
});
|
||||
|
||||
it('rejects path traversal', () => {
|
||||
expect(parseGitPath('abc.git/../../etc/passwd')).toBeNull();
|
||||
expect(parseGitPath('..git/x')).toBeNull();
|
||||
});
|
||||
|
||||
it('rejects percent-encoded dot/slash traversal in the subpath (case-insensitive)', () => {
|
||||
expect(parseGitPath('abc.git/%2e%2e%2fetc/passwd')).toBeNull();
|
||||
expect(parseGitPath('abc.git/%2E%2E/secret')).toBeNull();
|
||||
expect(parseGitPath('abc.git/objects/%2fabsolute')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveServiceKind', () => {
|
||||
it('GET info/refs?service=git-upload-pack -> read', () => {
|
||||
expect(
|
||||
resolveServiceKind({
|
||||
method: 'GET',
|
||||
subpath: 'info/refs',
|
||||
service: 'git-upload-pack',
|
||||
}),
|
||||
).toBe('read');
|
||||
});
|
||||
|
||||
it('GET info/refs?service=git-receive-pack -> write', () => {
|
||||
expect(
|
||||
resolveServiceKind({
|
||||
method: 'GET',
|
||||
subpath: 'info/refs',
|
||||
service: 'git-receive-pack',
|
||||
}),
|
||||
).toBe('write');
|
||||
});
|
||||
|
||||
it('POST git-upload-pack -> read', () => {
|
||||
expect(
|
||||
resolveServiceKind({ method: 'POST', subpath: 'git-upload-pack' }),
|
||||
).toBe('read');
|
||||
});
|
||||
|
||||
it('POST git-receive-pack -> write', () => {
|
||||
expect(
|
||||
resolveServiceKind({ method: 'POST', subpath: 'git-receive-pack' }),
|
||||
).toBe('write');
|
||||
});
|
||||
|
||||
it('a dumb-protocol GET (HEAD / objects) -> read', () => {
|
||||
expect(resolveServiceKind({ method: 'GET', subpath: 'HEAD' })).toBe('read');
|
||||
expect(
|
||||
resolveServiceKind({ method: 'GET', subpath: 'objects/12/abcdef' }),
|
||||
).toBe('read');
|
||||
});
|
||||
|
||||
it('info/refs with no/unknown service -> read (dumb discovery)', () => {
|
||||
expect(resolveServiceKind({ method: 'GET', subpath: 'info/refs' })).toBe(
|
||||
'read',
|
||||
);
|
||||
});
|
||||
|
||||
it('an unknown POST endpoint -> null', () => {
|
||||
expect(resolveServiceKind({ method: 'POST', subpath: 'whatever' })).toBeNull();
|
||||
});
|
||||
|
||||
it('an unsupported method -> null', () => {
|
||||
expect(
|
||||
resolveServiceKind({ method: 'DELETE', subpath: 'git-receive-pack' }),
|
||||
).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('decideGitHttpGate', () => {
|
||||
const base = {
|
||||
hasCredentials: true,
|
||||
credentialsValid: true,
|
||||
serviceKind: 'read' as const,
|
||||
gitSyncEnabled: true,
|
||||
gitHttpEnabled: true,
|
||||
spaceExists: true,
|
||||
spaceGitSyncEnabled: true,
|
||||
permissionGranted: true,
|
||||
};
|
||||
|
||||
it('proceeds on the happy path', () => {
|
||||
expect(decideGitHttpGate(base)).toEqual({ kind: 'proceed' });
|
||||
});
|
||||
|
||||
it('401 when credentials are missing (even for a valid space)', () => {
|
||||
expect(
|
||||
decideGitHttpGate({ ...base, hasCredentials: false }),
|
||||
).toEqual({ kind: 'unauthorized' });
|
||||
});
|
||||
|
||||
it('401 when credentials are present but invalid', () => {
|
||||
expect(
|
||||
decideGitHttpGate({ ...base, credentialsValid: false }),
|
||||
).toEqual({ kind: 'unauthorized' });
|
||||
});
|
||||
|
||||
it('400 on an unparseable service kind', () => {
|
||||
expect(decideGitHttpGate({ ...base, serviceKind: null })).toEqual({
|
||||
kind: 'bad-request',
|
||||
});
|
||||
});
|
||||
|
||||
it('404 when the space is not git-sync-enabled (never reveals existence)', () => {
|
||||
expect(
|
||||
decideGitHttpGate({ ...base, spaceGitSyncEnabled: false }),
|
||||
).toEqual({ kind: 'not-found' });
|
||||
});
|
||||
|
||||
it('404 when the space does not exist', () => {
|
||||
expect(decideGitHttpGate({ ...base, spaceExists: false })).toEqual({
|
||||
kind: 'not-found',
|
||||
});
|
||||
});
|
||||
|
||||
it('404 when git-sync is globally disabled', () => {
|
||||
expect(decideGitHttpGate({ ...base, gitSyncEnabled: false })).toEqual({
|
||||
kind: 'not-found',
|
||||
});
|
||||
});
|
||||
|
||||
it('404 when the git-http host is disabled', () => {
|
||||
expect(decideGitHttpGate({ ...base, gitHttpEnabled: false })).toEqual({
|
||||
kind: 'not-found',
|
||||
});
|
||||
});
|
||||
|
||||
it('403 when authenticated but lacking the required permission (reader on write)', () => {
|
||||
expect(
|
||||
decideGitHttpGate({
|
||||
...base,
|
||||
serviceKind: 'write',
|
||||
permissionGranted: false,
|
||||
}),
|
||||
).toEqual({ kind: 'forbidden' });
|
||||
});
|
||||
|
||||
it('still 401 (not 404) for missing creds against a disabled space', () => {
|
||||
// Anonymous probe must always get 401 first, regardless of space state.
|
||||
expect(
|
||||
decideGitHttpGate({
|
||||
...base,
|
||||
hasCredentials: false,
|
||||
spaceGitSyncEnabled: false,
|
||||
}),
|
||||
).toEqual({ kind: 'unauthorized' });
|
||||
});
|
||||
});
|
||||
147
apps/server/src/integrations/git-sync/http/git-http.helpers.ts
Normal file
147
apps/server/src/integrations/git-sync/http/git-http.helpers.ts
Normal file
@@ -0,0 +1,147 @@
|
||||
// Pure, framework-free helpers for the /git smart-HTTP host. They carry no Nest
|
||||
// / DI / concrete-service imports so the request parsing and the auth/authz
|
||||
// gating DECISION can be unit-tested in isolation, and nothing here ever logs a
|
||||
// password or the Authorization header.
|
||||
|
||||
/** The git operation a request maps to: a read (fetch/clone) or a write (push). */
|
||||
export type GitHttpServiceKind = 'read' | 'write';
|
||||
|
||||
/** A parsed `/git/<spaceId>.git/<subpath>` URL. */
|
||||
export interface ParsedGitPath {
|
||||
spaceId: string;
|
||||
/** The subpath after `<spaceId>.git/` (no leading slash), e.g. `info/refs`. */
|
||||
subpath: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the `<rest>` of a `/git/<rest>` URL path (no query string) into the
|
||||
* space id and the repo-relative subpath. The space id is the first path
|
||||
* segment with its trailing `.git` stripped. Returns null when the shape does
|
||||
* not match (missing `.git`, empty space id, traversal attempt).
|
||||
*
|
||||
* `rest` MUST already be URL-path-decoded of its query string by the caller
|
||||
* (pass the pathname only). We reject `..` segments defensively even though
|
||||
* http-backend resolves PATH_INFO against GIT_PROJECT_ROOT.
|
||||
*/
|
||||
export function parseGitPath(rest: string): ParsedGitPath | null {
|
||||
// Strip a leading slash, then take the first segment as `<spaceId>.git`.
|
||||
const clean = rest.replace(/^\/+/, '');
|
||||
const slash = clean.indexOf('/');
|
||||
const first = slash === -1 ? clean : clean.slice(0, slash);
|
||||
const subpath = slash === -1 ? '' : clean.slice(slash + 1);
|
||||
|
||||
if (!first.endsWith('.git')) return null;
|
||||
const spaceId = first.slice(0, -'.git'.length);
|
||||
if (!spaceId) return null;
|
||||
|
||||
// Reject path traversal / degenerate ids in either component.
|
||||
if (
|
||||
spaceId === '.' ||
|
||||
spaceId.includes('..') ||
|
||||
spaceId.includes('/') ||
|
||||
subpath.split('/').some((seg) => seg === '..')
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Defense-in-depth: reject percent-encoded dot/slash traversal (`%2e`, `%2f`,
|
||||
// case-insensitive) in the subpath BEFORE it is used to build PATH_INFO — a
|
||||
// decoder downstream could otherwise turn `%2e%2e%2f` back into `../`.
|
||||
if (/%2e|%2f/i.test(subpath)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return { spaceId, subpath };
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a parsed git request (method + subpath + query) to the required operation
|
||||
* kind. The smart-HTTP shapes:
|
||||
* - GET info/refs?service=git-upload-pack -> read (fetch)
|
||||
* - GET info/refs?service=git-receive-pack -> write (push)
|
||||
* - POST git-upload-pack -> read (fetch)
|
||||
* - POST git-receive-pack -> write (push)
|
||||
* - any other dumb-protocol GET (HEAD, objects/…) -> read
|
||||
* Returns null for an unsupported shape (e.g. a POST that is neither pack
|
||||
* endpoint) so the caller can 403/404 rather than guess.
|
||||
*/
|
||||
export function resolveServiceKind(input: {
|
||||
method: string;
|
||||
subpath: string;
|
||||
service?: string;
|
||||
}): GitHttpServiceKind | null {
|
||||
const method = input.method.toUpperCase();
|
||||
const subpath = input.subpath;
|
||||
|
||||
if (method === 'GET') {
|
||||
if (subpath === 'info/refs') {
|
||||
if (input.service === 'git-receive-pack') return 'write';
|
||||
if (input.service === 'git-upload-pack') return 'read';
|
||||
// info/refs without a known service: dumb-protocol discovery — read.
|
||||
return 'read';
|
||||
}
|
||||
// Dumb-protocol object/ref fetches (HEAD, objects/…) are reads.
|
||||
return 'read';
|
||||
}
|
||||
|
||||
if (method === 'POST') {
|
||||
if (subpath === 'git-receive-pack') return 'write';
|
||||
if (subpath === 'git-upload-pack') return 'read';
|
||||
return null; // unknown POST endpoint
|
||||
}
|
||||
|
||||
return null; // unsupported method
|
||||
}
|
||||
|
||||
/** The outcome of the gating/auth decision the request handler must enforce. */
|
||||
export type GitHttpGateDecision =
|
||||
| { kind: 'unauthorized' } // 401 + WWW-Authenticate (missing/invalid creds)
|
||||
| { kind: 'not-found' } // 404 (space hidden / sync or http disabled)
|
||||
| { kind: 'forbidden' } // 403 (authenticated but lacks the permission)
|
||||
| { kind: 'bad-request' } // 400 (unparseable git request shape)
|
||||
| { kind: 'proceed' }; // run http-backend
|
||||
|
||||
/**
|
||||
* Pure gating decision, mirroring the handler precedence so it can be unit
|
||||
* tested without the DB / CASL graph. Inputs are the already-resolved booleans
|
||||
* the handler computes from EnvironmentService / SpaceRepo / SpaceAbilityFactory.
|
||||
*
|
||||
* Precedence (matches the spec):
|
||||
* 1. no/invalid Basic credentials -> 401 (regardless of space).
|
||||
* 2. credentials present but invalid -> 401.
|
||||
* 3. unparseable git request shape -> 400.
|
||||
* 4. git-sync globally disabled, or git-http disabled, or the space is missing
|
||||
* / not git-sync-enabled -> 404 (never reveal existence).
|
||||
* 5. authenticated but lacking the required perm -> 403.
|
||||
* 6. otherwise -> proceed.
|
||||
*
|
||||
* Note (4) is checked AFTER (1)/(2): an anonymous probe always gets 401 first;
|
||||
* an authenticated user hitting a hidden/disabled space gets 404 (not 403).
|
||||
*/
|
||||
export function decideGitHttpGate(input: {
|
||||
hasCredentials: boolean;
|
||||
credentialsValid: boolean;
|
||||
serviceKind: GitHttpServiceKind | null;
|
||||
gitSyncEnabled: boolean;
|
||||
gitHttpEnabled: boolean;
|
||||
spaceExists: boolean;
|
||||
spaceGitSyncEnabled: boolean;
|
||||
permissionGranted: boolean;
|
||||
}): GitHttpGateDecision {
|
||||
if (!input.hasCredentials) return { kind: 'unauthorized' };
|
||||
if (!input.credentialsValid) return { kind: 'unauthorized' };
|
||||
if (input.serviceKind === null) return { kind: 'bad-request' };
|
||||
|
||||
if (
|
||||
!input.gitSyncEnabled ||
|
||||
!input.gitHttpEnabled ||
|
||||
!input.spaceExists ||
|
||||
!input.spaceGitSyncEnabled
|
||||
) {
|
||||
return { kind: 'not-found' };
|
||||
}
|
||||
|
||||
if (!input.permissionGranted) return { kind: 'forbidden' };
|
||||
|
||||
return { kind: 'proceed' };
|
||||
}
|
||||
@@ -0,0 +1,463 @@
|
||||
// Unit tests for GitHttpService — the /git smart-HTTP handler. Everything it
|
||||
// depends on (backend, auth, repos, ability factory, env, orchestrator) is
|
||||
// mocked so we exercise ONLY the handler wiring: workspace resolution (which is
|
||||
// done HERE, not by DomainMiddleware — see FIX 1), the auth/gating precedence,
|
||||
// the read-vs-write dispatch, and that a fetch does NOT take the lock.
|
||||
//
|
||||
// These tests deliberately NEVER set `req.raw.workspaceId`: the workspace must
|
||||
// come from WorkspaceRepo. If the handler regressed to reading
|
||||
// `req.raw.workspaceId`, the happy-path fetch test below would fail (the repo
|
||||
// would not be consulted and the request would 401).
|
||||
import { Logger, UnauthorizedException } from '@nestjs/common';
|
||||
import {
|
||||
SpaceCaslAction,
|
||||
SpaceCaslSubject,
|
||||
} from '../../../core/casl/interfaces/space-ability.type';
|
||||
import { GitHttpService } from './git-http.service';
|
||||
import { GitSyncLockHeldError } from '../services/git-sync.orchestrator';
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
interface BuildOptions {
|
||||
selfHosted?: boolean;
|
||||
gitSyncEnabled?: boolean;
|
||||
gitHttpEnabled?: boolean;
|
||||
/** What workspaceRepo.findFirst() returns (self-hosted resolution). */
|
||||
workspace?: { id: string } | null;
|
||||
/** What spaceRepo.findById() returns. */
|
||||
space?: { id: string; settings?: unknown } | null;
|
||||
/** Result of authService.verifyUserCredentials: a user, or throw 401. */
|
||||
user?: { id: string; email: string } | null;
|
||||
/** Whether the created ability grants the requested action. */
|
||||
abilityCan?: boolean;
|
||||
}
|
||||
|
||||
interface Built {
|
||||
service: GitHttpService;
|
||||
env: Record<string, AnyMock>;
|
||||
authService: { verifyUserCredentials: AnyMock };
|
||||
spaceRepo: { findById: AnyMock };
|
||||
workspaceRepo: { findFirst: AnyMock; findByHostname: AnyMock };
|
||||
abilityFactory: { createForUser: AnyMock };
|
||||
abilityCan: AnyMock;
|
||||
vaultRegistry: { ensureServable: AnyMock };
|
||||
orchestrator: { ingestExternalPush: AnyMock };
|
||||
backend: { run: AnyMock };
|
||||
}
|
||||
|
||||
function build(opts: BuildOptions = {}): Built {
|
||||
const {
|
||||
selfHosted = true,
|
||||
gitSyncEnabled = true,
|
||||
gitHttpEnabled = true,
|
||||
workspace = { id: 'ws-1' },
|
||||
space = { id: 'space-1', settings: { gitSync: { enabled: true } } },
|
||||
user = { id: 'user-1', email: 'dev@example.com' },
|
||||
abilityCan = true,
|
||||
} = opts;
|
||||
|
||||
const env: Record<string, AnyMock> = {
|
||||
isSelfHosted: jest.fn(() => selfHosted),
|
||||
isCloud: jest.fn(() => !selfHosted),
|
||||
isGitSyncEnabled: jest.fn(() => gitSyncEnabled),
|
||||
isGitSyncHttpEnabled: jest.fn(() => gitHttpEnabled),
|
||||
};
|
||||
|
||||
const authService = {
|
||||
verifyUserCredentials: jest.fn(async () => {
|
||||
if (!user) throw new UnauthorizedException();
|
||||
return user;
|
||||
}),
|
||||
};
|
||||
|
||||
const spaceRepo = { findById: jest.fn(async () => space) };
|
||||
|
||||
const workspaceRepo = {
|
||||
findFirst: jest.fn(async () => workspace),
|
||||
findByHostname: jest.fn(async () => workspace),
|
||||
};
|
||||
|
||||
const abilityCanMock = jest.fn(() => abilityCan);
|
||||
const abilityFactory = {
|
||||
createForUser: jest.fn(async () => ({ can: abilityCanMock })),
|
||||
};
|
||||
|
||||
const vaultRegistry = { ensureServable: jest.fn(async () => undefined) };
|
||||
const orchestrator = { ingestExternalPush: jest.fn(async () => undefined) };
|
||||
const backend = { run: jest.fn(async () => undefined) };
|
||||
|
||||
const service = new GitHttpService(
|
||||
env as any,
|
||||
authService as any,
|
||||
spaceRepo as any,
|
||||
workspaceRepo as any,
|
||||
abilityFactory as any,
|
||||
vaultRegistry as any,
|
||||
orchestrator as any,
|
||||
backend as any,
|
||||
);
|
||||
|
||||
return {
|
||||
service,
|
||||
env,
|
||||
authService,
|
||||
spaceRepo,
|
||||
workspaceRepo,
|
||||
abilityFactory,
|
||||
abilityCan: abilityCanMock,
|
||||
vaultRegistry,
|
||||
orchestrator,
|
||||
backend,
|
||||
};
|
||||
}
|
||||
|
||||
/** A fake Fastify reply capturing the terminal status/headers/body. */
|
||||
function fakeReply() {
|
||||
const state: {
|
||||
statusCode?: number;
|
||||
headers: Record<string, string>;
|
||||
body?: unknown;
|
||||
hijacked: boolean;
|
||||
sent: boolean;
|
||||
} = { headers: {}, hijacked: false, sent: false };
|
||||
|
||||
const reply: any = {
|
||||
header(name: string, value: string) {
|
||||
state.headers[name] = value;
|
||||
return reply;
|
||||
},
|
||||
status(code: number) {
|
||||
state.statusCode = code;
|
||||
return reply;
|
||||
},
|
||||
send(body: unknown) {
|
||||
state.body = body;
|
||||
state.sent = true;
|
||||
return reply;
|
||||
},
|
||||
hijack() {
|
||||
state.hijacked = true;
|
||||
},
|
||||
get sent() {
|
||||
return state.sent;
|
||||
},
|
||||
// The raw Node response — only touched on the streaming/error paths.
|
||||
raw: {
|
||||
headersSent: false,
|
||||
writableEnded: false,
|
||||
statusCode: 200,
|
||||
setHeader: jest.fn(),
|
||||
end: jest.fn(),
|
||||
},
|
||||
};
|
||||
return { reply, state };
|
||||
}
|
||||
|
||||
/** A fake Fastify request for a /git smart-HTTP call. */
|
||||
function fakeRequest(opts: {
|
||||
url: string;
|
||||
method?: string;
|
||||
authorization?: string;
|
||||
host?: string;
|
||||
}) {
|
||||
const { url, method = 'GET', authorization, host = 'docs.example.com' } = opts;
|
||||
const headers: Record<string, string> = { host };
|
||||
if (authorization) headers['authorization'] = authorization;
|
||||
// query is parsed by Fastify; mirror the `service` param when present.
|
||||
const qIdx = url.indexOf('?');
|
||||
const query: Record<string, string> = {};
|
||||
if (qIdx !== -1) {
|
||||
for (const pair of url.slice(qIdx + 1).split('&')) {
|
||||
const [k, v] = pair.split('=');
|
||||
if (k) query[k] = v ?? '';
|
||||
}
|
||||
}
|
||||
return {
|
||||
url,
|
||||
method,
|
||||
headers,
|
||||
query,
|
||||
// raw is intentionally WITHOUT workspaceId — the handler must resolve it
|
||||
// itself via WorkspaceRepo (a regression to req.raw.workspaceId would 401).
|
||||
raw: {},
|
||||
} as any;
|
||||
}
|
||||
|
||||
function basic(email: string, password: string): string {
|
||||
return 'Basic ' + Buffer.from(`${email}:${password}`).toString('base64');
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
// Silence the handler's logger.warn/error in negative-path tests.
|
||||
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
|
||||
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
|
||||
});
|
||||
|
||||
describe('GitHttpService.handle', () => {
|
||||
it('fetch with valid creds resolves the workspace via the repo and dispatches WITHOUT the lock', async () => {
|
||||
const built = build({ selfHosted: true });
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
// The workspace came from WorkspaceRepo, NOT req.raw.workspaceId.
|
||||
expect(built.workspaceRepo.findFirst).toHaveBeenCalledTimes(1);
|
||||
expect(built.authService.verifyUserCredentials).toHaveBeenCalledWith(
|
||||
{ email: 'dev@example.com', password: 'pw' },
|
||||
'ws-1',
|
||||
);
|
||||
expect(built.spaceRepo.findById).toHaveBeenCalledWith('space-1', 'ws-1');
|
||||
// Read ability was evaluated.
|
||||
expect(built.abilityCan).toHaveBeenCalledWith(
|
||||
SpaceCaslAction.Read,
|
||||
SpaceCaslSubject.Page,
|
||||
);
|
||||
// It proceeded: vault prepared, reply hijacked, backend ran directly.
|
||||
expect(built.vaultRegistry.ensureServable).toHaveBeenCalledWith('space-1');
|
||||
expect(state.hijacked).toBe(true);
|
||||
expect(built.backend.run).toHaveBeenCalledTimes(1);
|
||||
// A fetch must NOT take the push lock.
|
||||
expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('cloud deployment resolves the workspace by the host subdomain', async () => {
|
||||
const built = build({ selfHosted: false });
|
||||
const { reply } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
host: 'acme.example.com',
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
expect(built.workspaceRepo.findByHostname).toHaveBeenCalledWith('acme');
|
||||
expect(built.workspaceRepo.findFirst).not.toHaveBeenCalled();
|
||||
expect(built.backend.run).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('missing Basic credentials -> 401 with WWW-Authenticate', async () => {
|
||||
const built = build();
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
// no Authorization header
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
expect(state.statusCode).toBe(401);
|
||||
expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"');
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
expect(built.authService.verifyUserCredentials).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('invalid Basic credentials -> 401 with WWW-Authenticate', async () => {
|
||||
const built = build({ user: null }); // verifyUserCredentials throws 401
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'wrong'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
expect(state.statusCode).toBe(401);
|
||||
expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"');
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('a write by a Read-only user -> 403 (reader cannot push)', async () => {
|
||||
const built = build({ abilityCan: false });
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/git-receive-pack',
|
||||
method: 'POST',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
// The Manage ability was checked for a write and denied.
|
||||
expect(built.abilityCan).toHaveBeenCalledWith(
|
||||
SpaceCaslAction.Manage,
|
||||
SpaceCaslSubject.Page,
|
||||
);
|
||||
expect(state.statusCode).toBe(403);
|
||||
expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled();
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('a space that is not git-sync-enabled -> 404 (existence never revealed)', async () => {
|
||||
const built = build({
|
||||
space: { id: 'space-1', settings: { gitSync: { enabled: false } } },
|
||||
});
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
expect(state.statusCode).toBe(404);
|
||||
// CASL is never even evaluated for a non-candidate space.
|
||||
expect(built.abilityFactory.createForUser).not.toHaveBeenCalled();
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('git-sync globally disabled -> 404 even with valid creds', async () => {
|
||||
const built = build({ gitSyncEnabled: false });
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
expect(state.statusCode).toBe(404);
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('a valid write proceeds through the orchestrator (push takes the lock)', async () => {
|
||||
const built = build({ abilityCan: true });
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/git-receive-pack',
|
||||
method: 'POST',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
expect(built.abilityCan).toHaveBeenCalledWith(
|
||||
SpaceCaslAction.Manage,
|
||||
SpaceCaslSubject.Page,
|
||||
);
|
||||
expect(state.hijacked).toBe(true);
|
||||
expect(built.orchestrator.ingestExternalPush).toHaveBeenCalledTimes(1);
|
||||
const [spaceId, workspaceId] =
|
||||
built.orchestrator.ingestExternalPush.mock.calls[0];
|
||||
expect(spaceId).toBe('space-1');
|
||||
expect(workspaceId).toBe('ws-1');
|
||||
});
|
||||
|
||||
it('GET info/refs?service=git-receive-pack streams the backend WITHOUT a cycle/lock (so the follow-up POST never 503-collides)', async () => {
|
||||
// A push is a TWO-request exchange: GET info/refs?service=git-receive-pack
|
||||
// (ref advertisement) then POST git-receive-pack (the pack). The info/refs
|
||||
// request is write-AUTHORIZED (push perms needed to see those refs) but is
|
||||
// READ-ONLY — it must NOT run ingestExternalPush (a Docmost cycle under the
|
||||
// per-space lock), or the immediately-following POST collides with the still-
|
||||
// running cycle and deterministically 503s. It must just stream the backend.
|
||||
const built = build({ abilityCan: true });
|
||||
const { reply } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-receive-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
// Authorized as a write (Manage), but executed as a plain stream.
|
||||
expect(built.abilityCan).toHaveBeenCalledWith(
|
||||
SpaceCaslAction.Manage,
|
||||
SpaceCaslSubject.Page,
|
||||
);
|
||||
expect(built.orchestrator.ingestExternalPush).not.toHaveBeenCalled();
|
||||
expect(built.backend.run).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('a push that loses the lock -> 503 with Retry-After and a busy body (headers not written twice)', async () => {
|
||||
const built = build({ abilityCan: true });
|
||||
// The lock could not be acquired: the receive-pack closure never ran, so the
|
||||
// response is still unwritten and the handler must answer 503 itself.
|
||||
built.orchestrator.ingestExternalPush.mockRejectedValue(
|
||||
new GitSyncLockHeldError('space-1'),
|
||||
);
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/git-receive-pack',
|
||||
method: 'POST',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
// It hijacked and went through the orchestrator (write path), but the lock
|
||||
// was held so the backend never ran.
|
||||
expect(state.hijacked).toBe(true);
|
||||
expect(built.orchestrator.ingestExternalPush).toHaveBeenCalledTimes(1);
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
|
||||
// 503 + Retry-After were written on the raw response (headersSent was false).
|
||||
const raw = reply.raw as any;
|
||||
expect(raw.statusCode).toBe(503);
|
||||
expect(raw.setHeader).toHaveBeenCalledWith('Content-Type', 'text/plain');
|
||||
expect(raw.setHeader).toHaveBeenCalledWith('Retry-After', '1');
|
||||
// The body carries the busy/retry message and the response was ended once.
|
||||
expect(raw.end).toHaveBeenCalledTimes(1);
|
||||
expect(raw.end).toHaveBeenCalledWith('git-sync busy, retry');
|
||||
// Exactly the two headers above were set — no double write of headers.
|
||||
expect(raw.setHeader).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it('does NOT rewrite the 503 status/headers when the response is already sent', async () => {
|
||||
const built = build({ abilityCan: true });
|
||||
built.orchestrator.ingestExternalPush.mockRejectedValue(
|
||||
new GitSyncLockHeldError('space-1'),
|
||||
);
|
||||
const { reply } = fakeReply();
|
||||
// Simulate the (defensive) case where headers were already flushed: the
|
||||
// handler must skip statusCode/setHeader and only end() the socket.
|
||||
const raw = reply.raw as any;
|
||||
raw.headersSent = true;
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/git-receive-pack',
|
||||
method: 'POST',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
// No header writes when headersSent is already true (no "headers already
|
||||
// sent" double-write path), but the body/end still runs.
|
||||
expect(raw.setHeader).not.toHaveBeenCalled();
|
||||
expect(raw.statusCode).toBe(200); // untouched default from the fake
|
||||
expect(raw.end).toHaveBeenCalledTimes(1);
|
||||
expect(raw.end).toHaveBeenCalledWith('git-sync busy, retry');
|
||||
});
|
||||
|
||||
it('an unresolvable workspace -> 401 (credentials cannot be validated without one)', async () => {
|
||||
const built = build({ workspace: null });
|
||||
const { reply, state } = fakeReply();
|
||||
const req = fakeRequest({
|
||||
url: '/git/space-1.git/info/refs?service=git-upload-pack',
|
||||
method: 'GET',
|
||||
authorization: basic('dev@example.com', 'pw'),
|
||||
});
|
||||
|
||||
await built.service.handle(req, reply);
|
||||
|
||||
// Without a workspace we cannot run verifyUserCredentials, so credentials
|
||||
// are not validated -> 401 (the 401-before-404 ordering is preserved: an
|
||||
// unauthenticated request never reaches the space-existence 404).
|
||||
expect(built.workspaceRepo.findFirst).toHaveBeenCalledTimes(1);
|
||||
expect(built.authService.verifyUserCredentials).not.toHaveBeenCalled();
|
||||
expect(state.statusCode).toBe(401);
|
||||
expect(state.headers['WWW-Authenticate']).toBe('Basic realm="gitmost"');
|
||||
expect(built.backend.run).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
328
apps/server/src/integrations/git-sync/http/git-http.service.ts
Normal file
328
apps/server/src/integrations/git-sync/http/git-http.service.ts
Normal file
@@ -0,0 +1,328 @@
|
||||
import { Injectable, Logger, UnauthorizedException } from '@nestjs/common';
|
||||
import type { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { AuthService } from '../../../core/auth/services/auth.service';
|
||||
import SpaceAbilityFactory from '../../../core/casl/abilities/space-ability.factory';
|
||||
import {
|
||||
SpaceCaslAction,
|
||||
SpaceCaslSubject,
|
||||
} from '../../../core/casl/interfaces/space-ability.type';
|
||||
import { SpaceRepo } from '@docmost/db/repos/space/space.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { User } from '@docmost/db/types/entity.types';
|
||||
import { parseBasicAuth } from '../../mcp/mcp-auth.helpers';
|
||||
import { EnvironmentService } from '../../environment/environment.service';
|
||||
import { VaultRegistryService } from '../services/vault-registry.service';
|
||||
import {
|
||||
GitSyncLockHeldError,
|
||||
GitSyncOrchestrator,
|
||||
} from '../services/git-sync.orchestrator';
|
||||
import { GitHttpBackendService } from './git-http-backend.service';
|
||||
import {
|
||||
decideGitHttpGate,
|
||||
parseGitPath,
|
||||
resolveServiceKind,
|
||||
GitHttpServiceKind,
|
||||
} from './git-http.helpers';
|
||||
|
||||
const WWW_AUTHENTICATE = 'Basic realm="gitmost"';
|
||||
|
||||
/**
|
||||
* The /git smart-HTTP host. Wires request parsing, the reused auth primitives
|
||||
* (HTTP Basic -> AuthService.verifyUserCredentials), per-space gating
|
||||
* (EnvironmentService flags + space.settings.gitSync.enabled), CASL authz
|
||||
* (SpaceAbilityFactory), and dispatch to `git http-backend`:
|
||||
* - fetch (read) -> ensureServable then stream http-backend directly (no lock).
|
||||
* - push (write) -> ensureServable then orchestrator.ingestExternalPush, which
|
||||
* runs the receive-pack under the space lock and then a Docmost cycle.
|
||||
*
|
||||
* Mounted at the ROOT (`/git/...`) by a raw Fastify route in main.ts (the global
|
||||
* `/api` prefix does not apply). Never logs the password or Authorization header.
|
||||
*/
|
||||
@Injectable()
|
||||
export class GitHttpService {
|
||||
private readonly logger = new Logger(GitHttpService.name);
|
||||
|
||||
constructor(
|
||||
private readonly environmentService: EnvironmentService,
|
||||
private readonly authService: AuthService,
|
||||
private readonly spaceRepo: SpaceRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly spaceAbilityFactory: SpaceAbilityFactory,
|
||||
private readonly vaultRegistry: VaultRegistryService,
|
||||
private readonly orchestrator: GitSyncOrchestrator,
|
||||
private readonly backend: GitHttpBackendService,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Resolve the workspace for a /git request the SAME way DomainMiddleware does,
|
||||
* because Nest middleware does NOT run for this raw root-mounted route (it is
|
||||
* registered under the global '/api' router), so `req.raw.workspaceId` is never
|
||||
* populated here. We replicate DomainMiddleware / McpService:
|
||||
* - self-hosted (single workspace) -> workspaceRepo.findFirst();
|
||||
* - cloud (multi-tenant) -> resolve by the host-header subdomain.
|
||||
* Returns null when no workspace resolves; the gate then 404s (after the
|
||||
* 401-before-404 credential check encoded in decideGitHttpGate).
|
||||
*/
|
||||
private async resolveWorkspaceId(req: FastifyRequest): Promise<string | null> {
|
||||
try {
|
||||
if (this.environmentService.isSelfHosted()) {
|
||||
const workspace = await this.workspaceRepo.findFirst();
|
||||
return workspace?.id ?? null;
|
||||
}
|
||||
if (this.environmentService.isCloud()) {
|
||||
const host = this.headerValue(req.headers['host']);
|
||||
const subdomain = host ? host.split('.')[0] : '';
|
||||
if (!subdomain) return null;
|
||||
const workspace = await this.workspaceRepo.findByHostname(subdomain);
|
||||
return workspace?.id ?? null;
|
||||
}
|
||||
} catch (err) {
|
||||
// A DB error resolving the workspace must not leak details; treat as
|
||||
// unresolvable (the gate will 404, unless creds are missing -> 401 first).
|
||||
this.logger.warn(
|
||||
`git-http: workspace resolution error: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Handle one `/git/<spaceId>.git/<subpath>` request. `rest` is the path AFTER
|
||||
* the `/git/` prefix (no query string). The Fastify reply is hijacked before
|
||||
* any streaming so the binary CGI body is written directly to the raw socket.
|
||||
*/
|
||||
async handle(req: FastifyRequest, reply: FastifyReply): Promise<void> {
|
||||
const rawReq = req.raw;
|
||||
const rawRes = reply.raw;
|
||||
|
||||
// --- parse the URL into spaceId + subpath -------------------------------
|
||||
const rest = this.extractRest(req.url);
|
||||
const parsedPath = rest === null ? null : parseGitPath(rest);
|
||||
|
||||
// --- resolve the requested git service kind (read vs write) -------------
|
||||
const service =
|
||||
typeof req.query === 'object' && req.query !== null
|
||||
? (req.query as Record<string, string | undefined>).service
|
||||
: undefined;
|
||||
const serviceKind: GitHttpServiceKind | null = parsedPath
|
||||
? resolveServiceKind({
|
||||
method: req.method,
|
||||
subpath: parsedPath.subpath,
|
||||
service,
|
||||
})
|
||||
: null;
|
||||
|
||||
// --- authenticate (HTTP Basic) ------------------------------------------
|
||||
const authHeader = req.headers['authorization'];
|
||||
const basic = parseBasicAuth(
|
||||
Array.isArray(authHeader) ? authHeader[0] : authHeader,
|
||||
);
|
||||
// Resolve the workspace ourselves — DomainMiddleware does NOT run for this
|
||||
// raw root route, so `req.raw.workspaceId` is never set (see resolver doc).
|
||||
const workspaceId: string | null = await this.resolveWorkspaceId(req);
|
||||
|
||||
let user: User | undefined;
|
||||
let credentialsValid = false;
|
||||
if (basic && workspaceId) {
|
||||
try {
|
||||
user = await this.authService.verifyUserCredentials(
|
||||
{ email: basic.email, password: basic.password },
|
||||
workspaceId,
|
||||
);
|
||||
credentialsValid = true;
|
||||
} catch (err) {
|
||||
if (!(err instanceof UnauthorizedException)) {
|
||||
// A non-credential failure (e.g. DB error): treat as invalid creds for
|
||||
// the gate (a 401), and log without leaking the password/header.
|
||||
this.logger.warn(
|
||||
`git-http: credential check error: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
credentialsValid = false;
|
||||
}
|
||||
}
|
||||
|
||||
// --- resolve the space + per-space gating + CASL ------------------------
|
||||
let spaceExists = false;
|
||||
let spaceGitSyncEnabled = false;
|
||||
let spaceId: string | undefined;
|
||||
let permissionGranted = false;
|
||||
if (credentialsValid && user && workspaceId && parsedPath && serviceKind) {
|
||||
const space = await this.spaceRepo.findById(
|
||||
parsedPath.spaceId,
|
||||
workspaceId,
|
||||
);
|
||||
if (space) {
|
||||
spaceExists = true;
|
||||
spaceId = space.id;
|
||||
spaceGitSyncEnabled =
|
||||
(space.settings as any)?.gitSync?.enabled === true;
|
||||
|
||||
// Only evaluate CASL when the space is actually a sync candidate — an
|
||||
// unrelated space stays a 404 (existence is never revealed).
|
||||
if (spaceGitSyncEnabled) {
|
||||
try {
|
||||
const ability = await this.spaceAbilityFactory.createForUser(
|
||||
user,
|
||||
space.id,
|
||||
);
|
||||
const action =
|
||||
serviceKind === 'write'
|
||||
? SpaceCaslAction.Manage
|
||||
: SpaceCaslAction.Read;
|
||||
permissionGranted = ability.can(action, SpaceCaslSubject.Page);
|
||||
} catch {
|
||||
// createForUser throws NotFoundException when the user has no role in
|
||||
// the space — that is simply "no permission" here.
|
||||
permissionGranted = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --- the gate decision (pure) -------------------------------------------
|
||||
const decision = decideGitHttpGate({
|
||||
hasCredentials: Boolean(basic),
|
||||
credentialsValid,
|
||||
serviceKind,
|
||||
gitSyncEnabled: this.environmentService.isGitSyncEnabled(),
|
||||
gitHttpEnabled: this.environmentService.isGitSyncHttpEnabled(),
|
||||
spaceExists,
|
||||
spaceGitSyncEnabled,
|
||||
permissionGranted,
|
||||
});
|
||||
|
||||
if (decision.kind === 'unauthorized') {
|
||||
reply
|
||||
.header('WWW-Authenticate', WWW_AUTHENTICATE)
|
||||
.status(401)
|
||||
.send('Authentication required');
|
||||
return;
|
||||
}
|
||||
if (decision.kind === 'bad-request') {
|
||||
reply.status(400).send('Bad request');
|
||||
return;
|
||||
}
|
||||
if (decision.kind === 'not-found') {
|
||||
reply.status(404).send('Not found');
|
||||
return;
|
||||
}
|
||||
if (decision.kind === 'forbidden') {
|
||||
reply.status(403).send('Forbidden');
|
||||
return;
|
||||
}
|
||||
|
||||
// decision.kind === 'proceed' — guaranteed below (narrowing for TS).
|
||||
if (!parsedPath || !serviceKind || !spaceId || !user || !workspaceId) {
|
||||
// Defensive: 'proceed' implies these are set, but keep TS + runtime safe.
|
||||
reply.status(500).send('Internal server error');
|
||||
return;
|
||||
}
|
||||
|
||||
// --- dispatch to git http-backend ---------------------------------------
|
||||
const backendRequest = {
|
||||
spaceId,
|
||||
subpath: parsedPath.subpath,
|
||||
method: req.method,
|
||||
queryString: this.extractQueryString(req.url),
|
||||
contentType: this.headerValue(req.headers['content-type']) ?? '',
|
||||
gitProtocol: this.headerValue(req.headers['git-protocol']),
|
||||
remoteUser: user.email,
|
||||
};
|
||||
|
||||
try {
|
||||
// Idempotently make the vault servable (repo + receive/upload config).
|
||||
await this.vaultRegistry.ensureServable(spaceId);
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`git-http: failed to prepare vault for space ${spaceId}: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
if (!reply.sent) reply.status(500).send('Internal server error');
|
||||
return;
|
||||
}
|
||||
|
||||
// Hijack the reply so the backend can stream the raw (possibly binary) CGI
|
||||
// response directly to the socket (mirrors the MCP transport pattern).
|
||||
reply.hijack();
|
||||
|
||||
// Only the ACTUAL pack-receiving write (POST git-receive-pack) runs under the
|
||||
// space lock + a Docmost cycle. Everything else streams the http-backend
|
||||
// directly with NO lock and NO cycle: a fetch/clone (read), AND the
|
||||
// write-AUTHORIZED but READ-ONLY ref advertisement
|
||||
// (GET info/refs?service=git-receive-pack). Running a cycle on info/refs is
|
||||
// both wasteful and HARMFUL — it holds the per-space lock, so the push's
|
||||
// immediately-following POST git-receive-pack collides with it and 503s
|
||||
// (a deterministic push failure). Authz already happened above via the gate.
|
||||
const isReceivePack =
|
||||
req.method === 'POST' && parsedPath.subpath === 'git-receive-pack';
|
||||
if (serviceKind === 'read' || !isReceivePack) {
|
||||
await this.backend.run(backendRequest, rawReq, rawRes);
|
||||
return;
|
||||
}
|
||||
|
||||
// Push: run the receive-pack under the space lock, then a Docmost cycle.
|
||||
try {
|
||||
await this.orchestrator.ingestExternalPush(spaceId, workspaceId, () =>
|
||||
this.backend.run(backendRequest, rawReq, rawRes),
|
||||
);
|
||||
} catch (err) {
|
||||
if (err instanceof GitSyncLockHeldError) {
|
||||
// The lock could not be acquired and the receive-pack never ran, so the
|
||||
// response is still unwritten — answer 503 so git retries.
|
||||
if (!rawRes.headersSent) {
|
||||
rawRes.statusCode = 503;
|
||||
rawRes.setHeader('Content-Type', 'text/plain');
|
||||
rawRes.setHeader('Retry-After', '1');
|
||||
}
|
||||
try {
|
||||
rawRes.end('git-sync busy, retry');
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
return;
|
||||
}
|
||||
// Any other error: the receive-pack closure handles its own response, so
|
||||
// we only log here and make sure the socket is closed.
|
||||
this.logger.error(
|
||||
`git-http: push ingestion error for space ${spaceId}: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
try {
|
||||
if (!rawRes.writableEnded) rawRes.end();
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Normalise a possibly-array header value to its first string. */
|
||||
private headerValue(value: string | string[] | undefined): string | undefined {
|
||||
if (Array.isArray(value)) return value[0];
|
||||
return value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract the part of the URL AFTER `/git/` and BEFORE the query string.
|
||||
* Returns null when the URL is not under `/git/`.
|
||||
*/
|
||||
private extractRest(url: string): string | null {
|
||||
const qIdx = url.indexOf('?');
|
||||
const pathname = qIdx === -1 ? url : url.slice(0, qIdx);
|
||||
const prefix = '/git/';
|
||||
if (!pathname.startsWith(prefix)) return null;
|
||||
return pathname.slice(prefix.length);
|
||||
}
|
||||
|
||||
/** The raw query string without the leading '?', or '' when none. */
|
||||
private extractQueryString(url: string): string {
|
||||
const qIdx = url.indexOf('?');
|
||||
return qIdx === -1 ? '' : url.slice(qIdx + 1);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,252 @@
|
||||
// Unit tests for the event-driven git-sync trigger. The orchestrator
|
||||
// and page repo are hand-built mocks; the debounce coalescing is exercised with
|
||||
// jest fake timers. We assert the gate, the loop-guard (anti-echo), the
|
||||
// missing-page short-circuit, the heterogeneous event-shape id resolution, the
|
||||
// debounce collapse, and that errors are swallowed + logged.
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { PageChangeListener } from './page-change.listener';
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
interface Built {
|
||||
listener: PageChangeListener;
|
||||
env: { isGitSyncEnabled: AnyMock; getGitSyncDebounceMs: AnyMock };
|
||||
orchestrator: { runOnce: AnyMock };
|
||||
pageRepo: { findById: AnyMock };
|
||||
}
|
||||
|
||||
function build(opts: { enabled?: boolean; debounceMs?: number } = {}): Built {
|
||||
const { enabled = true, debounceMs = 2000 } = opts;
|
||||
const env = {
|
||||
isGitSyncEnabled: jest.fn(() => enabled),
|
||||
getGitSyncDebounceMs: jest.fn(() => debounceMs),
|
||||
};
|
||||
const orchestrator = { runOnce: jest.fn(async () => undefined) };
|
||||
const pageRepo = { findById: jest.fn() };
|
||||
|
||||
const listener = new PageChangeListener(
|
||||
env as any,
|
||||
orchestrator as any,
|
||||
pageRepo as any,
|
||||
);
|
||||
return { listener, env, orchestrator, pageRepo };
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('PageChangeListener', () => {
|
||||
describe('gate', () => {
|
||||
it('does nothing when git-sync is disabled (no findById, no schedule)', async () => {
|
||||
const { listener, orchestrator, pageRepo } = build({ enabled: false });
|
||||
await listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' });
|
||||
expect(pageRepo.findById).not.toHaveBeenCalled();
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('loop-guard (anti-echo)', () => {
|
||||
it("does NOT schedule a cycle when the page row's source is 'git-sync'", async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build();
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
lastUpdatedSource: 'git-sync',
|
||||
});
|
||||
await listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' });
|
||||
jest.runOnlyPendingTimers();
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it('schedules exactly one cycle for a normal (non-git-sync) source', async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build();
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
lastUpdatedSource: 'user',
|
||||
});
|
||||
await listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' });
|
||||
jest.runOnlyPendingTimers();
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledTimes(1);
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('space-1', 'ws-1');
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('missing page', () => {
|
||||
it('does not schedule when findById returns null/undefined', async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build();
|
||||
pageRepo.findById.mockResolvedValue(undefined);
|
||||
await listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' });
|
||||
jest.runOnlyPendingTimers();
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('spaceId/workspaceId resolution', () => {
|
||||
// The page row used to fill in any ids the event omits.
|
||||
const pageRow = {
|
||||
id: 'p1',
|
||||
spaceId: 'row-space',
|
||||
workspaceId: 'row-ws',
|
||||
lastUpdatedSource: 'user',
|
||||
};
|
||||
|
||||
async function resolve(event: Record<string, unknown>) {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build();
|
||||
pageRepo.findById.mockResolvedValue(pageRow);
|
||||
await listener.handlePageEvent(event as any);
|
||||
jest.runOnlyPendingTimers();
|
||||
return { orchestrator, pageRepo };
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
}
|
||||
|
||||
it("resolves pageId + event.spaceId + event.workspaceId", async () => {
|
||||
const { orchestrator, pageRepo } = await resolve({
|
||||
pageId: 'p1',
|
||||
spaceId: 'evt-space',
|
||||
workspaceId: 'evt-ws',
|
||||
});
|
||||
expect(pageRepo.findById).toHaveBeenCalledWith('p1', { includeContent: false });
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('evt-space', 'evt-ws');
|
||||
});
|
||||
|
||||
it('resolves pageId from pageIds[0]', async () => {
|
||||
const { orchestrator, pageRepo } = await resolve({
|
||||
pageIds: ['p1', 'p2'],
|
||||
spaceId: 'evt-space',
|
||||
workspaceId: 'evt-ws',
|
||||
});
|
||||
expect(pageRepo.findById).toHaveBeenCalledWith('p1', { includeContent: false });
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('evt-space', 'evt-ws');
|
||||
});
|
||||
|
||||
it('resolves pageId + spaceId from pages[]', async () => {
|
||||
const { orchestrator } = await resolve({
|
||||
pages: [{ id: 'p1', spaceId: 'pages-space' }],
|
||||
workspaceId: 'evt-ws',
|
||||
});
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('pages-space', 'evt-ws');
|
||||
});
|
||||
|
||||
it('resolves pageId + spaceId from node', async () => {
|
||||
const { orchestrator } = await resolve({
|
||||
node: { id: 'p1', spaceId: 'node-space' },
|
||||
workspaceId: 'evt-ws',
|
||||
});
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('node-space', 'evt-ws');
|
||||
});
|
||||
|
||||
it('falls back to the fetched page row when the event omits spaceId/workspaceId', async () => {
|
||||
const { orchestrator } = await resolve({ pageId: 'p1' });
|
||||
// No spaceId/workspaceId on the event -> use the page row's values.
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('row-space', 'row-ws');
|
||||
});
|
||||
});
|
||||
|
||||
describe('debounce coalescing', () => {
|
||||
it('collapses a burst of N events for one space into exactly one runOnce', async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build({ debounceMs: 500 });
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
lastUpdatedSource: 'user',
|
||||
});
|
||||
|
||||
// Fire a burst of 5 events; await each so its findById promise settles
|
||||
// and schedule() runs before the next event resets the timer.
|
||||
for (let i = 0; i < 5; i++) {
|
||||
await listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' });
|
||||
}
|
||||
|
||||
// Nothing fired yet (still within the debounce window).
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
|
||||
// Advance past the debounce window: the coalesced cycle fires once.
|
||||
jest.advanceTimersByTime(500);
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledTimes(1);
|
||||
expect(orchestrator.runOnce).toHaveBeenCalledWith('space-1', 'ws-1');
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('onModuleDestroy', () => {
|
||||
it('clears every pending debounce timer and empties the map', async () => {
|
||||
jest.useFakeTimers();
|
||||
const clearSpy = jest.spyOn(global, 'clearTimeout');
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build({ debounceMs: 500 });
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
lastUpdatedSource: 'user',
|
||||
});
|
||||
|
||||
// Schedule a pending cycle, then tear the module down before it fires.
|
||||
await listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' });
|
||||
clearSpy.mockClear(); // ignore any clears done by schedule() itself
|
||||
|
||||
listener.onModuleDestroy();
|
||||
|
||||
// The pending timer was cleared and the map drained, so advancing past
|
||||
// the debounce window fires NO cycle.
|
||||
expect(clearSpy).toHaveBeenCalledTimes(1);
|
||||
expect((listener as any).debounce.size).toBe(0);
|
||||
jest.advanceTimersByTime(500);
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
clearSpy.mockRestore();
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('error swallowing', () => {
|
||||
it('does not throw and logs a warning when findById throws', async () => {
|
||||
const warnSpy = jest
|
||||
.spyOn(Logger.prototype, 'warn')
|
||||
.mockImplementation(() => undefined);
|
||||
try {
|
||||
const { listener, orchestrator, pageRepo } = build();
|
||||
pageRepo.findById.mockRejectedValue(new Error('db down'));
|
||||
|
||||
await expect(
|
||||
listener.handlePageEvent({ pageId: 'p1', workspaceId: 'ws-1' }),
|
||||
).resolves.toBeUndefined();
|
||||
|
||||
expect(warnSpy).toHaveBeenCalledTimes(1);
|
||||
expect(String(warnSpy.mock.calls[0][0])).toContain('db down');
|
||||
expect(orchestrator.runOnce).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
warnSpy.mockRestore();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,156 @@
|
||||
import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common';
|
||||
import { OnEvent } from '@nestjs/event-emitter';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { EnvironmentService } from '../../environment/environment.service';
|
||||
import { GitSyncOrchestrator } from '../services/git-sync.orchestrator';
|
||||
import { GIT_SYNC_PAGE_EVENTS } from '../git-sync.constants';
|
||||
|
||||
/**
|
||||
* Shape of the page domain events the listener consumes. Different emit sites
|
||||
* carry different optional fields (page.repo `PageEvent`, `PageMovedEvent`,
|
||||
* etc.), so this is the intersection we read: a `pageIds` list / single `pageId`,
|
||||
* the `workspaceId`, and an OPTIONAL `spaceId` (present only on some events). When
|
||||
* `spaceId` is absent we resolve it from the page row.
|
||||
*/
|
||||
interface PageEventLike {
|
||||
pageIds?: string[];
|
||||
pageId?: string;
|
||||
workspaceId?: string;
|
||||
spaceId?: string;
|
||||
pages?: { id: string; spaceId: string }[];
|
||||
node?: { id: string; spaceId: string };
|
||||
}
|
||||
|
||||
/**
|
||||
* Event-driven trigger for the git-sync control plane. Subscribes to
|
||||
* the page lifecycle events and, for an enabled space, schedules a DEBOUNCED
|
||||
* `orchestrator.runOnce(spaceId, workspaceId)` — coalescing a burst of edits into
|
||||
* a single cycle per space.
|
||||
*
|
||||
* Loop-guard (best-effort): an event whose page row already reads
|
||||
* `lastUpdatedSource === 'git-sync'` is the orchestrator's OWN write, so we skip
|
||||
* it to avoid a write -> event -> sync echo. The guard ALWAYS runs (the page row
|
||||
* is fetched for every event, structural ones included). This is the cheap first
|
||||
* guard; the full bodyHash + updatedAt loop-guard (consuming the push side's
|
||||
* `PushedPageRecord`) is a later hardening step — noted, not built
|
||||
* here. The poll-safety interval still converges anything this guard drops.
|
||||
*/
|
||||
@Injectable()
|
||||
export class PageChangeListener implements OnModuleDestroy {
|
||||
private readonly logger = new Logger(PageChangeListener.name);
|
||||
// spaceId -> pending debounce timer. The cycle closes over its own
|
||||
// workspaceId, so the timer handle is all the map needs to track.
|
||||
private readonly debounce = new Map<string, NodeJS.Timeout>();
|
||||
|
||||
constructor(
|
||||
private readonly environmentService: EnvironmentService,
|
||||
private readonly orchestrator: GitSyncOrchestrator,
|
||||
private readonly pageRepo: PageRepo,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* One handler bound to ALL git-sync page events (the array form of `@OnEvent`).
|
||||
* Fetches the page row once to apply the loop-guard (unconditionally) and to
|
||||
* resolve the page's space + workspace, then schedules the debounced cycle.
|
||||
*/
|
||||
@OnEvent(GIT_SYNC_PAGE_EVENTS as unknown as string[])
|
||||
async handlePageEvent(event: PageEventLike): Promise<void> {
|
||||
if (!this.environmentService.isGitSyncEnabled()) return;
|
||||
|
||||
try {
|
||||
const pageId = this.firstPageId(event);
|
||||
if (!pageId) return;
|
||||
|
||||
// The loop-guard MUST always run — even structural events that already
|
||||
// carry spaceId+workspaceId could be the orchestrator's OWN write (it stamps
|
||||
// lastUpdatedSource='git-sync' on create/update/move/rename + body writes).
|
||||
// So ALWAYS fetch the page row: it gives us the loop-guard source AND fills
|
||||
// in any missing space/workspace in a single read. A missing page
|
||||
// (hard-deleted) is ignored.
|
||||
const page = await this.pageRepo.findById(pageId, {
|
||||
includeContent: false,
|
||||
});
|
||||
if (!page) return;
|
||||
|
||||
// Loop-guard: skip our own writes to avoid a write -> event -> sync echo
|
||||
// (best-effort). Applies unconditionally now.
|
||||
if (page.lastUpdatedSource === 'git-sync') return;
|
||||
|
||||
// Prefer ids carried on the event; fall back to the row we already fetched.
|
||||
const spaceId = this.eventSpaceId(event, pageId) ?? page.spaceId;
|
||||
const workspaceId = event.workspaceId ?? page.workspaceId;
|
||||
|
||||
if (!spaceId || !workspaceId) return;
|
||||
this.schedule(spaceId, workspaceId);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`git-sync: failed to handle page event: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/** Pull the first affected pageId out of the heterogeneous event shapes. */
|
||||
private firstPageId(event: PageEventLike): string | undefined {
|
||||
return (
|
||||
event.pageId ??
|
||||
event.pageIds?.[0] ??
|
||||
event.pages?.[0]?.id ??
|
||||
event.node?.id
|
||||
);
|
||||
}
|
||||
|
||||
/** A spaceId carried directly on the event, for the given pageId if scoped. */
|
||||
private eventSpaceId(
|
||||
event: PageEventLike,
|
||||
pageId: string,
|
||||
): string | undefined {
|
||||
if (event.spaceId) return event.spaceId;
|
||||
const fromPages = event.pages?.find((p) => p.id === pageId)?.spaceId;
|
||||
if (fromPages) return fromPages;
|
||||
if (event.node?.id === pageId) return event.node.spaceId;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* On shutdown, clear every pending debounce timer so a not-yet-fired cycle does
|
||||
* not run against a tearing-down module. The timers are already `.unref()`'d (so
|
||||
* they never block process exit), but clearing them also drops the dangling
|
||||
* references and prevents a late `runOnce` from firing post-destroy.
|
||||
*/
|
||||
onModuleDestroy(): void {
|
||||
for (const timer of this.debounce.values()) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
this.debounce.clear();
|
||||
}
|
||||
|
||||
/**
|
||||
* Debounce per space: a new event resets the timer so a burst collapses into a
|
||||
* single cycle. On fire, `runOnce` is enqueued (it internally serializes via the
|
||||
* in-process mutex + Redis lock, so a still-running cycle is simply skipped and
|
||||
* the next event reschedules).
|
||||
*/
|
||||
private schedule(spaceId: string, workspaceId: string): void {
|
||||
const existing = this.debounce.get(spaceId);
|
||||
if (existing) clearTimeout(existing);
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
this.debounce.delete(spaceId);
|
||||
void this.orchestrator
|
||||
.runOnce(spaceId, workspaceId)
|
||||
.catch((err) =>
|
||||
this.logger.error(
|
||||
`git-sync: debounced cycle for space ${spaceId} failed: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
),
|
||||
);
|
||||
}, this.environmentService.getGitSyncDebounceMs());
|
||||
|
||||
// Do not keep the event loop alive solely for a pending sync.
|
||||
timer.unref?.();
|
||||
this.debounce.set(spaceId, timer);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,180 @@
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import { mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Convergence repro for the git-ingest "silent revert" data-loss bug.
|
||||
*
|
||||
* ROOT CAUSE (confirmed): the merge logic itself is correct, but the git-ingest
|
||||
* write was applied via `openDirectConnection` on whichever instance/process
|
||||
* runs git-sync (the api/worker). When an editor is connected to a DIFFERENT
|
||||
* collab instance/process, that opens a SEPARATE, detached Y.Doc. The merge
|
||||
* lands in that detached doc (and the DB), but the live editor's Y.Doc never
|
||||
* receives the Yjs update — so its next debounced autosave overwrites the DB
|
||||
* with its STALE state and silently reverts the git change.
|
||||
*
|
||||
* These tests reproduce the invariant deterministically at the Yjs level (two
|
||||
* Y.Docs exchanging updates), because the real failure is DISTRIBUTED — it only
|
||||
* manifests when the write and the editor live on different instances, which a
|
||||
* single in-process Hocuspocus cannot reproduce (in one process the direct
|
||||
* connection already shares the editor's doc). HONEST SCOPE: this models the two
|
||||
* outcomes; full cross-instance convergence is not (and cannot be) proven in a
|
||||
* unit test without a live multi-instance Hocuspocus + redis.
|
||||
*
|
||||
* PATH B (the BUG): the git update is NOT delivered to the editor's doc — the
|
||||
* editor's later autosave reverts the change. Asserts the LOSS.
|
||||
* PATH A (the FIX): the git update IS delivered to the editor's doc as a Yjs
|
||||
* update — which is exactly what running the merge on the OWNING instance's
|
||||
* shared Document does (its update is broadcast to every connection). The
|
||||
* editor's CRDT converges and a later autosave preserves the git change.
|
||||
*
|
||||
* The fix routes git-sync's body write through CollaborationGateway.writePageBody
|
||||
* (the custom-event channel) so it executes on the owning instance — turning
|
||||
* PATH B into PATH A.
|
||||
*/
|
||||
|
||||
type Spec = { text: string; id?: string };
|
||||
|
||||
// Build a Y.XmlFragment('default'). `id` is set only when provided, mirroring
|
||||
// the live doc (block UniqueIDs present) vs a git-parsed body (ids absent).
|
||||
function buildFragment(doc: Y.Doc, specs: Spec[]): Y.XmlFragment {
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = specs.map((s) => {
|
||||
const el = new Y.XmlElement('paragraph');
|
||||
if (s.id) el.setAttribute('id', s.id);
|
||||
const t = new Y.XmlText();
|
||||
if (s.text) t.insert(0, s.text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
});
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return frag;
|
||||
}
|
||||
|
||||
const texts = (frag: Y.XmlFragment): string[] =>
|
||||
frag.toArray().map((el) =>
|
||||
(el as Y.XmlElement)
|
||||
.toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''),
|
||||
);
|
||||
|
||||
// Append '!' to the end of the given block's text — a tiny human edit that
|
||||
// stands in for a connected editor's autosave-triggering keystroke.
|
||||
function humanEdit(doc: Y.Doc, blockIndex: number, mark = '!'): void {
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const el = frag.get(blockIndex) as Y.XmlElement;
|
||||
const t = el.get(0) as Y.XmlText;
|
||||
doc.transact(() => t.insert(t.length, mark));
|
||||
}
|
||||
|
||||
describe('git-ingest convergence with an open editor', () => {
|
||||
// Shared setup: the page is persisted with two blocks (live ids), and BOTH the
|
||||
// server-side ingest doc (S) and the connected editor's doc (C) load that same
|
||||
// state — they start fully synced, exactly like two instances that each loaded
|
||||
// the page from the DB.
|
||||
function setup() {
|
||||
const db = new Y.Doc();
|
||||
buildFragment(db, [
|
||||
{ text: 'alpha', id: 'p1' },
|
||||
{ text: 'beta', id: 'p2' },
|
||||
]);
|
||||
const state0 = Y.encodeStateAsUpdate(db);
|
||||
|
||||
const server = new Y.Doc(); // where the git merge is applied
|
||||
Y.applyUpdate(server, state0);
|
||||
const editor = new Y.Doc(); // the browser's live in-memory doc
|
||||
Y.applyUpdate(editor, state0);
|
||||
|
||||
// base (last-synced, from git markdown — no ids) == the pre-change content.
|
||||
const baseDoc = new Y.Doc();
|
||||
const baseFrag = buildFragment(baseDoc, [{ text: 'alpha' }, { text: 'beta' }]);
|
||||
return { state0, server, editor, baseFrag };
|
||||
}
|
||||
|
||||
// git changed the SECOND block alpha/beta -> beta2; the editor is idle on it.
|
||||
function applyGitMerge(server: Y.Doc, baseFrag: Y.XmlFragment): Uint8Array {
|
||||
const targetDoc = new Y.Doc();
|
||||
const targetFrag = buildFragment(targetDoc, [
|
||||
{ text: 'alpha' },
|
||||
{ text: 'beta2' },
|
||||
]);
|
||||
let captured: Uint8Array | null = null;
|
||||
const onUpdate = (u: Uint8Array) => {
|
||||
// Accumulate (the merge emits one update per op when unwrapped); here a
|
||||
// single transact yields one update covering the whole merge.
|
||||
captured = captured ? Y.mergeUpdates([captured, u]) : u;
|
||||
};
|
||||
server.on('update', onUpdate);
|
||||
server.transact(() =>
|
||||
mergeXmlFragments3Way(
|
||||
server.getXmlFragment('default'),
|
||||
targetFrag,
|
||||
baseFrag,
|
||||
),
|
||||
);
|
||||
server.off('update', onUpdate);
|
||||
return captured!;
|
||||
}
|
||||
|
||||
it('PATH B (the BUG): undelivered git update is reverted by the editor autosave — DATA LOSS', () => {
|
||||
const { server, editor, baseFrag } = setup();
|
||||
|
||||
// git merge lands on the server doc only.
|
||||
applyGitMerge(server, baseFrag);
|
||||
expect(texts(server.getXmlFragment('default'))).toEqual(['alpha', 'beta2']);
|
||||
|
||||
// The editor NEVER receives the update (detached doc on another instance).
|
||||
// It makes an unrelated edit on block 0 and autosaves its full state.
|
||||
humanEdit(editor, 0);
|
||||
const persisted = new Y.Doc();
|
||||
Y.applyUpdate(persisted, Y.encodeStateAsUpdate(editor));
|
||||
|
||||
// git's 'beta2' is gone — the page reverted to 'beta'. This is the bug.
|
||||
expect(texts(persisted.getXmlFragment('default'))).toEqual([
|
||||
'alpha!',
|
||||
'beta',
|
||||
]);
|
||||
});
|
||||
|
||||
it('PATH A (the FIX): delivering the git update to the editor converges — git change SURVIVES', () => {
|
||||
const { server, editor, baseFrag } = setup();
|
||||
|
||||
// git merge on the server doc, capturing the broadcastable Yjs update.
|
||||
const gitUpdate = applyGitMerge(server, baseFrag);
|
||||
|
||||
// Running on the OWNING instance broadcasts the update to the connected
|
||||
// editor (Document.handleUpdate). Model that: the editor applies it.
|
||||
Y.applyUpdate(editor, gitUpdate);
|
||||
expect(texts(editor.getXmlFragment('default'))).toEqual(['alpha', 'beta2']);
|
||||
|
||||
// The editor now autosaves (unrelated edit on block 0). Its full state still
|
||||
// carries git's change — no revert.
|
||||
humanEdit(editor, 0);
|
||||
const persisted = new Y.Doc();
|
||||
Y.applyUpdate(persisted, Y.encodeStateAsUpdate(editor));
|
||||
expect(texts(persisted.getXmlFragment('default'))).toEqual([
|
||||
'alpha!',
|
||||
'beta2',
|
||||
]);
|
||||
});
|
||||
|
||||
it('PATH A — concurrent edits to DIFFERENT paragraphs both survive (finding #2)', () => {
|
||||
const { server, editor, baseFrag } = setup();
|
||||
|
||||
// The editor is actively editing block 0 (concurrent with the push).
|
||||
humanEdit(editor, 0, ' EDIT');
|
||||
|
||||
// git changes block 1; merge on the server, broadcast to the editor.
|
||||
const gitUpdate = applyGitMerge(server, baseFrag);
|
||||
Y.applyUpdate(editor, gitUpdate);
|
||||
|
||||
// Both sides preserved: the human's block-0 edit AND git's block-1 change.
|
||||
const persisted = new Y.Doc();
|
||||
Y.applyUpdate(persisted, Y.encodeStateAsUpdate(editor));
|
||||
expect(texts(persisted.getXmlFragment('default'))).toEqual([
|
||||
'alpha EDIT',
|
||||
'beta2',
|
||||
]);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,524 @@
|
||||
// Unit tests for the git-sync control plane. The engine's `runCycle`
|
||||
// (which owns the PULL->PUSH branch choreography) is mocked so we exercise ONLY
|
||||
// the orchestrator's wiring: gating, the Redis leader lock + in-process mutex
|
||||
// (via SpaceLockService),
|
||||
// the remote-template substitution in the settings it hands the engine, the
|
||||
// external-push ingest, and the idempotent interval lifecycle. The cycle
|
||||
// mechanics themselves are covered by the engine's own cycle round-trip spec.
|
||||
//
|
||||
// The engine mock must be declared before importing the orchestrator so the
|
||||
// runtime `loadGitSync()` bridge resolves to the mocked `runCycle` (the ESM
|
||||
// `@docmost/git-sync` package cannot be `require()`d under jest). The `mock`
|
||||
// prefix lets the hoisted factory reference it.
|
||||
const mockRunCycle = jest.fn();
|
||||
|
||||
jest.mock('../git-sync.loader', () => ({
|
||||
loadGitSync: jest.fn(async () => ({
|
||||
runCycle: mockRunCycle,
|
||||
})),
|
||||
}));
|
||||
|
||||
import { Logger } from '@nestjs/common';
|
||||
import {
|
||||
Kysely,
|
||||
DummyDriver,
|
||||
PostgresAdapter,
|
||||
PostgresIntrospector,
|
||||
PostgresQueryCompiler,
|
||||
CompiledQuery,
|
||||
} from 'kysely';
|
||||
import {
|
||||
GitSyncOrchestrator,
|
||||
GitSyncLockHeldError,
|
||||
} from './git-sync.orchestrator';
|
||||
import { SpaceLockService } from './space-lock.service';
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
const runCycleMock = mockRunCycle as unknown as AnyMock;
|
||||
|
||||
/** The default happy-path cycle result the engine returns. */
|
||||
const OK_CYCLE = {
|
||||
ran: true,
|
||||
pull: { written: 0, deleted: 0, conflict: false },
|
||||
push: { mode: 'apply', failures: 0 },
|
||||
};
|
||||
|
||||
interface BuildOptions {
|
||||
/** Env tunables (only the load-bearing ones are surfaced as overrides). */
|
||||
enabled?: boolean;
|
||||
serviceUserId?: string | undefined;
|
||||
remoteTemplate?: string | undefined;
|
||||
dataDir?: string;
|
||||
pollIntervalMs?: number;
|
||||
debounceMs?: number;
|
||||
/** A hook applied to the fake vault so a test can override its behaviour. */
|
||||
vaultOverrides?: Record<string, unknown>;
|
||||
/**
|
||||
* The row `buildSettings` reads for the per-space `autoMergeConflicts` flag
|
||||
* (`executeTakeFirst`). Default: the SAFE off value. Pass `undefined` to model
|
||||
* a missing row (no space / no settings).
|
||||
*/
|
||||
settingsRow?: { autoMergeConflicts: boolean } | undefined;
|
||||
}
|
||||
|
||||
interface Built {
|
||||
orchestrator: GitSyncOrchestrator;
|
||||
env: Record<string, AnyMock>;
|
||||
dataSource: { bind: AnyMock };
|
||||
client: Record<string, AnyMock>;
|
||||
vaultRegistry: { getVault: AnyMock; vaultPath: AnyMock };
|
||||
vault: Record<string, AnyMock>;
|
||||
scheduler: Record<string, AnyMock>;
|
||||
redis: { set: AnyMock; eval: AnyMock };
|
||||
redisService: { getOrThrow: AnyMock };
|
||||
db: unknown;
|
||||
}
|
||||
|
||||
function build(opts: BuildOptions = {}): Built {
|
||||
const {
|
||||
enabled = true,
|
||||
remoteTemplate = undefined,
|
||||
dataDir = '/vaults',
|
||||
pollIntervalMs = 15000,
|
||||
debounceMs = 2000,
|
||||
vaultOverrides = {},
|
||||
} = opts;
|
||||
// Distinguish "key omitted" (default off row) from "key present but undefined"
|
||||
// (a deliberately MISSING settings row).
|
||||
const settingsRow =
|
||||
'settingsRow' in opts ? opts.settingsRow : { autoMergeConflicts: false };
|
||||
// Distinguish "key omitted" (default to a valid id) from "key present but
|
||||
// undefined" (the no-service-user test deliberately sets it undefined).
|
||||
const serviceUserId = 'serviceUserId' in opts ? opts.serviceUserId : 'svc-user';
|
||||
|
||||
const env: Record<string, AnyMock> = {
|
||||
isGitSyncEnabled: jest.fn(() => enabled),
|
||||
getGitSyncServiceUserId: jest.fn(() => serviceUserId),
|
||||
getGitSyncRemoteTemplate: jest.fn(() => remoteTemplate),
|
||||
getGitSyncDataDir: jest.fn(() => dataDir),
|
||||
getGitSyncPollIntervalMs: jest.fn(() => pollIntervalMs),
|
||||
getGitSyncDebounceMs: jest.fn(() => debounceMs),
|
||||
};
|
||||
|
||||
// The read-side / write-side client the datasource hands back.
|
||||
const client: Record<string, AnyMock> = {
|
||||
listSpaceTree: jest.fn(async () => ({ pages: [], complete: true })),
|
||||
deletePage: jest.fn(async () => undefined),
|
||||
createPage: jest.fn(async () => undefined),
|
||||
updatePageBody: jest.fn(async () => undefined),
|
||||
};
|
||||
const dataSource = { bind: jest.fn(() => client) };
|
||||
|
||||
// The fake VaultGit: every method the orchestrator calls is a jest.fn.
|
||||
const vault: Record<string, AnyMock> = {
|
||||
assertGitAvailable: jest.fn(async () => undefined),
|
||||
ensureRepo: jest.fn(async () => undefined),
|
||||
isMergeInProgress: jest.fn(async () => false),
|
||||
ensureBranch: jest.fn(async () => undefined),
|
||||
checkout: jest.fn(async () => undefined),
|
||||
listTrackedFiles: jest.fn(async () => []),
|
||||
...(vaultOverrides as Record<string, AnyMock>),
|
||||
};
|
||||
const vaultRegistry = {
|
||||
getVault: jest.fn(async () => vault),
|
||||
vaultPath: jest.fn((spaceId: string) => `${dataDir}/${spaceId}`),
|
||||
};
|
||||
|
||||
const scheduler: Record<string, AnyMock> = {
|
||||
addInterval: jest.fn(),
|
||||
deleteInterval: jest.fn(),
|
||||
};
|
||||
|
||||
const redis = {
|
||||
// Default: lock acquired. Tests override per-case.
|
||||
set: jest.fn(async () => 'OK'),
|
||||
eval: jest.fn(async () => 1),
|
||||
};
|
||||
const redisService = { getOrThrow: jest.fn(() => redis) };
|
||||
|
||||
// Chainable Kysely stub. `buildSettings` reads the space's
|
||||
// `gitSync.autoMergeConflicts` flag via
|
||||
// `selectFrom('spaces').select(...).where('id','=',id).executeTakeFirst()`;
|
||||
// default it to the SAFE off value. `enabledSpaces` uses `.execute()`.
|
||||
const db = (() => {
|
||||
const builder: any = {
|
||||
select: () => builder,
|
||||
where: () => builder,
|
||||
executeTakeFirst: async () => settingsRow,
|
||||
execute: async () => [],
|
||||
};
|
||||
return { selectFrom: () => builder };
|
||||
})();
|
||||
|
||||
// The REAL SpaceLockService, constructed against the mock redis above, so all
|
||||
// existing lock assertions (lock-held, in-progress, leader lock, release CAS,
|
||||
// heartbeat) still exercise the same `redis.set`/`redis.eval` mock unchanged.
|
||||
const spaceLock = new SpaceLockService(redisService as any);
|
||||
|
||||
const orchestrator = new GitSyncOrchestrator(
|
||||
env as any,
|
||||
dataSource as any,
|
||||
vaultRegistry as any,
|
||||
scheduler as any,
|
||||
spaceLock as any,
|
||||
db as any,
|
||||
);
|
||||
|
||||
return {
|
||||
orchestrator,
|
||||
env,
|
||||
dataSource,
|
||||
client,
|
||||
vaultRegistry,
|
||||
vault,
|
||||
scheduler,
|
||||
redis,
|
||||
redisService,
|
||||
db,
|
||||
};
|
||||
}
|
||||
|
||||
/** The engine runs a clean cycle by default. */
|
||||
function primeEngineHappyPath(): void {
|
||||
runCycleMock.mockResolvedValue(OK_CYCLE);
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
primeEngineHappyPath();
|
||||
});
|
||||
|
||||
describe('GitSyncOrchestrator', () => {
|
||||
describe('runOnce gating', () => {
|
||||
it("short-circuits with skipped:'disabled' when git-sync is disabled", async () => {
|
||||
const { orchestrator, redis, vaultRegistry } = build({ enabled: false });
|
||||
const res = await orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res).toEqual({ spaceId: 'space-1', ran: false, skipped: 'disabled' });
|
||||
// No lock, no vault work performed.
|
||||
expect(redis.set).not.toHaveBeenCalled();
|
||||
expect(vaultRegistry.getVault).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("returns skipped:'no-service-user' when the service user id is falsy", async () => {
|
||||
const { orchestrator, redis } = build({ serviceUserId: undefined });
|
||||
const res = await orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res).toEqual({
|
||||
spaceId: 'space-1',
|
||||
ran: false,
|
||||
skipped: 'no-service-user',
|
||||
});
|
||||
expect(redis.set).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('in-process mutex', () => {
|
||||
it("a second runOnce while the first is in-flight returns skipped:'in-progress'", async () => {
|
||||
const built = build();
|
||||
let release!: () => void;
|
||||
const gate = new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
});
|
||||
// Hang the first cycle inside driveCycle by stalling getVault.
|
||||
built.vaultRegistry.getVault.mockImplementationOnce(async () => {
|
||||
await gate;
|
||||
return built.vault;
|
||||
});
|
||||
|
||||
const first = built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
// Let the first call enter the running set + acquire the lock.
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
|
||||
const second = await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(second).toEqual({
|
||||
spaceId: 'space-1',
|
||||
ran: false,
|
||||
skipped: 'in-progress',
|
||||
});
|
||||
|
||||
release();
|
||||
await first;
|
||||
});
|
||||
});
|
||||
|
||||
describe('redis leader lock', () => {
|
||||
it("returns skipped:'lock-held' and cleans up the mutex when the lock is not acquired", async () => {
|
||||
const built = build();
|
||||
// First acquire fails (not 'OK'); a later acquire succeeds.
|
||||
built.redis.set
|
||||
.mockResolvedValueOnce(null)
|
||||
.mockResolvedValue('OK');
|
||||
|
||||
const res = await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res).toEqual({
|
||||
spaceId: 'space-1',
|
||||
ran: false,
|
||||
skipped: 'lock-held',
|
||||
});
|
||||
// The mutex must be clear: a subsequent call can acquire + run.
|
||||
const res2 = await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res2.ran).toBe(true);
|
||||
expect(res2.skipped).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('poisoned-space protection', () => {
|
||||
it('releases the lock and clears the mutex when the cycle throws, returning { error }', async () => {
|
||||
const built = build();
|
||||
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
|
||||
runCycleMock.mockRejectedValueOnce(new Error('boom'));
|
||||
|
||||
const res = await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res.ran).toBe(false);
|
||||
expect(res.error).toBe('boom');
|
||||
// CAS release was invoked (eval) and the space is no longer "running":
|
||||
expect(built.redis.eval).toHaveBeenCalledTimes(1);
|
||||
|
||||
// A subsequent call can re-acquire (mutex cleared after the throw).
|
||||
runCycleMock.mockResolvedValue(OK_CYCLE);
|
||||
const res2 = await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res2.ran).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cycle wiring', () => {
|
||||
it('drives runCycle with the space vault, the bound client, and settings', async () => {
|
||||
const built = build();
|
||||
await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
|
||||
expect(runCycleMock).toHaveBeenCalledTimes(1);
|
||||
const [deps] = runCycleMock.mock.calls[0];
|
||||
expect(deps.spaceId).toBe('space-1');
|
||||
expect(deps.vault).toBe(built.vault);
|
||||
expect(deps.client).toBe(built.client);
|
||||
expect(deps.settings.vaultPath).toBe('/vaults/space-1');
|
||||
// The bound datasource identity is the (workspace, service-user) pair.
|
||||
expect(built.dataSource.bind).toHaveBeenCalledWith({
|
||||
workspaceId: 'ws-1',
|
||||
userId: 'svc-user',
|
||||
});
|
||||
});
|
||||
|
||||
it('threads autoMergeConflicts:true from the space settings row into the engine settings', async () => {
|
||||
const built = build({ settingsRow: { autoMergeConflicts: true } });
|
||||
await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
const [deps] = runCycleMock.mock.calls[0];
|
||||
expect(deps.settings.autoMergeConflicts).toBe(true);
|
||||
});
|
||||
|
||||
it('defaults autoMergeConflicts to false when the settings row is missing', async () => {
|
||||
const built = build({ settingsRow: undefined });
|
||||
await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
const [deps] = runCycleMock.mock.calls[0];
|
||||
expect(deps.settings.autoMergeConflicts).toBe(false);
|
||||
});
|
||||
|
||||
it("surfaces the engine's skipped status (e.g. merge-in-progress) verbatim", async () => {
|
||||
const built = build();
|
||||
runCycleMock.mockResolvedValue({ ran: false, skipped: 'merge-in-progress' });
|
||||
|
||||
const res = await built.orchestrator.runOnce('space-1', 'ws-1');
|
||||
expect(res).toEqual({
|
||||
spaceId: 'space-1',
|
||||
ran: false,
|
||||
skipped: 'merge-in-progress',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('ingestExternalPush', () => {
|
||||
it('streams the receive-pack FIRST, then runs the Docmost cycle', async () => {
|
||||
const order: string[] = [];
|
||||
const built = build();
|
||||
runCycleMock.mockImplementation(async () => {
|
||||
order.push('cycle');
|
||||
return OK_CYCLE;
|
||||
});
|
||||
const runReceivePack = jest.fn(async () => {
|
||||
order.push('receive-pack');
|
||||
});
|
||||
|
||||
await built.orchestrator.ingestExternalPush('space-1', 'ws-1', runReceivePack);
|
||||
|
||||
expect(runReceivePack).toHaveBeenCalledTimes(1);
|
||||
// The cycle only runs AFTER the push commits land on main.
|
||||
expect(order).toEqual(['receive-pack', 'cycle']);
|
||||
});
|
||||
|
||||
it('throws GitSyncLockHeldError and does NOT run the receive-pack when the lock is held', async () => {
|
||||
const built = build();
|
||||
built.redis.set.mockResolvedValue(null); // acquire fails → lock-held
|
||||
const runReceivePack = jest.fn(async () => undefined);
|
||||
|
||||
await expect(
|
||||
built.orchestrator.ingestExternalPush('space-1', 'ws-1', runReceivePack),
|
||||
).rejects.toBeInstanceOf(GitSyncLockHeldError);
|
||||
|
||||
// We must never write to the working tree concurrently with a cycle.
|
||||
expect(runReceivePack).not.toHaveBeenCalled();
|
||||
expect(runCycleMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('swallows a post-push cycle error (the push is durable; poll retries)', async () => {
|
||||
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
|
||||
const built = build();
|
||||
// The cycle throws AFTER the receive-pack already succeeded.
|
||||
runCycleMock.mockRejectedValueOnce(new Error('cycle boom'));
|
||||
const runReceivePack = jest.fn(async () => undefined);
|
||||
|
||||
// Does NOT throw — the durable push must not be reported as failed.
|
||||
await expect(
|
||||
built.orchestrator.ingestExternalPush('space-1', 'ws-1', runReceivePack),
|
||||
).resolves.toBeUndefined();
|
||||
expect(runReceivePack).toHaveBeenCalledTimes(1);
|
||||
// Lock was still released (CAS eval) despite the cycle error.
|
||||
expect(built.redis.eval).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('runs the receive-pack but SKIPS the cycle when no service user is configured', async () => {
|
||||
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
|
||||
const built = build({ serviceUserId: undefined });
|
||||
const runReceivePack = jest.fn(async () => undefined);
|
||||
|
||||
await expect(
|
||||
built.orchestrator.ingestExternalPush('space-1', 'ws-1', runReceivePack),
|
||||
).resolves.toBeUndefined();
|
||||
// The push is durable on main; the immediate cycle is skipped, not failed.
|
||||
expect(runReceivePack).toHaveBeenCalledTimes(1);
|
||||
expect(runCycleMock).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('refuses (LockHeldError) and runs nothing when git-sync is globally disabled', async () => {
|
||||
const built = build({ enabled: false });
|
||||
const runReceivePack = jest.fn(async () => undefined);
|
||||
|
||||
await expect(
|
||||
built.orchestrator.ingestExternalPush('space-1', 'ws-1', runReceivePack),
|
||||
).rejects.toBeInstanceOf(GitSyncLockHeldError);
|
||||
expect(runReceivePack).not.toHaveBeenCalled();
|
||||
expect(built.redis.set).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('remote template substitution', () => {
|
||||
it('substitutes {spaceId} into the gitRemote settings handed to the engine', async () => {
|
||||
const built = build({ remoteTemplate: 'git@h:vault-{spaceId}.git' });
|
||||
await built.orchestrator.runOnce('space-42', 'ws-1');
|
||||
const [deps] = runCycleMock.mock.calls[0];
|
||||
expect(deps.settings.gitRemote).toBe('git@h:vault-space-42.git');
|
||||
});
|
||||
});
|
||||
|
||||
describe('module lifecycle', () => {
|
||||
it('registers exactly one interval on init and tears it down idempotently on destroy', () => {
|
||||
const built = build();
|
||||
jest.spyOn(Logger.prototype, 'log').mockImplementation(() => undefined);
|
||||
|
||||
built.orchestrator.onModuleInit();
|
||||
expect(built.scheduler.addInterval).toHaveBeenCalledTimes(1);
|
||||
const [name] = built.scheduler.addInterval.mock.calls[0];
|
||||
|
||||
built.orchestrator.onModuleDestroy();
|
||||
expect(built.scheduler.deleteInterval).toHaveBeenCalledTimes(1);
|
||||
expect(built.scheduler.deleteInterval).toHaveBeenCalledWith(name);
|
||||
|
||||
// A second destroy is a no-op (guard against double-delete).
|
||||
built.orchestrator.onModuleDestroy();
|
||||
expect(built.scheduler.deleteInterval).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('registers nothing on init when git-sync is disabled', () => {
|
||||
const built = build({ enabled: false });
|
||||
built.orchestrator.onModuleInit();
|
||||
expect(built.scheduler.addInterval).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
// The poll-safety backstop: each tick enumerates the STRICT opt-in spaces and
|
||||
// reconciles each one under its own lock. We drive the private `pollTick()`
|
||||
// directly and (separately) compile `enabledSpaces()` to assert its opt-in SQL.
|
||||
describe('pollTick + enabledSpaces (strict opt-in backstop)', () => {
|
||||
it('runs runOnce exactly once per enabled space, with the right (spaceId, workspaceId)', async () => {
|
||||
const built = build();
|
||||
// Isolate the tick wiring from the cycle machinery: stub the enumeration
|
||||
// and count runOnce (it never throws; here we don't exercise its body).
|
||||
const runOnce = jest
|
||||
.spyOn(built.orchestrator, 'runOnce')
|
||||
.mockResolvedValue({ spaceId: 'x', ran: true });
|
||||
jest
|
||||
.spyOn(built.orchestrator as any, 'enabledSpaces')
|
||||
.mockResolvedValue([
|
||||
{ spaceId: 'space-1', workspaceId: 'ws-1' },
|
||||
{ spaceId: 'space-2', workspaceId: 'ws-2' },
|
||||
]);
|
||||
|
||||
await (built.orchestrator as any).pollTick();
|
||||
|
||||
expect(runOnce).toHaveBeenCalledTimes(2);
|
||||
// Per-space isolation: each space is reconciled with its OWN workspace id.
|
||||
expect(runOnce).toHaveBeenNthCalledWith(1, 'space-1', 'ws-1');
|
||||
expect(runOnce).toHaveBeenNthCalledWith(2, 'space-2', 'ws-2');
|
||||
});
|
||||
|
||||
it('does NOT throw and runs nothing when the enabled-spaces query throws (try/catch backstop)', async () => {
|
||||
jest.spyOn(Logger.prototype, 'error').mockImplementation(() => undefined);
|
||||
const built = build();
|
||||
const runOnce = jest.spyOn(built.orchestrator, 'runOnce');
|
||||
jest
|
||||
.spyOn(built.orchestrator as any, 'enabledSpaces')
|
||||
.mockRejectedValue(new Error('db down'));
|
||||
|
||||
// A failed enumeration must never break the interval — pollTick swallows it.
|
||||
await expect(
|
||||
(built.orchestrator as any).pollTick(),
|
||||
).resolves.toBeUndefined();
|
||||
expect(runOnce).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('early-returns (no enumeration, no runOnce) when git-sync is disabled', async () => {
|
||||
const built = build({ enabled: false });
|
||||
const enabled = jest.spyOn(built.orchestrator as any, 'enabledSpaces');
|
||||
const runOnce = jest.spyOn(built.orchestrator, 'runOnce');
|
||||
|
||||
await (built.orchestrator as any).pollTick();
|
||||
|
||||
// Gated on the master switch before any DB work.
|
||||
expect(enabled).not.toHaveBeenCalled();
|
||||
expect(runOnce).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('compiles the STRICT opt-in enumeration SQL (spaces, deletedAt is null, enabled flag)', async () => {
|
||||
// Inject a compile-only Kysely (DummyDriver) whose `log` hook captures the
|
||||
// exact SQL `enabledSpaces()` runs — no fake builder, the real query is
|
||||
// compiled. DummyDriver yields no rows; we only assert the SQL shape.
|
||||
const built = build();
|
||||
let captured: CompiledQuery | undefined;
|
||||
const compileDb = new Kysely<any>({
|
||||
dialect: {
|
||||
createAdapter: () => new PostgresAdapter(),
|
||||
createDriver: () => new DummyDriver(),
|
||||
createIntrospector: (d) => new PostgresIntrospector(d),
|
||||
createQueryCompiler: () => new PostgresQueryCompiler(),
|
||||
},
|
||||
log: (event) => {
|
||||
if (event.level === 'query') captured = event.query as CompiledQuery;
|
||||
},
|
||||
});
|
||||
// Swap the orchestrator's injected db for the compile-only instance.
|
||||
(built.orchestrator as any).db = compileDb;
|
||||
|
||||
const rows = await (built.orchestrator as any).enabledSpaces();
|
||||
// DummyDriver returns no rows -> empty opt-in list (the no-space default).
|
||||
expect(rows).toEqual([]);
|
||||
|
||||
expect(captured).toBeDefined();
|
||||
const sql = captured!.sql.replace(/\s+/g, ' ');
|
||||
expect(sql).toContain('from "spaces"');
|
||||
// deletedAt-is-null guard (live spaces only).
|
||||
expect(sql).toContain('"deletedAt" is null');
|
||||
// STRICT per-space opt-in: the raw jsonb flag predicate, verbatim.
|
||||
expect(sql).toContain(`settings->'gitSync'->>'enabled' = 'true'`);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,371 @@
|
||||
import {
|
||||
Injectable,
|
||||
Logger,
|
||||
OnModuleDestroy,
|
||||
OnModuleInit,
|
||||
} from '@nestjs/common';
|
||||
import { SchedulerRegistry } from '@nestjs/schedule';
|
||||
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import { sql } from 'kysely';
|
||||
import type { Settings } from '@docmost/git-sync';
|
||||
import { loadGitSync } from '../git-sync.loader';
|
||||
import { EnvironmentService } from '../../environment/environment.service';
|
||||
import { GitmostDataSourceService } from './gitmost-datasource.service';
|
||||
import { VaultRegistryService } from './vault-registry.service';
|
||||
import { SpaceLockService } from './space-lock.service';
|
||||
|
||||
/** A space the poll loop should reconcile: its id + the workspace it lives in. */
|
||||
interface EnabledSpace {
|
||||
spaceId: string;
|
||||
workspaceId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Thrown by `ingestExternalPush` when the per-space lock cannot be acquired (a
|
||||
* poll cycle is mid-flight on this or another replica). The /git HTTP handler
|
||||
* maps it to a 503 so the git client retries rather than racing a cycle's
|
||||
* working-tree checkout/merge.
|
||||
*/
|
||||
export class GitSyncLockHeldError extends Error {
|
||||
constructor(public readonly spaceId: string) {
|
||||
super(`git-sync: space ${spaceId} is busy (lock held); retry the push`);
|
||||
this.name = 'GitSyncLockHeldError';
|
||||
}
|
||||
}
|
||||
|
||||
/** Small status summary returned by `runOnce` (for the admin trigger + logs). */
|
||||
export interface GitSyncRunStatus {
|
||||
spaceId: string;
|
||||
ran: boolean;
|
||||
/** Why the cycle did not run (lock held elsewhere, busy, disabled, error). */
|
||||
skipped?:
|
||||
| 'lock-held'
|
||||
| 'in-progress'
|
||||
| 'disabled'
|
||||
| 'no-service-user'
|
||||
| 'merge-in-progress';
|
||||
pull?: { written: number; deleted: number; conflict: boolean };
|
||||
push?: { mode: string; failures: number };
|
||||
error?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* The git-sync control plane. Drives the vendored engine in
|
||||
* process: under a Redis leader lock (single-writer across replicas) plus an
|
||||
* in-process per-space mutex (no overlapping cycles on one instance), it runs a
|
||||
* PULL (Docmost -> vault) then a PUSH (vault -> Docmost) for a space.
|
||||
*
|
||||
* Enumeration of enabled spaces: STRICT opt-in. Only spaces whose
|
||||
* per-space flag `space.settings.gitSync.enabled === true` (written by the Phase-C
|
||||
* UI) are reconciled. There is intentionally NO all-spaces fallback: when no space
|
||||
* carries the flag, git-sync does NOTHING (an empty list) — flagging every space
|
||||
* the moment GIT_SYNC_ENABLED flips on is a safety hazard (it could mass-sync large
|
||||
* spaces). The whole loop is still gated on the GIT_SYNC_ENABLED master switch
|
||||
* first; per-space opt-in is now REQUIRED on top of it.
|
||||
*/
|
||||
@Injectable()
|
||||
export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
private readonly logger = new Logger(GitSyncOrchestrator.name);
|
||||
/** The registered poll-interval name, or null when none is registered. */
|
||||
private pollIntervalName: string | null = null;
|
||||
|
||||
constructor(
|
||||
private readonly environmentService: EnvironmentService,
|
||||
private readonly dataSource: GitmostDataSourceService,
|
||||
private readonly vaultRegistry: VaultRegistryService,
|
||||
private readonly schedulerRegistry: SchedulerRegistry,
|
||||
private readonly spaceLock: SpaceLockService,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
) {}
|
||||
|
||||
// --- enabled-space enumeration --------------------------------
|
||||
|
||||
/**
|
||||
* Enumerate the spaces the poll loop should reconcile. STRICT opt-in: ONLY
|
||||
* spaces carrying the Phase-C per-space flag (`settings->'gitSync'->>'enabled'
|
||||
* = 'true'`, written by the Phase-C UI) are returned. There is intentionally NO
|
||||
* fallback to "all live spaces" — when no space is flagged this returns an empty
|
||||
* list and git-sync does nothing (correct opt-in behavior). The GIT_SYNC_ENABLED
|
||||
* master switch gates whether the loop runs at all; this flag gates which spaces.
|
||||
*/
|
||||
private async enabledSpaces(): Promise<EnabledSpace[]> {
|
||||
return this.db
|
||||
.selectFrom('spaces')
|
||||
.select(['id as spaceId', 'workspaceId'])
|
||||
.where('deletedAt', 'is', null)
|
||||
.where(sql<boolean>`settings->'gitSync'->>'enabled' = 'true'`)
|
||||
.execute();
|
||||
}
|
||||
|
||||
// --- one sync cycle for a space -------------------------------
|
||||
|
||||
/**
|
||||
* Build the engine `Settings` for a space. The engine's REST-era fields
|
||||
* (docmostApiUrl/email/password) are unused on the native path — the
|
||||
* datasource writes in-process — so they are placeholders; only `vaultPath`,
|
||||
* `gitRemote`, and the tunables are load-bearing.
|
||||
*/
|
||||
private async buildSettings(spaceId: string): Promise<Settings> {
|
||||
const remoteTemplate = this.environmentService.getGitSyncRemoteTemplate();
|
||||
const gitRemote = remoteTemplate
|
||||
? remoteTemplate.replace(/\{spaceId\}/g, spaceId)
|
||||
: undefined;
|
||||
// Per-space PUSH policy for still-conflicted page bodies (SPEC §9): read the
|
||||
// `gitSync.autoMergeConflicts` flag from the space's jsonb settings. STRICT
|
||||
// opt-in like `enabled` — anything other than the literal 'true' (absent, null,
|
||||
// 'false') resolves to the SAFE default (skip a conflicted page, do not push).
|
||||
const row = await this.db
|
||||
.selectFrom('spaces')
|
||||
.select(
|
||||
sql<boolean>`settings->'gitSync'->>'autoMergeConflicts' = 'true'`.as(
|
||||
'autoMergeConflicts',
|
||||
),
|
||||
)
|
||||
.where('id', '=', spaceId)
|
||||
.executeTakeFirst();
|
||||
return {
|
||||
docmostApiUrl: 'http://native.local',
|
||||
docmostEmail: 'native@local',
|
||||
docmostPassword: 'native',
|
||||
docmostSpaceId: spaceId,
|
||||
vaultPath: this.vaultRegistry.vaultPath(spaceId),
|
||||
gitRemote,
|
||||
pollIntervalMs: this.environmentService.getGitSyncPollIntervalMs(),
|
||||
debounceMs: this.environmentService.getGitSyncDebounceMs(),
|
||||
logLevel: 'info',
|
||||
autoMergeConflicts: row?.autoMergeConflicts ?? false,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Run one full PULL + PUSH cycle for a space, under the Redis leader lock and
|
||||
* the in-process mutex. Never throws — per-space errors are caught, logged, and
|
||||
* returned in the status so a poll interval is never broken by one bad space.
|
||||
*/
|
||||
async runOnce(
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
): Promise<GitSyncRunStatus> {
|
||||
if (!this.environmentService.isGitSyncEnabled()) {
|
||||
return { spaceId, ran: false, skipped: 'disabled' };
|
||||
}
|
||||
const serviceUserId = this.environmentService.getGitSyncServiceUserId();
|
||||
if (!serviceUserId) {
|
||||
this.logger.error(
|
||||
'git-sync: GIT_SYNC_SERVICE_USER_ID is required when GIT_SYNC_ENABLED — skipping',
|
||||
);
|
||||
return { spaceId, ran: false, skipped: 'no-service-user' };
|
||||
}
|
||||
|
||||
// Run the full cycle under the per-space lock. withSpaceLock owns the
|
||||
// in-process mutex (no overlapping cycles on this instance) AND the Redis
|
||||
// leader lock (single writer across replicas), and returns a skip sentinel
|
||||
// when it could not enter — surfaced here as the existing skipped:'in-progress'
|
||||
// / 'lock-held' status so runOnce's observable behavior is unchanged.
|
||||
try {
|
||||
const result = await this.spaceLock.withSpaceLock(spaceId, (signal) =>
|
||||
this.driveCycle(spaceId, workspaceId, serviceUserId, signal),
|
||||
);
|
||||
if ('skipped' in result && !('spaceId' in result)) {
|
||||
return { spaceId, ran: false, skipped: result.skipped };
|
||||
}
|
||||
return result;
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
this.logger.error(`git-sync: cycle failed for space ${spaceId}: ${message}`);
|
||||
return { spaceId, ran: false, error: message };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingest a push that arrived over smart-HTTP (the /git host). Under the SAME
|
||||
* per-space lock the poll cycle uses, it:
|
||||
* 1. runs `runReceivePack()` — the closure that spawns `git http-backend` for
|
||||
* the receive-pack request and finishes streaming the HTTP response to the
|
||||
* client. The client's push result is determined here.
|
||||
* 2. THEN — still holding the lock — runs the full Docmost cycle (the same
|
||||
* `driveCycle` body `runOnce` uses) so the freshly received commits on
|
||||
* `main` flow back into Docmost pages.
|
||||
*
|
||||
* If the cycle body in step 2 throws, it is LOGGED but NOT rethrown: the push
|
||||
* already succeeded and the commits are durable on `main`, so the poll-interval
|
||||
* backstop will reconcile them on the next tick. The receive-pack itself is the
|
||||
* load-bearing step.
|
||||
*
|
||||
* Lock contention: if the lock cannot be acquired (a poll cycle is mid-flight),
|
||||
* this throws a `GitSyncLockHeldError`. The HTTP handler converts that to a 503
|
||||
* so git surfaces a retryable error to the user (chosen over blocking the
|
||||
* request behind a potentially long cycle). The receive-pack is NOT run when
|
||||
* the lock is held — we never write to the working tree concurrently with a
|
||||
* cycle.
|
||||
*/
|
||||
async ingestExternalPush(
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
runReceivePack: () => Promise<void>,
|
||||
): Promise<void> {
|
||||
if (!this.environmentService.isGitSyncEnabled()) {
|
||||
// The HTTP gate already checks this, but be defensive: never run a cycle
|
||||
// when sync is globally off.
|
||||
throw new GitSyncLockHeldError(spaceId);
|
||||
}
|
||||
const serviceUserId = this.environmentService.getGitSyncServiceUserId();
|
||||
|
||||
const result = await this.spaceLock.withSpaceLock(spaceId, async (signal) => {
|
||||
// 1) Stream the receive-pack to the client (durable commits land on main).
|
||||
await runReceivePack();
|
||||
|
||||
// 2) Reconcile the new commits into Docmost. A service user is required to
|
||||
// attribute the writes; without one we cannot run the cycle — the commits
|
||||
// are still durable and the poll backstop will pick them up once configured.
|
||||
if (!serviceUserId) {
|
||||
this.logger.error(
|
||||
'git-sync: GIT_SYNC_SERVICE_USER_ID is required to ingest an external ' +
|
||||
'push — the push is durable on main; skipping the immediate cycle.',
|
||||
);
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await this.driveCycle(spaceId, workspaceId, serviceUserId, signal);
|
||||
} catch (err) {
|
||||
// Do NOT rethrow: the push succeeded and the commits are durable on main;
|
||||
// the poll-interval backstop retries the cycle. Log for visibility.
|
||||
this.logger.error(
|
||||
`git-sync: post-push cycle failed for space ${spaceId} (push is ` +
|
||||
`durable; poll will retry): ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
return;
|
||||
});
|
||||
|
||||
// The lock was held (in-progress or another replica) — surface to the caller
|
||||
// so the HTTP handler can answer 503 and let git retry.
|
||||
if (typeof result === 'object' && result !== null && 'skipped' in result) {
|
||||
throw new GitSyncLockHeldError(spaceId);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Drive ONE reconcile cycle for a space. The PULL->PUSH branch choreography
|
||||
* lives in the engine's `runCycle` (so it can never drift from the engine it
|
||||
* ships with); the orchestrator owns only the lock (its caller) and the
|
||||
* service binding. There is no delete cap — deletes apply unconditionally (they
|
||||
* are soft/reversible) and every cycle logs what it deleted via `log`.
|
||||
*/
|
||||
private async driveCycle(
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
serviceUserId: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<GitSyncRunStatus> {
|
||||
const { runCycle } = await loadGitSync();
|
||||
const settings = await this.buildSettings(spaceId);
|
||||
const vault = await this.vaultRegistry.getVault(spaceId);
|
||||
const client = this.dataSource.bind({ workspaceId, userId: serviceUserId });
|
||||
|
||||
const result = await runCycle({
|
||||
// Cooperative-abort signal from the per-space lock: if a heartbeat refresh
|
||||
// cannot confirm the lock, the cycle bails before its next destructive
|
||||
// write phase instead of writing blind after a possible lock loss.
|
||||
signal,
|
||||
spaceId,
|
||||
client,
|
||||
vault,
|
||||
settings,
|
||||
// ABSOLUTE-path fs primitives the engine cycle injects (it stays IO-free).
|
||||
fs: {
|
||||
readFile: (absPath) => readFile(absPath, 'utf8'),
|
||||
writeFile: (absPath, text) => writeFile(absPath, text, 'utf8'),
|
||||
mkdir: (absDir) => mkdir(absDir, { recursive: true }).then(() => undefined),
|
||||
rm: (absPath) => rm(absPath, { force: true }),
|
||||
},
|
||||
// Every cycle logs its full push plan + per-action lines + completion
|
||||
// counts (created/updated/deleted/skipped/failures) through this `log`, so
|
||||
// what was deleted (and what was not) is always recorded. There is no
|
||||
// delete cap: deletes are soft (Trash, reversible), so a blocking limit
|
||||
// only got in the way of legitimate deletes; engine correctness (covered by
|
||||
// the reconcile/layout tests) is what prevents phantom deletions.
|
||||
log: (line: string) => this.logger.log(`git-sync[${spaceId}] ${line}`),
|
||||
});
|
||||
|
||||
return { spaceId, ...result };
|
||||
}
|
||||
|
||||
// --- poll-safety interval -------------------------------------
|
||||
|
||||
/** Registered interval name (shared by registration + teardown). */
|
||||
private static readonly POLL_INTERVAL_NAME = 'git-sync-poll';
|
||||
|
||||
/**
|
||||
* Register the poll-safety interval DYNAMICALLY so it honors the configured
|
||||
* GIT_SYNC_POLL_INTERVAL_MS (a static `@Interval` decorator could only hardcode
|
||||
* a value at class-eval time, before config is readable — diverging from what
|
||||
* `/status` reports). When git-sync is disabled we register nothing.
|
||||
*
|
||||
* ScheduleModule: forRoot() is registered ONCE globally by TelemetryModule;
|
||||
* GitSyncModule imports the plain ScheduleModule so SchedulerRegistry is
|
||||
* injectable without a duplicate forRoot.
|
||||
*/
|
||||
onModuleInit(): void {
|
||||
if (!this.environmentService.isGitSyncEnabled()) return;
|
||||
|
||||
const ms = this.environmentService.getGitSyncPollIntervalMs();
|
||||
const handle = setInterval(() => {
|
||||
void this.pollTick();
|
||||
}, ms);
|
||||
// Do not keep the event loop alive solely for the poll timer.
|
||||
handle.unref?.();
|
||||
this.schedulerRegistry.addInterval(
|
||||
GitSyncOrchestrator.POLL_INTERVAL_NAME,
|
||||
handle,
|
||||
);
|
||||
this.pollIntervalName = GitSyncOrchestrator.POLL_INTERVAL_NAME;
|
||||
this.logger.log(`git-sync: poll interval registered (${ms}ms).`);
|
||||
}
|
||||
|
||||
/** Tear down the dynamic interval on shutdown (guard against double-delete). */
|
||||
onModuleDestroy(): void {
|
||||
if (!this.pollIntervalName) return;
|
||||
try {
|
||||
// deleteInterval clears the timer and removes it from the registry.
|
||||
this.schedulerRegistry.deleteInterval(this.pollIntervalName);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`git-sync: failed to delete poll interval: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
} finally {
|
||||
this.pollIntervalName = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* One poll tick: catches events missed by the listener and reconciles after
|
||||
* downtime. Gated on GIT_SYNC_ENABLED (defensive — the interval is only
|
||||
* registered when enabled). Each enabled space runs under its own lock
|
||||
* (overlaps skipped). Never throws (runOnce swallows per-space errors).
|
||||
*/
|
||||
private async pollTick(): Promise<void> {
|
||||
if (!this.environmentService.isGitSyncEnabled()) return;
|
||||
let spaces: EnabledSpace[];
|
||||
try {
|
||||
spaces = await this.enabledSpaces();
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`git-sync: failed to enumerate enabled spaces: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
for (const { spaceId, workspaceId } of spaces) {
|
||||
// runOnce never throws; a per-space error is logged and returned in status.
|
||||
await this.runOnce(spaceId, workspaceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,477 @@
|
||||
// Stub the collab util so importing the service does not drag in the
|
||||
// editor-ext -> @tiptap/react -> react-dom graph (unloadable under jest's node
|
||||
// env, same coupling noted in mcp.service.spec.ts). The captured transact
|
||||
// callback is never executed in these unit tests, so the stub extensions array
|
||||
// is sufficient; the real collab write path is exercised by integration tests.
|
||||
jest.mock('../../../collaboration/collaboration.util', () => ({
|
||||
tiptapExtensions: [],
|
||||
getPageId: (name: string) => name.replace(/^page\./, ''),
|
||||
}));
|
||||
// writeBody now builds the replacement Yjs state eagerly (before clearing the
|
||||
// live doc), so TiptapTransformer.toYdoc runs in these unit tests. Real Tiptap
|
||||
// extensions are stubbed to [] above (they drag in the React graph), which can't
|
||||
// build a schema — so stub the transformer to return a small non-empty Y.Doc.
|
||||
// The real conversion is exercised by the @docmost/git-sync converter tests and
|
||||
// the integration tests.
|
||||
jest.mock('@hocuspocus/transformer', () => {
|
||||
const Yjs = require('yjs');
|
||||
return {
|
||||
TiptapTransformer: {
|
||||
toYdoc: jest.fn(() => {
|
||||
const d = new Yjs.Doc();
|
||||
d.getXmlFragment('default').insert(0, [new Yjs.XmlElement('paragraph')]);
|
||||
return d;
|
||||
}),
|
||||
},
|
||||
};
|
||||
});
|
||||
// PageService is only ever a mocked dependency here; stub the editor-ext entry
|
||||
// it imports so loading its module does not pull in the React graph either.
|
||||
jest.mock('@docmost/editor-ext', () => ({
|
||||
markdownToHtml: jest.fn(),
|
||||
}));
|
||||
// The service loads `parseDocmostMarkdown` / `markdownToProseMirror` at runtime
|
||||
// via the `loadGitSync()` bridge (the ESM `@docmost/git-sync` package cannot be
|
||||
// `require()`d under jest). Stub the loader: the real conversion is exercised by
|
||||
// the @docmost/git-sync converter tests and the converter gate; here the mocked
|
||||
// TiptapTransformer.toYdoc ignores the converted doc anyway, so a passthrough
|
||||
// body + a minimal ProseMirror doc is sufficient.
|
||||
jest.mock('../git-sync.loader', () => ({
|
||||
loadGitSync: jest.fn(async () => ({
|
||||
parseDocmostMarkdown: (md: string) => ({ meta: {}, body: md }),
|
||||
markdownToProseMirror: async () => ({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph' }],
|
||||
}),
|
||||
})),
|
||||
}));
|
||||
|
||||
import { GitmostDataSourceService } from './gitmost-datasource.service';
|
||||
|
||||
// Focused unit/contract test for the native GitSyncClient adapter.
|
||||
// No DB, no real collab server: the repos/services/gateway are mocked and we
|
||||
// assert the mapping logic + the provenance/soft-delete/position contracts.
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
interface Mocks {
|
||||
pageRepo: {
|
||||
findById: AnyMock;
|
||||
getSpaceDescendants: AnyMock;
|
||||
restorePage: AnyMock;
|
||||
};
|
||||
spaceRepo: { findById: AnyMock };
|
||||
pageService: {
|
||||
create: AnyMock;
|
||||
update: AnyMock;
|
||||
movePage: AnyMock;
|
||||
removePage: AnyMock;
|
||||
};
|
||||
collabGateway: { writePageBody: AnyMock };
|
||||
// Minimal Kysely-ish chainable mock for the direct-query paths.
|
||||
db: any;
|
||||
}
|
||||
|
||||
function makeQueryBuilder(rows: any[]) {
|
||||
const qb: any = {};
|
||||
for (const m of ['select', 'where', 'orderBy', 'limit']) {
|
||||
qb[m] = jest.fn(() => qb);
|
||||
}
|
||||
qb.execute = jest.fn(async () => rows);
|
||||
qb.executeTakeFirst = jest.fn(async () => rows[0]);
|
||||
return qb;
|
||||
}
|
||||
|
||||
function build(rows: any[] = []): {
|
||||
service: GitmostDataSourceService;
|
||||
mocks: Mocks;
|
||||
} {
|
||||
const mocks: Mocks = {
|
||||
pageRepo: {
|
||||
findById: jest.fn(),
|
||||
getSpaceDescendants: jest.fn(),
|
||||
restorePage: jest.fn(async () => undefined),
|
||||
},
|
||||
spaceRepo: { findById: jest.fn(async () => ({ id: 'space-1' })) },
|
||||
pageService: {
|
||||
create: jest.fn(),
|
||||
update: jest.fn(async () => undefined),
|
||||
movePage: jest.fn(async () => undefined),
|
||||
removePage: jest.fn(async () => undefined),
|
||||
},
|
||||
collabGateway: {
|
||||
writePageBody: jest.fn(async () => undefined),
|
||||
},
|
||||
db: {
|
||||
selectFrom: jest.fn(() => makeQueryBuilder(rows)),
|
||||
},
|
||||
};
|
||||
|
||||
const service = new GitmostDataSourceService(
|
||||
mocks.pageRepo as any,
|
||||
mocks.spaceRepo as any,
|
||||
mocks.pageService as any,
|
||||
mocks.collabGateway as any,
|
||||
mocks.db as any,
|
||||
);
|
||||
|
||||
return { service, mocks };
|
||||
}
|
||||
|
||||
const CTX = { workspaceId: 'ws-1', userId: 'svc-user' };
|
||||
|
||||
describe('GitmostDataSourceService', () => {
|
||||
describe('listSpaceTree', () => {
|
||||
it('maps descendants to PageNode and is always complete:true', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.spaceRepo.findById.mockResolvedValue({ id: 'space-1' });
|
||||
mocks.pageRepo.getSpaceDescendants.mockResolvedValue([
|
||||
{
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Root',
|
||||
parentPageId: null,
|
||||
position: 'a0',
|
||||
},
|
||||
{
|
||||
id: 'p2',
|
||||
slugId: 's2',
|
||||
title: 'Child',
|
||||
parentPageId: 'p1',
|
||||
position: 'a1',
|
||||
},
|
||||
]);
|
||||
|
||||
const client = service.bind(CTX);
|
||||
const res = await client.listSpaceTree('space-1');
|
||||
|
||||
expect(res.complete).toBe(true);
|
||||
expect(mocks.pageRepo.getSpaceDescendants).toHaveBeenCalledWith(
|
||||
'space-1',
|
||||
{ includeContent: false },
|
||||
);
|
||||
expect(res.pages).toEqual([
|
||||
{
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Root',
|
||||
parentPageId: null,
|
||||
hasChildren: true, // p2's parent is p1
|
||||
position: 'a0',
|
||||
},
|
||||
{
|
||||
id: 'p2',
|
||||
slugId: 's2',
|
||||
title: 'Child',
|
||||
parentPageId: 'p1',
|
||||
hasChildren: false,
|
||||
position: 'a1',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('throws when the space is not found', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.spaceRepo.findById.mockResolvedValue(undefined);
|
||||
await expect(service.bind(CTX).listSpaceTree('nope')).rejects.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('getPageJson', () => {
|
||||
it('returns the engine page shape with ISO updatedAt + content', async () => {
|
||||
const { service, mocks } = build();
|
||||
const updatedAt = new Date('2026-06-20T10:00:00.000Z');
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Doc',
|
||||
parentPageId: null,
|
||||
spaceId: 'space-1',
|
||||
updatedAt,
|
||||
content: { type: 'doc', content: [] },
|
||||
});
|
||||
|
||||
const res = await service.bind(CTX).getPageJson('p1');
|
||||
expect(mocks.pageRepo.findById).toHaveBeenCalledWith('p1', {
|
||||
includeContent: true,
|
||||
});
|
||||
expect(res).toEqual({
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Doc',
|
||||
parentPageId: null,
|
||||
spaceId: 'space-1',
|
||||
updatedAt: '2026-06-20T10:00:00.000Z',
|
||||
content: { type: 'doc', content: [] },
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('importPageMarkdown', () => {
|
||||
it('parses md, converts to ProseMirror, and routes the body write to the owning instance', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
updatedAt: new Date('2026-06-20T11:00:00.000Z'),
|
||||
});
|
||||
|
||||
const res = await service
|
||||
.bind(CTX)
|
||||
.importPageMarkdown('p1', '# Hello\n\nworld');
|
||||
|
||||
// writeBody routes through writePageBody (NOT openDirectConnection): the
|
||||
// merge must run on the instance that owns the live doc so a connected
|
||||
// editor converges instead of silently reverting the change. The service
|
||||
// user rides on the payload as the responsible author.
|
||||
expect(mocks.collabGateway.writePageBody).toHaveBeenCalledTimes(1);
|
||||
const [docName, payload] = mocks.collabGateway.writePageBody.mock.calls[0];
|
||||
expect(docName).toBe('page.p1');
|
||||
expect(payload.userId).toBe('svc-user');
|
||||
// A converted ProseMirror doc was passed; no base on a plain import.
|
||||
expect(payload.prosemirrorJson).toEqual(
|
||||
expect.objectContaining({ type: 'doc' }),
|
||||
);
|
||||
expect(payload.baseProsemirrorJson).toBeUndefined();
|
||||
|
||||
expect(res.updatedAt).toBe('2026-06-20T11:00:00.000Z');
|
||||
});
|
||||
|
||||
// The 2-way path (no base) is covered above; this exercises the THREE-WAY
|
||||
// branch that only fires when a `baseMarkdown` is supplied (review #5). The
|
||||
// merge dispatch itself now lives in the collab handler (gitSyncWriteBody);
|
||||
// here we assert the datasource forwards the base so the owning instance can
|
||||
// run the 3-way reconcile.
|
||||
describe('with a baseMarkdown (three-way merge)', () => {
|
||||
it('forwards the parsed base body so the owning instance can three-way merge', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
updatedAt: new Date('2026-06-20T11:00:00.000Z'),
|
||||
});
|
||||
|
||||
await service
|
||||
.bind(CTX)
|
||||
.importPageMarkdown('p1', '# Full\n\ngit', '# Base\n\nbase');
|
||||
|
||||
expect(mocks.collabGateway.writePageBody).toHaveBeenCalledTimes(1);
|
||||
const [, payload] = mocks.collabGateway.writePageBody.mock.calls[0];
|
||||
// Both the incoming body AND the last-synced base were converted and
|
||||
// forwarded — proof the 3-way common-ancestor is plumbed through.
|
||||
expect(payload.prosemirrorJson).toEqual(
|
||||
expect.objectContaining({ type: 'doc' }),
|
||||
);
|
||||
expect(payload.baseProsemirrorJson).toEqual(
|
||||
expect.objectContaining({ type: 'doc' }),
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('createPage', () => {
|
||||
it('creates the shell with git-sync provenance, writes body, returns id', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.pageService.create.mockResolvedValue({ id: 'new-id' });
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'new-id',
|
||||
updatedAt: new Date('2026-06-20T12:00:00.000Z'),
|
||||
});
|
||||
|
||||
const res = await service
|
||||
.bind(CTX)
|
||||
.createPage('Title', 'body md', 'space-1', 'parent-1');
|
||||
|
||||
expect(mocks.pageService.create).toHaveBeenCalledWith(
|
||||
'svc-user',
|
||||
'ws-1',
|
||||
{ spaceId: 'space-1', title: 'Title', parentPageId: 'parent-1' },
|
||||
{ actor: 'git-sync', aiChatId: null },
|
||||
);
|
||||
expect(mocks.collabGateway.writePageBody).toHaveBeenCalledWith(
|
||||
'page.new-id',
|
||||
expect.objectContaining({ userId: 'svc-user' }),
|
||||
);
|
||||
expect(res).toEqual({
|
||||
data: { id: 'new-id' },
|
||||
updatedAt: '2026-06-20T12:00:00.000Z',
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('deletePage', () => {
|
||||
it('uses the soft-delete path (removePage), not a force delete', async () => {
|
||||
const { service, mocks } = build();
|
||||
await service.bind(CTX).deletePage('p1');
|
||||
// Passes git-sync provenance so the soft-delete stamps
|
||||
// lastUpdatedSource='git-sync' (loop-guard, PR #119 review).
|
||||
expect(mocks.pageService.removePage).toHaveBeenCalledWith(
|
||||
'p1',
|
||||
'svc-user',
|
||||
'ws-1',
|
||||
{ actor: 'git-sync', aiChatId: null },
|
||||
);
|
||||
// No forceDelete on the service surface used here.
|
||||
expect((mocks.pageService as any).forceDelete).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('movePage', () => {
|
||||
it('computes a fractional position when none is supplied', async () => {
|
||||
// db query returns a last sibling at 'a0' -> jittered key after it.
|
||||
const { service, mocks } = build([{ position: 'a0' }]);
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
spaceId: 'space-1',
|
||||
});
|
||||
|
||||
await service.bind(CTX).movePage('p1', 'parent-1');
|
||||
|
||||
expect(mocks.pageService.movePage).toHaveBeenCalledTimes(1);
|
||||
const [dto, page, provenance] = mocks.pageService.movePage.mock.calls[0];
|
||||
expect(dto.pageId).toBe('p1');
|
||||
expect(dto.parentPageId).toBe('parent-1');
|
||||
expect(typeof dto.position).toBe('string');
|
||||
expect(dto.position.length).toBeGreaterThan(0);
|
||||
expect(page).toEqual({ id: 'p1', spaceId: 'space-1' });
|
||||
expect(provenance).toEqual({ actor: 'git-sync', aiChatId: null });
|
||||
});
|
||||
|
||||
it('passes through an explicit position unchanged', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.pageRepo.findById.mockResolvedValue({
|
||||
id: 'p1',
|
||||
spaceId: 'space-1',
|
||||
});
|
||||
|
||||
await service.bind(CTX).movePage('p1', null, 'zz');
|
||||
const [dto] = mocks.pageService.movePage.mock.calls[0];
|
||||
expect(dto.position).toBe('zz');
|
||||
// db not consulted for a supplied position.
|
||||
expect(mocks.db.selectFrom).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('renamePage', () => {
|
||||
it('updates only the title with git-sync provenance', async () => {
|
||||
const { service, mocks } = build();
|
||||
mocks.pageRepo.findById.mockResolvedValue({ id: 'p1', title: 'old' });
|
||||
|
||||
await service.bind(CTX).renamePage('p1', 'new title');
|
||||
|
||||
const [page, dto, user, provenance] =
|
||||
mocks.pageService.update.mock.calls[0];
|
||||
expect(page).toEqual({ id: 'p1', title: 'old' });
|
||||
expect(dto.title).toBe('new title');
|
||||
expect(user).toEqual({ id: 'svc-user' });
|
||||
expect(provenance).toEqual({ actor: 'git-sync', aiChatId: null });
|
||||
});
|
||||
});
|
||||
|
||||
describe('restorePage', () => {
|
||||
it('restores via the repo restore path scoped to the workspace', async () => {
|
||||
const { service, mocks } = build();
|
||||
const res = await service.bind(CTX).restorePage('p1');
|
||||
// Stamps lastUpdatedSource='git-sync' on restore (loop-guard, PR #119).
|
||||
expect(mocks.pageRepo.restorePage).toHaveBeenCalledWith(
|
||||
'p1',
|
||||
'ws-1',
|
||||
'git-sync',
|
||||
);
|
||||
expect(res).toEqual({ id: 'p1' });
|
||||
});
|
||||
});
|
||||
|
||||
// Phase-B+ continuous-sync methods: not yet called by the engine but wired into
|
||||
// the GitSyncClient seam (PR #119 review #5). Exercised via the bound client.
|
||||
describe('listRecentSince', () => {
|
||||
it('queries non-deleted pages newest-first and ISO-stringifies updatedAt', async () => {
|
||||
const rows = [
|
||||
{
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'A',
|
||||
parentPageId: null,
|
||||
spaceId: 'space-1',
|
||||
updatedAt: new Date('2026-06-20T10:00:00.000Z'),
|
||||
},
|
||||
];
|
||||
const { service, mocks } = build(rows);
|
||||
const qb = mocks.db.selectFrom.mock.results; // populated after the call
|
||||
|
||||
const out = (await service
|
||||
.bind(CTX)
|
||||
.listRecentSince('space-1', '2026-06-19T00:00:00.000Z', 100)) as any[];
|
||||
|
||||
// Query builder shaped against the `pages` table with the expected chain.
|
||||
expect(mocks.db.selectFrom).toHaveBeenCalledWith('pages');
|
||||
const builder = qb[0].value;
|
||||
expect(builder.select).toHaveBeenCalled();
|
||||
expect(builder.orderBy).toHaveBeenCalledWith('updatedAt', 'desc');
|
||||
// deletedAt is null + the conditional spaceId / since / cap clauses.
|
||||
const whereArgs = builder.where.mock.calls.map((c: any[]) => c[0]);
|
||||
expect(whereArgs).toContain('deletedAt');
|
||||
expect(whereArgs).toContain('spaceId');
|
||||
expect(whereArgs).toContain('updatedAt');
|
||||
expect(builder.limit).toHaveBeenCalledWith(100);
|
||||
|
||||
expect(out).toEqual([
|
||||
{
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'A',
|
||||
parentPageId: null,
|
||||
spaceId: 'space-1',
|
||||
updatedAt: '2026-06-20T10:00:00.000Z',
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('omits the spaceId / since / cap clauses when not supplied', async () => {
|
||||
const { service, mocks } = build([]);
|
||||
|
||||
await service.bind(CTX).listRecentSince(undefined, null);
|
||||
|
||||
const builder = mocks.db.selectFrom.mock.results[0].value;
|
||||
const whereArgs = builder.where.mock.calls.map((c: any[]) => c[0]);
|
||||
// Only the deletedAt-is-null guard; no spaceId / updatedAt> clauses.
|
||||
expect(whereArgs).toEqual(['deletedAt']);
|
||||
expect(builder.limit).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('listTrash', () => {
|
||||
it('queries soft-deleted pages and ISO-stringifies deletedAt (null stays null)', async () => {
|
||||
const rows = [
|
||||
{
|
||||
id: 'p1',
|
||||
slugId: 's1',
|
||||
title: 'Trashed',
|
||||
parentPageId: null,
|
||||
spaceId: 'space-1',
|
||||
deletedAt: new Date('2026-06-21T09:00:00.000Z'),
|
||||
},
|
||||
{
|
||||
id: 'p2',
|
||||
slugId: 's2',
|
||||
title: 'NoDate',
|
||||
parentPageId: null,
|
||||
spaceId: 'space-1',
|
||||
deletedAt: null,
|
||||
},
|
||||
];
|
||||
const { service, mocks } = build(rows);
|
||||
|
||||
const out = (await service.bind(CTX).listTrash('space-1')) as any[];
|
||||
|
||||
expect(mocks.db.selectFrom).toHaveBeenCalledWith('pages');
|
||||
const builder = mocks.db.selectFrom.mock.results[0].value;
|
||||
const whereCalls = builder.where.mock.calls;
|
||||
// deletedAt is-not null (the trash predicate) + spaceId filter.
|
||||
expect(whereCalls).toContainEqual(['deletedAt', 'is not', null]);
|
||||
expect(whereCalls).toContainEqual(['spaceId', '=', 'space-1']);
|
||||
expect(builder.orderBy).toHaveBeenCalledWith('deletedAt', 'desc');
|
||||
|
||||
expect(out[0].deletedAt).toBe('2026-06-21T09:00:00.000Z');
|
||||
expect(out[1].deletedAt).toBeNull();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,422 @@
|
||||
import { Injectable, Logger, NotFoundException } from '@nestjs/common';
|
||||
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||
import type {
|
||||
GitSyncClient,
|
||||
GitSyncPageNodeLite,
|
||||
} from '@docmost/git-sync';
|
||||
import { loadGitSync } from '../git-sync.loader';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { SpaceRepo } from '@docmost/db/repos/space/space.repo';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import { PageService } from '../../../core/page/services/page.service';
|
||||
import { CollaborationGateway } from '../../../collaboration/collaboration.gateway';
|
||||
import { AuthProvenanceData } from '../../../common/decorators/auth-provenance.decorator';
|
||||
|
||||
/**
|
||||
* The acting context the orchestrator binds the datasource to. The datasource is
|
||||
* NOT a fixed-identity singleton: it operates on behalf of a (workspaceId,
|
||||
* userId) pair the orchestrator supplies per space. `userId` is the
|
||||
* git-sync service user — it stays the responsible author (creatorId /
|
||||
* lastUpdatedById) while the `'git-sync'` actor marks provenance.
|
||||
*/
|
||||
export interface GitSyncBindContext {
|
||||
workspaceId: string;
|
||||
userId: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* The git-sync provenance carried into PageService writes. PageService.create/
|
||||
* update/movePage honor this provenance and stamp `lastUpdatedSource = 'git-sync'`
|
||||
* on the page row when `provenance.actor === 'git-sync'`. Body writes (writeBody,
|
||||
* §3.3) likewise stamp 'git-sync' because the collab context's `actor: 'git-sync'`
|
||||
* flows into PersistenceExtension. So ALL git-sync structural + body writes mark
|
||||
* the row's source, which the listener's loop-guard reads to skip our own writes.
|
||||
*/
|
||||
const GIT_SYNC_PROVENANCE: AuthProvenanceData = {
|
||||
actor: 'git-sync',
|
||||
aiChatId: null,
|
||||
};
|
||||
|
||||
/**
|
||||
* Native, in-process implementation of the engine's `GitSyncClient` seam
|
||||
* Reads go through repositories (PageRepo/SpaceRepo); body writes go
|
||||
* through collab `openDirectConnection` (§3.3); structural mutations
|
||||
* (create/move/delete/rename) go through PageService.
|
||||
*
|
||||
* Shape: this is an `@Injectable()` holding the repos/services. The orchestrator
|
||||
* calls `bind({ workspaceId, userId })` to obtain a `GitSyncClient` bound to that
|
||||
* acting context. The bound object is a thin closure over `this` — no per-call
|
||||
* identity plumbing leaks into the engine.
|
||||
*/
|
||||
@Injectable()
|
||||
export class GitmostDataSourceService {
|
||||
private readonly logger = new Logger(GitmostDataSourceService.name);
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly spaceRepo: SpaceRepo,
|
||||
private readonly pageService: PageService,
|
||||
private readonly collabGateway: CollaborationGateway,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
) {}
|
||||
|
||||
/**
|
||||
* Bind the datasource to an acting (workspaceId, userId) context and return a
|
||||
* `GitSyncClient` the engine can consume directly.
|
||||
*/
|
||||
bind(ctx: GitSyncBindContext): GitSyncClient {
|
||||
return {
|
||||
listSpaceTree: (spaceId, rootPageId) =>
|
||||
this.listSpaceTree(ctx, spaceId, rootPageId),
|
||||
getPageJson: (pageId) => this.getPageJson(ctx, pageId),
|
||||
importPageMarkdown: (pageId, fullMarkdown, baseMarkdown) =>
|
||||
this.importPageMarkdown(ctx, pageId, fullMarkdown, baseMarkdown),
|
||||
createPage: (title, content, spaceId, parentPageId) =>
|
||||
this.createPage(ctx, title, content, spaceId, parentPageId),
|
||||
deletePage: (pageId) => this.deletePage(ctx, pageId),
|
||||
movePage: (pageId, parentPageId, position) =>
|
||||
this.movePage(pageId, parentPageId, position),
|
||||
renamePage: (pageId, title) => this.renamePage(ctx, pageId, title),
|
||||
listRecentSince: (spaceId, sinceIso, hardPageCap) =>
|
||||
this.listRecentSince(spaceId, sinceIso, hardPageCap),
|
||||
listTrash: (spaceId) => this.listTrash(spaceId),
|
||||
restorePage: (pageId) => this.restorePage(ctx, pageId),
|
||||
};
|
||||
}
|
||||
|
||||
// --- reads (pull) ---------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Full page tree of a space mapped to the engine's `PageNode` shape. We read
|
||||
* the DB directly, so `complete` is ALWAYS `true` — the incomplete-fetch
|
||||
* suppression (SPEC §8) never fires natively.
|
||||
*/
|
||||
private async listSpaceTree(
|
||||
ctx: GitSyncBindContext,
|
||||
spaceId: string,
|
||||
_rootPageId?: string,
|
||||
): Promise<{ pages: GitSyncPageNodeLite[]; complete: boolean }> {
|
||||
const space = await this.spaceRepo.findById(spaceId, ctx.workspaceId);
|
||||
if (!space) {
|
||||
throw new NotFoundException(`Space ${spaceId} not found`);
|
||||
}
|
||||
|
||||
const rows = await this.pageRepo.getSpaceDescendants(space.id, {
|
||||
includeContent: false,
|
||||
});
|
||||
|
||||
// `getSpaceDescendants` does not select `hasChildren`; derive it from the
|
||||
// parent links present in the same result set.
|
||||
const parentIds = new Set<string>();
|
||||
for (const row of rows) {
|
||||
if (row.parentPageId) parentIds.add(row.parentPageId);
|
||||
}
|
||||
|
||||
const pages: GitSyncPageNodeLite[] = rows.map((row) => ({
|
||||
id: row.id,
|
||||
slugId: row.slugId,
|
||||
title: row.title,
|
||||
parentPageId: row.parentPageId ?? null,
|
||||
hasChildren: parentIds.has(row.id),
|
||||
position: row.position,
|
||||
}));
|
||||
|
||||
return { pages, complete: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* One page WITH its ProseMirror body content (editor-ext schema). `updatedAt`
|
||||
* is serialized to an ISO string for the loop-guard.
|
||||
*/
|
||||
private async getPageJson(
|
||||
ctx: GitSyncBindContext,
|
||||
pageId: string,
|
||||
): Promise<{
|
||||
id: string;
|
||||
slugId: string;
|
||||
title: string;
|
||||
parentPageId: string | null;
|
||||
spaceId: string;
|
||||
updatedAt: string;
|
||||
content: unknown;
|
||||
}> {
|
||||
const page = await this.pageRepo.findById(pageId, { includeContent: true });
|
||||
if (!page) {
|
||||
throw new NotFoundException(`Page ${pageId} not found`);
|
||||
}
|
||||
|
||||
return {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
title: page.title,
|
||||
parentPageId: page.parentPageId ?? null,
|
||||
spaceId: page.spaceId,
|
||||
updatedAt: new Date(page.updatedAt).toISOString(),
|
||||
content: page.content,
|
||||
};
|
||||
}
|
||||
|
||||
// --- writes (push) --------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Merge a page's body from a self-contained markdown file: parse the meta+body
|
||||
* envelope, convert the body to ProseMirror, then merge it through collab
|
||||
* (§3.3). When `baseMarkdown` (the last-synced version of the file) is given,
|
||||
* the body write is a THREE-WAY merge against the live doc so concurrent human
|
||||
* edits survive (review #5); without it, a 2-way merge. Returns the fresh
|
||||
* page's `updatedAt` for the loop-guard.
|
||||
*/
|
||||
private async importPageMarkdown(
|
||||
ctx: GitSyncBindContext,
|
||||
pageId: string,
|
||||
fullMarkdown: string,
|
||||
baseMarkdown?: string | null,
|
||||
): Promise<{ updatedAt?: string }> {
|
||||
const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync();
|
||||
const { body } = parseDocmostMarkdown(fullMarkdown);
|
||||
const doc = await markdownToProseMirror(body);
|
||||
|
||||
let baseDoc: unknown;
|
||||
if (baseMarkdown != null) {
|
||||
const { body: baseBody } = parseDocmostMarkdown(baseMarkdown);
|
||||
baseDoc = await markdownToProseMirror(baseBody);
|
||||
}
|
||||
|
||||
await this.writeBody(pageId, doc, ctx.userId, baseDoc);
|
||||
|
||||
const page = await this.pageRepo.findById(pageId);
|
||||
return {
|
||||
updatedAt: page ? new Date(page.updatedAt).toISOString() : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a page shell via PageService, then write its body through collab.
|
||||
* Returns the assigned id (`data.id`) + the page's `updatedAt`.
|
||||
*/
|
||||
private async createPage(
|
||||
ctx: GitSyncBindContext,
|
||||
title: string,
|
||||
content: string,
|
||||
spaceId: string,
|
||||
parentPageId?: string,
|
||||
): Promise<{ data: { id: string }; updatedAt?: string }> {
|
||||
const page = await this.pageService.create(
|
||||
ctx.userId,
|
||||
ctx.workspaceId,
|
||||
{ spaceId, title, parentPageId },
|
||||
GIT_SYNC_PROVENANCE,
|
||||
);
|
||||
|
||||
// The shell is created without body; push the markdown body through collab.
|
||||
const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync();
|
||||
const { body } = parseDocmostMarkdown(content);
|
||||
const doc = await markdownToProseMirror(body);
|
||||
await this.writeBody(page.id, doc, ctx.userId);
|
||||
|
||||
const fresh = await this.pageRepo.findById(page.id);
|
||||
return {
|
||||
data: { id: page.id },
|
||||
updatedAt: fresh ? new Date(fresh.updatedAt).toISOString() : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Soft-delete the page to Trash (reversible). NOT a force delete — `restorePage`
|
||||
* can bring it back.
|
||||
*/
|
||||
private async deletePage(
|
||||
ctx: GitSyncBindContext,
|
||||
pageId: string,
|
||||
): Promise<unknown> {
|
||||
await this.pageService.removePage(
|
||||
pageId,
|
||||
ctx.userId,
|
||||
ctx.workspaceId,
|
||||
GIT_SYNC_PROVENANCE,
|
||||
);
|
||||
return { id: pageId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Reparent a page. Docmost-move REQUIRES a fractional-index `position`; when the
|
||||
* engine omits it, compute a key after the destination's last sibling (plan
|
||||
* §3.2 / §14.4).
|
||||
*/
|
||||
private async movePage(
|
||||
pageId: string,
|
||||
parentPageId: string | null,
|
||||
position?: string,
|
||||
): Promise<unknown> {
|
||||
const page = await this.pageRepo.findById(pageId);
|
||||
if (!page) {
|
||||
throw new NotFoundException(`Page ${pageId} not found`);
|
||||
}
|
||||
|
||||
const resolvedPosition =
|
||||
position ?? (await this.computeMovePosition(page.spaceId, parentPageId));
|
||||
|
||||
await this.pageService.movePage(
|
||||
{ pageId, parentPageId: parentPageId ?? null, position: resolvedPosition },
|
||||
page,
|
||||
GIT_SYNC_PROVENANCE,
|
||||
);
|
||||
return { id: pageId };
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a fractional-index position AFTER the last sibling under
|
||||
* `parentPageId` (root pages when null) in the space, ordered by `position`
|
||||
* with the "C" collation Docmost uses. Falls back to a fresh key
|
||||
* when there are no siblings.
|
||||
*/
|
||||
private async computeMovePosition(
|
||||
spaceId: string,
|
||||
parentPageId: string | null,
|
||||
): Promise<string> {
|
||||
let query = this.db
|
||||
.selectFrom('pages')
|
||||
.select(['position'])
|
||||
.where('spaceId', '=', spaceId)
|
||||
.where('deletedAt', 'is', null)
|
||||
.orderBy('position', (ob) => ob.collate('C').desc())
|
||||
.limit(1);
|
||||
|
||||
query = parentPageId
|
||||
? query.where('parentPageId', '=', parentPageId)
|
||||
: query.where('parentPageId', 'is', null);
|
||||
|
||||
const lastSibling = await query.executeTakeFirst();
|
||||
return generateJitteredKeyBetween(lastSibling?.position ?? null, null);
|
||||
}
|
||||
|
||||
/** Change a page's title only (no body touch). */
|
||||
private async renamePage(
|
||||
ctx: GitSyncBindContext,
|
||||
pageId: string,
|
||||
title: string,
|
||||
): Promise<unknown> {
|
||||
const page = await this.pageRepo.findById(pageId);
|
||||
if (!page) {
|
||||
throw new NotFoundException(`Page ${pageId} not found`);
|
||||
}
|
||||
// PageService.update takes a User; the git-sync service user is the
|
||||
// responsible author. Only the id is read off it for lastUpdatedById.
|
||||
// `pageId` satisfies the UpdatePageDto type; PageService.update reads the
|
||||
// page id off `page`, not the DTO. Only `title` is applied here.
|
||||
await this.pageService.update(
|
||||
page,
|
||||
{ pageId, title },
|
||||
{ id: ctx.userId } as any,
|
||||
GIT_SYNC_PROVENANCE,
|
||||
);
|
||||
return { id: pageId };
|
||||
}
|
||||
|
||||
// --- continuous (phase B+) ------------------------------------------------
|
||||
|
||||
/**
|
||||
* Pages in the space updated since `sinceIso` (poll-safety reconciliation,
|
||||
* SPEC §8). `spaceId` undefined widens to all spaces; `hardPageCap` bounds the
|
||||
* result. Reads the DB directly (no cursor pagination needed here).
|
||||
*/
|
||||
private async listRecentSince(
|
||||
spaceId: string | undefined,
|
||||
sinceIso: string | null,
|
||||
hardPageCap?: number,
|
||||
): Promise<unknown[]> {
|
||||
let query = this.db
|
||||
.selectFrom('pages')
|
||||
.select([
|
||||
'id',
|
||||
'slugId',
|
||||
'title',
|
||||
'parentPageId',
|
||||
'spaceId',
|
||||
'updatedAt',
|
||||
])
|
||||
.where('deletedAt', 'is', null)
|
||||
.orderBy('updatedAt', 'desc');
|
||||
|
||||
if (spaceId) query = query.where('spaceId', '=', spaceId);
|
||||
if (sinceIso) query = query.where('updatedAt', '>', new Date(sinceIso));
|
||||
if (hardPageCap) query = query.limit(hardPageCap);
|
||||
|
||||
const rows = await query.execute();
|
||||
return rows.map((row) => ({
|
||||
...row,
|
||||
updatedAt: new Date(row.updatedAt).toISOString(),
|
||||
}));
|
||||
}
|
||||
|
||||
/** Soft-deleted (trashed) pages for the space (deletion detection). */
|
||||
private async listTrash(spaceId: string): Promise<unknown[]> {
|
||||
const rows = await this.db
|
||||
.selectFrom('pages')
|
||||
.select(['id', 'slugId', 'title', 'parentPageId', 'spaceId', 'deletedAt'])
|
||||
.where('spaceId', '=', spaceId)
|
||||
.where('deletedAt', 'is not', null)
|
||||
.orderBy('deletedAt', 'desc')
|
||||
.execute();
|
||||
|
||||
return rows.map((row) => ({
|
||||
...row,
|
||||
deletedAt: row.deletedAt ? new Date(row.deletedAt).toISOString() : null,
|
||||
}));
|
||||
}
|
||||
|
||||
/** Restore a soft-deleted page from Trash. */
|
||||
private async restorePage(
|
||||
ctx: GitSyncBindContext,
|
||||
pageId: string,
|
||||
): Promise<unknown> {
|
||||
// Stamp git-sync provenance so the change-listener loop-guard skips the
|
||||
// PAGE_RESTORED echo (mirrors deletePage / create / update / move).
|
||||
await this.pageRepo.restorePage(
|
||||
pageId,
|
||||
ctx.workspaceId,
|
||||
GIT_SYNC_PROVENANCE.actor,
|
||||
);
|
||||
return { id: pageId };
|
||||
}
|
||||
|
||||
// --- linchpin: native body write (§3.3) -----------------------------------
|
||||
|
||||
/**
|
||||
* In-process body write — no loopback websocket, no service-user token.
|
||||
*
|
||||
* Routes the write through `CollaborationGateway.writePageBody`, which applies
|
||||
* the block-level MERGE on the instance that OWNS the live Y.Doc (via the
|
||||
* custom-event channel) rather than opening a direct connection on this
|
||||
* (api/worker) instance. That distinction is load-bearing: when an editor is
|
||||
* connected to a different collab instance/process, a direct connection here
|
||||
* mutates a SEPARATE, detached doc the editor never sees — the editor's next
|
||||
* autosave then silently REVERTS the git change (data loss). Running on the
|
||||
* owning instance broadcasts the merge as a Yjs update so the editor converges
|
||||
* (see CollaborationGateway.writePageBody for the full rationale).
|
||||
*
|
||||
* The merge itself stays a block-level reconcile, not a full-body replace
|
||||
* (review #5): only changed blocks are touched, concurrently-edited blocks are
|
||||
* left untouched, and an unchanged resync is a 0-op write. With a `base` (the
|
||||
* last-synced version) it is a THREE-WAY merge so a block ONLY the human
|
||||
* changed is kept and a block ONLY git changed is taken (conflicts -> git);
|
||||
* without a base (e.g. createPage) it falls back to the 2-way merge. The
|
||||
* `{ actor: 'git-sync', user: { id: userId } }` context flows into
|
||||
* PersistenceExtension.onStoreDocument, which persists ydoc+content+textContent,
|
||||
* stamps `lastUpdatedSource = 'git-sync'`, and broadcasts `page.updated`.
|
||||
*/
|
||||
private async writeBody(
|
||||
pageId: string,
|
||||
prosemirrorJson: unknown,
|
||||
userId: string,
|
||||
baseProsemirrorJson?: unknown,
|
||||
): Promise<void> {
|
||||
const documentName = `page.${pageId}`;
|
||||
await this.collabGateway.writePageBody(documentName, {
|
||||
prosemirrorJson,
|
||||
baseProsemirrorJson,
|
||||
userId,
|
||||
});
|
||||
}
|
||||
}
|
||||
26
apps/server/src/integrations/git-sync/services/lcs.ts
Normal file
26
apps/server/src/integrations/git-sync/services/lcs.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* Backward-filled LCS length table for sequences `a` and `b`: `dp[i][j]` is the
|
||||
* length of the longest common subsequence of the suffixes `a[i:]` and `b[j:]`.
|
||||
* O(n*m) time/space — fine for page block counts.
|
||||
*
|
||||
* Shared by the two-way block diff (`yjs-body-merge.diffBlocks`) and the
|
||||
* three-way merge planner (`three-way-merge.lcsPairs`) so the (identical) table
|
||||
* construction lives in ONE place; each caller does its own traceback over the
|
||||
* returned table.
|
||||
*/
|
||||
export function buildLcsTable(a: string[], b: string[]): number[][] {
|
||||
const n = a.length;
|
||||
const m = b.length;
|
||||
const dp: number[][] = Array.from({ length: n + 1 }, () =>
|
||||
new Array(m + 1).fill(0),
|
||||
);
|
||||
for (let i = n - 1; i >= 0; i--) {
|
||||
for (let j = m - 1; j >= 0; j--) {
|
||||
dp[i][j] =
|
||||
a[i] === b[j]
|
||||
? dp[i + 1][j + 1] + 1
|
||||
: Math.max(dp[i + 1][j], dp[i][j + 1]);
|
||||
}
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
@@ -0,0 +1,101 @@
|
||||
// Red-team finding #10: single-writer guarantee across replicas must survive a
|
||||
// TTL lapse with a swallowed heartbeat refresh. Two SpaceLockService instances
|
||||
// (A, B) share ONE redis store. A holds 'X' and stays in-flight; the lock key
|
||||
// then disappears (TTL expiry while refreshLock silently failed). B must NOT be
|
||||
// able to acquire 'X' and run its fn concurrently with A — that would be two
|
||||
// writers racing the same working tree. This test asserts the DESIRED
|
||||
// single-writer behavior, so it FAILS today if the lapse lets B in.
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { SpaceLockService } from './space-lock.service';
|
||||
import { GIT_SYNC_LOCK_PREFIX } from '../git-sync.constants';
|
||||
|
||||
/**
|
||||
* Minimal shared fake redis honoring exactly the two primitives the lock uses:
|
||||
* - `SET key val PX ttl NX` → 'OK' only when the key is absent (NX semantics).
|
||||
* - `eval(<get/del CAS>|<get/pexpire CAS>, 1, key, instanceId[, ttl])` →
|
||||
* compares the stored value to ARGV[1] before del/pexpire (CAS).
|
||||
* TTL expiry is not time-driven here; tests simulate it by mutating `store`.
|
||||
*/
|
||||
function makeSharedRedis() {
|
||||
const store = new Map<string, string>();
|
||||
return {
|
||||
store,
|
||||
async set(key: string, val: string, _px: 'PX', _ttl: number, nx: 'NX') {
|
||||
if (nx === 'NX' && store.has(key)) return null;
|
||||
store.set(key, val);
|
||||
return 'OK';
|
||||
},
|
||||
async eval(lua: string, _numKeys: number, key: string, argInstanceId: string) {
|
||||
// Only act when WE still own the key (CAS), mirroring the Lua scripts.
|
||||
if (store.get(key) !== argInstanceId) return 0;
|
||||
if (lua.includes('del')) {
|
||||
store.delete(key);
|
||||
return 1;
|
||||
}
|
||||
// pexpire CAS refresh: value matches, "extend" is a no-op in the fake.
|
||||
return 1;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildInstance(redis: ReturnType<typeof makeSharedRedis>) {
|
||||
const redisService = { getOrThrow: jest.fn(() => redis) };
|
||||
return new SpaceLockService(redisService as any);
|
||||
}
|
||||
|
||||
async function flushMicrotasks(): Promise<void> {
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
}
|
||||
|
||||
beforeAll(() => {
|
||||
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
|
||||
});
|
||||
|
||||
describe('SpaceLockService — finding #10 single-writer across TTL lapse', () => {
|
||||
it('B must not run its fn concurrently with an in-flight A after the lock key vanishes', async () => {
|
||||
const redis = makeSharedRedis();
|
||||
const A = buildInstance(redis);
|
||||
const B = buildInstance(redis);
|
||||
|
||||
let aRunning = false;
|
||||
let releaseA!: () => void;
|
||||
const gateA = new Promise<void>((resolve) => {
|
||||
releaseA = resolve;
|
||||
});
|
||||
|
||||
// A acquires 'X' and stays in-flight awaiting the gate.
|
||||
const aResult = A.withSpaceLock('X', async () => {
|
||||
aRunning = true;
|
||||
await gateA;
|
||||
aRunning = false;
|
||||
return 'A-done';
|
||||
});
|
||||
await flushMicrotasks();
|
||||
|
||||
// Sanity: A is in-flight and owns the redis key.
|
||||
expect(aRunning).toBe(true);
|
||||
expect(redis.store.has(GIT_SYNC_LOCK_PREFIX + 'X')).toBe(true);
|
||||
|
||||
// Simulate TTL lapse with a swallowed heartbeat refresh: the lock key
|
||||
// disappears from the shared store while A is still running.
|
||||
redis.store.delete(GIT_SYNC_LOCK_PREFIX + 'X');
|
||||
|
||||
// Now B tries to take 'X'. Desired: rejected as 'lock-held' (single writer);
|
||||
// and under no circumstance may fn2 run while A is still in flight.
|
||||
let bRanWhileARunning = false;
|
||||
const bResult = await B.withSpaceLock('X', async () => {
|
||||
bRanWhileARunning = aRunning; // captures whether A was still in-flight
|
||||
return 'B-done';
|
||||
});
|
||||
|
||||
// Single-writer assertions: B did NOT execute concurrently with A.
|
||||
expect(bRanWhileARunning).toBe(false);
|
||||
expect(bResult).toEqual({ skipped: 'lock-held' });
|
||||
|
||||
// Cleanup: let A finish.
|
||||
releaseA();
|
||||
await expect(aResult).resolves.toBe('A-done');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,20 @@
|
||||
import { diff3Plan, type Pick } from './three-way-merge';
|
||||
|
||||
// Materialize a plan into the merged key sequence for assertion.
|
||||
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
||||
return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index]));
|
||||
}
|
||||
|
||||
const merge = (o: string[], a: string[], b: string[]): string[] =>
|
||||
apply(diff3Plan(o, a, b), a, b);
|
||||
|
||||
describe('diff3Plan red-team #9 (human edit + adjacent git insert)', () => {
|
||||
it('keeps human block-2 edit AND applies git insert of 2.5', () => {
|
||||
// base: 1 2 3
|
||||
// live: 1 H 3 (human rewrote block 2)
|
||||
// target: 1 2 2.5 3 (git inserted 2.5 after block 2)
|
||||
expect(
|
||||
merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '2.5', '3']),
|
||||
).toEqual(['1', 'H', '2.5', '3']);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,271 @@
|
||||
// Unit tests for SpaceLockService in ISOLATION. The lock is exercised against a
|
||||
// fake redis (mock `set`/`eval`) and we assert the exact ARGUMENTS passed to
|
||||
// redis — the test-coverage gap this refactor (PR #119 #2) closes: acquire uses
|
||||
// `SET ... PX <ttl> NX`, release uses a DEL-CAS Lua, and the heartbeat refresh
|
||||
// uses a PEXPIRE-CAS Lua, all keyed by the same private instanceId.
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { SpaceLockService } from './space-lock.service';
|
||||
import {
|
||||
GIT_SYNC_LOCK_PREFIX,
|
||||
GIT_SYNC_LOCK_TTL_MS,
|
||||
} from '../git-sync.constants';
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
interface Built {
|
||||
service: SpaceLockService;
|
||||
redis: { set: AnyMock; eval: AnyMock };
|
||||
}
|
||||
|
||||
function build(): Built {
|
||||
const redis = {
|
||||
// Default: lock acquired. Tests override per-case.
|
||||
set: jest.fn(async () => 'OK'),
|
||||
eval: jest.fn(async () => 1),
|
||||
};
|
||||
const redisService = { getOrThrow: jest.fn(() => redis) };
|
||||
const service = new SpaceLockService(redisService as any);
|
||||
return { service, redis };
|
||||
}
|
||||
|
||||
/** Drain queued microtasks so awaited continuations inside the lock run. */
|
||||
async function flushMicrotasks(): Promise<void> {
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('SpaceLockService', () => {
|
||||
describe('acquire (SET NX/PX)', () => {
|
||||
it('calls redis.set with (prefix+spaceId, <instanceId>, PX, ttl, NX) and reuses the instanceId on release', async () => {
|
||||
const { service, redis } = build();
|
||||
|
||||
const result = await service.withSpaceLock('space-1', async () => 'ok');
|
||||
expect(result).toBe('ok');
|
||||
|
||||
// acquire arguments
|
||||
expect(redis.set).toHaveBeenCalledTimes(1);
|
||||
const [key, instanceId, px, ttl, nx] = redis.set.mock.calls[0];
|
||||
expect(key).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
|
||||
expect(typeof instanceId).toBe('string');
|
||||
expect(instanceId.length).toBeGreaterThan(0);
|
||||
expect(px).toBe('PX');
|
||||
expect(ttl).toBe(GIT_SYNC_LOCK_TTL_MS);
|
||||
expect(nx).toBe('NX');
|
||||
|
||||
// release (eval) reuses the SAME instanceId as ARGV[1]
|
||||
expect(redis.eval).toHaveBeenCalledTimes(1);
|
||||
const [, , relKey, relInstanceId] = redis.eval.mock.calls[0];
|
||||
expect(relKey).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
|
||||
expect(relInstanceId).toBe(instanceId);
|
||||
});
|
||||
});
|
||||
|
||||
describe('release (DEL-CAS Lua)', () => {
|
||||
it('returns the fn result and runs a get/del CAS-compared release in finally', async () => {
|
||||
const { service, redis } = build();
|
||||
|
||||
const result = await service.withSpaceLock('space-1', async () => 42);
|
||||
expect(result).toBe(42);
|
||||
|
||||
expect(redis.eval).toHaveBeenCalledTimes(1);
|
||||
const [lua, numKeys, key, instanceId] = redis.eval.mock.calls[0];
|
||||
expect(lua).toContain('get');
|
||||
expect(lua).toContain('del');
|
||||
expect(lua).toContain('== ARGV[1]');
|
||||
expect(numKeys).toBe(1);
|
||||
expect(key).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
|
||||
expect(typeof instanceId).toBe('string');
|
||||
});
|
||||
});
|
||||
|
||||
describe('lock held by another replica', () => {
|
||||
it("returns { skipped: 'lock-held' } without running fn or releasing when set != 'OK'", async () => {
|
||||
const { service, redis } = build();
|
||||
redis.set.mockResolvedValueOnce(null);
|
||||
const fn = jest.fn(async () => 'ran');
|
||||
|
||||
const result = await service.withSpaceLock('space-1', fn);
|
||||
|
||||
expect(result).toEqual({ skipped: 'lock-held' });
|
||||
expect(fn).not.toHaveBeenCalled();
|
||||
// No release: we never acquired it.
|
||||
expect(redis.eval).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('in-process mutex', () => {
|
||||
it("a second withSpaceLock on the same space mid-flight returns { skipped: 'in-progress' } without a second set", async () => {
|
||||
const { service, redis } = build();
|
||||
let release!: () => void;
|
||||
const gate = new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
});
|
||||
|
||||
const first = service.withSpaceLock('space-1', async () => {
|
||||
await gate;
|
||||
return 'first';
|
||||
});
|
||||
// Let the first call acquire + enter the running set.
|
||||
await flushMicrotasks();
|
||||
|
||||
const second = await service.withSpaceLock('space-1', async () => 'second');
|
||||
expect(second).toEqual({ skipped: 'in-progress' });
|
||||
// Only the first call hit redis.set — the mutex short-circuits the second.
|
||||
expect(redis.set).toHaveBeenCalledTimes(1);
|
||||
|
||||
release();
|
||||
await expect(first).resolves.toBe('first');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fn throwing', () => {
|
||||
it('propagates the throw AND still releases (eval) in finally', async () => {
|
||||
const { service, redis } = build();
|
||||
const boom = new Error('boom');
|
||||
|
||||
await expect(
|
||||
service.withSpaceLock('space-1', async () => {
|
||||
throw boom;
|
||||
}),
|
||||
).rejects.toBe(boom);
|
||||
|
||||
// Release still ran despite the throw.
|
||||
expect(redis.eval).toHaveBeenCalledTimes(1);
|
||||
const [lua] = redis.eval.mock.calls[0];
|
||||
expect(lua).toContain('del');
|
||||
});
|
||||
});
|
||||
|
||||
describe('heartbeat refresh (PEXPIRE-CAS Lua)', () => {
|
||||
it('extends the lock via a pexpire CAS-Lua with the same instanceId while fn is in flight', async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { service, redis } = build();
|
||||
let release!: () => void;
|
||||
const gate = new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
});
|
||||
|
||||
const run = service.withSpaceLock('space-1', async () => {
|
||||
await gate;
|
||||
return 'done';
|
||||
});
|
||||
// Let acquire resolve and the running.add + setInterval registration run.
|
||||
await flushMicrotasks();
|
||||
|
||||
// Capture the instanceId used on acquire so we can assert it is reused.
|
||||
const instanceId = redis.set.mock.calls[0][1];
|
||||
|
||||
// Advance past one heartbeat interval (≈ TTL/3) to fire refreshLock.
|
||||
jest.advanceTimersByTime(Math.floor(GIT_SYNC_LOCK_TTL_MS / 3));
|
||||
await flushMicrotasks();
|
||||
|
||||
// The refresh eval ran (release has not, fn still awaiting the gate).
|
||||
expect(redis.eval).toHaveBeenCalledTimes(1);
|
||||
const [lua, numKeys, key, argInstanceId, ttlArg] =
|
||||
redis.eval.mock.calls[0];
|
||||
expect(lua).toContain('pexpire');
|
||||
expect(lua).toContain('== ARGV[1]');
|
||||
expect(numKeys).toBe(1);
|
||||
expect(key).toBe(GIT_SYNC_LOCK_PREFIX + 'space-1');
|
||||
expect(argInstanceId).toBe(instanceId);
|
||||
expect(ttlArg).toBe(String(GIT_SYNC_LOCK_TTL_MS));
|
||||
|
||||
// Let fn finish; release runs in finally (second eval, the DEL-CAS).
|
||||
release();
|
||||
await flushMicrotasks();
|
||||
await expect(run).resolves.toBe('done');
|
||||
expect(redis.eval).toHaveBeenCalledTimes(2);
|
||||
expect(redis.eval.mock.calls[1][0]).toContain('del');
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
// The lost-lock guard: a heartbeat refresh that cannot CONFIRM we still own the
|
||||
// lock (CAS miss, res !== 1) OR that throws (Redis error) aborts the supplied
|
||||
// controller so the in-flight protected fn stops instead of writing blind after
|
||||
// a possible lock takeover. `withSpaceLock` threads that signal into `fn`.
|
||||
describe('abort-on-lost-lock', () => {
|
||||
it('aborts the in-flight fn when the heartbeat refresh CAS-MISSES (eval -> 0)', async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { service, redis } = build();
|
||||
let release!: () => void;
|
||||
const gate = new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
});
|
||||
let captured: AbortSignal | undefined;
|
||||
|
||||
const run = service.withSpaceLock('space-1', async (signal) => {
|
||||
captured = signal;
|
||||
await gate;
|
||||
return 'done';
|
||||
});
|
||||
// Let acquire resolve and the setInterval register.
|
||||
await flushMicrotasks();
|
||||
expect(captured).toBeDefined();
|
||||
expect(captured!.aborted).toBe(false);
|
||||
|
||||
// The refresh CAS-misses: the key no longer holds our instanceId.
|
||||
redis.eval.mockResolvedValue(0);
|
||||
jest.advanceTimersByTime(Math.floor(GIT_SYNC_LOCK_TTL_MS / 3));
|
||||
await flushMicrotasks();
|
||||
|
||||
// The lost lock aborted the protected fn's signal.
|
||||
expect(captured!.aborted).toBe(true);
|
||||
|
||||
release();
|
||||
await flushMicrotasks();
|
||||
await expect(run).resolves.toBe('done');
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
|
||||
it('aborts the in-flight fn when the heartbeat refresh THROWS (Redis error)', async () => {
|
||||
jest.useFakeTimers();
|
||||
try {
|
||||
const { service, redis } = build();
|
||||
let release!: () => void;
|
||||
const gate = new Promise<void>((resolve) => {
|
||||
release = resolve;
|
||||
});
|
||||
let captured: AbortSignal | undefined;
|
||||
|
||||
const run = service.withSpaceLock('space-1', async (signal) => {
|
||||
captured = signal;
|
||||
await gate;
|
||||
return 'done';
|
||||
});
|
||||
await flushMicrotasks();
|
||||
expect(captured!.aborted).toBe(false);
|
||||
|
||||
// The refresh eval rejects (Redis down). release() in finally must still
|
||||
// resolve, so only reject the NEXT (heartbeat) call, then go back to OK.
|
||||
redis.eval.mockRejectedValueOnce(new Error('redis down'));
|
||||
jest.advanceTimersByTime(Math.floor(GIT_SYNC_LOCK_TTL_MS / 3));
|
||||
await flushMicrotasks();
|
||||
|
||||
expect(captured!.aborted).toBe(true);
|
||||
|
||||
release();
|
||||
await flushMicrotasks();
|
||||
await expect(run).resolves.toBe('done');
|
||||
} finally {
|
||||
jest.useRealTimers();
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Silence the warn logger if a refresh/release path ever logs (defensive).
|
||||
beforeAll(() => {
|
||||
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
|
||||
});
|
||||
@@ -0,0 +1,181 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { RedisService } from '@nestjs-labs/nestjs-ioredis';
|
||||
import type { Redis } from 'ioredis';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import {
|
||||
GIT_SYNC_LOCK_PREFIX,
|
||||
GIT_SYNC_LOCK_TTL_MS,
|
||||
} from '../git-sync.constants';
|
||||
|
||||
/**
|
||||
* The per-space lock used by the git-sync control plane: an in-process per-space
|
||||
* mutex (no overlapping cycles on one instance) PLUS a Redis leader lock
|
||||
* (single writer across replicas). Extracted from `GitSyncOrchestrator` so the
|
||||
* locking primitive is a single reusable, independently testable unit
|
||||
* (PR #119 refactor #2).
|
||||
*/
|
||||
@Injectable()
|
||||
export class SpaceLockService {
|
||||
private readonly logger = new Logger(SpaceLockService.name);
|
||||
private readonly redis: Redis;
|
||||
/** Unique per process instance — the leader-lock value (CAS on release). */
|
||||
private readonly instanceId = randomUUID();
|
||||
/** In-process per-space mutex: spaceIds with a cycle currently running. */
|
||||
private readonly running = new Set<string>();
|
||||
/**
|
||||
* Process-wide single-writer guard: spaceId -> instanceId of the live holder.
|
||||
* Unlike `running` (scoped to ONE service instance), this is shared by every
|
||||
* SpaceLockService in the process, so even if the Redis lock key lapses
|
||||
* (swallowed heartbeat / TTL expiry) a SECOND holder in the same process
|
||||
* cannot start a concurrent cycle for the same space — it is rejected
|
||||
* 'lock-held'. The cross-PROCESS race is handled by the Redis lock plus
|
||||
* abort-on-refresh-failure (and, as a follow-up, fencing tokens).
|
||||
*/
|
||||
private static readonly liveLocks = new Map<string, string>();
|
||||
|
||||
constructor(redisService: RedisService) {
|
||||
this.redis = redisService.getOrThrow();
|
||||
}
|
||||
|
||||
// --- Redis leader lock -----------------------------------------
|
||||
|
||||
/**
|
||||
* Acquire per-space leadership: `SET <key> <instanceId> PX <ttl> NX` returns
|
||||
* 'OK' only when the key did not exist. Any other reply means another replica
|
||||
* holds it.
|
||||
*/
|
||||
private async acquire(spaceId: string): Promise<boolean> {
|
||||
const ok = await this.redis.set(
|
||||
GIT_SYNC_LOCK_PREFIX + spaceId,
|
||||
this.instanceId,
|
||||
'PX',
|
||||
GIT_SYNC_LOCK_TTL_MS,
|
||||
'NX',
|
||||
);
|
||||
return ok === 'OK';
|
||||
}
|
||||
|
||||
/**
|
||||
* Release the lock with a CAS Lua so we only delete it when WE still hold it
|
||||
* (the value matches our instanceId) — never another replica's lock that took
|
||||
* over after our TTL expired.
|
||||
*/
|
||||
private async release(spaceId: string): Promise<void> {
|
||||
const lua =
|
||||
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end';
|
||||
try {
|
||||
await this.redis.eval(lua, 1, GIT_SYNC_LOCK_PREFIX + spaceId, this.instanceId);
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`git-sync: failed to release lock for space ${spaceId}: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* CAS-guarded TTL refresh: extend the lock's TTL ONLY while WE still own it
|
||||
* (the stored value matches our instanceId) — never extend another replica's
|
||||
* lock that took over after our TTL expired. Used by the heartbeat in
|
||||
* `withSpaceLock` so a long-running push (client-controlled receive-pack + the
|
||||
* Docmost cycle) cannot outlive the lock and let a concurrent cycle race the
|
||||
* working tree. Never throws (a thrown timer callback would crash the process),
|
||||
* but a refresh it cannot CONFIRM is treated as a LOST lock: it aborts the
|
||||
* supplied controller so the in-flight protected fn stops instead of writing
|
||||
* blind while another replica may already have taken over the lock.
|
||||
*/
|
||||
private async refreshLock(
|
||||
spaceId: string,
|
||||
controller?: AbortController,
|
||||
): Promise<void> {
|
||||
const lua =
|
||||
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("pexpire", KEYS[1], ARGV[2]) else return 0 end';
|
||||
try {
|
||||
const res = await this.redis.eval(
|
||||
lua,
|
||||
1,
|
||||
GIT_SYNC_LOCK_PREFIX + spaceId,
|
||||
this.instanceId,
|
||||
String(GIT_SYNC_LOCK_TTL_MS),
|
||||
);
|
||||
// CAS miss (res !== 1): we no longer own the key — our TTL lapsed and
|
||||
// another replica may hold it now. Abort the in-flight cycle rather than
|
||||
// swallowing the loss and racing the working tree.
|
||||
if (res !== 1) {
|
||||
this.logger.warn(
|
||||
`git-sync: lock for space ${spaceId} lost during refresh — aborting in-flight cycle`,
|
||||
);
|
||||
controller?.abort();
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`git-sync: failed to refresh lock for space ${spaceId}: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
// A refresh we cannot confirm means we may no longer hold the lock; abort.
|
||||
controller?.abort();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Run `fn` under the per-space lock: the in-process mutex (no overlapping
|
||||
* cycles on this instance) AND the Redis leader lock (single writer across
|
||||
* replicas). Returns `fn`'s result, or a skip sentinel when the lock could not
|
||||
* be acquired — `{ skipped: 'in-progress' }` (this instance is mid-cycle) or
|
||||
* `{ skipped: 'lock-held' }` (another replica holds the Redis lock). The mutex
|
||||
* + Redis lock are always released in a `finally`, even when `fn` throws (the
|
||||
* throw propagates to the caller). This is the single reusable wrapper shared
|
||||
* by `runOnce` (the poll/admin cycle) and `ingestExternalPush` (a push from a
|
||||
* git client over HTTP) so both serialize against each other identically.
|
||||
*/
|
||||
async withSpaceLock<T>(
|
||||
spaceId: string,
|
||||
fn: (signal: AbortSignal) => Promise<T>,
|
||||
): Promise<T | { skipped: 'lock-held' | 'in-progress' }> {
|
||||
if (this.running.has(spaceId)) {
|
||||
return { skipped: 'in-progress' };
|
||||
}
|
||||
// Cross-instance, same-process single-writer guard: another live holder (a
|
||||
// different SpaceLockService in this process) is mid-cycle for this space.
|
||||
// This survives a swallowed heartbeat / Redis TTL lapse, so a second writer
|
||||
// in the process cannot race the working tree — it is rejected 'lock-held'.
|
||||
if (SpaceLockService.liveLocks.has(spaceId)) {
|
||||
return { skipped: 'lock-held' };
|
||||
}
|
||||
// Reserve the in-process slot synchronously (before any await) so two
|
||||
// concurrent same-space calls on THIS instance cannot both pass the guard and
|
||||
// race acquire(). Redis NX is already authoritative across replicas; this just
|
||||
// closes the in-process TOCTOU window. Released in the outer finally on every
|
||||
// path (acquire-failure, fn-throw, normal completion).
|
||||
this.running.add(spaceId);
|
||||
SpaceLockService.liveLocks.set(spaceId, this.instanceId);
|
||||
try {
|
||||
if (!(await this.acquire(spaceId))) {
|
||||
return { skipped: 'lock-held' };
|
||||
}
|
||||
// Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the
|
||||
// protected fn can stop instead of writing blind after our lock lapsed.
|
||||
const controller = new AbortController();
|
||||
// Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so
|
||||
// a long push (client-controlled receive-pack + the Docmost cycle) cannot
|
||||
// outlive the fixed TTL and let a concurrent cycle race the working tree. The
|
||||
// refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the
|
||||
// timer from holding the event loop open; it is ALWAYS cleared in `finally`.
|
||||
const heartbeat = setInterval(() => {
|
||||
void this.refreshLock(spaceId, controller);
|
||||
}, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3)));
|
||||
heartbeat.unref?.();
|
||||
try {
|
||||
return await fn(controller.signal);
|
||||
} finally {
|
||||
clearInterval(heartbeat);
|
||||
await this.release(spaceId);
|
||||
}
|
||||
} finally {
|
||||
this.running.delete(spaceId);
|
||||
SpaceLockService.liveLocks.delete(spaceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,112 @@
|
||||
import { diff3Plan, type Pick } from './three-way-merge';
|
||||
|
||||
// Materialize a plan into the merged key sequence for assertion.
|
||||
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
||||
return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index]));
|
||||
}
|
||||
|
||||
const merge = (o: string[], a: string[], b: string[]): string[] =>
|
||||
apply(diff3Plan(o, a, b), a, b);
|
||||
|
||||
describe('diff3Plan (block-level three-way merge)', () => {
|
||||
it('identical on all three sides -> unchanged (all from live)', () => {
|
||||
const plan = diff3Plan(['1', '2', '3'], ['1', '2', '3'], ['1', '2', '3']);
|
||||
expect(plan.every((p) => p.src === 'live')).toBe(true);
|
||||
expect(apply(plan, ['1', '2', '3'], ['1', '2', '3'])).toEqual(['1', '2', '3']);
|
||||
});
|
||||
|
||||
it('git changed a block the human did not -> takes git', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '2', '3'], ['1', '9', '3'])).toEqual([
|
||||
'1',
|
||||
'9',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human changed a block git did not -> KEEPS the human edit (the core 3-way win)', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '3'])).toEqual([
|
||||
'1',
|
||||
'H',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human and git changed DIFFERENT blocks -> both preserved', () => {
|
||||
// human rewrote block 1, git rewrote block 3.
|
||||
expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
|
||||
'H',
|
||||
'2',
|
||||
'G',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human inserted a block AND git changed a different block -> both preserved', () => {
|
||||
expect(
|
||||
merge(['1', '2', '3'], ['1', '1.5', '2', '3'], ['1', '2', 'G']),
|
||||
).toEqual(['1', '1.5', '2', 'G']);
|
||||
});
|
||||
|
||||
it('both changed the SAME block -> conflict resolves to git', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', 'H', '3'], ['1', 'G', '3'])).toEqual([
|
||||
'1',
|
||||
'G',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('both made the SAME edit -> that edit (no duplication)', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', 'X', '3'], ['1', 'X', '3'])).toEqual([
|
||||
'1',
|
||||
'X',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human deleted a block git left alone -> deletion preserved', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '3'], ['1', '2', '3'])).toEqual([
|
||||
'1',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('git deleted a block the human left alone -> deletion applied', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '2', '3'], ['1', '3'])).toEqual([
|
||||
'1',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('both deleted the same block -> gone (no conflict)', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '3'], ['1', '3'])).toEqual(['1', '3']);
|
||||
});
|
||||
|
||||
it('git appended a trailing block -> appended', () => {
|
||||
expect(merge(['1', '2'], ['1', '2'], ['1', '2', '3'])).toEqual([
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human appended a trailing block git did not -> kept', () => {
|
||||
expect(merge(['1', '2'], ['1', '2', '3'], ['1', '2'])).toEqual([
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('empty base, git provides content (brand-new page body) -> git content', () => {
|
||||
expect(merge([], [], ['1', '2'])).toEqual(['1', '2']);
|
||||
});
|
||||
|
||||
it('git changed block 1, human edited block 3, far apart -> both kept', () => {
|
||||
expect(
|
||||
merge(
|
||||
['a', 'b', 'c', 'd', 'e'],
|
||||
['a', 'b', 'c', 'd', 'E'],
|
||||
['A', 'b', 'c', 'd', 'e'],
|
||||
),
|
||||
).toEqual(['A', 'b', 'c', 'd', 'E']);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,232 @@
|
||||
/**
|
||||
* Pure block-level THREE-WAY merge planner (diff3) over arrays of opaque block
|
||||
* keys. Used by the git-sync body write to merge an incoming git body into the
|
||||
* live page using the last-synced version as the common ancestor (review #5):
|
||||
*
|
||||
* - a block only the human changed (live != base, git == base) -> keep LIVE
|
||||
* - a block only git changed (git != base, live == base) -> take GIT
|
||||
* - a block both sides changed (a real conflict) -> GIT wins
|
||||
* - inserts/deletes from either side are preserved when unambiguous
|
||||
*
|
||||
* Content-agnostic: it works on string keys and returns the merged block order as
|
||||
* picks ({ src: 'live'|'target', index }) — the caller (the Yjs applier)
|
||||
* materializes them — so the whole algorithm is unit-testable on plain arrays.
|
||||
*
|
||||
* Algorithm: anchor on base blocks present (unchanged) in BOTH live and target
|
||||
* (their LCS-with-base intersection). Between consecutive anchors lies one region
|
||||
* the human and/or git rewrote; resolve each region three-way. Stable anchor
|
||||
* blocks are emitted from LIVE so the applier keeps the existing Yjs block
|
||||
* instances (and the human's in-flight edits) in place.
|
||||
*/
|
||||
|
||||
import { buildLcsTable } from './lcs';
|
||||
|
||||
/** Matched index pairs of the longest common subsequence of `a` and `b`. */
|
||||
function lcsPairs(a: string[], b: string[]): Array<[number, number]> {
|
||||
const n = a.length;
|
||||
const m = b.length;
|
||||
const dp = buildLcsTable(a, b);
|
||||
const pairs: Array<[number, number]> = [];
|
||||
let i = 0;
|
||||
let j = 0;
|
||||
while (i < n && j < m) {
|
||||
if (a[i] === b[j]) {
|
||||
pairs.push([i, j]);
|
||||
i++;
|
||||
j++;
|
||||
} else if (dp[i + 1][j] >= dp[i][j + 1]) {
|
||||
i++;
|
||||
} else {
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
/** o-index -> matched index in the other side (only for LCS-matched blocks). */
|
||||
function matchMap(pairs: Array<[number, number]>): Map<number, number> {
|
||||
const m = new Map<number, number>();
|
||||
for (const [o, x] of pairs) m.set(o, x);
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* One change `side` made to `base` within a region: base blocks `[oStart,oEnd)`
|
||||
* were replaced by the side's blocks listed in `content` (region-local indices).
|
||||
* A pure insert has `oStart === oEnd`; a pure delete has empty `content`.
|
||||
*/
|
||||
interface Hunk {
|
||||
oStart: number;
|
||||
oEnd: number;
|
||||
content: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Diff `o` against one side as a list of non-overlapping hunks (the base spans
|
||||
* the side rewrote/inserted/deleted), derived from their LCS alignment.
|
||||
*/
|
||||
function buildHunks(o: string[], side: string[]): Hunk[] {
|
||||
const pairs = lcsPairs(o, side); // [oIdx, sideIdx] kept (unchanged) blocks
|
||||
const hunks: Hunk[] = [];
|
||||
let prevO = -1;
|
||||
let prevS = -1;
|
||||
const flush = (curO: number, curS: number): void => {
|
||||
const oStart = prevO + 1;
|
||||
const oEnd = curO;
|
||||
const content: number[] = [];
|
||||
for (let s = prevS + 1; s < curS; s++) content.push(s);
|
||||
if (oEnd > oStart || content.length > 0) hunks.push({ oStart, oEnd, content });
|
||||
};
|
||||
for (const [oIdx, sIdx] of pairs) {
|
||||
flush(oIdx, sIdx);
|
||||
prevO = oIdx;
|
||||
prevS = sIdx;
|
||||
}
|
||||
flush(o.length, side.length);
|
||||
return hunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do two hunks (one per side) touch the same base region? Pure inserts only
|
||||
* collide when nested strictly inside the other hunk's base span (or, for two
|
||||
* inserts, at the same gap); changes sitting at a shared boundary do not.
|
||||
*/
|
||||
function hunksOverlap(a: Hunk, b: Hunk): boolean {
|
||||
const aIns = a.oStart === a.oEnd;
|
||||
const bIns = b.oStart === b.oEnd;
|
||||
if (aIns && bIns) return a.oStart === b.oStart;
|
||||
if (aIns) return b.oStart < a.oStart && a.oStart < b.oEnd;
|
||||
if (bIns) return a.oStart < b.oStart && b.oStart < a.oEnd;
|
||||
return Math.max(a.oStart, b.oStart) < Math.min(a.oEnd, b.oEnd);
|
||||
}
|
||||
|
||||
interface LocalPick {
|
||||
src: 'live' | 'target';
|
||||
local: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fine-grained three-way merge of ONE inter-anchor region. Combines the human's
|
||||
* and git's NON-overlapping hunks (e.g. a human edit to one block plus a git
|
||||
* insert/delete of OTHER blocks in the same region) so neither change is lost.
|
||||
* Returns the merged region as region-local picks, or `null` when the two sides
|
||||
* changed the SAME base block — a genuine conflict the caller resolves by the
|
||||
* original all-or-nothing rule (git wins the whole region).
|
||||
*/
|
||||
function tryMergeRegion(
|
||||
o: string[],
|
||||
a: string[],
|
||||
b: string[],
|
||||
): LocalPick[] | null {
|
||||
const aHunks = buildHunks(o, a);
|
||||
const bHunks = buildHunks(o, b);
|
||||
|
||||
// Any overlap between a human hunk and a git hunk is a real conflict; bail so
|
||||
// the caller falls back to git-wins (preserving the original behavior).
|
||||
for (const ah of aHunks) {
|
||||
for (const bh of bHunks) {
|
||||
if (hunksOverlap(ah, bh)) return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Disjoint: live index of each base block that BOTH sides kept (stable).
|
||||
const aKept = matchMap(lcsPairs(o, a)); // base index -> live index
|
||||
|
||||
const out: LocalPick[] = [];
|
||||
let pa = 0;
|
||||
let pb = 0;
|
||||
let oi = 0;
|
||||
while (oi < o.length || pa < aHunks.length || pb < bHunks.length) {
|
||||
const ah = pa < aHunks.length ? aHunks[pa] : null;
|
||||
const bh = pb < bHunks.length ? bHunks[pb] : null;
|
||||
const nextStart = Math.min(
|
||||
ah ? ah.oStart : o.length,
|
||||
bh ? bh.oStart : o.length,
|
||||
);
|
||||
|
||||
// Emit stable base blocks (kept by both) until the next hunk, from LIVE.
|
||||
while (oi < nextStart) {
|
||||
out.push({ src: 'live', local: aKept.get(oi) as number });
|
||||
oi++;
|
||||
}
|
||||
if (!ah && !bh) break;
|
||||
|
||||
// Apply the hunk at oi. When both sides act here they are disjoint, so the
|
||||
// pure-insert (oEnd === oi) is emitted before the side that consumes base oi.
|
||||
const aHere = ah !== null && ah.oStart === oi;
|
||||
const bHere = bh !== null && bh.oStart === oi;
|
||||
let useA: boolean;
|
||||
if (aHere && bHere) {
|
||||
useA = ah!.oEnd === oi; // insert side first; otherwise either order is fine
|
||||
} else {
|
||||
useA = aHere;
|
||||
}
|
||||
const h = (useA ? ah : bh) as Hunk;
|
||||
const src: 'live' | 'target' = useA ? 'live' : 'target';
|
||||
for (const idx of h.content) out.push({ src, local: idx });
|
||||
oi = h.oEnd;
|
||||
if (useA) pa++;
|
||||
else pb++;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export interface Pick {
|
||||
src: 'live' | 'target';
|
||||
index: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
|
||||
* Returns the merged block order as picks from live/target.
|
||||
*/
|
||||
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
||||
const oToA = matchMap(lcsPairs(o, a));
|
||||
const oToB = matchMap(lcsPairs(o, b));
|
||||
|
||||
const res: Pick[] = [];
|
||||
let oi = 0;
|
||||
let ai = 0;
|
||||
let bi = 0;
|
||||
|
||||
for (;;) {
|
||||
// Next anchor: a base block present (unchanged) in BOTH live and target.
|
||||
let anchor = oi;
|
||||
while (anchor < o.length && !(oToA.has(anchor) && oToB.has(anchor))) {
|
||||
anchor++;
|
||||
}
|
||||
const aEnd = anchor < o.length ? (oToA.get(anchor) as number) : a.length;
|
||||
const bEnd = anchor < o.length ? (oToB.get(anchor) as number) : b.length;
|
||||
|
||||
// Resolve the region [oi,anchor) that one or both sides rewrote/inserted.
|
||||
// Try a fine-grained three-way merge first so a human block-edit survives a
|
||||
// git insert/delete of OTHER blocks in the same region; only a genuine
|
||||
// same-block conflict (null) falls back to the original git-wins rule.
|
||||
const merged = tryMergeRegion(
|
||||
o.slice(oi, anchor),
|
||||
a.slice(ai, aEnd),
|
||||
b.slice(bi, bEnd),
|
||||
);
|
||||
if (merged) {
|
||||
for (const p of merged) {
|
||||
res.push(
|
||||
p.src === 'live'
|
||||
? { src: 'live', index: ai + p.local }
|
||||
: { src: 'target', index: bi + p.local },
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
|
||||
}
|
||||
|
||||
if (anchor >= o.length) break;
|
||||
|
||||
// Emit the stable anchor block from LIVE, then advance past it on all sides.
|
||||
res.push({ src: 'live', index: aEnd });
|
||||
ai = aEnd + 1;
|
||||
bi = bEnd + 1;
|
||||
oi = anchor + 1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -0,0 +1,145 @@
|
||||
// Unit tests for the per-space vault path resolver + lazy VaultGit cache
|
||||
// `mkdir` and the git-sync loader are mocked so construction is cheap and
|
||||
// no real filesystem / git work happens. We assert the path normalization
|
||||
// (trailing slash) and the one-VaultGit-per-space caching contract.
|
||||
//
|
||||
// The service loads `VaultGit` (and `vaultGitEnv`) at runtime via the
|
||||
// `loadGitSync()` bridge (the ESM `@docmost/git-sync` package cannot be
|
||||
// `require()`d under jest), so we mock that loader rather than the package.
|
||||
import { mkdir } from 'node:fs/promises';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { loadGitSync } from '../git-sync.loader';
|
||||
|
||||
jest.mock('node:fs/promises', () => ({
|
||||
mkdir: jest.fn(async () => undefined),
|
||||
}));
|
||||
|
||||
// ensureServable shells out via `promisify(execFile)`; mock execFile with a
|
||||
// callback-style fn so promisify resolves. Each `git config <key> <value>` call
|
||||
// is recorded so the four config writes (incl. the security-critical
|
||||
// receive.denyNonFastForwards=true) can be asserted.
|
||||
jest.mock('node:child_process', () => ({
|
||||
execFile: jest.fn((_cmd: string, _args: string[], _opts: any, cb: any) =>
|
||||
cb(null, { stdout: '', stderr: '' }),
|
||||
),
|
||||
}));
|
||||
|
||||
// Cheap VaultGit stub: records the path it was constructed with; no shell-out.
|
||||
// `ensureRepo` is a resolved jest.fn so ensureServable can call it. Declared with
|
||||
// a `mock`-prefixed name so jest allows referencing it inside the hoisted
|
||||
// `jest.mock` factory below.
|
||||
const mockVaultGit = jest
|
||||
.fn()
|
||||
.mockImplementation((path: string) => ({
|
||||
path,
|
||||
ensureRepo: jest.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
jest.mock('../git-sync.loader', () => ({
|
||||
loadGitSync: jest.fn(async () => ({
|
||||
VaultGit: mockVaultGit,
|
||||
vaultGitEnv: jest.fn(() => ({})),
|
||||
})),
|
||||
}));
|
||||
|
||||
import { VaultRegistryService } from './vault-registry.service';
|
||||
|
||||
type AnyMock = jest.Mock;
|
||||
|
||||
const mkdirMock = mkdir as unknown as AnyMock;
|
||||
const execFileMock = execFile as unknown as AnyMock;
|
||||
const VaultGitMock = mockVaultGit;
|
||||
void loadGitSync;
|
||||
|
||||
function build(dataDir: string): { service: VaultRegistryService } {
|
||||
const env = {
|
||||
getGitSyncDataDir: jest.fn(() => dataDir),
|
||||
};
|
||||
const service = new VaultRegistryService(env as any);
|
||||
return { service };
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
describe('VaultRegistryService', () => {
|
||||
describe('vaultPath', () => {
|
||||
it('normalizes a trailing slash in the data dir (no double slash)', () => {
|
||||
const { service } = build('/vaults/');
|
||||
expect(service.vaultPath('space-1')).toBe('/vaults/space-1');
|
||||
});
|
||||
|
||||
it('works without a trailing slash too', () => {
|
||||
const { service } = build('/vaults');
|
||||
expect(service.vaultPath('space-1')).toBe('/vaults/space-1');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getVault lazy cache', () => {
|
||||
it('returns the SAME instance on a second call (one VaultGit per space)', async () => {
|
||||
const { service } = build('/vaults');
|
||||
|
||||
const first = await service.getVault('space-1');
|
||||
const second = await service.getVault('space-1');
|
||||
|
||||
// Same cached instance, constructed exactly once.
|
||||
expect(second).toBe(first);
|
||||
expect(VaultGitMock).toHaveBeenCalledTimes(1);
|
||||
expect(VaultGitMock).toHaveBeenCalledWith('/vaults/space-1');
|
||||
// mkdir is only run on the first (cache-miss) construction.
|
||||
expect(mkdirMock).toHaveBeenCalledTimes(1);
|
||||
expect(mkdirMock).toHaveBeenCalledWith('/vaults/space-1', {
|
||||
recursive: true,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('ensureServable', () => {
|
||||
it('ensures the repo then writes the four force-push-protection git configs', async () => {
|
||||
const { service } = build('/vaults');
|
||||
|
||||
const path = await service.ensureServable('space-1');
|
||||
expect(path).toBe('/vaults/space-1');
|
||||
|
||||
// ensureRepo ran first on the cached vault.
|
||||
const vault = await service.getVault('space-1');
|
||||
expect((vault as any).ensureRepo).toHaveBeenCalledTimes(1);
|
||||
|
||||
// Collect every `git config <key> <value>` write.
|
||||
const configWrites = execFileMock.mock.calls
|
||||
.filter(([cmd, args]) => cmd === 'git' && args[0] === 'config')
|
||||
.map(([, args]) => [args[1], args[2]]);
|
||||
|
||||
expect(configWrites).toEqual([
|
||||
['receive.denyCurrentBranch', 'updateInstead'],
|
||||
// Security-critical: blocks force-push / history rewrites on main.
|
||||
['receive.denyNonFastForwards', 'true'],
|
||||
['http.receivepack', 'true'],
|
||||
['http.uploadpack', 'true'],
|
||||
]);
|
||||
|
||||
// Every config write targets THIS vault's cwd.
|
||||
for (const [cmd, args, opts] of execFileMock.mock.calls) {
|
||||
if (cmd === 'git' && args[0] === 'config') {
|
||||
expect(opts.cwd).toBe('/vaults/space-1');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it('rejects (and writes no git config) when ensureRepo rejects', async () => {
|
||||
const { service } = build('/vaults');
|
||||
const vault = await service.getVault('space-1');
|
||||
(vault as any).ensureRepo.mockRejectedValueOnce(new Error('init failed'));
|
||||
|
||||
await expect(service.ensureServable('space-1')).rejects.toThrow(
|
||||
'init failed',
|
||||
);
|
||||
|
||||
const configWrites = execFileMock.mock.calls.filter(
|
||||
([cmd, args]) => cmd === 'git' && args[0] === 'config',
|
||||
);
|
||||
expect(configWrites).toHaveLength(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,96 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { mkdir } from 'node:fs/promises';
|
||||
import { execFile } from 'node:child_process';
|
||||
import { promisify } from 'node:util';
|
||||
import type { VaultGit } from '@docmost/git-sync';
|
||||
import { loadGitSync } from '../git-sync.loader';
|
||||
import { EnvironmentService } from '../../environment/environment.service';
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
/**
|
||||
* Resolves the on-disk vault location per space and owns the (lazily created,
|
||||
* cached) `VaultGit` instance for each one.
|
||||
*
|
||||
* Topology: one git repo per enabled space, rooted at
|
||||
* `<GIT_SYNC_DATA_DIR>/<spaceId>`. A `VaultGit` is constructed at most once per
|
||||
* space and reused across cycles — it is a thin, stateless shell-out wrapper, so
|
||||
* caching it just avoids re-resolving the path and re-running `mkdir`.
|
||||
*/
|
||||
@Injectable()
|
||||
export class VaultRegistryService {
|
||||
private readonly logger = new Logger(VaultRegistryService.name);
|
||||
private readonly vaults = new Map<string, VaultGit>();
|
||||
|
||||
constructor(private readonly environmentService: EnvironmentService) {}
|
||||
|
||||
/** Absolute vault path for a space: `<GIT_SYNC_DATA_DIR>/<spaceId>`. */
|
||||
vaultPath(spaceId: string): string {
|
||||
const root = this.environmentService.getGitSyncDataDir().replace(/\/+$/, '');
|
||||
return `${root}/${spaceId}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get (or lazily construct + cache) the `VaultGit` for a space, ensuring its
|
||||
* directory exists. `VaultGit.ensureRepo()` is NOT called here — the engine's
|
||||
* pull/push paths call it (and the branch/ref setup) as their first step; this
|
||||
* only guarantees the parent dir exists so a fresh space does not ENOENT.
|
||||
*/
|
||||
async getVault(spaceId: string): Promise<VaultGit> {
|
||||
const cached = this.vaults.get(spaceId);
|
||||
if (cached) return cached;
|
||||
|
||||
const path = this.vaultPath(spaceId);
|
||||
await mkdir(path, { recursive: true });
|
||||
const { VaultGit } = await loadGitSync();
|
||||
const vault = new VaultGit(path);
|
||||
this.vaults.set(spaceId, vault);
|
||||
return vault;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a space's vault repo servable over smart-HTTP (the /git host). Ensures
|
||||
* the repo exists (engine `ensureRepo`: `git init -b main` + initial commit +
|
||||
* branches; idempotent), then sets the LOCAL git config a `git http-backend`
|
||||
* push needs:
|
||||
*
|
||||
* - receive.denyCurrentBranch=updateInstead — a push to the checked-out
|
||||
* `main` updates the working tree too (the engine's human-facing branch).
|
||||
* Requires a clean tree, which is guaranteed between cycles / under the
|
||||
* orchestrator lock that wraps an external push.
|
||||
* - receive.denyNonFastForwards=true — block force-push so a client cannot
|
||||
* rewrite the engine's history on `main`.
|
||||
* - http.receivepack=true / http.uploadpack=true — explicitly allow the
|
||||
* receive/upload services over HTTP.
|
||||
*
|
||||
* All four are set idempotently (plain `git config` overwrites the local
|
||||
* value). Returns the absolute vault path. Idempotent and safe to call before
|
||||
* every request.
|
||||
*/
|
||||
async ensureServable(spaceId: string): Promise<string> {
|
||||
const { vaultGitEnv } = await loadGitSync();
|
||||
const vault = await this.getVault(spaceId);
|
||||
const path = this.vaultPath(spaceId);
|
||||
|
||||
// ensureRepo also verifies git is available on its first git call; it does
|
||||
// `git init -b main` + an initial commit + the engine branches. Idempotent.
|
||||
await vault.ensureRepo();
|
||||
|
||||
const configs: Array<[string, string]> = [
|
||||
['receive.denyCurrentBranch', 'updateInstead'],
|
||||
['receive.denyNonFastForwards', 'true'],
|
||||
['http.receivepack', 'true'],
|
||||
['http.uploadpack', 'true'],
|
||||
];
|
||||
for (const [key, value] of configs) {
|
||||
await execFileAsync('git', ['config', key, value], {
|
||||
cwd: path,
|
||||
// Use the engine's cwd-isolated env (strips GIT_DIR / GIT_WORK_TREE) so
|
||||
// the config is written to THIS vault's local config, nothing else.
|
||||
env: vaultGitEnv(),
|
||||
});
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,198 @@
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import { mergeXmlFragments, mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Regression for the HIGH-severity runaway whole-body duplication: a page body
|
||||
* was RE-APPENDED in full on every git-sync reconcile cycle, unbounded, with NO
|
||||
* client connected.
|
||||
*
|
||||
* ROOT CAUSE (confirmed in-process against the real failing page): the LIVE Yjs
|
||||
* document materializes the editor-schema default `indent: 0` on every
|
||||
* paragraph/heading (and on the paragraph inside every list item, callout, and
|
||||
* table cell), but a body re-imported from git — parsed from clean markdown —
|
||||
* carries NO indent attribute. So every live block's comparison key differed from
|
||||
* the same block coming back from git; the three-way merge could anchor on
|
||||
* NOTHING, and the trailing unit that git's export already contained (but the
|
||||
* merge could not match against the byte-identical live tail) was re-appended
|
||||
* each cycle. Each grown export then diverged from the last-pushed base by one
|
||||
* more unit — a self-sustaining loop.
|
||||
*
|
||||
* The fix normalizes the materialized default (`indent: 0`) out of the block key
|
||||
* (the schema-derived `serializeXmlNode` normalization in yjs-body-merge.ts drops
|
||||
* every attr equal to its ProseMirror-schema default; `indent: 0` is one such),
|
||||
* so a live block compares equal to its git-round-tripped twin and the resync is
|
||||
* a true no-op. The sibling `yjs-body-merge.schema-defaults.spec.ts` covers the
|
||||
* rest of the bug class (image.align, link mark internal, …).
|
||||
*
|
||||
* These tests model that EXACTLY at the Yjs level: a LIVE fragment whose blocks
|
||||
* carry `indent: 0` + block ids, versus a git-derived fragment of the SAME
|
||||
* content with neither — for a body built from BYTE-IDENTICAL units that each
|
||||
* contain a heading, a paragraph, a callout, and a table with empty cells (the
|
||||
* trigger). RED before the fix (the merge applies > 0 ops and the body grows),
|
||||
* GREEN after (0 ops, no growth).
|
||||
*/
|
||||
|
||||
type Attrs = Record<string, string | number>;
|
||||
|
||||
function el(
|
||||
name: string,
|
||||
attrs: Attrs,
|
||||
children: (Y.XmlElement | Y.XmlText)[],
|
||||
) {
|
||||
const e = new Y.XmlElement(name);
|
||||
for (const [k, v] of Object.entries(attrs)) e.setAttribute(k, v as string);
|
||||
if (children.length) e.insert(0, children);
|
||||
return e;
|
||||
}
|
||||
|
||||
function text(s: string): Y.XmlText {
|
||||
const t = new Y.XmlText();
|
||||
if (s) t.insert(0, s);
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* One byte-identical content unit (heading / paragraph / callout / table-with-
|
||||
* empty-cells). `live` toggles the two things that exist ONLY in the live Yjs
|
||||
* doc and NOT in a git round-trip: the materialized `indent: 0` default and the
|
||||
* per-block `id`. `n` makes each unit's ids unique (as the editor would stamp)
|
||||
* while keeping the visible CONTENT byte-identical across units.
|
||||
*/
|
||||
function unit(
|
||||
live: boolean,
|
||||
n: number,
|
||||
headingText = 'Big Heading',
|
||||
): Y.XmlElement[] {
|
||||
const ind: Attrs = live ? { indent: 0 } : {};
|
||||
const id = (base: string): Attrs => (live ? { id: `${base}${n}` } : {});
|
||||
const para = (attrs: Attrs, s: string) =>
|
||||
el('paragraph', { ...attrs, ...ind }, [text(s)]);
|
||||
|
||||
const cell = (name: string) =>
|
||||
el(name, { colspan: 1, rowspan: 1 }, [para({}, '')]);
|
||||
|
||||
return [
|
||||
el('heading', { ...id('h'), level: 1, ...ind }, [text(headingText)]),
|
||||
para(id('p'), 'Para with the same words'),
|
||||
el('callout', { type: 'info' }, [para(id('c'), 'CalloutText here')]),
|
||||
el('table', {}, [
|
||||
el('tableRow', {}, [cell('tableHeader'), cell('tableHeader')]),
|
||||
el('tableRow', {}, [cell('tableCell'), cell('tableCell')]),
|
||||
]),
|
||||
];
|
||||
}
|
||||
|
||||
function fragmentOf(units: Y.XmlElement[][]): {
|
||||
doc: Y.Doc;
|
||||
frag: Y.XmlFragment;
|
||||
} {
|
||||
const doc = new Y.Doc();
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = units.flat();
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return { doc, frag };
|
||||
}
|
||||
|
||||
const blockCount = (frag: Y.XmlFragment): number => frag.toArray().length;
|
||||
|
||||
describe('git-sync reconcile import is idempotent (no whole-body duplication)', () => {
|
||||
const UNITS = 3;
|
||||
|
||||
it('3-way: identical content, live carries indent:0, base stale-by-one -> 0 ops, no growth', () => {
|
||||
// LIVE: the editor-stamped Yjs doc (indent:0 + ids on every block).
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
// INCOMING (git export -> re-import): same content, NO indent / ids.
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||
);
|
||||
// BASE = last-pushed file, lagging by ONE unit (the realistic divergence
|
||||
// that drives the trailing insert-vs-insert).
|
||||
const { frag: base } = fragmentOf(
|
||||
Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('3-way is a fixpoint across repeated cycles (does not grow)', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
const incomingUnits = () =>
|
||||
fragmentOf(Array.from({ length: UNITS }, (_, i) => unit(false, i))).frag;
|
||||
const baseUnits = () =>
|
||||
fragmentOf(Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)))
|
||||
.frag;
|
||||
|
||||
const before = blockCount(live);
|
||||
for (let cycle = 0; cycle < 5; cycle++) {
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incomingUnits(), baseUnits());
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
}
|
||||
});
|
||||
|
||||
it('2-way: identical content, live carries indent:0 -> 0 ops, no growth', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, incoming);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('does NOT regress real edits: a git change to one block still lands', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
const base = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||
).frag;
|
||||
// git edits the heading text of the LAST unit.
|
||||
const incoming = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) =>
|
||||
unit(false, i, i === UNITS - 1 ? 'EDITED Heading' : 'Big Heading'),
|
||||
),
|
||||
).frag;
|
||||
|
||||
const before = blockCount(live);
|
||||
liveDoc.transact(() => {
|
||||
mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
// The edit landed, and the body did NOT grow (one block changed in place).
|
||||
const headings = live
|
||||
.toArray()
|
||||
.filter((b) => (b as Y.XmlElement).nodeName === 'heading')
|
||||
.map((b) =>
|
||||
(b as Y.XmlElement)
|
||||
.toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''),
|
||||
);
|
||||
expect(headings).toContain('EDITED Heading');
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,316 @@
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import { tiptapExtensions } from '../../../collaboration/collaboration.util';
|
||||
import { mergeXmlFragments, mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Regression for the BUG CLASS behind the runaway whole-body duplication: the
|
||||
* point-fix (7a7b840e) only normalized `indent: 0`, but the SAME divergence
|
||||
* recurs for every attribute whose editor-ext (server) schema default the live
|
||||
* Yjs doc MATERIALIZES while the git round-trip — which comes through the engine
|
||||
* schema (different, usually null, defaults) plus `y-prosemirror`'s null-attr
|
||||
* dropping — does NOT carry. Confirmed triggers beyond `indent`:
|
||||
*
|
||||
* - `image.align` : editor-ext default "center" (materialized) vs engine
|
||||
* default null (dropped) -> element-attr divergence.
|
||||
* - link mark `internal`: editor-ext default false (materialized) vs engine
|
||||
* default null -> MARK-attr divergence (the prior denylist
|
||||
* could not reach marks at all — they are serialized raw in
|
||||
* the XmlText delta).
|
||||
*
|
||||
* `highlight.colorName` is normalized too (defense-in-depth); it is NOT a strong
|
||||
* real-world trigger because BOTH schemas default it to null, but the schema-
|
||||
* derived normalization handles it for free and stays idempotent.
|
||||
*
|
||||
* The fix derives the defaults from the ACTUAL ProseMirror schema (getSchema of
|
||||
* the server tiptapExtensions) and drops any element- OR mark-attribute equal to
|
||||
* its schema default (or null/undefined) from the block comparison key — so a
|
||||
* live block compares equal to its git-round-tripped twin and an unchanged
|
||||
* resync applies 0 ops. RED before the fix (keys diverge -> ops > 0 / growth),
|
||||
* GREEN after.
|
||||
*/
|
||||
|
||||
type Attrs = Record<string, unknown>;
|
||||
|
||||
function el(
|
||||
name: string,
|
||||
attrs: Attrs,
|
||||
children: (Y.XmlElement | Y.XmlText)[],
|
||||
): Y.XmlElement {
|
||||
const e = new Y.XmlElement(name);
|
||||
for (const [k, v] of Object.entries(attrs)) e.setAttribute(k, v as string);
|
||||
if (children.length) e.insert(0, children);
|
||||
return e;
|
||||
}
|
||||
|
||||
/** Text carrying marks, as the live Yjs doc stores them (XmlText format ops). */
|
||||
function markedText(s: string, marks: Record<string, unknown>): Y.XmlText {
|
||||
const t = new Y.XmlText();
|
||||
t.insert(0, s, marks);
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* One byte-identical RICH unit: a paragraph with a LINK, a top-level IMAGE, and
|
||||
* a paragraph with a HIGHLIGHT. `live` toggles exactly what the editor
|
||||
* materializes but a git round-trip does not: block `id`, `indent: 0`,
|
||||
* `image.align: "center"`, the link mark's `internal: false`, and the
|
||||
* highlight's `colorName: null`.
|
||||
*/
|
||||
function richUnit(live: boolean, n: number): Y.XmlElement[] {
|
||||
const ind: Attrs = live ? { indent: 0 } : {};
|
||||
const id = (base: string): Attrs => (live ? { id: `${base}${n}` } : {});
|
||||
|
||||
const linkMarks = live
|
||||
? {
|
||||
link: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
class: null,
|
||||
title: null,
|
||||
internal: false, // editor-ext default, materialized
|
||||
},
|
||||
}
|
||||
: {
|
||||
link: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
internal: null, // engine default
|
||||
},
|
||||
};
|
||||
|
||||
const hlMarks = live
|
||||
? { highlight: { color: '#ffd43b', colorName: null } }
|
||||
: { highlight: { color: '#ffd43b' } };
|
||||
|
||||
const imageAttrs: Attrs = live
|
||||
? { src: 'https://img.example.com/a.png', align: 'center' } // materialized
|
||||
: { src: 'https://img.example.com/a.png' }; // align:null dropped on git side
|
||||
|
||||
return [
|
||||
el('paragraph', { ...id('lp'), ...ind }, [
|
||||
markedText('click here', linkMarks),
|
||||
]),
|
||||
el('image', imageAttrs, []),
|
||||
el('paragraph', { ...id('hp'), ...ind }, [markedText('hot', hlMarks)]),
|
||||
];
|
||||
}
|
||||
|
||||
function fragmentOf(units: Y.XmlElement[][]): {
|
||||
doc: Y.Doc;
|
||||
frag: Y.XmlFragment;
|
||||
} {
|
||||
const doc = new Y.Doc();
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = units.flat();
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return { doc, frag };
|
||||
}
|
||||
|
||||
const blockCount = (frag: Y.XmlFragment): number => frag.toArray().length;
|
||||
|
||||
describe('git-sync reconcile is idempotent for schema-default attrs (image/link/highlight)', () => {
|
||||
const UNITS = 3;
|
||||
|
||||
it('3-way: live carries image.align/link.internal/indent defaults, base stale-by-one -> 0 ops', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(true, i)),
|
||||
);
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(false, i)),
|
||||
);
|
||||
const { frag: base } = fragmentOf(
|
||||
Array.from({ length: UNITS - 1 }, (_, i) => richUnit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('2-way: live carries the materialized defaults -> 0 ops, no growth', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(true, i)),
|
||||
);
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, incoming);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('is a fixpoint across repeated cycles (does not grow)', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(true, i)),
|
||||
);
|
||||
const incoming = () =>
|
||||
fragmentOf(Array.from({ length: UNITS }, (_, i) => richUnit(false, i)))
|
||||
.frag;
|
||||
const base = () =>
|
||||
fragmentOf(
|
||||
Array.from({ length: UNITS - 1 }, (_, i) => richUnit(false, i)),
|
||||
).frag;
|
||||
|
||||
const before = blockCount(live);
|
||||
for (let cycle = 0; cycle < 5; cycle++) {
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incoming(), base());
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
}
|
||||
});
|
||||
|
||||
it('does NOT regress a genuine non-default value (a real link.href / image.align:left still diffs)', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf([richUnit(true, 0)]);
|
||||
const base = fragmentOf([richUnit(false, 0)]).frag;
|
||||
// git genuinely changes the image alignment to a NON-default value.
|
||||
const incomingUnit = richUnit(false, 0);
|
||||
(incomingUnit[1] as Y.XmlElement).setAttribute('align', 'left');
|
||||
const incoming = fragmentOf([incomingUnit]).frag;
|
||||
|
||||
liveDoc.transact(() => {
|
||||
mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
const img = live
|
||||
.toArray()
|
||||
.find((b) => (b as Y.XmlElement).nodeName === 'image') as Y.XmlElement;
|
||||
expect(img.getAttribute('align')).toBe('left');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* FAITHFUL end-to-end proof through the REAL server transformer: build the live
|
||||
* doc the way the collaboration server does (defaults omitted in the JSON ->
|
||||
* TiptapTransformer.toYdoc MATERIALIZES image.align:"center", link.internal:false,
|
||||
* indent:0) versus the git-derived doc (engine-style: defaults emitted as
|
||||
* explicit null, no block ids). An unchanged resync must apply 0 ops.
|
||||
*/
|
||||
describe('git-sync reconcile is idempotent through the real toYdoc materialization', () => {
|
||||
const liveContent = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p1' },
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'click here',
|
||||
marks: [{ type: 'link', attrs: { href: 'https://example.com' } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'image', attrs: { src: 'https://img.example.com/a.png' } },
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p2' },
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'hot',
|
||||
marks: [{ type: 'highlight', attrs: { color: '#ffd43b' } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
// git/engine-style: explicit nulls for the engine-default attrs, no ids.
|
||||
const gitContent = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'click here',
|
||||
marks: [
|
||||
{
|
||||
type: 'link',
|
||||
attrs: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
class: null,
|
||||
title: null,
|
||||
internal: null,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'image',
|
||||
attrs: { src: 'https://img.example.com/a.png', align: null },
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'hot',
|
||||
marks: [
|
||||
{ type: 'highlight', attrs: { color: '#ffd43b', colorName: null } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const toYdoc = (content: unknown[]) =>
|
||||
TiptapTransformer.toYdoc(
|
||||
{ type: 'doc', content },
|
||||
'default',
|
||||
tiptapExtensions as any,
|
||||
);
|
||||
|
||||
it('3-way: materialized-default live vs engine-style git, base stale-by-one -> 0 ops', () => {
|
||||
const liveDoc = toYdoc(liveContent);
|
||||
const targetDoc = toYdoc(gitContent);
|
||||
const baseDoc = toYdoc(gitContent.slice(0, gitContent.length - 1));
|
||||
|
||||
const live = liveDoc.getXmlFragment('default');
|
||||
const before = live.toArray().length;
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(
|
||||
live,
|
||||
targetDoc.getXmlFragment('default'),
|
||||
baseDoc.getXmlFragment('default'),
|
||||
);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(live.toArray().length).toBe(before);
|
||||
});
|
||||
|
||||
it('2-way: materialized-default live vs engine-style git -> 0 ops', () => {
|
||||
const liveDoc = toYdoc(liveContent);
|
||||
const targetDoc = toYdoc(gitContent);
|
||||
|
||||
const live = liveDoc.getXmlFragment('default');
|
||||
const before = live.toArray().length;
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, targetDoc.getXmlFragment('default'));
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(live.toArray().length).toBe(before);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,338 @@
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import {
|
||||
mergeXmlFragments,
|
||||
mergeXmlFragments3Way,
|
||||
cloneXmlNode,
|
||||
diffBlocks,
|
||||
} from './yjs-body-merge';
|
||||
|
||||
// Build a Y.XmlFragment('default') in `doc` from a list of paragraph specs.
|
||||
// Each spec is the paragraph's plain text (a single XmlText child).
|
||||
function buildFragment(doc: Y.Doc, paragraphs: string[]): Y.XmlFragment {
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = paragraphs.map((text) => {
|
||||
const el = new Y.XmlElement('paragraph');
|
||||
const t = new Y.XmlText();
|
||||
if (text) t.insert(0, text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
});
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return frag;
|
||||
}
|
||||
|
||||
function texts(frag: Y.XmlFragment): string[] {
|
||||
return frag.toArray().map((el) => (el as Y.XmlElement).toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''));
|
||||
}
|
||||
|
||||
describe('yjs-body-merge', () => {
|
||||
describe('diffBlocks (LCS edit script)', () => {
|
||||
it('identical sequences produce only keeps (no edits)', () => {
|
||||
const ops = diffBlocks(['a', 'b', 'c'], ['a', 'b', 'c']);
|
||||
expect(ops.every((o) => o.op === 'keep')).toBe(true);
|
||||
});
|
||||
|
||||
it('a single changed middle element is one del + one ins', () => {
|
||||
const ops = diffBlocks(['a', 'b', 'c'], ['a', 'B', 'c']);
|
||||
expect(ops.filter((o) => o.op === 'del')).toHaveLength(1);
|
||||
expect(ops.filter((o) => o.op === 'ins')).toHaveLength(1);
|
||||
expect(ops.filter((o) => o.op === 'keep')).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mergeXmlFragments', () => {
|
||||
it('identical content is a complete no-op (0 ops) — never clobbers an unchanged resync', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['one', 'two', 'three']);
|
||||
const targetFrag = buildFragment(target, ['one', 'two', 'three']);
|
||||
|
||||
// Capture block identities to prove they are left untouched.
|
||||
const before = liveFrag.toArray();
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
// Same Y.XmlElement instances — nothing was deleted/recreated.
|
||||
expect(liveFrag.toArray()).toEqual(before);
|
||||
expect(texts(liveFrag)).toEqual(['one', 'two', 'three']);
|
||||
});
|
||||
|
||||
it('a human edit to one block survives a git change to a DIFFERENT block', () => {
|
||||
// Live: the human has the doc open; block 0 holds their edit. Git changed
|
||||
// only block 2. The merge must touch ONLY block 2 and leave block 0 (and
|
||||
// its in-flight edit) exactly as-is.
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['HUMAN EDIT', 'shared', 'old tail']);
|
||||
const targetFrag = buildFragment(target, [
|
||||
'HUMAN EDIT',
|
||||
'shared',
|
||||
'new tail from git',
|
||||
]);
|
||||
|
||||
const block0Before = liveFrag.get(0); // the human's block instance
|
||||
const block1Before = liveFrag.get(1);
|
||||
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
// Only block 2 was replaced: one del + one ins.
|
||||
expect(applied).toBe(2);
|
||||
// The human's block and the shared block are the SAME instances (untouched).
|
||||
expect(liveFrag.get(0)).toBe(block0Before);
|
||||
expect(liveFrag.get(1)).toBe(block1Before);
|
||||
// Block 2 now carries git's content.
|
||||
expect(texts(liveFrag)).toEqual([
|
||||
'HUMAN EDIT',
|
||||
'shared',
|
||||
'new tail from git',
|
||||
]);
|
||||
});
|
||||
|
||||
it('appends a new trailing block without disturbing existing ones', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['a', 'b']);
|
||||
const targetFrag = buildFragment(target, ['a', 'b', 'c']);
|
||||
const a = liveFrag.get(0);
|
||||
const b = liveFrag.get(1);
|
||||
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
expect(applied).toBe(1); // single insert
|
||||
expect(liveFrag.get(0)).toBe(a);
|
||||
expect(liveFrag.get(1)).toBe(b);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('deletes a removed block, keeping its neighbours', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['a', 'b', 'c']);
|
||||
const targetFrag = buildFragment(target, ['a', 'c']);
|
||||
const a = liveFrag.get(0);
|
||||
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
expect(applied).toBe(1); // single delete
|
||||
expect(liveFrag.get(0)).toBe(a);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'c']);
|
||||
});
|
||||
|
||||
it('a fully different body is replaced (and stays valid)', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['x', 'y']);
|
||||
const targetFrag = buildFragment(target, ['p', 'q', 'r']);
|
||||
live.transact(() => mergeXmlFragments(liveFrag, targetFrag));
|
||||
expect(texts(liveFrag)).toEqual(['p', 'q', 'r']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mergeXmlFragments3Way', () => {
|
||||
it('keeps a human edit to one block while applying a git change to another (3-way)', () => {
|
||||
// base (last synced): [a, b, c]. Human edited block 0 in the live doc; git
|
||||
// changed block 2 in the incoming file. 3-way must keep BOTH — the 2-way
|
||||
// merge would instead revert the human's block 0 to git's stale version.
|
||||
const base = new Y.Doc();
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
|
||||
const liveFrag = buildFragment(live, ['HUMAN', 'b', 'c']);
|
||||
const targetFrag = buildFragment(target, ['a', 'b', 'GIT']);
|
||||
|
||||
const humanBlock = liveFrag.get(0); // the human's live instance
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
|
||||
);
|
||||
|
||||
// Human's block preserved as the SAME instance; git's change applied.
|
||||
expect(liveFrag.get(0)).toBe(humanBlock);
|
||||
expect(texts(liveFrag)).toEqual(['HUMAN', 'b', 'GIT']);
|
||||
});
|
||||
|
||||
it('a block both sides changed resolves to git (conflict policy)', () => {
|
||||
const base = new Y.Doc();
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
|
||||
const liveFrag = buildFragment(live, ['a', 'HUMAN', 'c']);
|
||||
const targetFrag = buildFragment(target, ['a', 'GIT', 'c']);
|
||||
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
|
||||
);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
|
||||
});
|
||||
|
||||
it('git change with no concurrent human edit (live == base) applies cleanly', () => {
|
||||
const base = new Y.Doc();
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const baseFrag = buildFragment(base, ['a', 'b']);
|
||||
const liveFrag = buildFragment(live, ['a', 'b']);
|
||||
const targetFrag = buildFragment(target, ['a', 'B2']);
|
||||
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
|
||||
);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'B2']);
|
||||
});
|
||||
});
|
||||
|
||||
// Regression: start-of-document content duplicating on every two-way sync.
|
||||
//
|
||||
// The LIVE Docmost doc stamps a per-block UniqueID on every heading/paragraph;
|
||||
// a body arriving FROM git is parsed from clean markdown and carries NO block
|
||||
// ids. If the merge comparison key includes that `id`, an unchanged live block
|
||||
// never matches the SAME block coming from git, so the three-way merge cannot
|
||||
// anchor on it — and an incoming block with no anchor (content inserted at the
|
||||
// TOP of the page) is RE-ADDED on every cycle, an unbounded duplication loop.
|
||||
// These tests model that exact id-asymmetry and assert the reconciliation is
|
||||
// IDEMPOTENT (no block growth). They are RED before excluding `id` from the
|
||||
// key in `serializeXmlNode`.
|
||||
describe('idempotent reconciliation with live block ids (start-of-doc dup)', () => {
|
||||
// Build a fragment from block specs. `id` is set only when provided, mirroring
|
||||
// the live doc (ids present) vs a git-parsed body (ids absent).
|
||||
type Spec = { tag: 'heading' | 'paragraph'; text: string; id?: string };
|
||||
function buildDoc(doc: Y.Doc, specs: Spec[]): Y.XmlFragment {
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = specs.map((s) => {
|
||||
const el = new Y.XmlElement(s.tag);
|
||||
if (s.id) el.setAttribute('id', s.id);
|
||||
if (s.tag === 'heading') el.setAttribute('level', '2');
|
||||
const t = new Y.XmlText();
|
||||
if (s.text) t.insert(0, s.text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
});
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return frag;
|
||||
}
|
||||
const textsOf = (frag: Y.XmlFragment): string[] =>
|
||||
frag.toArray().map((el) =>
|
||||
(el as Y.XmlElement)
|
||||
.toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''),
|
||||
);
|
||||
|
||||
it('re-merging the SAME git body does NOT re-add the top block (idempotent)', () => {
|
||||
// last-synced base (from git markdown): NO block ids.
|
||||
const base = new Y.Doc();
|
||||
const baseFrag = buildDoc(base, [
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Some paragraph.' },
|
||||
{ tag: 'paragraph', text: 'End block.' },
|
||||
]);
|
||||
// live Docmost doc: SAME content, but every block carries a UniqueID.
|
||||
const live = new Y.Doc();
|
||||
const liveFrag = buildDoc(live, [
|
||||
{ tag: 'heading', text: 'Title', id: 'ida' },
|
||||
{ tag: 'paragraph', text: 'Some paragraph.', id: 'idb' },
|
||||
{ tag: 'paragraph', text: 'End block.', id: 'idc' },
|
||||
]);
|
||||
// incoming git body: the user inserted a heading at the very TOP.
|
||||
const buildTarget = (): Y.XmlFragment =>
|
||||
buildDoc(new Y.Doc(), [
|
||||
{ tag: 'heading', text: 'TOPDUP' },
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Some paragraph.' },
|
||||
{ tag: 'paragraph', text: 'End block.' },
|
||||
]);
|
||||
|
||||
// First sync: the top block is added once.
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, buildTarget(), baseFrag),
|
||||
);
|
||||
expect(textsOf(liveFrag)).toEqual([
|
||||
'TOPDUP',
|
||||
'Title',
|
||||
'Some paragraph.',
|
||||
'End block.',
|
||||
]);
|
||||
|
||||
// Subsequent sync of the SAME git body against the SAME base must be a
|
||||
// NO-OP — not a second copy of the top block. Before the fix this re-adds
|
||||
// 'TOPDUP', growing the doc on every cycle.
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, buildTarget(), baseFrag),
|
||||
);
|
||||
expect(textsOf(liveFrag)).toEqual([
|
||||
'TOPDUP',
|
||||
'Title',
|
||||
'Some paragraph.',
|
||||
'End block.',
|
||||
]);
|
||||
expect(textsOf(liveFrag).filter((t) => t === 'TOPDUP')).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('an unchanged git body (live ids, none in git) is a complete no-op', () => {
|
||||
// base == git body (no pending git change); live is the same content with
|
||||
// ids. With `id` in the key the whole body looks rewritten; the merge must
|
||||
// still leave live byte-identical (block instances untouched).
|
||||
const base = new Y.Doc();
|
||||
const baseFrag = buildDoc(base, [
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Body.' },
|
||||
]);
|
||||
const live = new Y.Doc();
|
||||
const liveFrag = buildDoc(live, [
|
||||
{ tag: 'heading', text: 'Title', id: 'ida' },
|
||||
{ tag: 'paragraph', text: 'Body.', id: 'idb' },
|
||||
]);
|
||||
const before = liveFrag.toArray();
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments3Way(
|
||||
liveFrag,
|
||||
buildDoc(new Y.Doc(), [
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Body.' },
|
||||
]),
|
||||
baseFrag,
|
||||
);
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
// Same live block instances (ids preserved) — nothing recreated.
|
||||
expect(liveFrag.toArray()).toEqual(before);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cloneXmlNode', () => {
|
||||
it('preserves text marks (XmlText delta) across docs', () => {
|
||||
const src = new Y.Doc();
|
||||
const srcFrag = src.getXmlFragment('default');
|
||||
const el = new Y.XmlElement('paragraph');
|
||||
const t = new Y.XmlText();
|
||||
t.insert(0, 'plain ');
|
||||
t.insert(6, 'bold', { bold: true });
|
||||
el.insert(0, [t]);
|
||||
srcFrag.insert(0, [el]);
|
||||
|
||||
const dst = new Y.Doc();
|
||||
const dstFrag = dst.getXmlFragment('default');
|
||||
dstFrag.insert(0, [cloneXmlNode(srcFrag.get(0) as Y.XmlElement)]);
|
||||
|
||||
const clonedText = (dstFrag.get(0) as Y.XmlElement).get(0) as Y.XmlText;
|
||||
expect(clonedText.toDelta()).toEqual([
|
||||
{ insert: 'plain ' },
|
||||
{ insert: 'bold', attributes: { bold: true } },
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
335
apps/server/src/integrations/git-sync/services/yjs-body-merge.ts
Normal file
335
apps/server/src/integrations/git-sync/services/yjs-body-merge.ts
Normal file
@@ -0,0 +1,335 @@
|
||||
import * as Y from 'yjs';
|
||||
import { getSchema } from '@tiptap/core';
|
||||
import type { Schema } from '@tiptap/pm/model';
|
||||
|
||||
import { tiptapExtensions } from '../../../collaboration/collaboration.util';
|
||||
import { diff3Plan } from './three-way-merge';
|
||||
import { buildLcsTable } from './lcs';
|
||||
|
||||
/**
|
||||
* Block-level merge of an incoming (git) page body into a LIVE Yjs document,
|
||||
* replacing the previous full-body "delete everything + re-insert" write that
|
||||
* clobbered concurrent human edits on every sync (review #5 — "do the write as a
|
||||
* merge").
|
||||
*
|
||||
* Strategy: diff the two documents at TOP-LEVEL BLOCK granularity (an LCS over a
|
||||
* canonical structural serialization of each block) and apply only the minimal
|
||||
* insert/delete operations. Blocks that are byte-identical on both sides are
|
||||
* left UNTOUCHED in the live doc — so a human editing one paragraph is unaffected
|
||||
* when git changes a different paragraph, and an unchanged re-sync is a complete
|
||||
* no-op (zero Yjs operations). Yjs then CRDT-merges the minimal ops with any
|
||||
* concurrent edits.
|
||||
*
|
||||
* Limitation (honest): this is a 2-way merge (live vs incoming). For a block that
|
||||
* BOTH sides changed since the last sync it cannot tell which is newer without a
|
||||
* common ancestor, so the incoming (git) version wins for that one block. A full
|
||||
* 3-way merge would need the last-synced base plumbed from the engine; the common
|
||||
* cases — unchanged resync, and edits to DIFFERENT blocks — are handled losslessly.
|
||||
*/
|
||||
|
||||
type XmlNode = Y.XmlElement | Y.XmlText | Y.XmlHook;
|
||||
|
||||
/**
|
||||
* Node attributes that are VOLATILE identity (not content) and so must be
|
||||
* excluded from the block comparison key.
|
||||
*
|
||||
* `id` is the per-block UniqueID the editor stamps on every heading/paragraph
|
||||
* (and transclusionSource). It exists ONLY in the live Yjs document — a body
|
||||
* arriving from git is parsed from clean markdown, which carries no block ids
|
||||
* (`markdownToProseMirror` materializes `id: null`, which the Yjs transform then
|
||||
* drops). If `id` were part of the key, an UNCHANGED live block (id "abc123")
|
||||
* would never match the SAME block coming from git (no id), so the three-way
|
||||
* merge's LCS could not anchor on it. The merge would then treat every live
|
||||
* block as deleted-and-reinserted and, when an incoming block has no matching
|
||||
* anchor (e.g. content inserted at the very TOP of the page), RE-ADD a copy of
|
||||
* it on every sync cycle — a non-convergent, unbounded duplication loop
|
||||
* (start-of-document content duplicating each push/pull cycle).
|
||||
*
|
||||
* Excluding `id` makes blocks compare by CONTENT, so an unchanged block matches
|
||||
* across the git round-trip and the reconciliation is idempotent. Block identity
|
||||
* is still preserved in the merged output: `diff3Plan` keeps the LIVE block
|
||||
* INSTANCE (with its id) for an anchor — picks are by index, not by key — so the
|
||||
* stable Yjs block (and any in-flight human edit on it) stays put. This mirrors
|
||||
* `canonicalize.ts`, which already strips the regenerated block `id` from the
|
||||
* round-trip idempotency comparison for exactly the same reason.
|
||||
*
|
||||
* Known limitation (accepted trade-off of content-based matching): two GENUINELY
|
||||
* DISTINCT blocks whose content is byte-identical now collapse to the same content
|
||||
* key, so when git deletes one of the duplicates the LCS may drop the OTHER live
|
||||
* instance instead. The visible result is identical (one copy removed, one kept),
|
||||
* but a concurrent in-flight human edit on the dropped instance could be lost.
|
||||
*/
|
||||
const VOLATILE_KEY_ATTRS = new Set(['id']);
|
||||
|
||||
/**
|
||||
* The editor (ProseMirror) schema, built ONCE from the same `tiptapExtensions`
|
||||
* the collaboration server uses to materialize Yjs docs. Memoized: building the
|
||||
* schema is non-trivial and the block key is computed per block per cycle.
|
||||
*
|
||||
* Why the schema (not a hardcoded denylist): the LIVE Yjs document is produced by
|
||||
* `TiptapTransformer.toYdoc(pm, 'default', tiptapExtensions)`, which STAMPS every
|
||||
* schema-default attribute onto every node and mark — `indent: 0` on every
|
||||
* paragraph/heading, `image.align: "center"`, the link mark's `internal: false`,
|
||||
* `highlight.colorName: null`, and so on for youtube/pdf/any future node. A body
|
||||
* re-imported from git comes through the engine's `markdownToProseMirror`, whose
|
||||
* schema declares those attrs with DIFFERENT (usually null) defaults; the
|
||||
* resulting null/absent element attrs are then DROPPED by `y-prosemirror`'s
|
||||
* toYdoc. So the SAME block carries materialized defaults on the live side and
|
||||
* nothing on the git side, its key diverges, the three-way merge anchors on
|
||||
* NOTHING, and the whole body is RE-APPENDED every reconcile cycle — an unbounded
|
||||
* duplication loop with no client connected.
|
||||
*
|
||||
* Deriving the defaults from the actual schema normalizes ALL such attributes
|
||||
* generally (it is not another per-attribute denylist): any attribute whose value
|
||||
* equals the schema default — or is null/undefined — is dropped from the key, on
|
||||
* BOTH element attributes and the mark attributes inside each XmlText delta, so a
|
||||
* live block compares equal to its git-round-tripped twin and an unchanged resync
|
||||
* applies zero ops. Genuinely non-default values (a real `indent: 2`, an
|
||||
* `align: "left"`, a real `link.href`, a real highlight color) are content and
|
||||
* stay in the key, so real edits still diff and land.
|
||||
*/
|
||||
let memoSchema: Schema | null = null;
|
||||
let memoSchemaTried = false;
|
||||
function getMergeSchema(): Schema | null {
|
||||
if (!memoSchemaTried) {
|
||||
memoSchemaTried = true;
|
||||
try {
|
||||
memoSchema = getSchema(tiptapExtensions as any);
|
||||
} catch {
|
||||
// Defensive: if the schema can't be built (e.g. a degenerate extension
|
||||
// set in a unit test that stubs `tiptapExtensions`), fall back to dropping
|
||||
// only null/undefined attrs. The real server always builds it fine.
|
||||
memoSchema = null;
|
||||
}
|
||||
}
|
||||
return memoSchema;
|
||||
}
|
||||
|
||||
/** True if `value` is the schema default for `attrName` of `attrSpecs`, or is
|
||||
* null/undefined (which a git round-trip drops). Such attributes are excluded
|
||||
* from the comparison key. `attrSpecs` is a ProseMirror node/mark spec attr map
|
||||
* (`{ [name]: { default } }`); a missing map (unknown node/mark) only drops
|
||||
* null/undefined. (A non-null value matching an attr declared without a default
|
||||
* cannot occur — `spec.default === value` is then `undefined === value`, false.) */
|
||||
function isDefaultAttr(
|
||||
attrSpecs: Record<string, any> | undefined | null,
|
||||
attrName: string,
|
||||
value: unknown,
|
||||
): boolean {
|
||||
if (value === null || value === undefined) return true;
|
||||
const spec = attrSpecs?.[attrName];
|
||||
return !!spec && spec.default === value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize one XmlText delta op's mark attributes: drop every mark-attr whose
|
||||
* value equals the mark's schema default (or is null/undefined), so the link
|
||||
* mark's materialized `internal: false`/`target: "_blank"` and a highlight's
|
||||
* `colorName: null` no longer diverge from a git round-trip that carries neither.
|
||||
* The text (op.insert) and genuinely-set mark attrs (a real `href`, a real
|
||||
* highlight color) are preserved verbatim. `attributes` maps markName -> mark
|
||||
* attrs object (or `true`/boolean for attr-less marks); each is handled safely.
|
||||
*/
|
||||
function normalizeDelta(delta: any[]): any[] {
|
||||
const schema = getMergeSchema();
|
||||
return delta.map((op) => {
|
||||
if (!op || op.attributes == null || typeof op.attributes !== 'object') {
|
||||
return op;
|
||||
}
|
||||
const marks: Record<string, unknown> = {};
|
||||
for (const markName of Object.keys(op.attributes).sort()) {
|
||||
const markVal = op.attributes[markName];
|
||||
if (markVal === null || markVal === undefined) continue;
|
||||
if (typeof markVal !== 'object') {
|
||||
// attr-less mark stored as a primitive (e.g. `true`) — keep as-is.
|
||||
marks[markName] = markVal;
|
||||
continue;
|
||||
}
|
||||
const markSpec = schema?.marks[markName]?.spec.attrs as
|
||||
| Record<string, any>
|
||||
| undefined;
|
||||
const cleaned: Record<string, unknown> = {};
|
||||
for (const ak of Object.keys(markVal as object).sort()) {
|
||||
const av = (markVal as Record<string, unknown>)[ak];
|
||||
if (isDefaultAttr(markSpec, ak, av)) continue;
|
||||
cleaned[ak] = av;
|
||||
}
|
||||
marks[markName] = cleaned;
|
||||
}
|
||||
return { ...op, attributes: marks };
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonical, comparable serialization of a Yjs XML node (structure + text +
|
||||
* marks + attributes), with attribute keys sorted so equal blocks always produce
|
||||
* an identical string regardless of attribute insertion order. The volatile
|
||||
* block `id` (see `VOLATILE_KEY_ATTRS`) and every schema-default attribute (see
|
||||
* `getMergeSchema`) are excluded at every level — on element attributes AND on
|
||||
* the mark attributes inside each XmlText delta — so a block compares equal by
|
||||
* CONTENT across the git round-trip (which materializes neither), keeping the
|
||||
* merge anchor-able and idempotent.
|
||||
*/
|
||||
export function serializeXmlNode(node: unknown): unknown {
|
||||
if (node instanceof Y.XmlText) {
|
||||
return { t: normalizeDelta(node.toDelta()) };
|
||||
}
|
||||
if (node instanceof Y.XmlElement) {
|
||||
const attrs = node.getAttributes() as Record<string, unknown>;
|
||||
const attrSpecs = getMergeSchema()?.nodes[node.nodeName]?.spec.attrs as
|
||||
| Record<string, any>
|
||||
| undefined;
|
||||
const sorted: Record<string, unknown> = {};
|
||||
for (const k of Object.keys(attrs).sort()) {
|
||||
if (VOLATILE_KEY_ATTRS.has(k)) continue;
|
||||
if (isDefaultAttr(attrSpecs, k, attrs[k])) continue;
|
||||
sorted[k] = attrs[k];
|
||||
}
|
||||
return {
|
||||
n: node.nodeName,
|
||||
a: sorted,
|
||||
c: node.toArray().map(serializeXmlNode),
|
||||
};
|
||||
}
|
||||
// XmlHook / unknown: fall back to a stable string so it compares by identity
|
||||
// of its serialized form (these do not occur in the Docmost block schema).
|
||||
return { u: String(node) };
|
||||
}
|
||||
|
||||
const key = (node: unknown): string => JSON.stringify(serializeXmlNode(node));
|
||||
|
||||
/**
|
||||
* Deep-clone a detached/owned Yjs XML node into a fresh node that can be inserted
|
||||
* into ANOTHER document (Yjs types are bound to their doc, so cross-doc moves are
|
||||
* impossible — we rebuild). Preserves nodeName, attributes, text+marks (via the
|
||||
* XmlText delta) and the full child subtree.
|
||||
*/
|
||||
export function cloneXmlNode(node: XmlNode): Y.XmlElement | Y.XmlText {
|
||||
if (node instanceof Y.XmlText) {
|
||||
const t = new Y.XmlText();
|
||||
const delta = node.toDelta();
|
||||
if (delta.length) t.applyDelta(delta);
|
||||
return t;
|
||||
}
|
||||
if (node instanceof Y.XmlElement) {
|
||||
const el = new Y.XmlElement(node.nodeName);
|
||||
const attrs = node.getAttributes() as Record<string, unknown>;
|
||||
for (const k of Object.keys(attrs)) el.setAttribute(k, attrs[k] as string);
|
||||
const kids = node.toArray().map((c) => cloneXmlNode(c as XmlNode));
|
||||
if (kids.length) el.insert(0, kids);
|
||||
return el;
|
||||
}
|
||||
// Best-effort for any other node type (XmlHook — does not occur in the
|
||||
// Docmost block schema): an empty paragraph so the merge never crashes.
|
||||
return new Y.XmlElement('paragraph');
|
||||
}
|
||||
|
||||
type Op = { op: 'keep' } | { op: 'del' } | { op: 'ins'; bi: number };
|
||||
|
||||
/**
|
||||
* LCS-based edit script turning sequence `a` (live block keys) into `b` (incoming
|
||||
* block keys): a run of keep/del/ins ops. O(n*m) table — fine for page block
|
||||
* counts.
|
||||
*/
|
||||
export function diffBlocks(a: string[], b: string[]): Op[] {
|
||||
const n = a.length;
|
||||
const m = b.length;
|
||||
const dp = buildLcsTable(a, b);
|
||||
const ops: Op[] = [];
|
||||
let i = 0;
|
||||
let j = 0;
|
||||
while (i < n && j < m) {
|
||||
if (a[i] === b[j]) {
|
||||
ops.push({ op: 'keep' });
|
||||
i++;
|
||||
j++;
|
||||
} else if (dp[i + 1][j] >= dp[i][j + 1]) {
|
||||
ops.push({ op: 'del' });
|
||||
i++;
|
||||
} else {
|
||||
ops.push({ op: 'ins', bi: j });
|
||||
j++;
|
||||
}
|
||||
}
|
||||
while (i < n) {
|
||||
ops.push({ op: 'del' });
|
||||
i++;
|
||||
}
|
||||
while (j < m) {
|
||||
ops.push({ op: 'ins', bi: j });
|
||||
j++;
|
||||
}
|
||||
return ops;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge `target` block children into `live`, mutating `live` in place with the
|
||||
* minimal set of inserts/deletes. MUST be called inside a Yjs transaction.
|
||||
* Returns the number of block operations applied (0 == content already identical).
|
||||
*/
|
||||
export function mergeXmlFragments(
|
||||
live: Y.XmlFragment,
|
||||
target: Y.XmlFragment,
|
||||
): number {
|
||||
const liveKids = live.toArray();
|
||||
const targetKids = target.toArray();
|
||||
const liveKeys = liveKids.map(key);
|
||||
const targetKeys = targetKids.map(key);
|
||||
|
||||
const ops = diffBlocks(liveKeys, targetKeys);
|
||||
|
||||
let cursor = 0; // index into the LIVE fragment as we mutate it
|
||||
let applied = 0;
|
||||
for (const op of ops) {
|
||||
if (op.op === 'keep') {
|
||||
cursor++;
|
||||
} else if (op.op === 'del') {
|
||||
live.delete(cursor, 1); // remove the live block at the cursor; do not advance
|
||||
applied++;
|
||||
} else {
|
||||
live.insert(cursor, [cloneXmlNode(targetKids[op.bi] as XmlNode)]);
|
||||
cursor++;
|
||||
applied++;
|
||||
}
|
||||
}
|
||||
return applied;
|
||||
}
|
||||
|
||||
/**
|
||||
* THREE-WAY block merge: reconcile `live` toward `target` using `base` (the
|
||||
* last-synced common ancestor) so a block only the human changed is KEPT and a
|
||||
* block only git changed is taken — instead of git's version always winning
|
||||
* (review #5). Conflicts (both changed the same block) resolve to git.
|
||||
*
|
||||
* Implementation: diff3Plan computes the merged block ORDER (picks from live or
|
||||
* target); we materialize that as a virtual target fragment and reuse the 2-way
|
||||
* `mergeXmlFragments` to splice it into `live` minimally (so untouched live block
|
||||
* instances — and their in-flight edits — stay put). MUST be called inside a Yjs
|
||||
* transaction. Returns the number of block operations applied.
|
||||
*/
|
||||
export function mergeXmlFragments3Way(
|
||||
live: Y.XmlFragment,
|
||||
target: Y.XmlFragment,
|
||||
base: Y.XmlFragment,
|
||||
): number {
|
||||
const liveKids = live.toArray();
|
||||
const targetKids = target.toArray();
|
||||
const liveKeys = liveKids.map(key);
|
||||
const targetKeys = targetKids.map(key);
|
||||
const baseKeys = base.toArray().map(key);
|
||||
|
||||
const plan = diff3Plan(baseKeys, liveKeys, targetKeys);
|
||||
|
||||
// Build the merged block sequence in a throwaway doc, cloning from whichever
|
||||
// side each pick came from, then 2-way merge it back into the live fragment.
|
||||
const merged = new Y.Doc();
|
||||
const mergedFrag = merged.getXmlFragment('default');
|
||||
const nodes = plan.map((p) =>
|
||||
cloneXmlNode(
|
||||
(p.src === 'live' ? liveKids[p.index] : targetKids[p.index]) as XmlNode,
|
||||
),
|
||||
);
|
||||
if (nodes.length) mergedFrag.insert(0, nodes);
|
||||
|
||||
return mergeXmlFragments(live, mergedFrag);
|
||||
}
|
||||
@@ -15,6 +15,7 @@ import { InternalLogFilter } from './common/logger/internal-log-filter';
|
||||
import { EnvironmentService } from './integrations/environment/environment.service';
|
||||
import { resolveFrameHeader } from './common/helpers';
|
||||
import { resolveTrustProxy } from './integrations/environment/trust-proxy.util';
|
||||
import { GitHttpService } from './integrations/git-sync/http/git-http.service';
|
||||
|
||||
async function bootstrap() {
|
||||
const app = await NestFactory.create<NestFastifyApplication>(
|
||||
@@ -99,6 +100,23 @@ async function bootstrap() {
|
||||
},
|
||||
);
|
||||
|
||||
// git smart-HTTP POST bodies use these media types. Register PASSTHROUGH
|
||||
// content-type parsers so Fastify does NOT buffer/parse them (it would
|
||||
// otherwise reject the unknown type with 415); the /git handler streams the
|
||||
// raw Node request (request.raw) to `git http-backend` stdin instead. A
|
||||
// passthrough parser also bypasses the bodyLimit, so large pushes are not
|
||||
// truncated (the bytes are never buffered by Fastify).
|
||||
app
|
||||
.getHttpAdapter()
|
||||
.getInstance()
|
||||
.addContentTypeParser(
|
||||
[
|
||||
'application/x-git-upload-pack-request',
|
||||
'application/x-git-receive-pack-request',
|
||||
],
|
||||
(_req, payload, done) => done(null, payload),
|
||||
);
|
||||
|
||||
app
|
||||
.getHttpAdapter()
|
||||
.getInstance()
|
||||
@@ -146,6 +164,25 @@ async function bootstrap() {
|
||||
app.useGlobalInterceptors(new TransformHttpResponseInterceptor(reflector));
|
||||
app.enableShutdownHooks();
|
||||
|
||||
// git smart-HTTP host (the /git/<spaceId>.git/... subtree). Registered as a
|
||||
// RAW Fastify route — NOT a Nest controller under the global '/api' prefix —
|
||||
// so it lives at the ROOT and a single wildcard reliably captures the whole
|
||||
// multi-segment subtree (avoiding the path-to-regexp v8 wildcard / global-
|
||||
// prefix-exclude ambiguity in NestJS v11). The handler is resolved from the
|
||||
// Nest container so all auth/authz/gating still runs. NOTE: Nest middleware
|
||||
// (DomainMiddleware) does NOT run for this raw root route — it is bound to the
|
||||
// Nest router under the global '/api' prefix — so request.raw.workspaceId is
|
||||
// NOT populated here; GitHttpService resolves the workspace itself (mirroring
|
||||
// DomainMiddleware). The Fastify wildcard '/git/*' captures the multi-segment
|
||||
// subpath; the handler re-parses req.url itself.
|
||||
const gitHttpService = app.get(GitHttpService);
|
||||
app
|
||||
.getHttpAdapter()
|
||||
.getInstance()
|
||||
.all('/git/*', async (request, reply) => {
|
||||
await gitHttpService.handle(request as any, reply as any);
|
||||
});
|
||||
|
||||
const logger = new Logger('NestApplication');
|
||||
|
||||
process.on('unhandledRejection', (reason, promise) => {
|
||||
|
||||
102
apps/server/test/git-sync-browser-e2e.cjs
Normal file
102
apps/server/test/git-sync-browser-e2e.cjs
Normal file
@@ -0,0 +1,102 @@
|
||||
#!/usr/bin/env node
|
||||
/*
|
||||
* git-sync BROWSER e2e — drives the real Docmost web UI with Playwright to
|
||||
* reproduce the exact user flow that previously caused data loss: pages created
|
||||
* in the browser start UNTITLED (all collapse to the `_` vault filename); typing
|
||||
* a title reshuffles that collision and used to TRASH another live page. This
|
||||
* test creates several pages via the UI, titles one, runs a sync, and asserts
|
||||
* NOTHING was moved to Trash.
|
||||
*
|
||||
* Setup: needs Playwright + a Chromium build. The project should add
|
||||
* `@playwright/test` as a devDep (`pnpm dlx playwright install chromium`). This
|
||||
* script resolves playwright-core + the chromium binary from env so it can run
|
||||
* against an already-installed browser:
|
||||
* PW_CORE=/path/to/node_modules/playwright-core
|
||||
* PW_CHROME=/path/to/chrome
|
||||
* and the live stand env (SERVER/SPACE_ID/EMAIL/PASSWORD/DB_CONTAINER) like the
|
||||
* shell e2e suites.
|
||||
*/
|
||||
const { execSync } = require('node:child_process');
|
||||
|
||||
const SERVER = process.env.SERVER || 'http://localhost:3000';
|
||||
const WEB = process.env.WEB || 'http://localhost:5173';
|
||||
const SPACE_ID = process.env.SPACE_ID || '019ef1f7-437b-7ae9-9306-809a1729f085';
|
||||
const SPACE_SLUG = process.env.SPACE_SLUG || 'general';
|
||||
const EMAIL = process.env.EMAIL || 'admin@test.local';
|
||||
const PASSWORD = process.env.PASSWORD || 'Test12345!';
|
||||
const DB = process.env.DB_CONTAINER || 'gitmost-db';
|
||||
const PW_CORE = process.env.PW_CORE || '/home/claude/pw/node_modules/playwright-core';
|
||||
const PW_CHROME = process.env.PW_CHROME ||
|
||||
'/home/claude/.cache/ms-playwright/chromium-1148/chrome-linux/chrome';
|
||||
|
||||
const { chromium } = require(PW_CORE);
|
||||
const psql = (q) =>
|
||||
execSync(`docker exec ${DB} psql -U docmost -d docmost -tAc "${q}"`, { encoding: 'utf8' }).trim();
|
||||
const trashedCount = () =>
|
||||
Number(psql(`select count(*) from pages where space_id='${SPACE_ID}' and deleted_at is not null`));
|
||||
let cookie = '';
|
||||
const login = () => {
|
||||
const out = execSync(
|
||||
`curl -s -i -X POST ${SERVER}/api/auth/login -H 'Content-Type: application/json' -d '{"email":"${EMAIL}","password":"${PASSWORD}"}'`,
|
||||
{ encoding: 'utf8' });
|
||||
cookie = (out.match(/authToken=([^;]+)/) || [])[1] || '';
|
||||
};
|
||||
const sync = () => execSync(
|
||||
`curl -s -b 'authToken=${cookie}' -X POST ${SERVER}/api/git-sync/trigger -H 'Content-Type: application/json' -d '{"spaceId":"${SPACE_ID}"}'`,
|
||||
{ encoding: 'utf8' });
|
||||
|
||||
let pass = 0, fail = 0;
|
||||
const ok = (m) => { console.log(' \x1b[32mPASS\x1b[0m ' + m); pass++; };
|
||||
const bad = (m) => { console.log(' \x1b[31mFAIL\x1b[0m ' + m); fail++; };
|
||||
|
||||
(async () => {
|
||||
login();
|
||||
const trashBefore = trashedCount();
|
||||
const browser = await chromium.launch({ executablePath: PW_CHROME, args: ['--no-sandbox'] });
|
||||
const page = await browser.newPage();
|
||||
try {
|
||||
// --- log in through the UI ---
|
||||
await page.goto(`${WEB}/login`, { waitUntil: 'networkidle' });
|
||||
await page.getByPlaceholder('email@example.com').fill(EMAIL);
|
||||
await page.getByPlaceholder(/password/i).fill(PASSWORD);
|
||||
await page.getByRole('button', { name: /sign in|log in|login|войти/i }).click();
|
||||
await page.waitForTimeout(2000);
|
||||
ok('logged in via the browser');
|
||||
|
||||
// --- create several UNTITLED pages via the UI (the bug trigger) ---
|
||||
await page.goto(`${WEB}/s/${SPACE_SLUG}`, { waitUntil: 'networkidle' });
|
||||
await page.waitForTimeout(1200);
|
||||
const createdUrls = [];
|
||||
for (let i = 0; i < 3; i++) {
|
||||
await page.getByRole('button', { name: 'Create page' }).first().click();
|
||||
await page.waitForTimeout(1500);
|
||||
createdUrls.push(page.url());
|
||||
sync(); // each create fires a real git-sync cycle
|
||||
}
|
||||
ok('created 3 untitled pages through the UI');
|
||||
|
||||
// --- type a title into the page currently open (retitle == the trigger) ---
|
||||
const titleEditor = page.locator('.tiptap.ProseMirror').first();
|
||||
await titleEditor.click();
|
||||
await page.keyboard.type('Заголовок через браузер');
|
||||
await page.waitForTimeout(1500); // debounced save
|
||||
sync(); sync();
|
||||
ok('typed a title into one page');
|
||||
|
||||
// --- THE assertion: nothing got trashed by the reshuffle ---
|
||||
const trashAfter = trashedCount();
|
||||
if (trashAfter === trashBefore) ok(`no page trashed by the untitled+retitle flow (trash stayed ${trashBefore})`);
|
||||
else bad(`a page was TRASHED by the browser flow (trash ${trashBefore} -> ${trashAfter}) — DATA LOSS`);
|
||||
|
||||
// the titled page must still be live
|
||||
const titled = Number(psql(`select count(*) from pages where space_id='${SPACE_ID}' and title='Заголовок через браузер' and deleted_at is null`));
|
||||
if (titled === 1) ok('the titled page is live'); else bad('the titled page is not live');
|
||||
} finally {
|
||||
await browser.close();
|
||||
// cleanup: hard-delete the pages this run created (titled + the untitled ones from this run)
|
||||
psql(`delete from pages where space_id='${SPACE_ID}' and (title='Заголовок через браузер' or (title='' and created_at > now() - interval '5 minutes'))`);
|
||||
sync();
|
||||
}
|
||||
console.log(`\nRESULTS: ${pass} passed, ${fail} failed`);
|
||||
process.exit(fail === 0 ? 0 : 1);
|
||||
})().catch((e) => { console.error(e); process.exit(2); });
|
||||
248
apps/server/test/git-sync-e2e-advanced.sh
Executable file
248
apps/server/test/git-sync-e2e-advanced.sh
Executable file
@@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# git-sync ADVANCED end-to-end suite — authz, protocol hardening, concurrency,
|
||||
# and structural sync (rename / reparent / delete-cap), driven against a LIVE
|
||||
# stand. Companion to git-sync-e2e.sh (the basic two-way flows). These cases
|
||||
# need deeper hooks than a plain clone:
|
||||
# - the vault working repo on the host ($VAULT_DIR/<spaceId>) for ref/SHA asserts,
|
||||
# - the Redis container ($REDIS_CONTAINER) to inject a held lock (503 path),
|
||||
# - DB-created fixture users / a second space (auto-created + torn down).
|
||||
#
|
||||
# Came out of a generate->critique subagent pass on "what is NOT covered". The
|
||||
# critic verified the contracts against the code (e.g. a non-member of an
|
||||
# ENABLED space gets 403, not 404 — only a missing / sync-disabled space 404s).
|
||||
#
|
||||
# Usage: apps/server/test/git-sync-e2e-advanced.sh
|
||||
set -uo pipefail
|
||||
|
||||
SERVER="${SERVER:-http://localhost:3000}"
|
||||
# By default the suite PROVISIONS its own throwaway space (never touches real
|
||||
# data). Set SPACE_ID explicitly to run against an existing space instead.
|
||||
SPACE_ID="${SPACE_ID:-}"
|
||||
EMAIL="${EMAIL:-admin@test.local}"
|
||||
PASSWORD="${PASSWORD:-Test12345!}"
|
||||
DB_CONTAINER="${DB_CONTAINER:-gitmost-db}"
|
||||
DB_USER="${DB_USER:-docmost}"
|
||||
DB_NAME="${DB_NAME:-docmost}"
|
||||
REDIS_CONTAINER="${REDIS_CONTAINER:-gitmost-redis}"
|
||||
VAULT_DIR="${VAULT_DIR:-/tmp/gitmost-vaults}"
|
||||
LOCK_PREFIX="git-sync:lock:"
|
||||
|
||||
BASIC=$(printf '%s:%s' "$EMAIL" "$PASSWORD" | base64 -w0)
|
||||
GIT_URL="" # set once the space is known (after login/provisioning)
|
||||
VAULT="" # ditto
|
||||
PROVISIONED="" # the space id we created (and must delete on exit), if any
|
||||
WORK=$(mktemp -d /tmp/git-sync-adv.XXXXXX)
|
||||
COOKIES="$WORK/cookies.txt"
|
||||
PASS=0; FAIL=0
|
||||
READER_ID=""; OUTSIDER_ID=""; SPACE2_ID=""
|
||||
|
||||
say(){ printf '\n\033[1m== %s\033[0m\n' "$*"; }
|
||||
ok(){ printf ' \033[32mPASS\033[0m %s\n' "$*"; PASS=$((PASS+1)); }
|
||||
bad(){ printf ' \033[31mFAIL\033[0m %s\n' "$*"; FAIL=$((FAIL+1)); }
|
||||
psqlq(){ docker exec "$DB_CONTAINER" psql -U "$DB_USER" -d "$DB_NAME" -tAc "$1" 2>/dev/null | tr -d '[:space:]'; }
|
||||
api(){ curl -s -b "$COOKIES" "$@"; }
|
||||
gitc(){ git -c http.extraHeader="Authorization: Basic $BASIC" "$@"; }
|
||||
code(){ curl -s -o /dev/null -w '%{http_code}' "$@"; } # print HTTP status
|
||||
basicfor(){ printf '%s:%s' "$1" "$PASSWORD" | base64 -w0; }
|
||||
sync_now(){ api -X POST "$SERVER/api/git-sync/trigger" -H 'Content-Type: application/json' -d "{\"spaceId\":\"$SPACE_ID\"}" >/dev/null; }
|
||||
vault_sha(){ git -C "$VAULT" rev-parse "$1" 2>/dev/null; }
|
||||
# Push retrying on 503 — the smart-HTTP host returns 503+Retry-After when a sync
|
||||
# cycle holds the lock (a real git client retries; so do we, to dodge poll races).
|
||||
gpush(){ local out; for _ in $(seq 1 6); do out=$(gitc push origin main 2>&1); echo "$out" | grep -q '503\|busy' && { sleep 2; continue; }; return 0; done; return 1; }
|
||||
|
||||
teardown(){
|
||||
# Hard-delete fixtures by EMAIL/NAME pattern (robust against a mid-run abort
|
||||
# that never captured an id), so the stand + the basic suite stay clean.
|
||||
psqlq "delete from space_members where user_id in (select id from users where email like 'e2e-adv-%@test.local');
|
||||
delete from users where email like 'e2e-adv-%@test.local';
|
||||
delete from spaces where name like 'E2E-ADV-%';
|
||||
delete from pages where space_id='$SPACE_ID' and title like 'E2E-ADV-%';" >/dev/null
|
||||
docker exec "$REDIS_CONTAINER" redis-cli del "${LOCK_PREFIX}${SPACE_ID}" >/dev/null 2>&1
|
||||
# Delete the throwaway space we created (cascades pages); the delete-cap case
|
||||
# leaves the vault non-convergent, so dropping the whole space + its vault is
|
||||
# the clean teardown. (When run against a caller-supplied space, only reset the
|
||||
# vault — the fixtures above were already removed by pattern.)
|
||||
if [ -n "$PROVISIONED" ]; then
|
||||
psqlq "delete from pages where space_id='$PROVISIONED'; delete from spaces where id='$PROVISIONED';" >/dev/null
|
||||
fi
|
||||
[ -n "$VAULT" ] && rm -rf "$VAULT"
|
||||
[ -z "$PROVISIONED" ] && [ -n "$SPACE_ID" ] && sync_now
|
||||
rm -rf "$WORK"
|
||||
}
|
||||
trap teardown EXIT
|
||||
|
||||
# Create a workspace user that shares the admin's password hash (so it logs in
|
||||
# with $PASSWORD). $2 = "reader" adds a reader space membership; "none" = no
|
||||
# membership (non-member). Echoes the new user id.
|
||||
make_user(){
|
||||
local email="$1" role="$2" uid
|
||||
# grep the bare uuid out of the RETURNING output (psql may append a status tag).
|
||||
uid=$(docker exec "$DB_CONTAINER" psql -U "$DB_USER" -d "$DB_NAME" -tAc \
|
||||
"insert into users (id,email,name,password,workspace_id,created_at,updated_at,has_generated_password,is_agent)
|
||||
select gen_random_uuid(),'$email','$email',password,workspace_id,now(),now(),false,false
|
||||
from users where email='$EMAIL' returning id;" 2>/dev/null \
|
||||
| grep -oE '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' | head -1)
|
||||
if [ "$role" = "reader" ]; then
|
||||
psqlq "insert into space_members (id,user_id,space_id,role,added_by_id,created_at,updated_at)
|
||||
values (gen_random_uuid(),'$uid','$SPACE_ID','reader','$uid',now(),now());" >/dev/null
|
||||
fi
|
||||
printf '%s' "$uid"
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
say "setup: login + fixtures"
|
||||
[ "$(code -c "$COOKIES" -X POST "$SERVER/api/auth/login" -H 'Content-Type: application/json' -d "{\"email\":\"$EMAIL\",\"password\":\"$PASSWORD\"}")" = "200" ] \
|
||||
&& ok "admin login" || { bad "admin login failed"; exit 1; }
|
||||
if [ -z "$SPACE_ID" ]; then
|
||||
slug="adv$(date +%s)$RANDOM"
|
||||
SPACE_ID=$(api -X POST "$SERVER/api/spaces/create" -H 'Content-Type: application/json' \
|
||||
-d "{\"name\":\"E2E-ADV Throwaway $slug\",\"slug\":\"$slug\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
[ -n "$SPACE_ID" ] || { bad "could not provision a test space"; exit 1; }
|
||||
PROVISIONED="$SPACE_ID"
|
||||
psqlq "update spaces set settings = coalesce(settings,'{}'::jsonb) || '{\"gitSync\":{\"enabled\":true}}'::jsonb where id='$SPACE_ID';" >/dev/null
|
||||
ok "provisioned throwaway space $SPACE_ID"
|
||||
fi
|
||||
GIT_URL="$SERVER/git/$SPACE_ID.git"
|
||||
VAULT="$VAULT_DIR/$SPACE_ID"
|
||||
sync_now # initialize the vault for the new space
|
||||
gitc clone -q "$GIT_URL" "$WORK/c" 2>/dev/null && ok "baseline clone" || { bad "baseline clone failed"; exit 1; }
|
||||
( cd "$WORK/c" && git config user.email e2e@test && git config user.name e2e )
|
||||
|
||||
# ===========================================================================
|
||||
say "protocol: unparseable / wrong-method requests are rejected (never reach git)"
|
||||
# A recognized git content-type to an UNKNOWN service subpath reaches the handler
|
||||
# and is rejected as a bad request (resolveServiceKind -> null -> 400).
|
||||
[ "$(code -X POST -H "Authorization: Basic $BASIC" -H 'Content-Type: application/x-git-upload-pack-request' "$GIT_URL/git-bogus-pack")" = "400" ] \
|
||||
&& ok "unknown service subpath -> 400" || bad "unknown service subpath not 400"
|
||||
# An UNKNOWN content-type is rejected by the global content-type allowlist (415)
|
||||
# before the git handler even runs — also a valid rejection.
|
||||
[ "$(code -X POST -H "Authorization: Basic $BASIC" -H 'Content-Type: application/x-git-bogus' "$GIT_URL/git-receive-pack")" = "415" ] \
|
||||
&& ok "unknown content-type -> 415 (global allowlist)" || bad "unknown content-type not 415"
|
||||
[ "$(code -X PUT -H "Authorization: Basic $BASIC" "$GIT_URL/git-receive-pack")" = "400" ] \
|
||||
&& ok "PUT on a pack endpoint -> 400" || bad "PUT not 400"
|
||||
[ "$(code -X DELETE -H "Authorization: Basic $BASIC" "$GIT_URL/info/refs?service=git-upload-pack")" = "400" ] \
|
||||
&& ok "DELETE on info/refs -> 400" || bad "DELETE not 400"
|
||||
|
||||
# ===========================================================================
|
||||
say "protocol: path-traversal in space-id / subpath is rejected (no escape)"
|
||||
for u in \
|
||||
"$SERVER/git/..%2f..%2f..%2fetc.git/info/refs?service=git-upload-pack" \
|
||||
"$GIT_URL/%2e%2e%2finfo/refs?service=git-upload-pack" \
|
||||
"$SERVER/git/.git/info/refs?service=git-upload-pack" ; do
|
||||
c=$(curl -s --path-as-is -o /dev/null -w '%{http_code}' -H "Authorization: Basic $BASIC" "$u")
|
||||
case "$c" in 400|404) ok "traversal '${u##*/git/}' -> $c";; *) bad "traversal '${u##*/git/}' got $c (expected 400/404)";; esac
|
||||
done
|
||||
|
||||
# ===========================================================================
|
||||
say "authz: a sync-DISABLED space is 404 (existence not revealed), not 403"
|
||||
SPACE2_ID=$(api -X POST "$SERVER/api/spaces/create" -H 'Content-Type: application/json' -d '{"name":"E2E-ADV-Space2","slug":"e2eadvspace2"}' | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
if [ -n "$SPACE2_ID" ]; then
|
||||
[ "$(code -H "Authorization: Basic $BASIC" "$SERVER/git/$SPACE2_ID.git/info/refs?service=git-upload-pack")" = "404" ] \
|
||||
&& ok "admin member of a gitSync-disabled space -> 404" || bad "disabled space did not 404"
|
||||
# enabling it flips to 200 (proves the per-space flag is the gate)
|
||||
psqlq "update spaces set settings = coalesce(settings,'{}'::jsonb) || '{\"gitSync\":{\"enabled\":true}}'::jsonb where id='$SPACE2_ID';" >/dev/null
|
||||
[ "$(code -H "Authorization: Basic $BASIC" "$SERVER/git/$SPACE2_ID.git/info/refs?service=git-upload-pack")" = "200" ] \
|
||||
&& ok "flipping gitSync.enabled=true -> 200" || bad "enabled 2nd space did not 200"
|
||||
else
|
||||
bad "could not create a 2nd space"
|
||||
fi
|
||||
|
||||
# ===========================================================================
|
||||
say "authz: reader can FETCH (200) but is FORBIDDEN to push (403)"
|
||||
READER_ID=$(make_user "e2e-adv-reader@test.local" reader)
|
||||
RBASIC=$(basicfor "e2e-adv-reader@test.local")
|
||||
[ "$(code -H "Authorization: Basic $RBASIC" "$GIT_URL/info/refs?service=git-upload-pack")" = "200" ] \
|
||||
&& ok "reader fetch -> 200" || bad "reader fetch not 200"
|
||||
[ "$(code -H "Authorization: Basic $RBASIC" "$GIT_URL/info/refs?service=git-receive-pack")" = "403" ] \
|
||||
&& ok "reader push (receive-pack) -> 403" || bad "reader push not 403"
|
||||
|
||||
# ===========================================================================
|
||||
say "authz: a NON-member of an enabled space -> 403 (NOT 404)"
|
||||
OUTSIDER_ID=$(make_user "e2e-adv-outsider@test.local" none)
|
||||
OBASIC=$(basicfor "e2e-adv-outsider@test.local")
|
||||
c=$(code -H "Authorization: Basic $OBASIC" "$GIT_URL/info/refs?service=git-upload-pack")
|
||||
[ "$c" = "403" ] && ok "non-member fetch -> 403 (existence revealed only to members)" || bad "non-member got $c (contract is 403)"
|
||||
|
||||
# ===========================================================================
|
||||
say "concurrency: a push while the per-space lock is held -> 503 + Retry-After"
|
||||
docker exec "$REDIS_CONTAINER" redis-cli set "${LOCK_PREFIX}${SPACE_ID}" "held-by-test" PX 8000 NX >/dev/null 2>&1
|
||||
hdr=$(curl -s -D - -o /dev/null -X POST -H "Authorization: Basic $BASIC" \
|
||||
-H 'Content-Type: application/x-git-receive-pack-request' --data-binary '0000' \
|
||||
"$GIT_URL/git-receive-pack")
|
||||
st=$(printf '%s' "$hdr" | head -1 | grep -o '[0-9]\{3\}')
|
||||
ra=$(printf '%s' "$hdr" | grep -i '^Retry-After:' | tr -d '\r')
|
||||
main_before=$(vault_sha main)
|
||||
[ "$st" = "503" ] && ok "push during held lock -> 503" || bad "lock-held push got $st (expected 503)"
|
||||
[ -n "$ra" ] && ok "503 carries a $ra header" || bad "503 missing Retry-After header"
|
||||
docker exec "$REDIS_CONTAINER" redis-cli del "${LOCK_PREFIX}${SPACE_ID}" >/dev/null 2>&1
|
||||
[ "$(vault_sha main)" = "$main_before" ] && ok "receive-pack did not mutate the vault while locked" || bad "vault main changed under a held lock"
|
||||
|
||||
# ===========================================================================
|
||||
say "idempotent re-sync: nothing changes when nothing changed (no churn)"
|
||||
sync_now
|
||||
m1=$(vault_sha main); lp1=$(vault_sha refs/docmost/last-pushed)
|
||||
sync_now; sync_now
|
||||
m2=$(vault_sha main); lp2=$(vault_sha refs/docmost/last-pushed)
|
||||
[ "$m1" = "$m2" ] && [ "$lp1" = "$lp2" ] && ok "main + last-pushed SHAs stable across idle cycles" \
|
||||
|| bad "idle cycles churned refs (main $m1->$m2, last-pushed $lp1->$lp2)"
|
||||
|
||||
# (Structural rename/move on the live stand is deliberately NOT scripted here: a
|
||||
# freshly-API-created page has a meta-only body, so git's rename-similarity
|
||||
# heuristic classifies a `git mv` of it as delete+add rather than `R`, which is a
|
||||
# test-fixture artifact, not a feature bug. The rename/move classifier is covered
|
||||
# deterministically by the engine unit suite — packages/git-sync/test/
|
||||
# classify-rename-moves.test.ts and node-ops.test.ts.)
|
||||
|
||||
# ===========================================================================
|
||||
say "data-loss guard: deleting MORE than the cap is HELD, not dropped"
|
||||
# Create cap+2 sibling pages, sync, then git rm all of them in one push.
|
||||
CAP=$(api "$SERVER/api/git-sync/status" | grep -o '"maxDeletesPerCycle":[0-9]*' | grep -o '[0-9]*')
|
||||
CAP=${CAP:-5}
|
||||
N=$((CAP+2))
|
||||
ids=""
|
||||
for i in $(seq 1 $N); do
|
||||
id=$(api -X POST "$SERVER/api/pages/create" -H 'Content-Type: application/json' -d "{\"spaceId\":\"$SPACE_ID\",\"title\":\"E2E-ADV-Del-$i-$RANDOM\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
ids="$ids $id"
|
||||
done
|
||||
sync_now
|
||||
lp_before=$(vault_sha refs/docmost/last-pushed)
|
||||
rm -rf "$WORK/cd"; gitc clone -q "$GIT_URL" "$WORK/cd" 2>/dev/null
|
||||
cd "$WORK/cd"; git config user.email e2e@test; git config user.name e2e
|
||||
for id in $ids; do f=$(grep -rl "$id" --include='*.md' . | head -1); [ -n "$f" ] && git rm -q "$f"; done
|
||||
git commit -qm "rm $N pages (over cap $CAP)"
|
||||
gpush
|
||||
cd "$WORK"
|
||||
sleep 2
|
||||
trashed=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and deleted_at is not null and ($(echo $ids | sed "s/ \?\([0-9a-f-]\+\)/ or id='\1'/g; s/^ or //"));")
|
||||
lp_after=$(vault_sha refs/docmost/last-pushed)
|
||||
[ "${trashed:-0}" = "0" ] && ok "none of the $N over-cap deletes were applied (held)" || bad "$trashed pages trashed despite over-cap (data loss!)"
|
||||
[ "$lp_before" = "$lp_after" ] && ok "last-pushed ref did NOT advance past the delete commit (retry-safe)" || bad "last-pushed advanced over suppressed deletes ($lp_before -> $lp_after)"
|
||||
# cleanup these pages (hard-delete; they are E2E-ADV-* so teardown also catches them)
|
||||
|
||||
# ===========================================================================
|
||||
say "data-loss guard #2: untitled pages + retitle must NOT trash other pages"
|
||||
# THE bug from the browser flow: Docmost creates pages UNTITLED (title=''), which
|
||||
# all serialize to the `_` fallback name. Retitling one reshuffles the `_`
|
||||
# collision and relocates another's file; git reports the move as delete+add and
|
||||
# the push used to TRASH the relocated live page. Identity is the pageId now.
|
||||
ut_before=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and deleted_at is not null;")
|
||||
ut_ids=""
|
||||
for i in 1 2 3 4; do
|
||||
id=$(api -X POST "$SERVER/api/pages/create" -H 'Content-Type: application/json' -d "{\"spaceId\":\"$SPACE_ID\",\"title\":\"\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
ut_ids="$ut_ids $id"; sync_now
|
||||
done
|
||||
# retitle the first one (like typing a title in the editor), then sync twice
|
||||
first=$(echo $ut_ids | awk '{print $1}')
|
||||
api -X POST "$SERVER/api/pages/update" -H 'Content-Type: application/json' -d "{\"pageId\":\"$first\",\"title\":\"E2E-ADV-Titled-$RANDOM\"}" >/dev/null
|
||||
sync_now; sync_now
|
||||
ut_after=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and deleted_at is not null;")
|
||||
live_kept=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and deleted_at is null and ($(echo $ut_ids | sed "s/ \?\([0-9a-f-]\+\)/ or id='\1'/g; s/^ or //"));")
|
||||
[ "${ut_after:-9}" = "${ut_before:-0}" ] && ok "no page trashed by the untitled+retitle reshuffle (was the data-loss bug)" || bad "trashed count grew ${ut_before}->${ut_after} (page lost to the reshuffle!)"
|
||||
[ "${live_kept:-0}" = "4" ] && ok "all 4 untitled/retitled pages still LIVE" || bad "only $live_kept/4 of the untitled pages survived"
|
||||
# cleanup these via the E2E-ADV teardown (the retitled one) + hard-delete the rest
|
||||
psqlq "delete from pages where id in ($(echo $ut_ids | sed "s/ \?\([0-9a-f-]\+\)/,'\1'/g; s/^,//"));" >/dev/null
|
||||
|
||||
# ===========================================================================
|
||||
say "RESULTS: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" -eq 0 ] && exit 0 || exit 1
|
||||
221
apps/server/test/git-sync-e2e.sh
Executable file
221
apps/server/test/git-sync-e2e.sh
Executable file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# git-sync end-to-end test suite.
|
||||
#
|
||||
# Exercises the FULL two-way sync against a LIVE gitmost server over the real
|
||||
# smart-HTTP /git remote: clone (fetch), push (git -> Docmost), Docmost -> git,
|
||||
# delete -> trash, the 3-way body merge, and the auth/authz gate. This is the
|
||||
# integration counterpart to the unit suites — it boots nothing itself; it drives
|
||||
# a running stand.
|
||||
#
|
||||
# Prerequisites (a running git-sync stand):
|
||||
# - server up at $SERVER with GIT_SYNC_ENABLED=true + GIT_SYNC_HTTP_ENABLED=true
|
||||
# and a configured GIT_SYNC_SERVICE_USER_ID;
|
||||
# - a space whose settings.gitSync.enabled = true ($SPACE_ID);
|
||||
# - an admin user ($EMAIL/$PASSWORD) who is a member of that space;
|
||||
# - the Postgres container reachable for DB assertions ($DB_CONTAINER).
|
||||
#
|
||||
# Usage: apps/server/test/git-sync-e2e.sh
|
||||
# Override any of the env vars below to point at a different stand.
|
||||
set -uo pipefail
|
||||
|
||||
SERVER="${SERVER:-http://localhost:3000}"
|
||||
# By default the suite PROVISIONS its own throwaway space (so it never touches
|
||||
# real data). Set SPACE_ID explicitly to run against an existing space instead.
|
||||
SPACE_ID="${SPACE_ID:-}"
|
||||
EMAIL="${EMAIL:-admin@test.local}"
|
||||
PASSWORD="${PASSWORD:-Test12345!}"
|
||||
DB_CONTAINER="${DB_CONTAINER:-gitmost-db}"
|
||||
DB_USER="${DB_USER:-docmost}"
|
||||
DB_NAME="${DB_NAME:-docmost}"
|
||||
|
||||
BASIC=$(printf '%s:%s' "$EMAIL" "$PASSWORD" | base64 -w0)
|
||||
GIT_URL="" # set once the space is known (after login/provisioning)
|
||||
PROVISIONED="" # the space id we created (and must delete on exit), if any
|
||||
WORK=$(mktemp -d /tmp/git-sync-e2e.XXXXXX)
|
||||
COOKIES="$WORK/cookies.txt"
|
||||
PASS=0
|
||||
FAIL=0
|
||||
|
||||
cleanup() {
|
||||
# Delete the throwaway space we created (cascades its pages); never touch a
|
||||
# caller-supplied space beyond our own E2E-* fixtures.
|
||||
if [ -n "$PROVISIONED" ]; then
|
||||
docker exec "$DB_CONTAINER" psql -U "$DB_USER" -d "$DB_NAME" -tAc \
|
||||
"delete from pages where space_id='$PROVISIONED'; delete from spaces where id='$PROVISIONED';" >/dev/null 2>&1
|
||||
rm -rf "/tmp/gitmost-vaults/$PROVISIONED" 2>/dev/null
|
||||
elif [ -n "$SPACE_ID" ]; then
|
||||
docker exec "$DB_CONTAINER" psql -U "$DB_USER" -d "$DB_NAME" -tAc \
|
||||
"delete from pages where space_id='$SPACE_ID' and title like 'E2E-%';" >/dev/null 2>&1
|
||||
curl -s -b "$COOKIES" -X POST "$SERVER/api/git-sync/trigger" \
|
||||
-H 'Content-Type: application/json' -d "{\"spaceId\":\"$SPACE_ID\"}" >/dev/null 2>&1
|
||||
fi
|
||||
rm -rf "$WORK"
|
||||
}
|
||||
trap cleanup EXIT
|
||||
|
||||
say() { printf '\n\033[1m== %s\033[0m\n' "$*"; }
|
||||
ok() { printf ' \033[32mPASS\033[0m %s\n' "$*"; PASS=$((PASS+1)); }
|
||||
bad() { printf ' \033[31mFAIL\033[0m %s\n' "$*"; FAIL=$((FAIL+1)); }
|
||||
|
||||
gitc() { git -c http.extraHeader="Authorization: Basic $BASIC" "$@"; }
|
||||
# Push retrying on 503 — the host returns 503+Retry-After when a sync cycle holds
|
||||
# the per-space lock (a real client retries; so do we, to dodge poll races).
|
||||
gpush() { local out; for _ in 1 2 3 4 5 6; do out=$(gitc push -q origin main 2>&1); echo "$out" | grep -q '503\|busy' && { sleep 2; continue; }; return 0; done; return 1; }
|
||||
psqlq() { docker exec "$DB_CONTAINER" psql -U "$DB_USER" -d "$DB_NAME" -tAc "$1" 2>/dev/null; }
|
||||
api() { curl -s -b "$COOKIES" "$@"; }
|
||||
|
||||
# Force one synchronous sync cycle and return when it has applied.
|
||||
sync_now() {
|
||||
api -X POST "$SERVER/api/git-sync/trigger" -H 'Content-Type: application/json' \
|
||||
-d "{\"spaceId\":\"$SPACE_ID\"}" >/dev/null
|
||||
}
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "auth: login as the admin"
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' -c "$COOKIES" -X POST \
|
||||
"$SERVER/api/auth/login" -H 'Content-Type: application/json' \
|
||||
-d "{\"email\":\"$EMAIL\",\"password\":\"$PASSWORD\"}")
|
||||
[ "$code" = "200" ] && ok "login 200" || { bad "login returned $code"; exit 1; }
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
if [ -z "$SPACE_ID" ]; then
|
||||
say "setup: provision a throwaway git-sync space (never touches real data)"
|
||||
slug="e2e$(date +%s)$RANDOM"
|
||||
SPACE_ID=$(api -X POST "$SERVER/api/spaces/create" -H 'Content-Type: application/json' \
|
||||
-d "{\"name\":\"E2E Throwaway $slug\",\"slug\":\"$slug\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
if [ -n "$SPACE_ID" ]; then
|
||||
PROVISIONED="$SPACE_ID"
|
||||
psqlq "update spaces set settings = coalesce(settings,'{}'::jsonb) || '{\"gitSync\":{\"enabled\":true}}'::jsonb where id='$SPACE_ID';" >/dev/null
|
||||
ok "provisioned space $SPACE_ID"
|
||||
else
|
||||
bad "could not provision a test space"; exit 1
|
||||
fi
|
||||
fi
|
||||
GIT_URL="$SERVER/git/$SPACE_ID.git"
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "gate: smart-HTTP auth/authz"
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' "$GIT_URL/info/refs?service=git-upload-pack")
|
||||
[ "$code" = "401" ] && ok "no credentials -> 401" || bad "no creds expected 401, got $code"
|
||||
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' -H "Authorization: Basic $(printf '%s:wrong' "$EMAIL" | base64 -w0)" \
|
||||
"$GIT_URL/info/refs?service=git-upload-pack")
|
||||
[ "$code" = "401" ] && ok "wrong password -> 401" || bad "wrong creds expected 401, got $code"
|
||||
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' -H "Authorization: Basic $BASIC" \
|
||||
"$SERVER/git/00000000-0000-0000-0000-000000000000.git/info/refs?service=git-upload-pack")
|
||||
[ "$code" = "404" ] && ok "unknown space -> 404 (existence not revealed)" || bad "unknown space expected 404, got $code"
|
||||
|
||||
code=$(curl -s -o /dev/null -w '%{http_code}' -H "Authorization: Basic $BASIC" \
|
||||
"$GIT_URL/info/refs?service=git-upload-pack")
|
||||
[ "$code" = "200" ] && ok "valid creds + sync space -> 200" || bad "valid clone gate expected 200, got $code"
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# A DEDICATED test page so the push/merge edits never touch a real page, and so
|
||||
# a freshly-provisioned (empty) space has content for the fetch test below.
|
||||
say "setup: create a dedicated test page (edits target only this one)"
|
||||
TEST_TITLE="E2E-SyncTarget-$RANDOM$RANDOM"
|
||||
TEST_ID=$(api -X POST "$SERVER/api/pages/create" -H 'Content-Type: application/json' \
|
||||
-d "{\"spaceId\":\"$SPACE_ID\",\"title\":\"$TEST_TITLE\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
[ -n "$TEST_ID" ] && ok "created test page $TEST_TITLE" || { bad "could not create the test page"; }
|
||||
sync_now
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "fetch: clone the space vault over HTTP"
|
||||
sync_now
|
||||
if gitc clone -q "$GIT_URL" "$WORK/clone" 2>/dev/null; then
|
||||
count=$(find "$WORK/clone" -maxdepth 1 -name '*.md' | wc -l)
|
||||
[ "$count" -ge 1 ] && ok "clone succeeded with $count markdown file(s)" || bad "clone has no .md files"
|
||||
else
|
||||
bad "clone failed"
|
||||
fi
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "push: a git edit propagates into the (dedicated) Docmost page"
|
||||
rm -rf "$WORK/cpush"; gitc clone -q "$GIT_URL" "$WORK/cpush" 2>/dev/null
|
||||
cd "$WORK/cpush" || exit 1
|
||||
git config user.email e2e@test >/dev/null; git config user.name e2e >/dev/null
|
||||
target=$(grep -rl "$TEST_ID" --include='*.md' . | head -1)
|
||||
if [ -n "$target" ]; then
|
||||
MARK="E2E-PUSH-$RANDOM$RANDOM"
|
||||
printf '\n## %s\n' "$MARK" >> "$target"
|
||||
git commit -aqm "e2e push: $MARK"
|
||||
if gpush; then
|
||||
sleep 2
|
||||
has=$(psqlq "select count(*) from pages where id='$TEST_ID' and content::text like '%$MARK%';")
|
||||
[ "${has:-0}" -ge 1 ] && ok "pushed edit reached the test page" || bad "marker $MARK not in the test page content"
|
||||
else
|
||||
bad "git push failed"
|
||||
fi
|
||||
else
|
||||
bad "test page .md not found in the clone"
|
||||
fi
|
||||
cd "$WORK" || exit 1
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "Docmost -> git: a page created in Docmost appears in the vault"
|
||||
NEW_TITLE="E2E-Created-$RANDOM"
|
||||
new_id=$(api -X POST "$SERVER/api/pages/create" -H 'Content-Type: application/json' \
|
||||
-d "{\"spaceId\":\"$SPACE_ID\",\"title\":\"$NEW_TITLE\"}" | grep -o '"id":"[^"]*"' | head -1 | cut -d'"' -f4)
|
||||
if [ -n "$new_id" ]; then
|
||||
sync_now
|
||||
rm -rf "$WORK/clone2"
|
||||
gitc clone -q "$GIT_URL" "$WORK/clone2" 2>/dev/null
|
||||
if find "$WORK/clone2" -name "*$NEW_TITLE*.md" | grep -q .; then
|
||||
ok "new Docmost page '$NEW_TITLE' materialized as a vault file"
|
||||
else
|
||||
bad "created page '$NEW_TITLE' did not appear in the vault"
|
||||
fi
|
||||
else
|
||||
bad "could not create a page via the API"
|
||||
fi
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "delete: removing a file via git soft-deletes the Docmost page"
|
||||
cd "$WORK/clone2" 2>/dev/null || cd "$WORK/clone" || exit 1
|
||||
git config user.email e2e@test >/dev/null; git config user.name e2e >/dev/null
|
||||
delfile=$(find . -maxdepth 1 -name "*$NEW_TITLE*.md" | head -1)
|
||||
if [ -n "$delfile" ]; then
|
||||
git rm -q "$delfile"
|
||||
git commit -qm "e2e delete: $NEW_TITLE"
|
||||
if gpush; then
|
||||
sleep 2
|
||||
deleted=$(psqlq "select count(*) from pages where space_id='$SPACE_ID' and title='$NEW_TITLE' and deleted_at is not null;")
|
||||
[ "${deleted:-0}" -ge 1 ] && ok "page '$NEW_TITLE' was soft-deleted (in Trash)" || bad "page '$NEW_TITLE' not soft-deleted after git rm"
|
||||
else
|
||||
bad "push (delete) failed"
|
||||
fi
|
||||
else
|
||||
bad "delete target file not found in clone"
|
||||
fi
|
||||
cd "$WORK" || exit 1
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "3-way merge: a git edit to one part keeps the rest of the (test) page"
|
||||
# Re-clone fresh, append a second unique line to the SAME dedicated page, push,
|
||||
# then confirm BOTH markers coexist — the body merge did not clobber the first.
|
||||
rm -rf "$WORK/cmerge"
|
||||
gitc clone -q "$GIT_URL" "$WORK/cmerge" 2>/dev/null
|
||||
cd "$WORK/cmerge" || exit 1
|
||||
git config user.email e2e@test >/dev/null; git config user.name e2e >/dev/null
|
||||
mfile=$(grep -rl "$TEST_ID" --include='*.md' . | head -1)
|
||||
if [ -n "$mfile" ]; then
|
||||
MARK2="E2E-MERGE-$RANDOM$RANDOM"
|
||||
printf '\n## %s\n' "$MARK2" >> "$mfile"
|
||||
git commit -aqm "e2e merge: $MARK2"
|
||||
if gpush; then
|
||||
sleep 2
|
||||
both=$(psqlq "select count(*) from pages where id='$TEST_ID' and content::text like '%$MARK2%' and content::text like '%E2E-PUSH-%';")
|
||||
[ "${both:-0}" -ge 1 ] && ok "new edit added without losing prior content (3-way merge)" || bad "3-way merge lost content (both markers not present)"
|
||||
else
|
||||
bad "push (merge) failed"
|
||||
fi
|
||||
else
|
||||
bad "test page .md not found in the clone"
|
||||
fi
|
||||
cd "$WORK" || exit 1
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
say "RESULTS: $PASS passed, $FAIL failed"
|
||||
[ "$FAIL" -eq 0 ] && exit 0 || exit 1
|
||||
29
apps/server/test/jest.setup.ts
Normal file
29
apps/server/test/jest.setup.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
// Jest global setup (runs before each test module loads).
|
||||
//
|
||||
// react-dom@18 (pulled in transitively via @docmost/editor-ext -> @tiptap/react
|
||||
// -> react-dom, e.g. through the math node) reads `navigator` at MODULE-INIT
|
||||
// time. The server jest config uses `testEnvironment: "node"`, which has no
|
||||
// `navigator`, so ANY spec that transitively imports the editor schema/engine
|
||||
// (e.g. the git-sync HTTP service specs, which reach the conversion engine)
|
||||
// fails to LOAD with "ReferenceError: navigator is not defined". These specs
|
||||
// never exercise the DOM — they just can't survive the import. Provide the
|
||||
// minimal browser globals those modules touch at import so the specs run.
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
const g = globalThis as any;
|
||||
|
||||
if (typeof g.navigator === "undefined") {
|
||||
// react-dom only reads navigator.userAgent at init; keep it minimal.
|
||||
Object.defineProperty(g, "navigator", {
|
||||
value: { userAgent: "node", platform: "node" },
|
||||
configurable: true,
|
||||
writable: true,
|
||||
});
|
||||
}
|
||||
|
||||
if (typeof g.MessageChannel === "undefined") {
|
||||
// react-dom's scheduler references MessageChannel at init in some builds.
|
||||
g.MessageChannel = class {
|
||||
port1 = { postMessage() {}, close() {}, onmessage: null };
|
||||
port2 = { postMessage() {}, close() {}, onmessage: null };
|
||||
};
|
||||
}
|
||||
@@ -20,7 +20,7 @@
|
||||
инструментов при точных типах. Делать инкрементально.
|
||||
- ⏳ **Унификация конвертера ProseMirror ↔ Markdown** — открыта (см. раздел
|
||||
«Расширение …» ниже); на неё опирается план git-синка
|
||||
(`docs/git-sync-plan.md`).
|
||||
(`docs/backlog/git-sync-thin-meta.md`).
|
||||
|
||||
## Суть
|
||||
|
||||
|
||||
139
docs/backlog/git-sync-thin-meta.md
Normal file
139
docs/backlog/git-sync-thin-meta.md
Normal file
@@ -0,0 +1,139 @@
|
||||
# git-sync: native-Obsidian vault format
|
||||
|
||||
Статус: **дизайн (согласован с владельцем 2026-06-24), к реализации.**
|
||||
|
||||
## Цель
|
||||
|
||||
Волт спейса должен быть **настоящим Obsidian-волтом**: владелец открывает папку в
|
||||
Obsidian (с плагином Folder Notes) и получает ровно ту же структуру страниц, не
|
||||
замечая разницы. Никаких служебных артефактов, которые бы выглядели чужеродно.
|
||||
Сторонние редакторы кладут «голые» файлы/папки — движок их **адоптирует** в
|
||||
страницы Docmost.
|
||||
|
||||
Сейчас каждый `.md` несёт жирный `<!-- docmost:meta {…} -->` блок — это уезжает.
|
||||
|
||||
## Формат
|
||||
|
||||
```
|
||||
<Space-vault>/
|
||||
Заметка.md # лист: чистый markdown + frontmatter id
|
||||
Проект/ # страница-родитель = ПАПКА
|
||||
Проект.md # folder-note: ТЕЛО самой страницы «Проект»
|
||||
Задача.md # ребёнок
|
||||
Подпроект/
|
||||
Подпроект.md # тело «Подпроект»
|
||||
...
|
||||
.obsidian/ # конфиг Obsidian — движок НЕ ТРОГАЕТ
|
||||
```
|
||||
|
||||
Каждый файл страницы:
|
||||
```
|
||||
---
|
||||
gitmost_id: 019ef6fc-2638-7ce1-9ce3-2756ce038480
|
||||
---
|
||||
<чистый markdown — тело страницы (wiki-ссылки, всё как в Obsidian)>
|
||||
```
|
||||
|
||||
- **Лист** (нет детей) → `<title>.md`.
|
||||
- **Родитель** (есть дети) → папка `<title>/`, его тело в `<title>/<title>.md`
|
||||
(folder-note по конвенции плагина LostPaul Folder Notes — заметка с именем
|
||||
папки внутри неё). Лист, у которого появился первый ребёнок, превращается из
|
||||
`<title>.md` в `<title>/<title>.md` (безопасный move по id).
|
||||
- **title** = имя файла (для папки — имя папки). **parentPageId** = ближайшая
|
||||
родительская папка (её folder-note). **spaceId** = эта репа. Всё выводимо.
|
||||
- **Идентичность** — `gitmost_id` (= Docmost pageId) во frontmatter. Невыводима,
|
||||
едет ВМЕСТЕ с файлом → переживает любой move, даже не распознанный git как
|
||||
rename. (Ключ namespaced `gitmost_id`, не голый `id`, чтобы не конфликтовать с
|
||||
пользовательскими frontmatter-полями. Имя ключа — последнее на подтверждении.)
|
||||
- **Коллизии имён** (2+ сиблинга с одним title): как делает сам Obsidian —
|
||||
добавляем натуральный суффикс ` 2`, ` 3`. id во frontmatter, так что имя файла
|
||||
чисто косметическое; смена суффикса — безопасный rename (идентичность по id).
|
||||
|
||||
Никакого `.gitmost/index.json` (сайдкар отвергнут: path-keyed индекс хрупок к
|
||||
rename; id во frontmatter самодостаточен). Никаких `docmost:meta`/`docmost:comments`
|
||||
блоков (комменты и так живут инлайн-марками `<span data-comment-id>` в теле).
|
||||
|
||||
## Ссылки между заметками (`[[wikilinks]]`)
|
||||
|
||||
Obsidian резолвит `[[Заметка]]` по **basename** (не по полному пути), нормализуя
|
||||
пробелы/`-`/`_`, с приоритетом короткого пути при неоднозначности.
|
||||
|
||||
- В Docmost ссылки — по pageId (mention/reference node), rename переживают.
|
||||
- В волте — обсидиановские `[[basename]]`.
|
||||
- Следствие: **reparent (смена папки) ссылку НЕ ломает** (basename тот же),
|
||||
ломает только **retitle**. Значит переписывать `[[…]]` надо только при смене
|
||||
имени страницы — узкий случай. (Obsidian сам умеет «update links on rename».)
|
||||
- Конвертер Docmost-mention ↔ `[[wikilink]]` (обе стороны) + переписывание при
|
||||
retitle — **отдельная фаза** (см. план), не блокирует формат.
|
||||
|
||||
## PULL (Docmost → vault)
|
||||
|
||||
1. Прочитать дерево спейса.
|
||||
2. Layout: лист→`<t>.md`, родитель→`<t>/<t>.md`, коллизии→` 2`/` 3`.
|
||||
3. Записать `---\ngitmost_id: …\n---\n<тело>` (чистый markdown).
|
||||
4. Переехавшие файлы — move (по id), не delete.
|
||||
5. Коммит на `docmost`, merge в `main`.
|
||||
|
||||
## PUSH (vault → Docmost)
|
||||
|
||||
1. Дифф `last-pushed..main`.
|
||||
2. Идентичность файла — из frontmatter `gitmost_id`. Родитель — из пути (folder-note
|
||||
родительской папки).
|
||||
3. Классификация:
|
||||
- есть `gitmost_id` в дереве → update/move/rename по id (страховка 5133bb34).
|
||||
- нет id (новый голый файл от Obsidian) → **adopt**: create page (title=имя,
|
||||
parent=папка), дописать `gitmost_id` во frontmatter.
|
||||
- голая папка с детьми без folder-note → создать страницу-родитель, завести
|
||||
`<folder>/<folder>.md`.
|
||||
- файл пропал, а id ещё в дереве под другим путём → move. Реально пропал →
|
||||
delete (под delete-cap).
|
||||
|
||||
## Адопция (третья-сторона → Docmost)
|
||||
|
||||
- голый `.md` без frontmatter id → create page.
|
||||
- голая папка с `.md` внутри без folder-note → create страницу-родитель + folder-note.
|
||||
- `.obsidian/`, аттачменты, dot-файлы, любые не-`.md` → **игнор** (не страницы),
|
||||
лежат в гите как есть, Obsidian ими владеет. Без `.gitignore`.
|
||||
|
||||
## Без обратной совместимости
|
||||
|
||||
Старый `docmost:meta` формат НЕ поддерживаем (данные тестовые). Волт — кэш: на
|
||||
переходе `rm -rf` волты спейсов, они пересобираются из Docmost сразу в native-
|
||||
формате. `parsePageFile` не читает `docmost:meta`; файл без `gitmost_id` frontmatter
|
||||
— это голый/рукописный файл → адопция (не legacy-страница).
|
||||
|
||||
## Краевые случаи
|
||||
|
||||
- Git не хранит пустые папки → «родитель без своего файла» невозможен: тело
|
||||
родителя — это folder-note `<t>/<t>.md`, он и держит папку (плюс дети). Childless
|
||||
пустая страница → просто `<t>.md`.
|
||||
- Конфликт folder-note `Папка/Папка.md` с ребёнком title «Папка» → ребёнку суффикс.
|
||||
- Переименование папки (= rename родителя) → move всего поддерева по id, без
|
||||
delete+create; ссылки `[[…]]` на сам родитель переписать (basename сменился).
|
||||
|
||||
## План фаз (каждая — юниты движка + браузерный e2e + изолированные shell-e2e)
|
||||
|
||||
1. ✅ Формат файла: `parsePageFile`/`serializePageFile` (frontmatter id + тело,
|
||||
`gitmost_id` frontmatter + тело). Юниты. Без смены поведения. (готово)
|
||||
2. ✅ PULL пишет native-формат (frontmatter + folder-note layout). Волты
|
||||
wipe+rebuild. (2a — folder-note layout в `buildVaultLayout`; 2b — PULL пишет
|
||||
`serializePageFile`, `readExisting` читает frontmatter.) (готово)
|
||||
3. ✅ PUSH берёт идентичность из frontmatter, title из имени файла, родителя из
|
||||
пути (`parentFolderFile` folder-note-aware). CREATE пишет `gitmost_id` обратно;
|
||||
UPDATE шлёт чистое тело (без frontmatter) на обе стороны 3-way merge. (готово)
|
||||
4. Адопция голых файлов/папок (частично в фазе 3: файл без `gitmost_id` → create).
|
||||
ВАЖНО: тут же сохранить пользовательский frontmatter (Obsidian properties) при
|
||||
адопции — `parsePageFile` сейчас срезает ведущий frontmatter даже без
|
||||
`gitmost_id`, а write-back пишет только `gitmost_id`; нужно врезать `gitmost_id`
|
||||
в существующий frontmatter и сохранять остальные поля И при write-back, И при
|
||||
следующем pull (иначе pull перезатрёт). До этого native-формат НЕ катить на
|
||||
реальный Obsidian-волт с properties.
|
||||
5. Чистка: выпилить старый `docmost:meta` формат-код целиком.
|
||||
6. Ссылки: конвертер Docmost-mention ↔ `[[wikilink]]` + переписывание при retitle.
|
||||
|
||||
## Риски
|
||||
|
||||
Смена ФОРМАТА волта на data-loss-чувствительном движке (сегодня ловили тяжёлый баг
|
||||
с трашем живых страниц). Каждая фаза — за инкрементом, с юнит-тестами движка И
|
||||
браузерным e2e (`git-sync-browser-e2e.cjs`) + изолированными shell-e2e на
|
||||
одноразовом спейсе. Без in-place миграций без бэкапа волта.
|
||||
@@ -1,534 +0,0 @@
|
||||
# Git-sync: спека реализации (встраивание docmost-sync в gitmost)
|
||||
|
||||
Статус: **спецификация, код не менялся.** Детальный план реализации фичи
|
||||
«двусторонний синк страниц Docmost ↔ локальная git-папка Markdown», встроенной
|
||||
прямо в gitmost.
|
||||
|
||||
Источник движка: `https://gitea.vvzvlad.xyz/vvzvlad/docmost-sync`
|
||||
(ветка `main`, на момент спеки HEAD `b03eb35`). Все сигнатуры ниже сверены с этим
|
||||
исходником и с текущим кодом gitmost.
|
||||
|
||||
Предыстория и обоснование архитектурных развилок — в бэклоге
|
||||
[ai-chat-tool-definitions-duplicated.md](backlog/ai-chat-tool-definitions-duplicated.md)
|
||||
(раздел про дублирование конвертера) и в исходном `SPEC.md` репозитория
|
||||
docmost-sync (нумерация §-параграфов ниже ссылается на него).
|
||||
|
||||
---
|
||||
|
||||
## 0. Зафиксированные решения
|
||||
|
||||
Из обсуждения архитектуры (выбор пользователя) и трёх суб-решений:
|
||||
|
||||
1. **Нативная in-process интеграция.** Никаких REST-к-себе и сервис-юзера: чтение
|
||||
через репозитории gitmost, запись тела — через collab `openDirectConnection`,
|
||||
триггеры — через `EventEmitter2` вместо поллинга `/recent`.
|
||||
2. **Встроенный NestJS-модуль** `GitSyncModule` в `apps/server/src/integrations/git-sync`
|
||||
с `@Interval`/событиями и **leader-lock на Redis** (single-writer при нескольких
|
||||
репликах).
|
||||
3. **Настройка по спейсам в UI** — флаг в `space.settings.gitSync`, секреты
|
||||
(git-remote) — через ENV/`EnvironmentService`.
|
||||
4. **Конвертер** — вендорим *чистую* часть из docmost-sync в `packages/git-sync`,
|
||||
гейт = round-trip-идемпотентность против схемы `@docmost/editor-ext`.
|
||||
5. **Vault** — **репозиторий на спейс**; `move-to-space` = кросс-репо delete+create.
|
||||
6. **Провенанс** — отдельное значение `lastUpdatedSource = 'git-sync'`.
|
||||
|
||||
Вне scope v1 (как и в SPEC): комментарии (только якоря, без тредов), права/ACL,
|
||||
вложения как отдельный поток (едут ссылками внутри контента), realtime-подписка
|
||||
на Hocuspocus (остаётся поллинг-страховка + события).
|
||||
|
||||
---
|
||||
|
||||
## 1. Архитектура верхнего уровня
|
||||
|
||||
```
|
||||
gitmost server (NestJS, один процесс)
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ GitSyncModule │
|
||||
│ │
|
||||
│ GitSyncOrchestrator ── @Interval + Redis leader-lock │
|
||||
│ │ (per enabled space: pull-cycle / push-cycle) │
|
||||
│ │ │
|
||||
│ ├── engine (vendored docmost-sync, IO инжектируется) │
|
||||
│ │ pull.ts / push.ts / reconcile / layout / stabilize │
|
||||
│ │ │
|
||||
│ ├── GitmostDataSource ── реализует подмножество │
|
||||
│ │ DocmostClient НАТИВНО: │
|
||||
│ │ reads → PageRepo / SpaceRepo (Kysely) │
|
||||
│ │ writes → CollaborationGateway.openDirectConnection│
|
||||
│ │ + PageService (create/move/delete/...) │
|
||||
│ │ │
|
||||
│ └── VaultGit ── shell-out в системный git (как есть) │
|
||||
│ │
|
||||
│ PageChangeListener ── подписка на EventName.PAGE_* → │
|
||||
│ debounce → enqueue push-cycle │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
▲ читает/пишет страницы ▼ git push/pull
|
||||
PostgreSQL (pages/spaces) data/git-sync/<spaceId>/ (vault) → remote
|
||||
```
|
||||
|
||||
Ключ интеграции: движок docmost-sync уже **полностью построен на dependency
|
||||
injection** — весь внешний IO (REST-клиент, git, файловая система) передаётся
|
||||
через узкие интерфейсы. Мы НЕ переписываем движок; мы подставляем нативные
|
||||
реализации в его DI-швы.
|
||||
|
||||
---
|
||||
|
||||
## 2. Состав вендоринга из docmost-sync
|
||||
|
||||
В новый пакет `packages/git-sync` копируем (с сохранением истории смысла —
|
||||
backport-friendly, как сделано с `packages/mcp`):
|
||||
|
||||
### 2.1. Движок (engine) — `src/engine/`
|
||||
| Файл | Что несёт | IO | Берём |
|
||||
| --- | --- | --- | --- |
|
||||
| `pull.ts` | Docmost→FS: reconcile + write + commit + merge | client+git+fs (инжектируется) | да |
|
||||
| `push.ts` | FS→Docmost: diff + classify + apply + refs | client+git+fs (инжектируется) | да |
|
||||
| `git.ts` | `VaultGit` — обёртка git shell-out | системный `git` | да, как есть |
|
||||
| `reconcile.ts` | чистый планировщик | нет | да |
|
||||
| `layout.ts` | чистый маппер дерево→пути | нет | да |
|
||||
| `sanitize.ts` | чистая санитизация имён | нет | да |
|
||||
| `stabilize.ts` | fixpoint-нормализация md (SPEC §11) | нет (lib-вызовы) | да |
|
||||
| `loop-guard.ts` | `bodyHash` (sha256) | нет | да |
|
||||
| `settings.ts` | zod-конфиг | `.env` | **адаптируем** (см. §7) |
|
||||
| `index.ts` | тонкий CLI-скаффолд | — | нет (заменяем на NestJS) |
|
||||
|
||||
### 2.2. Конвертер (чистая часть) — `src/lib/`
|
||||
Из `packages/docmost-client/src/lib/` берём **только** чистый конвертер и формат
|
||||
файла (collab/auth REST-части НЕ нужны — запись нативная):
|
||||
|
||||
| Файл | Экспорт |
|
||||
| --- | --- |
|
||||
| `markdown-converter.ts` | `convertProseMirrorToMarkdown(content): string` |
|
||||
| `collaboration.ts` (только конвертер-функция) | `markdownToProseMirror(md): Promise<doc>` ⚠️ |
|
||||
| `markdown-document.ts` | `serializeDocmostMarkdownBody`, `parseDocmostMarkdown`, `serializeDocmostMarkdown`, тип `DocmostMdMeta` |
|
||||
| `canonicalize.ts` | `canonicalizeContent(node)`, `docsCanonicallyEqual(a,b)` |
|
||||
| `docmost-schema.ts` | tiptap-схема для `markdownToProseMirror` |
|
||||
| `node-ops.ts`, `diff.ts` | трансформации/диф (нужны транзитивно) |
|
||||
|
||||
⚠️ `markdownToProseMirror` физически лежит в `collaboration.ts` docmost-client
|
||||
(строка 289) — это **чистая** функция (marked→HTML→generateJSON), не путать с
|
||||
collab/websocket write-path из того же файла, который НЕ берём.
|
||||
|
||||
> **Долг (зафиксирован в бэклоге):** это третья копия конвертера (есть в
|
||||
> docmost-sync, в `packages/mcp`, теперь в `packages/git-sync`). Конвергенция в
|
||||
> общий пакет — отдельная задача; здесь сознательно вендорим валидированную
|
||||
> копию ради сохранения идемпотентности.
|
||||
|
||||
### 2.3. НЕ берём
|
||||
`pull`/`push` CLI-обёртки, `roundtrip.ts` (харнес переносим в тесты, см. §13),
|
||||
`docmost-client` REST-клиент целиком, `lib/collaboration.ts` (websocket-write),
|
||||
`lib/auth-utils.ts`, `Makefile`, Docker-обвязку docmost-sync.
|
||||
|
||||
---
|
||||
|
||||
## 3. Главный шов: `GitmostDataSource`
|
||||
|
||||
Движок дёргает Docmost через `Pick<DocmostClient, …>`. Мы реализуем класс,
|
||||
**структурно совместимый** с этими сигнатурами, но нативный внутри. Это
|
||||
единственный нетривиальный новый код.
|
||||
|
||||
### 3.1. Точный набор методов, которых требует движок
|
||||
|
||||
Из `pull.ts` (`ApplyPullActionsDeps.client`) и обхода дерева:
|
||||
```ts
|
||||
listSpaceTree(spaceId: string, rootPageId?: string): Promise<{ pages: PageNode[]; complete: boolean }>;
|
||||
getPageJson(pageId: string): Promise<{ id; slugId; title; parentPageId; spaceId; updatedAt; content }>;
|
||||
```
|
||||
|
||||
Из `push.ts` (`ApplyPushDeps.client`):
|
||||
```ts
|
||||
importPageMarkdown(pageId: string, fullMarkdown: string): Promise<{ updatedAt?: string; /* … */ }>;
|
||||
createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{ data: { id: string }; updatedAt?: string }>;
|
||||
deletePage(pageId: string): Promise<unknown>;
|
||||
movePage(pageId: string, parentPageId: string | null, position?: string): Promise<unknown>;
|
||||
renamePage(pageId: string, title: string): Promise<unknown>;
|
||||
```
|
||||
|
||||
Для непрерывного режима/детекции удалений (фаза B+, SPEC §8):
|
||||
```ts
|
||||
listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise<any[]>;
|
||||
listTrash(spaceId: string): Promise<any[]>;
|
||||
restorePage(pageId: string): Promise<unknown>;
|
||||
```
|
||||
|
||||
### 3.2. Маппинг на нативные сервисы gitmost
|
||||
|
||||
| Метод адаптера | Нативная реализация |
|
||||
| --- | --- |
|
||||
| `listSpaceTree(spaceId)` | `SpaceRepo.findById(spaceId, wsId)` + `PageRepo.getSpaceDescendants(spaceId, { includeContent: false })` → map в `PageNode { id, title, slugId, parentPageId, hasChildren }`. **`complete: true` всегда** (читаем БД, не пагинированный REST) → суппрессия `incomplete-fetch` из SPEC §8 нативно не срабатывает. |
|
||||
| `getPageJson(pageId)` | `PageRepo.findById(pageId, { includeContent: true })` → `{ id, slugId, title, parentPageId, spaceId, updatedAt, content }`. `content` — ProseMirror JSON в схеме `editor-ext`. |
|
||||
| `importPageMarkdown(pageId, fullMd)` | `parseDocmostMarkdown(fullMd)` → body; `await markdownToProseMirror(body)` → doc; **запись через collab** (см. §3.3). Вернуть `{ updatedAt }` свежей страницы. |
|
||||
| `createPage(title, body, spaceId, parent?)` | `PageService.create(userId, wsId, { spaceId, title, parentPageId }, provenance)` → shell; затем тело через collab (§3.3). Вернуть `{ data: { id }, updatedAt }`. |
|
||||
| `deletePage(pageId)` | `PageService.removePage(pageId, userId, wsId)` (soft-delete → Trash, обратимо). |
|
||||
| `movePage(pageId, parent, pos?)` | `PageService.movePage({ pageId, parentPageId: parent, position }, movedPage, provenance)`. **`position` обязателен** для Docmost-move — вычисляем `fractional-indexing-jittered` ключ между соседями (соседей берём из `PageRepo`). |
|
||||
| `renamePage(pageId, title)` | `PageService.update(page, { title }, user, provenance)`. |
|
||||
| `listRecentSince` | `PageRepo.getRecentPagesInSpace(spaceId, { … })`, фильтр по `updatedAt > since`. |
|
||||
| `listTrash(spaceId)` | `PageRepo` запрос с `deletedAt IS NOT NULL` по спейсу. |
|
||||
| `restorePage(pageId)` | `PageService.restore(...)`. |
|
||||
|
||||
`userId`/`wsId` берём из конфигурации спейса (сервисный аккаунт воркспейса или
|
||||
владелец спейса — см. §7). `provenance` всегда несёт `source: 'git-sync'` (§8).
|
||||
|
||||
### 3.3. Нативная запись тела (linchpin)
|
||||
|
||||
Подтверждено в коде: `CollaborationGateway.openDirectConnection(documentName, context)`
|
||||
([collaboration.gateway.ts:148](../apps/server/src/collaboration/collaboration.gateway.ts#L148-L150))
|
||||
+ паттерн `withYdocConnection`
|
||||
([collaboration.handler.ts:118-133](../apps/server/src/collaboration/collaboration.handler.ts#L118-L133)).
|
||||
Имя документа — `page.<pageId>` ([getPageId](../apps/server/src/collaboration/collaboration.util.ts#L163-L165)).
|
||||
Схему берём из `tiptapExtensions` ([collaboration.util.ts](../apps/server/src/collaboration/collaboration.util.ts)).
|
||||
|
||||
```ts
|
||||
// In-process body write — no loopback websocket, no service-user token.
|
||||
// Mirrors collaboration.handler.ts 'replace' operation exactly.
|
||||
private async writeBody(pageId: string, prosemirrorJson: JSONContent): Promise<void> {
|
||||
const conn = await this.collabGateway.openDirectConnection(
|
||||
`page.${pageId}`,
|
||||
{ actor: 'git-sync' }, // provenance flows into PersistenceExtension (see §8)
|
||||
);
|
||||
try {
|
||||
await conn.transact((doc) => {
|
||||
const fragment = doc.getXmlFragment('default');
|
||||
if (fragment.length > 0) fragment.delete(0, fragment.length);
|
||||
const next = TiptapTransformer.toYdoc(prosemirrorJson, 'default', tiptapExtensions);
|
||||
Y.applyUpdate(doc, Y.encodeStateAsUpdate(next));
|
||||
});
|
||||
} finally {
|
||||
await conn.disconnect();
|
||||
}
|
||||
// PersistenceExtension.onStoreDocument persists ydoc+content+textContent
|
||||
// consistently, stamps lastUpdatedSource, broadcasts 'page.updated'.
|
||||
}
|
||||
```
|
||||
|
||||
**Схема-совместимость (критично).** `markdownToProseMirror` производит
|
||||
ProseMirror JSON в схеме docmost-client, а `TiptapTransformer.toYdoc` валидирует
|
||||
его в схеме `editor-ext`. Аналогично на чтении `convertProseMirrorToMarkdown`
|
||||
получает `content` в схеме `editor-ext`. Эти две схемы **должны совпадать по
|
||||
именам нод/марок/атрибутов**, иначе ноды потеряются. Это и есть гейт §13.1.
|
||||
|
||||
---
|
||||
|
||||
## 4. `VaultGit` и git-бинарь
|
||||
|
||||
`VaultGit` (engine/git.ts) оставляем как есть — он шеллит в системный `git` через
|
||||
`execFile` (args-массив, без инъекций), всегда `cwd=<vaultPath>`. Константы:
|
||||
`DEFAULT_BRANCH = "main"`, `BOT_AUTHOR_NAME = "Docmost Sync"`,
|
||||
`BOT_AUTHOR_EMAIL = "docmost-sync@local"`; в push.ts: `DOCMOST_BRANCH = "docmost"`,
|
||||
`LAST_PUSHED_REF = "refs/docmost/last-pushed"`, провенанс-трейлеры
|
||||
`Docmost-Sync-Source: docmost|local`.
|
||||
|
||||
**Ops-требование:** в рантайм-образ gitmost добавить пакет `git`
|
||||
([Dockerfile](../Dockerfile)) — сейчас его там может не быть. Без бинаря
|
||||
`VaultGit.assertGitAvailable()` падает на старте цикла.
|
||||
|
||||
**Модель веток (пер-репо, SPEC §5):** `main` (правит человек/файлы) ↔ `docmost`
|
||||
(зеркало Docmost, пишет только движок) ↔ `merge-base` как базлайн;
|
||||
`refs/docmost/last-pushed` — что из `main` уже отражено в Docmost.
|
||||
|
||||
---
|
||||
|
||||
## 5. Топология vault: репозиторий на спейс
|
||||
|
||||
- Корень: `<DATA_DIR>/git-sync/<spaceId>/` — отдельный git-репо на каждый
|
||||
включённый спейс. `layout.ts` уже спейс-скоупный (корень спейса → `segments: []`).
|
||||
- Remote — пер-спейс (из конфигурации спейса/ENV). Изоляция конфликтов, блокировок
|
||||
и blast-radius.
|
||||
- `move-to-space` (страница меняет спейс) → **кросс-репо**: `delete` в исходном
|
||||
репо + `create` в целевом. Ловим по событию `PAGE_MOVED_TO_SPACE`.
|
||||
- Redis-lock ключ — `git-sync:lock:<spaceId>` (§9).
|
||||
|
||||
---
|
||||
|
||||
## 6. NestJS-модуль `GitSyncModule`
|
||||
|
||||
Структура (шаблон — `McpModule`):
|
||||
```
|
||||
apps/server/src/integrations/git-sync/
|
||||
git-sync.module.ts
|
||||
git-sync.constants.ts # QueueJob/event-имена, дефолты
|
||||
services/
|
||||
gitmost-datasource.service.ts # §3 адаптер
|
||||
git-sync.orchestrator.ts # @Interval + leader-lock + цикл по спейсам
|
||||
vault-registry.service.ts # путь vault на спейс, VaultGit-инстансы
|
||||
fractional-index.util.ts # position для move (reuse server util)
|
||||
listeners/
|
||||
page-change.listener.ts # подписка на EventName.PAGE_* + debounce
|
||||
git-sync.controller.ts # (опц.) ручной trigger/status для админа
|
||||
```
|
||||
|
||||
```ts
|
||||
@Module({
|
||||
imports: [DatabaseModule, EnvironmentModule, ScheduleModule.forRoot()],
|
||||
providers: [
|
||||
GitmostDataSourceService,
|
||||
GitSyncOrchestrator,
|
||||
VaultRegistryService,
|
||||
PageChangeListener,
|
||||
],
|
||||
})
|
||||
export class GitSyncModule {}
|
||||
```
|
||||
- Регистрируем в [app.module.ts](../apps/server/src/app.module.ts) рядом с `McpModule`.
|
||||
- Зависимости: `PageRepo`/`SpaceRepo` (через `DatabaseModule`), `PageService`,
|
||||
`CollaborationGateway` (экспортировать из `CollaborationModule`),
|
||||
`EnvironmentService`, ioredis-клиент.
|
||||
- `ScheduleModule.forRoot()` уже подключается в `TelemetryModule`; повторный вызов
|
||||
безопасен, но лучше вынести в общий модуль или убедиться, что forRoot один раз.
|
||||
|
||||
---
|
||||
|
||||
## 7. Конфигурация
|
||||
|
||||
### 7.1. Per-space (UI) — `space.settings.gitSync`
|
||||
Расширяем существующий паттерн `settings.sharing` / `settings.comments`.
|
||||
|
||||
Сервер:
|
||||
- `UpdateSpaceDto` ([update-space.dto.ts](../apps/server/src/core/space/dto/update-space.dto.ts)):
|
||||
добавить `@IsOptional() @IsBoolean() gitSyncEnabled?: boolean;` (+ опц.
|
||||
`gitSyncRemote?: string`, если решим хранить remote в БД, а не только в ENV).
|
||||
- `SpaceService.updateSpace(dto, wsId)`
|
||||
([space.service.ts:120](../apps/server/src/core/space/services/space.service.ts#L120)):
|
||||
обработать как `disablePublicSharing`/`allowViewerComments`.
|
||||
- `SpaceRepo`: добавить `updateGitSyncSettings(spaceId, wsId, prefKey, prefValue, trx?)`
|
||||
по образцу `updateSharingSettings`
|
||||
([space.repo.ts:92](../apps/server/src/database/repos/space/space.repo.ts#L92)) —
|
||||
jsonb-merge в `settings.gitSync.<key>`.
|
||||
- Гард: CASL `SpaceCaslAction.Manage / SpaceCaslSubject.Settings` (как в
|
||||
[space.controller.ts:147](../apps/server/src/core/space/space.controller.ts#L147)).
|
||||
|
||||
Клиент:
|
||||
- Тоггл в форме настроек спейса
|
||||
([edit-space-form.tsx](../apps/client/src/features/space/components/edit-space-form.tsx))
|
||||
через `useUpdateSpaceMutation()` → `updateSpace({ spaceId, gitSyncEnabled })`.
|
||||
Образец — `mcp-settings.tsx`. `readOnly` при отсутствии `Manage/Settings`.
|
||||
|
||||
Форма `space.settings.gitSync`:
|
||||
```jsonc
|
||||
{ "gitSync": { "enabled": true, "remote": "git@…", "branch": "main" } }
|
||||
```
|
||||
|
||||
### 7.2. Секреты/тюнинг (ENV) — `EnvironmentService`
|
||||
Движковый `settings.ts` (zod, читает `.env`) **заменяем** на чтение из gitmost
|
||||
`EnvironmentService`: `parseSettings(env)` оставляем как чистую функцию для тестов,
|
||||
но в проде собираем `Settings` из `EnvironmentService`-геттеров.
|
||||
|
||||
Новые переменные (объявить в
|
||||
[environment.validation.ts](../apps/server/src/integrations/environment/environment.validation.ts)
|
||||
class-validator-декораторами, геттеры — в
|
||||
[environment.service.ts](../apps/server/src/integrations/environment/environment.service.ts)):
|
||||
|
||||
| ENV | Назначение | Обяз. |
|
||||
| --- | --- | --- |
|
||||
| `GIT_SYNC_ENABLED` | глобальный мастер-выключатель | нет (default false) |
|
||||
| `GIT_SYNC_DATA_DIR` | корень vault'ов (default `<DATA_DIR>/git-sync`) | нет |
|
||||
| `GIT_SYNC_REMOTE_TEMPLATE` | шаблон remote, напр. `git@host:vault-{spaceId}.git` | нет |
|
||||
| `GIT_SYNC_SSH_KEY_PATH` / креды remote | доступ к git-remote (secret) | по ситуации |
|
||||
| `GIT_SYNC_POLL_INTERVAL_MS` | страховочный поллинг (default 15000) | нет |
|
||||
| `GIT_SYNC_DEBOUNCE_MS` | окно дебаунса событий (default 2000) | нет |
|
||||
| `GIT_SYNC_SERVICE_USER_ID` | от чьего имени писать в Docmost | да (если синк включён) |
|
||||
|
||||
> git-remote = доступ ко всей вики спейса (SPEC §12): креды только в ENV/secret
|
||||
> store, никогда в БД/коммиты. В UI — только `enabled` (+ опц. имя remote из
|
||||
> заранее разрешённого списка).
|
||||
|
||||
---
|
||||
|
||||
## 8. Провенанс и loop-guard
|
||||
|
||||
### 8.1. Значение `'git-sync'`
|
||||
Сегодня `lastUpdatedSource ∈ { 'user', 'agent' }`
|
||||
([persistence.extension.ts:132-134](../apps/server/src/collaboration/extensions/persistence.extension.ts#L132-L134)).
|
||||
Добавляем `'git-sync'`:
|
||||
- `PersistenceExtension`: `context.actor === 'git-sync'` → `lastUpdatedSource = 'git-sync'`.
|
||||
- Снапшот истории для `'git-sync'` — дебаунс (как у человека), а не немедленный
|
||||
(немедленный — только для `'agent'`,
|
||||
[persistence.extension.ts:321](../apps/server/src/collaboration/extensions/persistence.extension.ts#L321)).
|
||||
- Для `create/move/rename/delete` через `PageService` передаём
|
||||
`AuthProvenanceData` c `source: 'git-sync'` (тип уже используется для агента —
|
||||
расширить допустимые значения; точную форму подтвердить на реализации).
|
||||
- Клиент: в истории
|
||||
([history-item.tsx:128](../apps/client/src/features/page-history/components/history-item.tsx#L128))
|
||||
не показывать агентский бейдж/дип-линк для `'git-sync'`; добавить значение в
|
||||
тип [page.types.ts:23-26](../apps/client/src/features/page-history/types/page.types.ts#L23-L26)
|
||||
(опц. свой бейдж «sync»).
|
||||
|
||||
### 8.2. Подавление петли (SPEC §10)
|
||||
На pull-стороне игнорируем страницу как «свою запись», если:
|
||||
`page.lastUpdatedSource === 'git-sync'` **И** `bodyHash(exportedBody)` совпадает
|
||||
с последним запушенным (`PushedPageRecord.bodyHash` из `push.ts`). После записи в
|
||||
Docmost сохраняем `updatedAt` ответа, чтобы поллинг-страховка не утянул свою же
|
||||
запись обратно.
|
||||
|
||||
---
|
||||
|
||||
## 9. Single-writer (Redis leader-lock)
|
||||
|
||||
В кодовой базе `@Interval`-задачи (`trash-cleanup`, `telemetry`, `session-cleanup`)
|
||||
**не защищены** от мультиинстанса. Для синка добавляем явный лок.
|
||||
|
||||
- ioredis уже есть (`RedisModule` из `@nestjs-labs/nestjs-ioredis`,
|
||||
[app.module.ts](../apps/server/src/app.module.ts); прямой `RedisClient`
|
||||
используется в collab-gateway).
|
||||
- Лок на спейс: `SET git-sync:lock:<spaceId> <instanceId> NX PX <ttl>`; держим
|
||||
цикл только при успехе, продлеваем по heartbeat, освобождаем в `finally`
|
||||
(Lua-CAS на удаление по `instanceId`, чтобы не снять чужой лок).
|
||||
- TTL > максимальной длительности цикла; на краше лок истекает сам.
|
||||
|
||||
```ts
|
||||
// Acquire per-space leadership; returns false if another replica holds it.
|
||||
private async acquire(spaceId: string): Promise<boolean> {
|
||||
const ok = await this.redis.set(`git-sync:lock:${spaceId}`, this.instanceId, 'PX', LOCK_TTL_MS, 'NX');
|
||||
return ok === 'OK';
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Планировщик и событийные триггеры
|
||||
|
||||
- **События (основной триггер).** `PageChangeListener` подписывается на
|
||||
`EventName.PAGE_CREATED | PAGE_UPDATED | PAGE_MOVED | PAGE_SOFT_DELETED |
|
||||
PAGE_RESTORED | PAGE_MOVED_TO_SPACE` и job `PAGE_CONTENT_UPDATED`
|
||||
([event.contants.ts](../apps/server/src/common/events/event.contants.ts)).
|
||||
Фильтр по `spaceId` (только включённые спейсы) → дебаунс (`GIT_SYNC_DEBOUNCE_MS`)
|
||||
→ ставит pull/push-цикл спейса в очередь оркестратора.
|
||||
- Loop-guard: события от собственных записей (`source==='git-sync'` + совпавший
|
||||
хэш) пропускаем (§8.2).
|
||||
- **Поллинг-страховка.** `@Interval(GIT_SYNC_POLL_INTERVAL_MS)` в оркестраторе:
|
||||
по каждому включённому спейсу (под локом) — реконсиляция (`listRecentSince` +
|
||||
`listTrash`), ловит пропущенные события и стартовую сверку после простоя
|
||||
(SPEC §12).
|
||||
- Один цикл на спейс за раз (внутри-процессный мьютекс на `spaceId` поверх
|
||||
Redis-лока).
|
||||
|
||||
---
|
||||
|
||||
## 11. Потоки данных (walkthroughs)
|
||||
|
||||
### 11.1. Первичный клон спейса (initial clone, SPEC §12)
|
||||
1. `VaultGit.ensureRepo()` + `ensureBranch('docmost','main')` + `checkout('docmost')`.
|
||||
2. `dataSource.listSpaceTree(spaceId)` → `{ pages, complete:true }`.
|
||||
3. `readExisting({ listTracked: () => git.listTrackedFiles('*.md'), readFile })`.
|
||||
4. `computePullActions({ pages, treeComplete:true, existing })` → план.
|
||||
5. `applyPullActions(deps, actions, vaultRoot)`: на каждую страницу
|
||||
`getPageJson` → `stabilizePageFile(content, meta)` (export→import→export
|
||||
fixpoint, SPEC §11) → запись файла; затем `stageAll` + `commit` (трейлер
|
||||
`docmost`) на `docmost`; `checkout('main')` + `merge('docmost')`.
|
||||
6. Зафиксировать max `updatedAt` как стартовый `T_last`; `git push` в remote.
|
||||
|
||||
### 11.2. Docmost → FS (pull-цикл)
|
||||
Триггер: событие/поллинг → (под локом) шаги §11.1 п.1–5 инкрементально. 3-way
|
||||
merge `docmost→main` делает git: непересекающиеся правки сливаются, реальное
|
||||
пересечение → conflict-маркеры в файле. **При конфликте push этой страницы в
|
||||
Docmost блокируется** до ручного резолва (SPEC §9; фаза D).
|
||||
|
||||
### 11.3. FS → Docmost (push-цикл)
|
||||
`runPush(deps, { dryRun })`:
|
||||
1. `git.ensureRepo` / `isMergeInProgress` (abort при merge) / `checkout('main')`.
|
||||
2. `stageAll` + `commit('local: working-tree changes')` (локально, в Docmost не шлёт).
|
||||
3. База диффа: `readRef(LAST_PUSHED_REF)` ?? `docmost`; `revParse('main')` → `pushedCommit`.
|
||||
4. `diffNameStatus(base, 'main')` → changes; префетч `metaAt(path, side)`.
|
||||
5. `computePushActions({ changes, metaAt })` → creates/updates/deletes/renamesMoves/skipped.
|
||||
6. `dryRun` → лог плана и выход (клиент НЕ создаётся).
|
||||
7. `--apply`: `makeClient(settings)` → наш `GitmostDataSource`;
|
||||
`applyPushActions`:
|
||||
- update → `importPageMarkdown(pageId, fullMd)` (collab-write, §3.3);
|
||||
- create → `createPage(...)` → записать присвоенный `pageId` обратно в meta;
|
||||
- delete → `deletePage(pageId)` (Trash);
|
||||
- rename/move → `classifyRenameMoves` → `movePage`/`renamePage`;
|
||||
- при пустых failures: `updateRef(LAST_PUSHED_REF, pushedCommit)` +
|
||||
`fastForwardBranch('docmost', pushedCommit)`.
|
||||
8. Записать `bodyHash` + `updatedAt` (loop-guard, §8.2); `git push`.
|
||||
|
||||
---
|
||||
|
||||
## 12. Фазирование
|
||||
|
||||
- **A. Каркас + односторонний pull (нативно).** `packages/git-sync` (вендоринг
|
||||
§2), `GitmostDataSource` (чтение через репозитории), `GitSyncModule`, конфиг из
|
||||
`EnvironmentService`, ручной/однократный pull-цикл на один спейс. **Гейт §13.1.**
|
||||
- **B. Push + непрерывность.** Нативная запись (§3.3), `runPush`, ветки/refs,
|
||||
loop-guard (§8), Redis-лок (§9), `@Interval` + `PageChangeListener` (§10).
|
||||
- **C. Per-space UI.** `space.settings.gitSync` (§7.1), DTO/сервис/репо/гард,
|
||||
тоггл на клиенте, скоуп оркестратора по включённым спейсам.
|
||||
- **D. Харднинг.** Conflict-gating (SPEC §9), удаления через Trash + git (§5),
|
||||
стартовая реконсиляция и `move-to-space` кросс-репо, провенанс на клиенте,
|
||||
Dockerfile `git`, полный набор тестов.
|
||||
|
||||
---
|
||||
|
||||
## 13. Тестирование
|
||||
|
||||
### 13.1. Гейт идемпотентности (блокирует фазу B)
|
||||
Перенести round-trip-харнес docmost-sync (`roundtrip.ts` + `test/fixtures/corpus`)
|
||||
в тесты `packages/git-sync`, но прогонять **против схемы `editor-ext`**:
|
||||
`content (editor-ext) → convertProseMirrorToMarkdown → markdownToProseMirror →
|
||||
TiptapTransformer.toYdoc(…, tiptapExtensions) → fromYdoc → canonicalizeContent`
|
||||
должно давать `docsCanonicallyEqual === true`. Любая потеря нод/атрибутов =
|
||||
расхождение схем → чинить `docmost-schema.ts` под `editor-ext`.
|
||||
|
||||
### 13.2. Юнит (чистая логика, переносится как есть)
|
||||
`reconcile` (planReconciliation / decideAbsenceDeletions / mass-delete guards),
|
||||
`layout` (коллизии/санитизация), `computePullActions`, `computePushActions`,
|
||||
`classifyRenameMoves`, `bodyHash`.
|
||||
|
||||
### 13.3. Интеграция (нативный адаптер)
|
||||
`GitmostDataSource` против тестовой БД: `listSpaceTree`/`getPageJson` корректно
|
||||
маппят; `createPage`/`movePage`/`deletePage`/`importPageMarkdown` пишут через
|
||||
collab и проставляют `lastUpdatedSource='git-sync'`; loop-guard не зацикливается
|
||||
(write → poll → no-op).
|
||||
|
||||
### 13.4. e2e (под локом)
|
||||
Полный pull→push round-trip на временном vault + временном спейсе: правка в
|
||||
Docmost доезжает в файл и наоборот; конфликт даёт маркеры и блокирует push.
|
||||
|
||||
---
|
||||
|
||||
## 14. Риски и открытые пункты
|
||||
|
||||
1. **Схема-совместимость конвертера** (§3.3, §13.1) — главный риск; гейт
|
||||
обязателен до фазы B.
|
||||
2. **`AuthProvenanceData`** — точную форму типа подтвердить; возможно, потребует
|
||||
расширения enum источника на сервере и в истории.
|
||||
3. **Согласованность Yjs** — писать строго через `openDirectConnection`/`transact`;
|
||||
не трогать `content`-колонку напрямую.
|
||||
4. **`position` для move** — обязателен в Docmost-move; нужен
|
||||
`fractional-indexing-jittered` между соседями (соседей брать сортировкой
|
||||
`position COLLATE "C"`).
|
||||
5. **`git` в рантайме** — добавить в Dockerfile.
|
||||
6. **`ScheduleModule.forRoot()`** — не задублировать `forRoot`.
|
||||
7. **Сервисный пользователь записи** (`GIT_SYNC_SERVICE_USER_ID`) — от чьего имени
|
||||
идут create/move (влияет на `creatorId`/права); согласовать политику.
|
||||
8. **Конфликты и удаления** — фаза D строго по SPEC §8/§9 (маркеры никогда не
|
||||
уезжают в Docmost).
|
||||
|
||||
---
|
||||
|
||||
## 15. Чек-лист изменений по файлам
|
||||
|
||||
**Новый пакет**
|
||||
- `packages/git-sync/**` — движок + чистый конвертер (§2), `package.json`
|
||||
(`@docmost/git-sync`, `workspace:*`), `tsconfig.json`.
|
||||
|
||||
**Сервер (`apps/server/src`)**
|
||||
- `integrations/git-sync/**` — модуль, оркестратор, адаптер, листенер (§6).
|
||||
- `app.module.ts` — импорт `GitSyncModule`.
|
||||
- `collaboration/collaboration.module.ts` — экспорт `CollaborationGateway`.
|
||||
- `collaboration/extensions/persistence.extension.ts` — источник `'git-sync'` (§8.1).
|
||||
- `core/space/dto/update-space.dto.ts` — `gitSyncEnabled?` (§7.1).
|
||||
- `core/space/services/space.service.ts` — обработка флага.
|
||||
- `database/repos/space/space.repo.ts` — `updateGitSyncSettings` (§7.1).
|
||||
- `integrations/environment/environment.validation.ts` + `environment.service.ts` —
|
||||
новые ENV (§7.2).
|
||||
- `Dockerfile` — пакет `git`.
|
||||
|
||||
**Клиент (`apps/client/src`)**
|
||||
- `features/space/components/edit-space-form.tsx` — тоггл git-sync.
|
||||
- `features/space/types` — поле `settings.gitSync`.
|
||||
- `features/page-history/types/page.types.ts` + `components/history-item.tsx` —
|
||||
значение `'git-sync'` в `lastUpdatedSource`.
|
||||
|
||||
**Корень**
|
||||
- `pnpm-workspace.yaml` уже покрывает `packages/*`; `apps/server/package.json` —
|
||||
зависимость `@docmost/git-sync: workspace:*`.
|
||||
59
packages/editor-ext/src/lib/details/details.test.ts
Normal file
59
packages/editor-ext/src/lib/details/details.test.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { Editor } from "@tiptap/core";
|
||||
import { Document } from "@tiptap/extension-document";
|
||||
import { Paragraph } from "@tiptap/extension-paragraph";
|
||||
import { Text } from "@tiptap/extension-text";
|
||||
import { Details } from "./details";
|
||||
import { DetailsSummary } from "./details-summary";
|
||||
import { DetailsContent } from "./details-content";
|
||||
|
||||
// The `details` node's `open` attribute must parse to a strict BOOLEAN. The old
|
||||
// `getAttribute("open")` returned "" (falsy) for `<details open>` and `null`
|
||||
// when absent, so a parsed-open details rendered without `open` and collapsed.
|
||||
// `hasAttribute` yields a real boolean, so open state survives parse → render.
|
||||
|
||||
const extensions = [
|
||||
Document,
|
||||
Paragraph,
|
||||
Text,
|
||||
Details,
|
||||
DetailsSummary,
|
||||
DetailsContent,
|
||||
];
|
||||
|
||||
/** Parse an HTML string through the schema and return the first details node. */
|
||||
function parseDetails(html: string): any {
|
||||
const editor = new Editor({ extensions, content: html });
|
||||
const json = editor.getJSON();
|
||||
const find = (n: any): any => {
|
||||
if (!n || typeof n !== "object") return undefined;
|
||||
if (n.type === "details") return n;
|
||||
if (Array.isArray(n.content)) {
|
||||
for (const c of n.content) {
|
||||
const hit = find(c);
|
||||
if (hit) return hit;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
};
|
||||
const details = find(json);
|
||||
editor.destroy();
|
||||
return details;
|
||||
}
|
||||
|
||||
describe("details node: open attribute parses as a strict boolean", () => {
|
||||
const body =
|
||||
'<summary>S</summary><div data-type="detailsContent"><p>b</p></div>';
|
||||
|
||||
it("parses <details open> to open === true", () => {
|
||||
const details = parseDetails(`<details open>${body}</details>`);
|
||||
expect(details).toBeDefined();
|
||||
expect(details.attrs.open).toBe(true);
|
||||
});
|
||||
|
||||
it("parses <details> (no open) to open === false", () => {
|
||||
const details = parseDetails(`<details>${body}</details>`);
|
||||
expect(details).toBeDefined();
|
||||
expect(details.attrs.open).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -39,7 +39,7 @@ export const Details = Node.create<DetailsOptions>({
|
||||
return {
|
||||
open: {
|
||||
default: false,
|
||||
parseHTML: (e) => e.getAttribute("open"),
|
||||
parseHTML: (e) => e.hasAttribute("open"),
|
||||
renderHTML: (a) => (a.open ? { open: "" } : {}),
|
||||
},
|
||||
};
|
||||
|
||||
109
packages/git-sync/build/engine/client.types.d.ts
vendored
109
packages/git-sync/build/engine/client.types.d.ts
vendored
@@ -1,109 +0,0 @@
|
||||
/**
|
||||
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
|
||||
* rather than any concrete client, because the gitmost server writes NATIVELY —
|
||||
* through repositories + collab `openDirectConnection`.
|
||||
*
|
||||
* `GitSyncClient` is that interface: the native datasource (server side)
|
||||
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
|
||||
* subsets of it. The signatures below MIRROR exactly the methods the engine's
|
||||
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
|
||||
* off each result), so a REST-style client is still structurally assignable and
|
||||
* the native adapter has a precise contract.
|
||||
*/
|
||||
/**
|
||||
* A page node as returned by `listSpaceTree` (the sidebar/tree walk, no body).
|
||||
* The engine layout (`buildVaultLayout`) consumes `PageNode` from `./layout`,
|
||||
* which only requires `id` (+ optional `title`/`slugId`/`parentPageId`); this
|
||||
* lite shape documents the fields the tree walk surfaces. Real tree nodes also
|
||||
* carry `position`, `icon`, `hasChildren` — kept open via the index signature.
|
||||
*/
|
||||
export interface GitSyncPageNodeLite {
|
||||
id: string;
|
||||
slugId?: string;
|
||||
title?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
/** `listSpaceTree` nodes carry extra fields (position, icon, …). */
|
||||
[key: string]: unknown;
|
||||
}
|
||||
/**
|
||||
* The structural client the engine depends on. Only `Pick<GitSyncClient, ...>`
|
||||
* subsets are ever used:
|
||||
* - pull reads: `getPageJson` (+ the tree walk's `listSpaceTree`),
|
||||
* - push writes: `importPageMarkdown` / `createPage` / `deletePage` /
|
||||
* `movePage` / `renamePage`,
|
||||
* - continuous (phase B+): `listRecentSince` / `listTrash` / `restorePage`.
|
||||
*/
|
||||
export interface GitSyncClient {
|
||||
/**
|
||||
* Full tree of page nodes for the space (or the subtree rooted at
|
||||
* `rootPageId`), each WITHOUT body content. `complete` is `false` when the
|
||||
* walk was truncated / a fetch failed — the pull side suppresses absence
|
||||
* deletions on an incomplete tree (SPEC §8). Native impl returns
|
||||
* `complete: true` always (reads the DB, not a paginated REST endpoint).
|
||||
*/
|
||||
listSpaceTree(spaceId: string, rootPageId?: string): Promise<{
|
||||
pages: GitSyncPageNodeLite[];
|
||||
complete: boolean;
|
||||
}>;
|
||||
/**
|
||||
* One page WITH its ProseMirror body content. `applyPullActions` reads
|
||||
* `id`, `slugId`, `title`, `parentPageId`, `spaceId` (for the file meta) and
|
||||
* `content` (to stabilize/serialize). `updatedAt` is carried for the
|
||||
* poll-suppression loop-guard.
|
||||
*/
|
||||
getPageJson(pageId: string): Promise<{
|
||||
id: string;
|
||||
slugId: string;
|
||||
title: string;
|
||||
parentPageId: string | null;
|
||||
spaceId: string;
|
||||
updatedAt: string;
|
||||
content: unknown;
|
||||
}>;
|
||||
/**
|
||||
* Merge a page's body from a self-contained markdown file (meta + body). The
|
||||
* collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite.
|
||||
* `applyPushActions` reads only an optional `updatedAt` off the result
|
||||
* (via `extractUpdatedAt`, tolerant of extra fields).
|
||||
*
|
||||
* `baseMarkdown` is the last-synced version of the file (`refs/docmost/
|
||||
* last-pushed`), the common ancestor for a THREE-WAY merge against the live
|
||||
* doc so concurrent human edits survive (review #5). Optional/null -> 2-way.
|
||||
*/
|
||||
importPageMarkdown(pageId: string, fullMarkdown: string, baseMarkdown?: string | null): Promise<{
|
||||
updatedAt?: string;
|
||||
[key: string]: unknown;
|
||||
}>;
|
||||
/**
|
||||
* Create a new page and return the assigned id at `data.id`
|
||||
* (`applyPushActions` reads `result.data.id`, then writes it back into the
|
||||
* file's meta). An optional top-level/`data.updatedAt` feeds the loop-guard.
|
||||
*/
|
||||
createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{
|
||||
data: {
|
||||
id: string;
|
||||
};
|
||||
updatedAt?: string;
|
||||
[key: string]: unknown;
|
||||
}>;
|
||||
/** Soft-delete a page to Trash (SPEC §8). Result is not inspected. */
|
||||
deletePage(pageId: string): Promise<unknown>;
|
||||
/**
|
||||
* Reparent a page (and optionally set its fractional-index `position`). The
|
||||
* engine passes `position` UNDEFINED for now; the native impl computes a
|
||||
* default between siblings. Result is not inspected.
|
||||
*/
|
||||
movePage(pageId: string, parentPageId: string | null, position?: string): Promise<unknown>;
|
||||
/** Change a page's title only (no body touch). Result is not inspected. */
|
||||
renamePage(pageId: string, title: string): Promise<unknown>;
|
||||
/**
|
||||
* Pages updated since `sinceIso` (the poll-safety reconciliation, SPEC §8).
|
||||
* `spaceId` may be undefined (all spaces); `hardPageCap` bounds the walk.
|
||||
*/
|
||||
listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise<unknown[]>;
|
||||
/** List soft-deleted (trashed) pages for the space (deletion detection). */
|
||||
listTrash(spaceId: string): Promise<unknown[]>;
|
||||
/** Restore a soft-deleted page from Trash. Result is not inspected. */
|
||||
restorePage(pageId: string): Promise<unknown>;
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
/**
|
||||
* The client seam. `pull.ts`/`push.ts` depend on a narrow STRUCTURAL interface
|
||||
* rather than any concrete client, because the gitmost server writes NATIVELY —
|
||||
* through repositories + collab `openDirectConnection`.
|
||||
*
|
||||
* `GitSyncClient` is that interface: the native datasource (server side)
|
||||
* implements it, and the engine only ever uses `Pick<GitSyncClient, ...>`
|
||||
* subsets of it. The signatures below MIRROR exactly the methods the engine's
|
||||
* `pull.ts`/`push.ts` actually call (arg shapes + the fields the engine reads
|
||||
* off each result), so a REST-style client is still structurally assignable and
|
||||
* the native adapter has a precise contract.
|
||||
*/
|
||||
export {};
|
||||
@@ -1 +0,0 @@
|
||||
export declare function loadSettingsOrExit<T>(factory: () => T): T;
|
||||
@@ -1,50 +0,0 @@
|
||||
import { ZodError } from 'zod';
|
||||
// Turn a ZodError from settings validation into a clear, actionable startup
|
||||
// message that names the offending env var(s), then exit(1) — no raw stack
|
||||
// trace. Mirrors the Python new-project skeleton's load_settings_or_exit.
|
||||
// A non-ZodError is left to propagate unchanged.
|
||||
export function loadSettingsOrExit(factory) {
|
||||
try {
|
||||
return factory();
|
||||
}
|
||||
catch (err) {
|
||||
if (!(err instanceof ZodError))
|
||||
throw err;
|
||||
const missing = [];
|
||||
const invalid = [];
|
||||
for (const issue of err.issues) {
|
||||
const name = issue.path.length ? String(issue.path[0]) : '?';
|
||||
// A missing required variable surfaces as an `invalid_type` issue whose
|
||||
// received value was `undefined`. zod 3 exposed `issue.received` directly;
|
||||
// zod 4 dropped that field and instead folds it into the message
|
||||
// ("expected string, received undefined"). Detect both shapes so the
|
||||
// missing-vs-invalid split holds across zod majors. NOTE: an invalid (but
|
||||
// present) value uses a different code (invalid_format / invalid_value) or
|
||||
// an `invalid_type` message that reports a non-undefined received (e.g.
|
||||
// "received NaN" from a coerced number), so neither is misread as missing.
|
||||
const i = issue;
|
||||
const isMissing = issue.code === 'invalid_type' &&
|
||||
(i.received === 'undefined' ||
|
||||
/received undefined/i.test(i.message ?? ''));
|
||||
if (isMissing)
|
||||
missing.push(name);
|
||||
else
|
||||
invalid.push(`${name}: ${issue.message}`);
|
||||
}
|
||||
const lines = ['Configuration error in environment / .env:'];
|
||||
if (missing.length) {
|
||||
lines.push(' Missing required variable(s):');
|
||||
for (const n of [...new Set(missing)])
|
||||
lines.push(` - ${n}`);
|
||||
}
|
||||
if (invalid.length) {
|
||||
lines.push(' Invalid value(s):');
|
||||
for (const item of invalid)
|
||||
lines.push(` - ${item}`);
|
||||
}
|
||||
lines.push('');
|
||||
lines.push('Set them in .env (see .env.example) and try again.');
|
||||
process.stderr.write(lines.join('\n') + '\n');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
70
packages/git-sync/build/engine/cycle.d.ts
vendored
70
packages/git-sync/build/engine/cycle.d.ts
vendored
@@ -1,70 +0,0 @@
|
||||
import { VaultGit } from "./git.js";
|
||||
import { GitSyncClient } from "./client.types.js";
|
||||
import { Settings } from "./settings.js";
|
||||
/**
|
||||
* Absolute-path filesystem primitives the cycle needs. Injected (not imported)
|
||||
* so the engine stays IO-free and unit-testable. `mkdir` is recursive; `rm` is
|
||||
* force (a missing file is a no-op).
|
||||
*/
|
||||
export interface CycleFs {
|
||||
readFile: (absPath: string) => Promise<string>;
|
||||
writeFile: (absPath: string, text: string) => Promise<void>;
|
||||
mkdir: (absDir: string) => Promise<void>;
|
||||
rm: (absPath: string) => Promise<void>;
|
||||
}
|
||||
export interface RunCycleDeps {
|
||||
spaceId: string;
|
||||
/** The Docmost seam (reads for pull, writes for push). */
|
||||
client: GitSyncClient;
|
||||
/** The per-space git vault (a real working repo). */
|
||||
vault: VaultGit;
|
||||
/** Engine settings; `vaultPath` roots the relPath -> absolute-path mapping. */
|
||||
settings: Settings;
|
||||
fs: CycleFs;
|
||||
log: (line: string) => void;
|
||||
/**
|
||||
* Delete-cap hook (the ONLY caller-specific policy). Called with the push
|
||||
* dry-run's planned delete count (`Number.POSITIVE_INFINITY` when the dry-run
|
||||
* itself failed, so the hook can fail safe) and the live client; returns the
|
||||
* client to use for the REAL apply. The default (omitted) applies every op
|
||||
* unmodified. gitmost uses it to neutralize deletes when over its cap.
|
||||
*
|
||||
* When omitted, NO dry-run is performed (one fewer push planning pass).
|
||||
*/
|
||||
resolveApplyClient?: (plannedDeletes: number, client: GitSyncClient) => GitSyncClient;
|
||||
}
|
||||
export interface RunCycleResult {
|
||||
ran: boolean;
|
||||
/** Set when the cycle short-circuited without running pull/push. */
|
||||
skipped?: "merge-in-progress";
|
||||
pull?: {
|
||||
written: number;
|
||||
deleted: number;
|
||||
conflict: boolean;
|
||||
};
|
||||
push?: {
|
||||
mode: string;
|
||||
failures: number;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
|
||||
* (vault -> Docmost), under the engine's required branch choreography. This is
|
||||
* the single entry point the app drives — it owns the staging order so it can
|
||||
* never drift from the engine it ships with.
|
||||
*
|
||||
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
|
||||
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
|
||||
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
|
||||
* would fail otherwise.
|
||||
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
|
||||
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
|
||||
* then checks out `main` and merges docmost -> main. Writing Docmost
|
||||
* content straight onto `main` would clobber local file edits before push
|
||||
* can diff them.
|
||||
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
|
||||
* 5. PUSH: optional dry-run to feed the delete-cap hook, then the real apply.
|
||||
*
|
||||
* Lock + cap POLICY live in the caller; this owns only the mechanics.
|
||||
*/
|
||||
export declare function runCycle(deps: RunCycleDeps): Promise<RunCycleResult>;
|
||||
@@ -1,97 +0,0 @@
|
||||
import { readExisting, computePullActions, applyPullActions } from "./pull.js";
|
||||
import { runPush } from "./push.js";
|
||||
/**
|
||||
* Run ONE full reconcile cycle for a space: PULL (Docmost -> vault) then PUSH
|
||||
* (vault -> Docmost), under the engine's required branch choreography. This is
|
||||
* the single entry point the app drives — it owns the staging order so it can
|
||||
* never drift from the engine it ships with.
|
||||
*
|
||||
* Staging (the ⭐ data-loss-critical order, SPEC §6/§9):
|
||||
* 1. assertGitAvailable + ensureRepo (the git state store must exist).
|
||||
* 2. refuse on an unresolved merge (a prior conflicting pull); next checkout
|
||||
* would fail otherwise.
|
||||
* 3. ensureBranch('docmost','main') + checkout('docmost'). Pull writes MUST
|
||||
* land on `docmost`, not `main`: applyPullActions commits on `docmost`,
|
||||
* then checks out `main` and merges docmost -> main. Writing Docmost
|
||||
* content straight onto `main` would clobber local file edits before push
|
||||
* can diff them.
|
||||
* 4. PULL: readExisting -> listSpaceTree -> computePullActions -> apply.
|
||||
* 5. PUSH: optional dry-run to feed the delete-cap hook, then the real apply.
|
||||
*
|
||||
* Lock + cap POLICY live in the caller; this owns only the mechanics.
|
||||
*/
|
||||
export async function runCycle(deps) {
|
||||
const { spaceId, client, vault, settings, fs, log, resolveApplyClient } = deps;
|
||||
const vaultRoot = settings.vaultPath;
|
||||
const abs = (relPath) => `${vaultRoot}/${relPath}`;
|
||||
// 1. The engine state store is git: make sure the repo + branches exist
|
||||
// before any tracked-file listing or diff.
|
||||
await vault.assertGitAvailable();
|
||||
await vault.ensureRepo();
|
||||
// 2. Refuse to run on top of an unresolved merge (SPEC §9): a prior
|
||||
// conflicting pull leaves the vault mid-merge; the next checkout would fail.
|
||||
if (await vault.isMergeInProgress()) {
|
||||
log(`vault has an unresolved merge — resolve it (or 'git merge --abort') ` +
|
||||
`and re-run (SPEC §9); skipping cycle.`);
|
||||
return { ran: false, skipped: "merge-in-progress" };
|
||||
}
|
||||
// 3. Pull writes happen on `docmost`; be on it BEFORE applying (see docstring).
|
||||
await vault.ensureBranch("docmost", "main");
|
||||
await vault.checkout("docmost");
|
||||
// 4. PULL --------------------------------------------------------------------
|
||||
const existing = await readExisting({
|
||||
listTracked: () => vault.listTrackedFiles("*.md"),
|
||||
readFile: (relPath) => fs.readFile(abs(relPath)),
|
||||
});
|
||||
const tree = await client.listSpaceTree(spaceId);
|
||||
const pullActions = computePullActions({
|
||||
pages: tree.pages,
|
||||
treeComplete: tree.complete,
|
||||
existing,
|
||||
});
|
||||
const pullResult = await applyPullActions({
|
||||
client,
|
||||
git: vault,
|
||||
writeFile: (absPath, text) => fs.writeFile(absPath, text),
|
||||
mkdir: (absDir) => fs.mkdir(absDir),
|
||||
rm: (absPath) => fs.rm(absPath),
|
||||
}, pullActions, vaultRoot);
|
||||
// 5. PUSH --------------------------------------------------------------------
|
||||
const pushDeps = {
|
||||
settings,
|
||||
git: vault,
|
||||
makeClient: () => client,
|
||||
readFile: (relPath) => fs.readFile(abs(relPath)),
|
||||
writeFile: (relPath, text) => fs.writeFile(abs(relPath), text),
|
||||
log,
|
||||
};
|
||||
let applyClient = client;
|
||||
if (resolveApplyClient) {
|
||||
// Plan the push as a DRY-RUN first to read the delete count, then let the
|
||||
// caller decide the apply client (e.g. neutralize deletes over a cap). A
|
||||
// failed dry-run yields Infinity so the hook can fail safe.
|
||||
let plannedDeletes;
|
||||
try {
|
||||
const dry = await runPush(pushDeps, { dryRun: true });
|
||||
plannedDeletes = dry.planned?.deletes ?? 0;
|
||||
}
|
||||
catch (err) {
|
||||
log(`push dry-run planning failed (${err instanceof Error ? err.message : String(err)}); deferring deletion policy to the cap hook (fail-safe).`);
|
||||
plannedDeletes = Number.POSITIVE_INFINITY;
|
||||
}
|
||||
applyClient = resolveApplyClient(plannedDeletes, client);
|
||||
}
|
||||
const pushResult = await runPush({ ...pushDeps, makeClient: () => applyClient }, { dryRun: false });
|
||||
return {
|
||||
ran: true,
|
||||
pull: {
|
||||
written: pullResult.written,
|
||||
deleted: pullResult.deleted,
|
||||
conflict: pullResult.merge.conflict,
|
||||
},
|
||||
push: {
|
||||
mode: pushResult.mode,
|
||||
failures: pushResult.failures?.length ?? 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
259
packages/git-sync/build/engine/git.d.ts
vendored
259
packages/git-sync/build/engine/git.d.ts
vendored
@@ -1,259 +0,0 @@
|
||||
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
|
||||
export declare const BOT_AUTHOR_NAME = "Docmost Sync";
|
||||
export declare const BOT_AUTHOR_EMAIL = "docmost-sync@local";
|
||||
/** Default branch the vault repo is initialized on. */
|
||||
export declare const DEFAULT_BRANCH = "main";
|
||||
/**
|
||||
* One row of `git diff --name-status` (SPEC §6 "ФС → Docmost"). `status` is the
|
||||
* single-letter change code (`-M` rename detection on), `path` is the (new) file
|
||||
* path; for a rename/copy (`R`/`C`) `oldPath` is the source and `path` is the
|
||||
* destination, with `score` carrying git's similarity index (0–100).
|
||||
*/
|
||||
export interface DiffEntry {
|
||||
status: "A" | "M" | "D" | "R" | "C";
|
||||
/** New (destination) path. For A/M/D it is the only path. */
|
||||
path: string;
|
||||
/** Source path — present only for R/C. */
|
||||
oldPath?: string;
|
||||
/** Rename/copy similarity score (0–100) — present only for R/C. */
|
||||
score?: number;
|
||||
}
|
||||
/** Result of a `merge`: whether it succeeded cleanly or left conflict markers. */
|
||||
export interface MergeResult {
|
||||
/** True when the merge applied cleanly (fast-forward or clean 3-way). */
|
||||
ok: boolean;
|
||||
/** True when the merge stopped on conflicts (markers left in the worktree). */
|
||||
conflict: boolean;
|
||||
/** Raw combined stdout+stderr, for logging/diagnostics. */
|
||||
output: string;
|
||||
}
|
||||
/** Options for an engine-authored commit (provenance, SPEC §7.3). */
|
||||
export interface CommitOptions {
|
||||
authorName: string;
|
||||
authorEmail: string;
|
||||
/**
|
||||
* Trailer lines appended to the commit message body (e.g.
|
||||
* `Docmost-Sync-Source: docmost`). These are the machine-readable provenance
|
||||
* the loop-guard keys on (SPEC §12, "commit-attribution").
|
||||
*/
|
||||
trailers?: string[];
|
||||
}
|
||||
/**
|
||||
* A git wrapper bound to a single vault path. Construct once per vault; every
|
||||
* method runs git with `cwd = vaultPath`.
|
||||
*/
|
||||
export declare class VaultGit {
|
||||
private readonly vaultPath;
|
||||
constructor(vaultPath: string);
|
||||
/**
|
||||
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
|
||||
* to system `git` for every vault operation, so a missing binary (e.g. a slim
|
||||
* container image without git) must fail fast with an actionable message
|
||||
* rather than a cryptic ENOENT deep inside the first real git call. Presence
|
||||
* check only — we do NOT gate on a specific version. Runs `git --version`
|
||||
* with NO `cwd` (the vault dir may not exist yet at preflight time).
|
||||
*/
|
||||
assertGitAvailable(): Promise<void>;
|
||||
/**
|
||||
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
|
||||
* the single `runRaw` primitive: throws a clear, unified Error (including
|
||||
* stderr/stdout) on a non-zero exit.
|
||||
*/
|
||||
private run;
|
||||
/**
|
||||
* The ONE primitive every git invocation in this module flows through. Builds
|
||||
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
|
||||
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
|
||||
* can treat a non-zero exit as either an error (`run`) or a meaningful state
|
||||
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
|
||||
*
|
||||
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
|
||||
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
|
||||
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
|
||||
* printing commands (ls-files, diff --name-only, …).
|
||||
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
|
||||
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
|
||||
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
|
||||
* - On a spawn/exec error we capture the error `message` too, so a failure
|
||||
* before git could write to stderr (e.g. ENOENT) is NOT lost.
|
||||
*/
|
||||
private runRaw;
|
||||
/**
|
||||
* Ensure the vault directory exists and is an initialized git repo on `main`
|
||||
* with an initial (empty) commit so branches exist. Idempotent: safe to call
|
||||
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
|
||||
* (so engine commits never fall back to a global/unset identity).
|
||||
*/
|
||||
ensureRepo(): Promise<void>;
|
||||
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
|
||||
private isRepo;
|
||||
/** True if a LOCAL git config key is set in the vault repo. */
|
||||
private hasLocalConfig;
|
||||
/** True if the repo has at least one commit (HEAD resolves). */
|
||||
private hasAnyCommit;
|
||||
/** True if a branch with the given name exists. */
|
||||
branchExists(name: string): Promise<boolean>;
|
||||
/**
|
||||
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
|
||||
* checkout) when the branch is already present.
|
||||
*/
|
||||
ensureBranch(name: string, fromBranch: string): Promise<void>;
|
||||
/** Name of the currently checked-out branch. */
|
||||
currentBranch(): Promise<string>;
|
||||
/** Check out an existing branch. */
|
||||
checkout(name: string): Promise<void>;
|
||||
/** Stage everything (adds, modifications, deletions). */
|
||||
stageAll(): Promise<void>;
|
||||
/**
|
||||
* True if the vault is mid-merge (an unresolved merge from a previous run,
|
||||
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
|
||||
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
|
||||
* BEFORE any checkout so a left-over merge produces a clear, actionable
|
||||
* message instead of a raw "you need to resolve your current index first"
|
||||
* failure deep inside `checkout`. This is what makes re-runs converge
|
||||
* (resumability, SPEC §12).
|
||||
*/
|
||||
isMergeInProgress(): Promise<boolean>;
|
||||
/**
|
||||
* Commit the currently STAGED changes with an explicit author/committer
|
||||
* identity and the given trailers appended to the message body (SPEC §7.3
|
||||
* provenance). Returns `true` if a commit was made, `false` if there was
|
||||
* nothing to commit (graceful no-op). The caller is expected to have staged
|
||||
* its changes first (e.g. via `stageAll`).
|
||||
*/
|
||||
commit(message: string, opts: CommitOptions): Promise<boolean>;
|
||||
/**
|
||||
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
|
||||
* Builds the full message with appended trailers and sets author + committer
|
||||
* identity via env vars (so the committer matches the author, not the repo
|
||||
* default).
|
||||
*/
|
||||
private commitRaw;
|
||||
/**
|
||||
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
|
||||
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
|
||||
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
|
||||
* markers are left in the worktree for manual resolution by a later increment,
|
||||
* and — critically — nothing is pushed to Docmost (we never write to Docmost
|
||||
* anyway).
|
||||
*/
|
||||
merge(fromBranch: string): Promise<MergeResult>;
|
||||
/** True if the index has any unmerged (conflicted) paths. */
|
||||
private hasUnmergedPaths;
|
||||
/**
|
||||
* List tracked files on the current branch (paths relative to the vault
|
||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||
* the listing, e.g. `"*.md"`.
|
||||
*
|
||||
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
|
||||
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
|
||||
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
|
||||
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
|
||||
* breaking move/duplicate detection. We defeat that two ways at once:
|
||||
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
|
||||
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
|
||||
* pass it inline here.
|
||||
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
|
||||
* ambiguity), which we split on `\0`.
|
||||
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
|
||||
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
|
||||
* are returned verbatim — git already emits forward slashes.
|
||||
*/
|
||||
listTrackedFiles(glob?: string): Promise<string[]>;
|
||||
/**
|
||||
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
|
||||
* (SPEC §6: the FS→Docmost push direction diffs `main` against
|
||||
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
|
||||
* file is reported as a single `R` row with both its old and new path instead
|
||||
* of a delete+add pair — that distinction is what lets the push planner tell a
|
||||
* move from a delete+create (SPEC §8 "Move vs delete").
|
||||
*
|
||||
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
|
||||
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
|
||||
* status:
|
||||
* - A/M/D: `status\0path\0`
|
||||
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
|
||||
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
|
||||
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
|
||||
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
|
||||
*/
|
||||
diffNameStatus(fromRef: string, toRef: string): Promise<DiffEntry[]>;
|
||||
/**
|
||||
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
|
||||
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
|
||||
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
|
||||
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
|
||||
*/
|
||||
revParse(ref: string): Promise<string | null>;
|
||||
/**
|
||||
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
|
||||
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
|
||||
* "что из `main` уже отражено в Docmost").
|
||||
*/
|
||||
readRef(ref: string): Promise<string | null>;
|
||||
/**
|
||||
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
|
||||
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
|
||||
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
|
||||
*/
|
||||
updateRef(ref: string, target: string): Promise<void>;
|
||||
/**
|
||||
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
|
||||
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
|
||||
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
|
||||
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
|
||||
* push succeeds, Docmost already contains the pushed `main` content, so the
|
||||
* mirror must reflect it — otherwise the NEXT pull would diff our own write
|
||||
* back and re-pull it (loop-guard).
|
||||
*
|
||||
* SAFETY — never force, never clobber divergent history:
|
||||
* - If `branch` IS an ancestor of `toCommit`, advance it with
|
||||
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
|
||||
* NOT checked out during a push (push works on `main`), so updating the ref
|
||||
* directly is safe and avoids any working-tree touch.
|
||||
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
|
||||
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
|
||||
* let the caller log it. We must never overwrite a `docmost` history that
|
||||
* has commits the push base does not contain.
|
||||
*
|
||||
* Returns `{ ok: true }` when the branch was advanced (or already at
|
||||
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
|
||||
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
|
||||
*/
|
||||
fastForwardBranch(branch: string, toCommit: string): Promise<{
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
}>;
|
||||
/**
|
||||
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
|
||||
* if the path does not exist there. Used by the push direction to read the
|
||||
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
|
||||
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
|
||||
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
|
||||
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
|
||||
* that ref) maps to `null` rather than throwing.
|
||||
*/
|
||||
showFileAtRef(ref: string, path: string): Promise<string | null>;
|
||||
}
|
||||
/**
|
||||
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
|
||||
* Used by the single `runRaw` primitive every git command flows through, so
|
||||
* these pins apply uniformly (including the `git --version` preflight).
|
||||
*
|
||||
* cwd-isolation is this module's central safety guarantee: every git command
|
||||
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
|
||||
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
|
||||
* redirect the operation away from `cwd` (e.g. to the source repo or another
|
||||
* checkout), defeating that guarantee. So we always strip them, regardless of
|
||||
* whatever else the caller adds (author/committer identity, etc.).
|
||||
*
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export declare function vaultGitEnv(extra?: Record<string, string>): NodeJS.ProcessEnv;
|
||||
/**
|
||||
* Build a commit message body with trailer lines appended (SPEC §7.3). The
|
||||
* trailers are separated from the subject by a blank line so `git interpret-
|
||||
* trailers` / `git log --format=%(trailers)` parse them as trailers.
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export declare function buildCommitMessage(subject: string, trailers?: string[]): string;
|
||||
@@ -1,570 +0,0 @@
|
||||
/**
|
||||
* Thin async wrapper over the system `git` binary (SPEC §5: state store = git).
|
||||
*
|
||||
* IMPORTANT — VAULT-SCOPED: every operation here runs with `cwd = vaultPath`,
|
||||
* which is the vault's OWN git repository (default `data/vault`), SEPARATE from
|
||||
* the gitmost application repo. This module MUST NEVER run git against the
|
||||
* application repo. `data/` is gitignored, so a nested repo under `data/vault`
|
||||
* is safe. The pull cycle is READ-ONLY toward Docmost; this module only touches
|
||||
* the local vault git, never a git remote (push is deferred, see SPEC §7).
|
||||
*
|
||||
* Implementation notes:
|
||||
* - We shell out via `node:child_process` `execFile` (promisified), passing
|
||||
* ARGS AS AN ARRAY — no shell, so there is no command injection surface even
|
||||
* if a page title / branch name contains shell metacharacters.
|
||||
* - EVERY git invocation funnels through the single `runRaw` primitive, which
|
||||
* ALWAYS prepends `--no-pager -c core.quotepath=false` to the argv (so git
|
||||
* never blocks on a pager and always prints verbatim UTF-8 paths). There is
|
||||
* no exception — even the `git --version` preflight goes through `runRaw`.
|
||||
* - "nothing to commit" is treated as a graceful no-op, not an error.
|
||||
*/
|
||||
import { execFile } from "node:child_process";
|
||||
import { mkdir } from "node:fs/promises";
|
||||
import { promisify } from "node:util";
|
||||
const execFileAsync = promisify(execFile);
|
||||
/** Bot identity used for engine-authored vault commits (SPEC §7.3). */
|
||||
export const BOT_AUTHOR_NAME = "Docmost Sync";
|
||||
export const BOT_AUTHOR_EMAIL = "docmost-sync@local";
|
||||
/** Default branch the vault repo is initialized on. */
|
||||
export const DEFAULT_BRANCH = "main";
|
||||
/**
|
||||
* A git wrapper bound to a single vault path. Construct once per vault; every
|
||||
* method runs git with `cwd = vaultPath`.
|
||||
*/
|
||||
export class VaultGit {
|
||||
vaultPath;
|
||||
constructor(vaultPath) {
|
||||
this.vaultPath = vaultPath;
|
||||
}
|
||||
/**
|
||||
* Preflight: verify a runnable `git` binary is on PATH. The daemon shells out
|
||||
* to system `git` for every vault operation, so a missing binary (e.g. a slim
|
||||
* container image without git) must fail fast with an actionable message
|
||||
* rather than a cryptic ENOENT deep inside the first real git call. Presence
|
||||
* check only — we do NOT gate on a specific version. Runs `git --version`
|
||||
* with NO `cwd` (the vault dir may not exist yet at preflight time).
|
||||
*/
|
||||
async assertGitAvailable() {
|
||||
// Goes through the single `runRaw` primitive like every other invocation.
|
||||
// `cwd: null` means "do not set a cwd" — the vault dir may not exist yet at
|
||||
// preflight time, so we must not point git at a missing directory.
|
||||
const r = await this.runRaw(["--version"], { cwd: null });
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error("git binary not found or not runnable — install git (the vault state " +
|
||||
`store requires it). Underlying error: ${detail}`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Run a git command in the vault and return trimmed stdout. THIN wrapper over
|
||||
* the single `runRaw` primitive: throws a clear, unified Error (including
|
||||
* stderr/stdout) on a non-zero exit.
|
||||
*/
|
||||
async run(args, opts) {
|
||||
const r = await this.runRaw(args, opts);
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
|
||||
}
|
||||
return r.stdout.trim();
|
||||
}
|
||||
/**
|
||||
* The ONE primitive every git invocation in this module flows through. Builds
|
||||
* the full argv (`--no-pager -c core.quotepath=false <args>`), env, cwd, and
|
||||
* maxBuffer, runs git, and NEVER throws — it returns the exit info so callers
|
||||
* can treat a non-zero exit as either an error (`run`) or a meaningful state
|
||||
* (e.g. a merge conflict, a porcelain diff that "fails" deliberately).
|
||||
*
|
||||
* - argv: ALWAYS prepends `--no-pager -c core.quotepath=false`, so git never
|
||||
* blocks on a pager and always prints verbatim UTF-8 paths (no octal
|
||||
* escaping/quoting). `quotepath=false` is the baseline for ALL path-
|
||||
* printing commands (ls-files, diff --name-only, …).
|
||||
* - cwd: `opts.cwd === null` -> do NOT set cwd (the preflight, where the
|
||||
* vault dir may not exist); otherwise `opts.cwd ?? this.vaultPath`.
|
||||
* - env: `vaultGitEnv(opts?.env)` (cwd-isolation + caller extras).
|
||||
* - On a spawn/exec error we capture the error `message` too, so a failure
|
||||
* before git could write to stderr (e.g. ENOENT) is NOT lost.
|
||||
*/
|
||||
async runRaw(args, opts) {
|
||||
const cwd = opts?.cwd === null ? undefined : (opts?.cwd ?? this.vaultPath);
|
||||
try {
|
||||
const { stdout, stderr } = await execFileAsync("git", ["--no-pager", "-c", "core.quotepath=false", ...args], {
|
||||
// Generous buffer: file listings / porcelain output on a large vault
|
||||
// can be sizable.
|
||||
...(cwd !== undefined ? { cwd } : {}),
|
||||
maxBuffer: 64 * 1024 * 1024,
|
||||
env: vaultGitEnv(opts?.env),
|
||||
});
|
||||
return { code: 0, stdout, stderr };
|
||||
}
|
||||
catch (err) {
|
||||
const e = err;
|
||||
return {
|
||||
code: typeof e.code === "number" ? e.code : 1,
|
||||
stdout: e.stdout ?? "",
|
||||
// Preserve the error message when there is no stderr (e.g. a spawn
|
||||
// failure like ENOENT, where promisified execFile sets stderr to an
|
||||
// EMPTY STRING — so `||`, not `??`, to fall through to `message`).
|
||||
stderr: e.stderr || e.message || "",
|
||||
};
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Ensure the vault directory exists and is an initialized git repo on `main`
|
||||
* with an initial (empty) commit so branches exist. Idempotent: safe to call
|
||||
* on every run. Sets a LOCAL bot identity for the vault repo if none is set
|
||||
* (so engine commits never fall back to a global/unset identity).
|
||||
*/
|
||||
async ensureRepo() {
|
||||
await mkdir(this.vaultPath, { recursive: true });
|
||||
if (!(await this.isRepo())) {
|
||||
// `git init -b main` sets the initial branch on modern git; we still
|
||||
// guard the branch name below for safety on older binaries.
|
||||
await this.run(["init", "-b", DEFAULT_BRANCH]);
|
||||
}
|
||||
// Set a local identity for the vault repo if unset, so engine commits have
|
||||
// a deterministic committer even on a machine with no global git config.
|
||||
if (!(await this.hasLocalConfig("user.name"))) {
|
||||
await this.run(["config", "user.name", BOT_AUTHOR_NAME]);
|
||||
}
|
||||
if (!(await this.hasLocalConfig("user.email"))) {
|
||||
await this.run(["config", "user.email", BOT_AUTHOR_EMAIL]);
|
||||
}
|
||||
// Neutralize correctness-affecting git config in the vault's LOCAL config so
|
||||
// a user's GLOBAL/system config cannot change porcelain BEHAVIOR (not just
|
||||
// output) and corrupt the vault. The vault is OUR dedicated repo, so LOCAL
|
||||
// values (which override global/system) are the right scope. Set
|
||||
// UNCONDITIONALLY every run — idempotent and cheap; `git config <key>`
|
||||
// writes to `--local` by default inside the repo. These MUST be in place
|
||||
// before any add/commit/checkout that could be affected, hence they run
|
||||
// before the initial-commit block below.
|
||||
// - core.autocrlf=false — CRITICAL (SPEC §11): a global core.autocrlf=true
|
||||
// would rewrite LF<->CRLF on add/checkout, making our deterministic,
|
||||
// byte-stable markdown churn and breaking the round-trip invariant.
|
||||
// `false` guarantees git stores/checks out verbatim bytes.
|
||||
// - core.safecrlf=false — avoid CRLF-related warnings/aborts on add.
|
||||
// - commit.gpgsign=false — the headless daemon must never try to GPG-sign
|
||||
// a commit (would fail/hang; we already set GIT_TERMINAL_PROMPT=0).
|
||||
// - core.attributesFile=/dev/null — neutralize the user's GLOBAL
|
||||
// gitattributes so a global clean/smudge filter (filter.<name>.clean)
|
||||
// cannot rewrite the STORED blob and break §11 byte-stability (a config
|
||||
// that core.autocrlf=false does not cover). POSIX-only path, which is
|
||||
// fine: the daemon runs on Linux (Docker) / macOS. A system
|
||||
// /etc/gitattributes remains the host admin's domain (out of scope).
|
||||
// NOTE: these stay PERSISTED LOCAL config (not `-c` flags) on purpose — a
|
||||
// human running git by hand in the vault must inherit the same neutralized
|
||||
// behavior; a transient `-c` would not persist. (core.quotepath, by
|
||||
// contrast, only affects OUR parsing of output and so is baked into the
|
||||
// `runRaw` argv baseline instead.)
|
||||
try {
|
||||
await this.run(["config", "core.autocrlf", "false"]);
|
||||
await this.run(["config", "core.safecrlf", "false"]);
|
||||
await this.run(["config", "commit.gpgsign", "false"]);
|
||||
await this.run(["config", "core.attributesFile", "/dev/null"]);
|
||||
}
|
||||
catch (err) {
|
||||
const detail = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(`failed to pin vault git config (SPEC §11) — ensure ${this.vaultPath}` +
|
||||
"/.git/config is writable and not locked (e.g. stale config.lock): " +
|
||||
detail);
|
||||
}
|
||||
// Create the initial empty commit on `main` if the repo has no commits yet,
|
||||
// so both `main` and (later) `docmost` branches have a common base.
|
||||
if (!(await this.hasAnyCommit())) {
|
||||
// Make sure we are on the default branch before the first commit (covers
|
||||
// the older-git case where `init -b` was not honored).
|
||||
await this.run(["checkout", "-B", DEFAULT_BRANCH]);
|
||||
await this.commitRaw("init vault", {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
allowEmpty: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
/** True if `cwd` is inside a git work-tree (the vault is initialized). */
|
||||
async isRepo() {
|
||||
const r = await this.runRaw(["rev-parse", "--is-inside-work-tree"]);
|
||||
return r.code === 0 && r.stdout.trim() === "true";
|
||||
}
|
||||
/** True if a LOCAL git config key is set in the vault repo. */
|
||||
async hasLocalConfig(key) {
|
||||
const r = await this.runRaw(["config", "--local", "--get", key]);
|
||||
return r.code === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
/** True if the repo has at least one commit (HEAD resolves). */
|
||||
async hasAnyCommit() {
|
||||
const r = await this.runRaw(["rev-parse", "--verify", "HEAD"]);
|
||||
return r.code === 0;
|
||||
}
|
||||
/** True if a branch with the given name exists. */
|
||||
async branchExists(name) {
|
||||
const r = await this.runRaw([
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
`refs/heads/${name}`,
|
||||
]);
|
||||
return r.code === 0;
|
||||
}
|
||||
/**
|
||||
* Create `name` from `fromBranch` if it does not already exist. No-op (and no
|
||||
* checkout) when the branch is already present.
|
||||
*/
|
||||
async ensureBranch(name, fromBranch) {
|
||||
if (await this.branchExists(name))
|
||||
return;
|
||||
await this.run(["branch", name, fromBranch]);
|
||||
}
|
||||
/** Name of the currently checked-out branch. */
|
||||
async currentBranch() {
|
||||
return this.run(["rev-parse", "--abbrev-ref", "HEAD"]);
|
||||
}
|
||||
/** Check out an existing branch. */
|
||||
async checkout(name) {
|
||||
await this.run(["checkout", name]);
|
||||
}
|
||||
/** Stage everything (adds, modifications, deletions). */
|
||||
async stageAll() {
|
||||
await this.run(["add", "-A"]);
|
||||
}
|
||||
/**
|
||||
* True if the vault is mid-merge (an unresolved merge from a previous run,
|
||||
* SPEC §9 / §12). Detected via a `MERGE_HEAD` ref OR any unmerged
|
||||
* (conflicted) index entries (`git ls-files -u`). The pull cycle checks this
|
||||
* BEFORE any checkout so a left-over merge produces a clear, actionable
|
||||
* message instead of a raw "you need to resolve your current index first"
|
||||
* failure deep inside `checkout`. This is what makes re-runs converge
|
||||
* (resumability, SPEC §12).
|
||||
*/
|
||||
async isMergeInProgress() {
|
||||
// MERGE_HEAD exists exactly while a merge is in progress.
|
||||
const mergeHead = await this.runRaw([
|
||||
"rev-parse",
|
||||
"--verify",
|
||||
"--quiet",
|
||||
"MERGE_HEAD",
|
||||
]);
|
||||
if (mergeHead.code === 0 && mergeHead.stdout.trim().length > 0)
|
||||
return true;
|
||||
// Fallback / belt-and-suspenders: any unmerged index entries also mean the
|
||||
// working tree is mid-conflict and a checkout would refuse.
|
||||
const unmerged = await this.runRaw(["ls-files", "-u"]);
|
||||
return unmerged.code === 0 && unmerged.stdout.trim().length > 0;
|
||||
}
|
||||
/**
|
||||
* Commit the currently STAGED changes with an explicit author/committer
|
||||
* identity and the given trailers appended to the message body (SPEC §7.3
|
||||
* provenance). Returns `true` if a commit was made, `false` if there was
|
||||
* nothing to commit (graceful no-op). The caller is expected to have staged
|
||||
* its changes first (e.g. via `stageAll`).
|
||||
*/
|
||||
async commit(message, opts) {
|
||||
// Nothing staged -> nothing to commit. Treat as a no-op (SPEC §11: a
|
||||
// deterministic re-pull of unchanged pages produces identical bytes, so
|
||||
// git sees no diff and we must not error).
|
||||
const staged = await this.runRaw([
|
||||
"diff",
|
||||
"--cached",
|
||||
"--quiet",
|
||||
]);
|
||||
// `diff --cached --quiet` exits 0 when the index matches HEAD (nothing
|
||||
// staged), 1 when there are staged changes.
|
||||
if (staged.code === 0)
|
||||
return false;
|
||||
await this.commitRaw(message, opts);
|
||||
return true;
|
||||
}
|
||||
/**
|
||||
* Low-level commit used by both `commit` and `ensureRepo`'s initial commit.
|
||||
* Builds the full message with appended trailers and sets author + committer
|
||||
* identity via env vars (so the committer matches the author, not the repo
|
||||
* default).
|
||||
*/
|
||||
async commitRaw(message, opts) {
|
||||
const fullMessage = buildCommitMessage(message, opts.trailers);
|
||||
// `--no-verify` skips pre-commit/commit-msg hooks: a global core.hooksPath
|
||||
// (or any injected hook) must never interfere with engine commits in our
|
||||
// dedicated vault repo.
|
||||
const args = ["commit", "--no-verify", "-m", fullMessage];
|
||||
if (opts.allowEmpty)
|
||||
args.push("--allow-empty");
|
||||
// Route through the single `runRaw` primitive; set author + committer
|
||||
// identity via env vars (so the committer matches the author, not the repo
|
||||
// default). Throw via the same unified message on a non-zero exit.
|
||||
const r = await this.runRaw(args, {
|
||||
env: {
|
||||
GIT_AUTHOR_NAME: opts.authorName,
|
||||
GIT_AUTHOR_EMAIL: opts.authorEmail,
|
||||
GIT_COMMITTER_NAME: opts.authorName,
|
||||
GIT_COMMITTER_EMAIL: opts.authorEmail,
|
||||
},
|
||||
});
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git ${args.join(" ")} failed: ${detail}`);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Merge `fromBranch` into the current branch (`git merge --no-edit`).
|
||||
* Fast-forwards when possible; performs a real 3-way merge otherwise. Conflict
|
||||
* state is SURFACED (returned), NOT auto-resolved (SPEC §9): the conflict
|
||||
* markers are left in the worktree for manual resolution by a later increment,
|
||||
* and — critically — nothing is pushed to Docmost (we never write to Docmost
|
||||
* anyway).
|
||||
*/
|
||||
async merge(fromBranch) {
|
||||
const r = await this.runRaw(["merge", "--no-edit", fromBranch]);
|
||||
const output = `${r.stdout}\n${r.stderr}`.trim();
|
||||
if (r.code === 0) {
|
||||
return { ok: true, conflict: false, output };
|
||||
}
|
||||
// A non-zero exit on merge most commonly means a conflict. Confirm by
|
||||
// checking for unmerged paths (porcelain "U" status) so we don't mislabel
|
||||
// an unrelated failure as a conflict.
|
||||
const conflict = await this.hasUnmergedPaths();
|
||||
return { ok: false, conflict, output };
|
||||
}
|
||||
/** True if the index has any unmerged (conflicted) paths. */
|
||||
async hasUnmergedPaths() {
|
||||
const r = await this.runRaw(["diff", "--name-only", "--diff-filter=U"]);
|
||||
return r.code === 0 && r.stdout.trim().length > 0;
|
||||
}
|
||||
/**
|
||||
* List tracked files on the current branch (paths relative to the vault
|
||||
* root, forward-slash separated). An optional glob (a git pathspec) narrows
|
||||
* the listing, e.g. `"*.md"`.
|
||||
*
|
||||
* The target wiki is RUSSIAN, so vault file names routinely contain Cyrillic
|
||||
* (e.g. `Колонка.md`). With git's DEFAULT `core.quotepath=true`, `ls-files`
|
||||
* returns non-ASCII paths octal-escaped and double-quoted (`"\320\232..."`),
|
||||
* which `src/pull.ts` `readExisting` would then parse as garbage paths,
|
||||
* breaking move/duplicate detection. We defeat that two ways at once:
|
||||
* - `core.quotepath=false` disables the octal-escape/quoting. It is now the
|
||||
* `runRaw` argv baseline (prepended to EVERY invocation), so we no longer
|
||||
* pass it inline here.
|
||||
* - `-z` emits NUL-delimited RAW UTF-8 paths (no quoting, no newline
|
||||
* ambiguity), which we split on `\0`.
|
||||
* We read the RAW stdout (NOT the trimming `run()` helper, which would mangle
|
||||
* the NUL-delimited bytes) and split on `\0`, dropping empty entries. Paths
|
||||
* are returned verbatim — git already emits forward slashes.
|
||||
*/
|
||||
async listTrackedFiles(glob) {
|
||||
const r = await this.runRaw(["ls-files", "-z", ...(glob ? [glob] : [])]);
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git ls-files failed: ${detail}`);
|
||||
}
|
||||
return r.stdout.split("\0").filter((p) => p.length > 0);
|
||||
}
|
||||
/**
|
||||
* Diff two refs with `--name-status -M -z` and parse the NUL-delimited output
|
||||
* (SPEC §6: the FS→Docmost push direction diffs `main` against
|
||||
* `refs/docmost/last-pushed`). Rename detection is ON (`-M`), so a moved/renamed
|
||||
* file is reported as a single `R` row with both its old and new path instead
|
||||
* of a delete+add pair — that distinction is what lets the push planner tell a
|
||||
* move from a delete+create (SPEC §8 "Move vs delete").
|
||||
*
|
||||
* `-z` makes git emit NUL-delimited RAW UTF-8 records (the Russian wiki has
|
||||
* Cyrillic file names) with NO quoting/escaping. The record shape differs by
|
||||
* status:
|
||||
* - A/M/D: `status\0path\0`
|
||||
* - R/C: `Rnnn\0oldPath\0newPath\0` (nnn = similarity score, e.g. `R100`)
|
||||
* We read the RAW stdout (not the trimming `run()` helper, which would mangle
|
||||
* the NUL bytes), split on `\0`, drop the trailing empty entry, and walk the
|
||||
* tokens pulling 1 or 2 path tokens per status. Paths are returned verbatim.
|
||||
*/
|
||||
async diffNameStatus(fromRef, toRef) {
|
||||
const r = await this.runRaw([
|
||||
"diff",
|
||||
"--name-status",
|
||||
"-M",
|
||||
"-z",
|
||||
fromRef,
|
||||
toRef,
|
||||
]);
|
||||
if (r.code !== 0) {
|
||||
const detail = (r.stderr || r.stdout || "").trim();
|
||||
throw new Error(`git diff --name-status failed: ${detail}`);
|
||||
}
|
||||
// Tokens alternate: <status> <path...> <status> <path...> ... With `-z`,
|
||||
// each token (status code AND each path) is its own NUL-delimited field.
|
||||
const tokens = r.stdout.split("\0").filter((t) => t.length > 0);
|
||||
const entries = [];
|
||||
let i = 0;
|
||||
while (i < tokens.length) {
|
||||
const raw = tokens[i++];
|
||||
// The status token is e.g. `A`, `M`, `D`, or `R100` / `C075`. The leading
|
||||
// letter is the change kind; any trailing digits are the similarity score.
|
||||
const letter = raw[0];
|
||||
if (letter === "R" || letter === "C") {
|
||||
const score = Number.parseInt(raw.slice(1), 10);
|
||||
const oldPath = tokens[i++];
|
||||
const path = tokens[i++];
|
||||
if (oldPath === undefined || path === undefined)
|
||||
break; // malformed tail
|
||||
entries.push({
|
||||
status: letter,
|
||||
path,
|
||||
oldPath,
|
||||
...(Number.isFinite(score) ? { score } : {}),
|
||||
});
|
||||
}
|
||||
else if (letter === "A" || letter === "M" || letter === "D") {
|
||||
const path = tokens[i++];
|
||||
if (path === undefined)
|
||||
break; // malformed tail
|
||||
entries.push({ status: letter, path });
|
||||
}
|
||||
else {
|
||||
// Unknown/other status (e.g. T type-change, U unmerged) — consume one
|
||||
// path token defensively so the walk stays aligned, but do not emit it
|
||||
// (the push planner only handles A/M/D/R/C).
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return entries;
|
||||
}
|
||||
/**
|
||||
* Resolve a ref/commit-ish to its full SHA, or `null` if it does not exist.
|
||||
* `rev-parse --verify --quiet` exits non-zero (and prints nothing) for an
|
||||
* unknown ref, so a non-zero exit maps cleanly to `null`. Used to read
|
||||
* `refs/docmost/last-pushed` (SPEC §5) — which is absent before the first push.
|
||||
*/
|
||||
async revParse(ref) {
|
||||
const r = await this.runRaw(["rev-parse", "--verify", "--quiet", ref]);
|
||||
if (r.code !== 0)
|
||||
return null;
|
||||
const sha = r.stdout.trim();
|
||||
return sha.length > 0 ? sha : null;
|
||||
}
|
||||
/**
|
||||
* Read a ref to its SHA, or `null` if unset. Thin alias over `revParse`,
|
||||
* named for the push direction's marker `refs/docmost/last-pushed` (SPEC §5:
|
||||
* "что из `main` уже отражено в Docmost").
|
||||
*/
|
||||
async readRef(ref) {
|
||||
return this.revParse(ref);
|
||||
}
|
||||
/**
|
||||
* Point `ref` at `target` (`git update-ref <ref> <target>`). Used to advance
|
||||
* `refs/docmost/last-pushed` to the just-pushed `main` commit after a push
|
||||
* (SPEC §6 step 3 / §5). `target` may be a SHA or any commit-ish git accepts.
|
||||
*/
|
||||
async updateRef(ref, target) {
|
||||
await this.run(["update-ref", ref, target]);
|
||||
}
|
||||
/**
|
||||
* Fast-forward `branch` to `toCommit` — but ONLY if it is a TRUE fast-forward,
|
||||
* i.e. the current `branch` tip is an ancestor of `toCommit` (verified via
|
||||
* `git merge-base --is-ancestor <branch> <toCommit>`). Used to advance the
|
||||
* `docmost` mirror branch after a clean push (SPEC §6 step 3 / §10): once a
|
||||
* push succeeds, Docmost already contains the pushed `main` content, so the
|
||||
* mirror must reflect it — otherwise the NEXT pull would diff our own write
|
||||
* back and re-pull it (loop-guard).
|
||||
*
|
||||
* SAFETY — never force, never clobber divergent history:
|
||||
* - If `branch` IS an ancestor of `toCommit`, advance it with
|
||||
* `git update-ref refs/heads/<branch> <toCommit>`. The `docmost` branch is
|
||||
* NOT checked out during a push (push works on `main`), so updating the ref
|
||||
* directly is safe and avoids any working-tree touch.
|
||||
* - If `branch` is NOT an ancestor (divergent / would-be non-fast-forward),
|
||||
* do NOT move it — return `{ ok: false, reason: 'not-fast-forward' }` and
|
||||
* let the caller log it. We must never overwrite a `docmost` history that
|
||||
* has commits the push base does not contain.
|
||||
*
|
||||
* Returns `{ ok: true }` when the branch was advanced (or already at
|
||||
* `toCommit`, a degenerate fast-forward), `{ ok: false, reason }` otherwise.
|
||||
* A missing `branch` or `toCommit` also yields `{ ok: false }` with a reason.
|
||||
*/
|
||||
async fastForwardBranch(branch, toCommit) {
|
||||
const branchRef = `refs/heads/${branch}`;
|
||||
// Resolve both endpoints first so a missing ref is a clean refusal, not a
|
||||
// confusing `merge-base` failure.
|
||||
const branchSha = await this.revParse(branchRef);
|
||||
if (branchSha === null) {
|
||||
return { ok: false, reason: `branch ${branch} does not exist` };
|
||||
}
|
||||
const targetSha = await this.revParse(toCommit);
|
||||
if (targetSha === null) {
|
||||
return { ok: false, reason: `target ${toCommit} does not resolve` };
|
||||
}
|
||||
// Already at the target -> a no-op fast-forward (still ok).
|
||||
if (branchSha === targetSha)
|
||||
return { ok: true };
|
||||
// `merge-base --is-ancestor A B` exits 0 iff A is an ancestor of B. Only a
|
||||
// true ancestor is a fast-forward; anything else is divergent and refused.
|
||||
const ancestor = await this.runRaw([
|
||||
"merge-base",
|
||||
"--is-ancestor",
|
||||
branchSha,
|
||||
targetSha,
|
||||
]);
|
||||
if (ancestor.code !== 0) {
|
||||
return { ok: false, reason: "not-fast-forward" };
|
||||
}
|
||||
// Safe to advance: the branch is not checked out during push, so a direct
|
||||
// ref update avoids a checkout/working-tree touch.
|
||||
await this.updateRef(branchRef, targetSha);
|
||||
return { ok: true };
|
||||
}
|
||||
/**
|
||||
* Read a file's content at a specific ref (`git show <ref>:<path>`), or `null`
|
||||
* if the path does not exist there. Used by the push direction to read the
|
||||
* PRE-IMAGE of a DELETED file (e.g. at `refs/docmost/last-pushed`) so its
|
||||
* `docmost:meta` — and therefore its `pageId` — can be recovered to translate
|
||||
* the deletion into a `delete_page` (SPEC §6/§8: only TRACKED files, i.e. ones
|
||||
* that had a pageId, are deleted in Docmost). A non-zero exit (path absent at
|
||||
* that ref) maps to `null` rather than throwing.
|
||||
*/
|
||||
async showFileAtRef(ref, path) {
|
||||
// `git show <ref>:<path>` requires the path relative to the repo root; pass
|
||||
// it verbatim (forward-slash, matching `listTrackedFiles` / diff output).
|
||||
const r = await this.runRaw(["show", `${ref}:${path}`]);
|
||||
if (r.code !== 0)
|
||||
return null;
|
||||
return r.stdout;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Build the environment for a vault git invocation (SPEC §12 cwd-isolation).
|
||||
* Used by the single `runRaw` primitive every git command flows through, so
|
||||
* these pins apply uniformly (including the `git --version` preflight).
|
||||
*
|
||||
* cwd-isolation is this module's central safety guarantee: every git command
|
||||
* MUST operate on the vault repo at `cwd: vaultPath` and nothing else. An
|
||||
* inherited `GIT_DIR` / `GIT_WORK_TREE` in `process.env` would silently
|
||||
* redirect the operation away from `cwd` (e.g. to the source repo or another
|
||||
* checkout), defeating that guarantee. So we always strip them, regardless of
|
||||
* whatever else the caller adds (author/committer identity, etc.).
|
||||
*
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function vaultGitEnv(extra) {
|
||||
const env = {
|
||||
...process.env,
|
||||
// Locale-independent output (defense in depth). We never parse localized
|
||||
// prose, but pinning the locale prevents a future regression where some
|
||||
// git message we DO key on is translated by an inherited LC_ALL/LANG.
|
||||
LC_ALL: "C",
|
||||
LANG: "C",
|
||||
// Never page (we already pass --no-pager, but a stray GIT_PAGER could still
|
||||
// bite) and never block on an interactive prompt (e.g. credentials) — the
|
||||
// daemon runs unattended and must not hang.
|
||||
GIT_PAGER: "cat",
|
||||
GIT_TERMINAL_PROMPT: "0",
|
||||
...extra,
|
||||
};
|
||||
delete env.GIT_DIR;
|
||||
delete env.GIT_WORK_TREE;
|
||||
return env;
|
||||
}
|
||||
/**
|
||||
* Build a commit message body with trailer lines appended (SPEC §7.3). The
|
||||
* trailers are separated from the subject by a blank line so `git interpret-
|
||||
* trailers` / `git log --format=%(trailers)` parse them as trailers.
|
||||
* Exported for unit testing.
|
||||
*/
|
||||
export function buildCommitMessage(subject, trailers) {
|
||||
if (!trailers || trailers.length === 0)
|
||||
return subject;
|
||||
return `${subject}\n\n${trailers.join("\n")}`;
|
||||
}
|
||||
44
packages/git-sync/build/engine/layout.d.ts
vendored
44
packages/git-sync/build/engine/layout.d.ts
vendored
@@ -1,44 +0,0 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
/** Flat page node as returned by `listAllSpacePages` (no content). */
|
||||
export interface PageNode {
|
||||
id: string;
|
||||
title?: string;
|
||||
slugId?: string;
|
||||
parentPageId?: string | null;
|
||||
hasChildren?: boolean;
|
||||
}
|
||||
/** A page's resolved vault destination: folder path + file stem. */
|
||||
export interface VaultEntry {
|
||||
/** Folder path, root -> leaf (the page's ancestors). Empty for a root page. */
|
||||
segments: string[];
|
||||
/** The page's own file name without extension. */
|
||||
stem: string;
|
||||
}
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export declare function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry>;
|
||||
@@ -1,170 +0,0 @@
|
||||
/**
|
||||
* Pure page-tree -> vault path mapping (SPEC §12).
|
||||
*
|
||||
* Given the flat list of page nodes for a space (as returned by
|
||||
* `listAllSpacePages`), compute for every page a deterministic, collision-free
|
||||
* destination: a folder path (root -> leaf ancestors) plus a file stem (the
|
||||
* page's own name, no extension). This module is intentionally PURE and
|
||||
* dependency-free apart from the sanitization helpers, so the whole tree ->
|
||||
* path logic is unit-testable without any I/O. The names are COSMETIC; identity
|
||||
* lives in each file's meta block (pageId / slugId).
|
||||
*/
|
||||
import { sanitizeTitle, disambiguate } from "./sanitize.js";
|
||||
/**
|
||||
* Build the full vault layout for a space.
|
||||
*
|
||||
* Returns a Map keyed by pageId -> `{ segments, stem }`. The result is
|
||||
* deterministic for a given input and guarantees every full destination path
|
||||
* (`[...segments, stem].join("/")`) is unique, so no page can silently overwrite
|
||||
* another.
|
||||
*
|
||||
* Disambiguation is layered:
|
||||
* 1. Sibling collisions (same sanitized title under the same parent) are
|
||||
* resolved with a stable ` ~<slugId>` suffix (the suffix is itself
|
||||
* sanitized, since slugId/id is untrusted data that must never inject a
|
||||
* path separator).
|
||||
* 2. A final full-path pass catches residual collisions that sibling-scoping
|
||||
* cannot see — e.g. two pages whose parents are BOTH outside the input set
|
||||
* both bucket at the root with `segments: []`.
|
||||
*/
|
||||
export function buildVaultLayout(pages) {
|
||||
// Index pages by id so the parent chain can be walked. Guard against
|
||||
// duplicate ids in the input (first one wins).
|
||||
const byId = new Map();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !byId.has(p.id))
|
||||
byId.set(p.id, p);
|
||||
}
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map();
|
||||
const nameById = new Map();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey = p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
}
|
||||
}
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
// and THROWS if it is somehow absent, rather than silently recomputing a
|
||||
// DIFFERENT, non-disambiguated name (which would desync a folder segment from
|
||||
// its target file).
|
||||
const nameOf = (id) => {
|
||||
const name = nameById.get(id);
|
||||
if (name === undefined) {
|
||||
throw new Error(`buildVaultLayout: no resolved name for page id ${id}`);
|
||||
}
|
||||
return name;
|
||||
};
|
||||
// Build the folder path for a page by walking parentPageId to the root. The
|
||||
// page's OWN name is the file stem; its ancestors become folders. A `visited`
|
||||
// guard prevents an infinite loop on a malformed parent cycle.
|
||||
const folderSegmentsFor = (node) => {
|
||||
const ancestors = [];
|
||||
const visited = new Set();
|
||||
let current = node.parentPageId
|
||||
? byId.get(node.parentPageId)
|
||||
: undefined;
|
||||
while (current && current.id && !visited.has(current.id)) {
|
||||
visited.add(current.id);
|
||||
ancestors.unshift(nameOf(current.id));
|
||||
current = current.parentPageId
|
||||
? byId.get(current.parentPageId)
|
||||
: undefined;
|
||||
}
|
||||
return ancestors;
|
||||
};
|
||||
// First pass: compute the provisional { segments, stem } for every node.
|
||||
const layout = new Map();
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id || layout.has(p.id))
|
||||
continue;
|
||||
layout.set(p.id, {
|
||||
segments: folderSegmentsFor(p),
|
||||
stem: nameOf(p.id),
|
||||
});
|
||||
}
|
||||
// FOLDER-NOTE transform (native-Obsidian layout): a page WITH CHILDREN lives at
|
||||
// `<…>/<stem>/<stem>.md` — its body is the folder-note INSIDE its own folder
|
||||
// (LostPaul Folder Notes convention), and its children sit alongside it in that
|
||||
// folder. A leaf stays `<…>/<stem>.md`. Children's segments already point into
|
||||
// the parent's folder (folderSegmentsFor walks ancestor NAMES), so only the
|
||||
// parent's own file relocates here; the sibling name pass above already made
|
||||
// the parent name unique, so folder == file name stays consistent.
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id)
|
||||
continue;
|
||||
const entry = layout.get(p.id);
|
||||
if (entry && p.hasChildren) {
|
||||
entry.segments = [...entry.segments, entry.stem];
|
||||
}
|
||||
}
|
||||
// Final full-path uniqueness pass — a belt-and-suspenders safety net. Note
|
||||
// that cross-bucket (orphan/root) collisions are now resolved in the name pass
|
||||
// above (orphans share the "__root__" bucket), so ancestor names are final
|
||||
// before `segments` are built and this pass should rarely/never re-stem an
|
||||
// ancestor. It only re-stems the colliding LATER leaf via the sanitized
|
||||
// slugId/id, then (if still colliding) appends the id.
|
||||
//
|
||||
// Process FOLDER-NOTES (pages with children) FIRST so a parent claims its
|
||||
// canonical `<name>/<name>.md` before a same-named CHILD — the child (a leaf)
|
||||
// is the one that disambiguates, never the folder-note.
|
||||
const usedPaths = new Set();
|
||||
const seenIds = new Set();
|
||||
const pathKey = (e) => [...e.segments, e.stem].join("/");
|
||||
const ordered = pages
|
||||
.filter((p) => Boolean(p && p.id))
|
||||
.sort((a, b) => Number(Boolean(b.hasChildren)) - Number(Boolean(a.hasChildren)));
|
||||
for (const p of ordered) {
|
||||
if (seenIds.has(p.id))
|
||||
continue;
|
||||
seenIds.add(p.id);
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry)
|
||||
continue;
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// First attempt: disambiguate the stem with the sanitized slugId (or id).
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.slugId ?? p.id));
|
||||
if (usedPaths.has(pathKey(entry))) {
|
||||
// Still colliding: append the (sanitized) id as a last resort. The id
|
||||
// is globally unique, so this always resolves the collision.
|
||||
entry.stem = disambiguate(entry.stem, sanitizeTitle(p.id));
|
||||
}
|
||||
}
|
||||
usedPaths.add(pathKey(entry));
|
||||
}
|
||||
return layout;
|
||||
}
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(node, parentKey, usedBySibling) {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
13
packages/git-sync/build/engine/loop-guard.d.ts
vendored
13
packages/git-sync/build/engine/loop-guard.d.ts
vendored
@@ -1,13 +0,0 @@
|
||||
/**
|
||||
* Stable hash of a page's markdown BODY (SPEC §10 "хэш тела"). Deterministic:
|
||||
* the same input string always yields the same digest, a different input a
|
||||
* different one. Used to recognize our own write later (loop suppression).
|
||||
*
|
||||
* We hash the body STRING as-is (UTF-8) with SHA-256 and return lowercase hex.
|
||||
* SPEC §10 keys on the body hash rather than file bytes; callers decide WHAT
|
||||
* counts as "the body" (here it is the exact string passed in — typically the
|
||||
* self-contained markdown that was pushed). No normalization is applied: the
|
||||
* caller is responsible for passing a canonical/stable representation if it
|
||||
* wants hash equality across cosmetic-only differences.
|
||||
*/
|
||||
export declare function bodyHash(markdownBody: string): string;
|
||||
136
packages/git-sync/build/engine/pull.d.ts
vendored
136
packages/git-sync/build/engine/pull.d.ts
vendored
@@ -1,136 +0,0 @@
|
||||
import type { GitSyncClient } from "./client.types.js";
|
||||
import { type PageNode } from "./layout.js";
|
||||
import { VaultGit } from "./git.js";
|
||||
import { type MovedEntry, type DeletionDecision } from "./reconcile.js";
|
||||
/**
|
||||
* Injectable IO for `readExisting` (R-Pull-1, test-strategy report §5). The real
|
||||
* `main` wires these to `git.listTrackedFiles("*.md")` and an `fs.readFile`
|
||||
* rooted at the vault; tests pass fakes so the parsing/skip rules are unit-
|
||||
* testable without a real git repo or filesystem.
|
||||
*/
|
||||
export interface ReadExistingDeps {
|
||||
/** List tracked .md paths (forward-slash, vault-relative). */
|
||||
listTracked: () => Promise<string[]>;
|
||||
/** Read a tracked file's text by its (forward-slash) vault-relative path. */
|
||||
readFile: (relPath: string) => Promise<string>;
|
||||
}
|
||||
/**
|
||||
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
|
||||
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
|
||||
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
|
||||
* hand-written Obsidian file; PUSH adopts those separately).
|
||||
*
|
||||
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
|
||||
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
|
||||
* -> skipped, NOT thrown; the next pull converges;
|
||||
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
|
||||
*/
|
||||
export declare function readExisting(deps: ReadExistingDeps): Promise<{
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}[]>;
|
||||
/**
|
||||
* Input to the PURE `computePullActions` (R-Pull-2). All data, no IO: the live
|
||||
* tree nodes + completeness flag (from `listSpaceTree`) and the parsed
|
||||
* `existing` tracked files (from `readExisting`).
|
||||
*/
|
||||
export interface PullActionsInput {
|
||||
/** Live page nodes for the space (from `listSpaceTree`). */
|
||||
pages: PageNode[];
|
||||
/** Whether the live tree fetch was COMPLETE (SPEC §8 suppression). */
|
||||
treeComplete: boolean;
|
||||
/** Parsed tracked files: `{ pageId, relPath }` (from `readExisting`). */
|
||||
existing: {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}[];
|
||||
}
|
||||
/**
|
||||
* The PURE decisions object computed by `computePullActions` (no IO). It holds
|
||||
* the reconciliation plan plus the SPEC §8 absence-deletion decision, with the
|
||||
* suppression already folded in: `toDelete` is the POST-suppression set the
|
||||
* caller should actually remove (empty when `deletionDecision.apply` is false).
|
||||
*/
|
||||
export interface PullActions {
|
||||
/** Pages to (re)write at their relPath (add + update + move target). */
|
||||
toWrite: {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}[];
|
||||
/** Moves: write new path, then remove old path (only on a successful write). */
|
||||
moved: MovedEntry[];
|
||||
/**
|
||||
* Absence-based paths to delete AFTER suppression. Empty when the decision
|
||||
* suppressed deletions this cycle, so the caller can apply it unconditionally.
|
||||
*/
|
||||
toDelete: string[];
|
||||
/** Why absence deletions were (or were not) applied (for logging + tests). */
|
||||
deletionDecision: DeletionDecision;
|
||||
/** Tracked-file count (for the suppression log messages). */
|
||||
existingCount: number;
|
||||
/** Planned absence-delete count BEFORE suppression (for the log message). */
|
||||
plannedDeleteCount: number;
|
||||
}
|
||||
/**
|
||||
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
|
||||
* tree nodes + completeness + existing tracked files and returns the full set of
|
||||
* decisions with NO IO:
|
||||
*
|
||||
* - builds the vault layout (deterministic relPath per live page),
|
||||
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
|
||||
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
|
||||
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
|
||||
* POST-suppression set (empty when suppressed).
|
||||
*
|
||||
* Moves are NOT governed by the suppression: a moved page is present in `live`,
|
||||
* so its old-path removal is real (the caller still gates it on the write
|
||||
* succeeding). The expensive content fetch / file write / git ops happen in the
|
||||
* thin `applyPullActions`.
|
||||
*/
|
||||
export declare function computePullActions(input: PullActionsInput): PullActions;
|
||||
/**
|
||||
* Injectable IO for `applyPullActions` (R-Pull-2). The real `main` wires these
|
||||
* to the live client, the vault git wrapper, and `node:fs/promises`; tests pass
|
||||
* fakes that RECORD calls so the ordering + the move-on-success data-loss guard
|
||||
* are testable without real git/fs/network.
|
||||
*/
|
||||
export interface ApplyPullActionsDeps {
|
||||
client: Pick<GitSyncClient, "getPageJson">;
|
||||
git: Pick<VaultGit, "stageAll" | "commit" | "checkout" | "merge">;
|
||||
/** Write a file by ABSOLUTE path (mkdir of the parent is done internally). */
|
||||
writeFile: (absPath: string, text: string) => Promise<void>;
|
||||
/** Recursive mkdir of an ABSOLUTE directory path. */
|
||||
mkdir: (absDir: string) => Promise<void>;
|
||||
/** Remove a file by ABSOLUTE path (force: a missing file is a no-op). */
|
||||
rm: (absPath: string) => Promise<void>;
|
||||
}
|
||||
/** Outcome counters from `applyPullActions` (for the summary + tests). */
|
||||
export interface ApplyResult {
|
||||
written: number;
|
||||
movedApplied: number;
|
||||
deleted: number;
|
||||
failed: number;
|
||||
committed: boolean;
|
||||
merge: {
|
||||
ok: boolean;
|
||||
conflict: boolean;
|
||||
output: string;
|
||||
};
|
||||
}
|
||||
/**
|
||||
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
|
||||
* order, with all the original safety guards preserved bit-for-bit:
|
||||
*
|
||||
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
|
||||
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
|
||||
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
|
||||
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
|
||||
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
|
||||
* failed move-write keeps the old path so the page never vanishes).
|
||||
* 3. apply (post-suppression) absence deletes.
|
||||
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
|
||||
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
|
||||
*
|
||||
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
|
||||
*/
|
||||
export declare function applyPullActions(deps: ApplyPullActionsDeps, actions: PullActions, vaultRoot: string): Promise<ApplyResult>;
|
||||
@@ -1,284 +0,0 @@
|
||||
/**
|
||||
* Pull cycle — Docmost -> vault (SPEC §6 "Docmost -> ФС").
|
||||
*
|
||||
* This increment turns the read-only mirror into the git-backed pull cycle:
|
||||
*
|
||||
* 1. ensureRepo(vault); refuse if a merge is in progress (SPEC §9/§12);
|
||||
* ensureBranch("docmost", "main") (SPEC §5 branches)
|
||||
* 2. checkout docmost
|
||||
* 3. fetch the live tree (listSpaceTree -> {pages, complete}) -> compute the
|
||||
* desired `live` files (relPath via the pure sanitize/disambiguation layout)
|
||||
* 4. parse `existing` tracked .md files (pageId + relPath from gitmost_id frontmatter)
|
||||
* 5. plan = planReconciliation(live, existing) (pure, SPEC §5/§8); toDelete
|
||||
* is absence-only, moves are separate
|
||||
* 6. decideAbsenceDeletions: SUPPRESS absence deletions on an incomplete tree
|
||||
* fetch (SPEC §8) and behind the mass-delete guard (defense in depth)
|
||||
* 7. write each live page in its fixpoint form (normalize-on-write, SPEC §11);
|
||||
* apply moved-old-path removals (only when the move write SUCCEEDED) and
|
||||
* absence-delete removals (only when the decision allowed them)
|
||||
* 8. stageAll + commit on `docmost` with the provenance trailer (SPEC §7.3)
|
||||
* 9. checkout main + merge docmost (conflicts are surfaced, NOT auto-resolved,
|
||||
* SPEC §9); push is deferred (SPEC §7)
|
||||
* 10. one-line summary
|
||||
*
|
||||
* DIRECTION IS Docmost -> vault ONLY. Nothing here ever writes to Docmost
|
||||
* (read-only: listSpaceTree + getPageJson). All git operations run against
|
||||
* the vault repo (`cwd = vaultPath`), never the source repo (see ./git.ts).
|
||||
*
|
||||
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
|
||||
* the gitmost server drives the engine in-process (there is no standalone CLI
|
||||
* entry point).
|
||||
*/
|
||||
import { dirname } from "node:path";
|
||||
import { sep } from "node:path";
|
||||
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
|
||||
import { buildVaultLayout } from "./layout.js";
|
||||
import { BOT_AUTHOR_NAME, BOT_AUTHOR_EMAIL, DEFAULT_BRANCH, } from "./git.js";
|
||||
import { planReconciliation, decideAbsenceDeletions, } from "./reconcile.js";
|
||||
import { stabilizePageBody } from "./stabilize.js";
|
||||
// Engine-only mirror branch (SPEC §5): the engine writes here, humans never do.
|
||||
const DOCMOST_BRANCH = "docmost";
|
||||
// Machine-readable provenance the loop-guard keys on (SPEC §7.3 / §12).
|
||||
const SOURCE_TRAILER = "Docmost-Sync-Source: docmost";
|
||||
// Number of pages fetched/stabilized concurrently. Bounded so a large space
|
||||
// does not open thousands of simultaneous requests/conversions at once.
|
||||
const CONCURRENCY = 6;
|
||||
// How often to log incremental progress (every N completed pages).
|
||||
const PROGRESS_EVERY = 25;
|
||||
/** Convert a vault-relative path (forward-slash) to an absolute FS path. */
|
||||
function relToAbs(vaultRoot, relPath) {
|
||||
return [vaultRoot, ...relPath.split("/")].join("/");
|
||||
}
|
||||
/** Convert an absolute/relative segment list under the vault to a relPath. */
|
||||
function segmentsToRelPath(segments, stem) {
|
||||
return [...segments, `${stem}.md`].join("/");
|
||||
}
|
||||
/**
|
||||
* Read every tracked .md file in the vault and recover `{ pageId, relPath }` from
|
||||
* its `gitmost_id` frontmatter (native-Obsidian format). Files without a
|
||||
* `gitmost_id` are skipped (they are not engine-tracked pages yet — e.g. a stray
|
||||
* hand-written Obsidian file; PUSH adopts those separately).
|
||||
*
|
||||
* The IO is injected (R-Pull-1) so this is testable with fakes. Skip rules:
|
||||
* - a `readFile` rejection (tracked but missing on disk, a mid-operation race)
|
||||
* -> skipped, NOT thrown; the next pull converges;
|
||||
* - no `gitmost_id` frontmatter (`parsePageFile` -> id null) -> skipped.
|
||||
*/
|
||||
export async function readExisting(deps) {
|
||||
const tracked = await deps.listTracked();
|
||||
const existing = [];
|
||||
for (const relPath of tracked) {
|
||||
// git ls-files always emits forward-slash paths; normalize just in case.
|
||||
const rel = relPath.split(sep).join("/");
|
||||
let text;
|
||||
try {
|
||||
text = await deps.readFile(rel);
|
||||
}
|
||||
catch {
|
||||
// Tracked but missing on disk (mid-operation race) — skip; the next pull
|
||||
// converges.
|
||||
continue;
|
||||
}
|
||||
const { id } = parsePageFile(text);
|
||||
if (id)
|
||||
existing.push({ pageId: id, relPath: rel });
|
||||
}
|
||||
return existing;
|
||||
}
|
||||
/**
|
||||
* PURE pull-action planner (R-Pull-2, test-strategy report §5). Takes the live
|
||||
* tree nodes + completeness + existing tracked files and returns the full set of
|
||||
* decisions with NO IO:
|
||||
*
|
||||
* - builds the vault layout (deterministic relPath per live page),
|
||||
* - `planReconciliation` -> toWrite / moved / absence-toDelete,
|
||||
* - `decideAbsenceDeletions` -> the SPEC §8 suppression (incomplete-fetch +
|
||||
* empty-live + mass-delete guard), folded IN here so `toDelete` is the
|
||||
* POST-suppression set (empty when suppressed).
|
||||
*
|
||||
* Moves are NOT governed by the suppression: a moved page is present in `live`,
|
||||
* so its old-path removal is real (the caller still gates it on the write
|
||||
* succeeding). The expensive content fetch / file write / git ops happen in the
|
||||
* thin `applyPullActions`.
|
||||
*/
|
||||
export function computePullActions(input) {
|
||||
const { pages, treeComplete, existing } = input;
|
||||
const layout = buildVaultLayout(pages);
|
||||
const live = [];
|
||||
for (const p of pages) {
|
||||
if (!p || !p.id)
|
||||
continue;
|
||||
const entry = layout.get(p.id);
|
||||
if (!entry)
|
||||
continue;
|
||||
live.push({
|
||||
pageId: p.id,
|
||||
relPath: segmentsToRelPath(entry.segments, entry.stem),
|
||||
});
|
||||
}
|
||||
// Plan reconciliation (pure). `plan.toDelete` is ABSENCE-based only;
|
||||
// `plan.moved` carries move old-path removals separately.
|
||||
const plan = planReconciliation(live, existing);
|
||||
// Decide whether the ABSENCE-based deletions may be applied this cycle
|
||||
// (SPEC §8): incomplete-fetch suppression + empty-live + mass-delete guard.
|
||||
// Moves are NOT governed by this.
|
||||
const deletionDecision = decideAbsenceDeletions({
|
||||
treeComplete,
|
||||
liveCount: live.length,
|
||||
existingCount: existing.length,
|
||||
deleteCount: plan.toDelete.length,
|
||||
});
|
||||
return {
|
||||
toWrite: plan.toWrite,
|
||||
moved: plan.moved,
|
||||
// Fold the suppression in: a suppressed cycle deletes nothing.
|
||||
toDelete: deletionDecision.apply ? plan.toDelete : [],
|
||||
deletionDecision,
|
||||
existingCount: existing.length,
|
||||
plannedDeleteCount: plan.toDelete.length,
|
||||
};
|
||||
}
|
||||
/**
|
||||
* THIN IO applier (R-Pull-2). Performs the side effects in the EXACT current
|
||||
* order, with all the original safety guards preserved bit-for-bit:
|
||||
*
|
||||
* 1. for each `toWrite`: fetch content (`client.getPageJson`) -> stabilize
|
||||
* (normalize-on-write fixpoint, SPEC §11) -> mkdir + write. One bad page
|
||||
* never aborts the pull (bounded-concurrency pool, fault-tolerant).
|
||||
* 2. apply MOVE old-path removals — ONLY when the planner marked the old path
|
||||
* removable AND the new-path write SUCCEEDED (the ⭐ data-loss guard: a
|
||||
* failed move-write keeps the old path so the page never vanishes).
|
||||
* 3. apply (post-suppression) absence deletes.
|
||||
* 4. stageAll + commit on `docmost` (subject from ACTUAL written/deleted
|
||||
* counts) + checkout main + merge docmost (conflicts surfaced, SPEC §9).
|
||||
*
|
||||
* `vaultRoot` roots the relPath -> absolute-path conversion for the fs deps.
|
||||
*/
|
||||
export async function applyPullActions(deps, actions, vaultRoot) {
|
||||
const { client, git } = deps;
|
||||
// Emit the SPEC §8 suppression warnings (preserved from the original `main`).
|
||||
const decision = actions.deletionDecision;
|
||||
if (!decision.apply) {
|
||||
if (decision.reason === "incomplete-fetch") {
|
||||
console.warn("pull: tree fetch incomplete — deletions suppressed this cycle (SPEC §8)");
|
||||
}
|
||||
else if (decision.reason === "empty-live") {
|
||||
console.warn(`pull: live fetch returned 0 pages but ${actions.existingCount} file(s) are ` +
|
||||
`tracked — deletions suppressed this cycle (SPEC §8). Re-run when ` +
|
||||
`Docmost is reachable.`);
|
||||
}
|
||||
else {
|
||||
console.warn(`pull: plan would delete ${actions.plannedDeleteCount} of ${actions.existingCount} ` +
|
||||
`tracked file(s) (mass-delete guard) — deletions suppressed this ` +
|
||||
`cycle (SPEC §8). Verify the live Docmost tree, then re-run.`);
|
||||
}
|
||||
}
|
||||
// 1. Write each live page in its fixpoint form (normalize-on-write, SPEC §11).
|
||||
let written = 0;
|
||||
let failed = 0;
|
||||
let completed = 0;
|
||||
let nextIndex = 0;
|
||||
// pageIds whose write FAILED. A moved page whose new-path write failed must
|
||||
// NOT have its old path removed (otherwise the page vanishes entirely).
|
||||
const failedPageIds = new Set();
|
||||
const writeOne = async (w) => {
|
||||
try {
|
||||
const page = await client.getPageJson(w.pageId);
|
||||
// Native-Obsidian format: a minimal `gitmost_id` frontmatter + the fixpoint
|
||||
// markdown body. title/parent/space are DERIVED (filename / folder / repo),
|
||||
// so nothing but the pageId is persisted as meta.
|
||||
const text = serializePageFile(page.id, await stabilizePageBody(page.content));
|
||||
const abs = relToAbs(vaultRoot, w.relPath);
|
||||
await deps.mkdir(dirname(abs));
|
||||
await deps.writeFile(abs, text);
|
||||
written++;
|
||||
}
|
||||
catch (err) {
|
||||
failed++;
|
||||
failedPageIds.add(w.pageId);
|
||||
console.error(`pull: failed page ${w.pageId}:`, err instanceof Error ? err.message : String(err));
|
||||
}
|
||||
finally {
|
||||
completed++;
|
||||
if (completed % PROGRESS_EVERY === 0) {
|
||||
console.log(`pulled ${completed}/${actions.toWrite.length}`);
|
||||
}
|
||||
}
|
||||
};
|
||||
// Bounded-concurrency pool (dependency-free): a fixed set of runners each
|
||||
// take the next index until the write list is exhausted. One bad page never
|
||||
// aborts the whole pull (mirrors the fault-tolerant tree walk).
|
||||
const runner = async () => {
|
||||
while (true) {
|
||||
const i = nextIndex++;
|
||||
if (i >= actions.toWrite.length)
|
||||
return;
|
||||
await writeOne(actions.toWrite[i]);
|
||||
}
|
||||
};
|
||||
await Promise.all(Array.from({ length: Math.min(CONCURRENCY, actions.toWrite.length) || 1 }, () => runner()));
|
||||
// Helper: `rm` with force:true is a no-op if the file is already gone.
|
||||
const removePath = async (rel, what) => {
|
||||
try {
|
||||
await deps.rm(relToAbs(vaultRoot, rel));
|
||||
return true;
|
||||
}
|
||||
catch (err) {
|
||||
console.error(`pull: failed to ${what} ${rel}:`, err instanceof Error ? err.message : String(err));
|
||||
return false;
|
||||
}
|
||||
};
|
||||
// 2. Apply MOVE old-path removals. A moved page IS present in `live`, so its
|
||||
// old path is genuinely stale — NOT subject to the incomplete-fetch
|
||||
// suppression. BUT only remove the old path when (a) the planner marked it
|
||||
// removable (not reused by another live page) AND (b) the new-path write
|
||||
// actually SUCCEEDED — otherwise we would delete the only copy of a page
|
||||
// whose move-write failed (⭐ data-loss guard).
|
||||
let movedApplied = 0;
|
||||
for (const m of actions.moved) {
|
||||
if (!m.removeOldPath)
|
||||
continue;
|
||||
if (failedPageIds.has(m.pageId)) {
|
||||
console.warn(`pull: move write for ${m.pageId} failed — keeping old path ` +
|
||||
`${m.fromRelPath} (SPEC §8)`);
|
||||
continue;
|
||||
}
|
||||
if (await removePath(m.fromRelPath, "remove moved old path"))
|
||||
movedApplied++;
|
||||
}
|
||||
// 3. Apply ABSENCE-based deletions — `actions.toDelete` is ALREADY the
|
||||
// post-suppression set (empty when the decision suppressed them, SPEC §8).
|
||||
let deleted = 0;
|
||||
for (const rel of actions.toDelete) {
|
||||
if (await removePath(rel, "delete"))
|
||||
deleted++;
|
||||
}
|
||||
// 4. Stage + commit on `docmost` (only if there is something to commit).
|
||||
// Deterministic stabilized output means unchanged pages produce identical
|
||||
// bytes -> git sees no diff -> no churn (SPEC §11). The subject reflects the
|
||||
// ACTUAL work applied (pages written + files deleted), not the planned size,
|
||||
// so a run with failures does not over-report (SPEC §5 nit).
|
||||
const subject = deleted > 0
|
||||
? `docmost: sync ${written} page(s), ${deleted} deleted`
|
||||
: `docmost: sync ${written} page(s)`;
|
||||
await git.stageAll();
|
||||
const committed = await git.commit(subject, {
|
||||
authorName: BOT_AUTHOR_NAME,
|
||||
authorEmail: BOT_AUTHOR_EMAIL,
|
||||
trailers: [SOURCE_TRAILER],
|
||||
});
|
||||
// Merge docmost -> main. Conflicts are surfaced and left in git (SPEC §9);
|
||||
// we never push to Docmost. Push to a git remote is deferred (SPEC §7).
|
||||
await git.checkout(DEFAULT_BRANCH);
|
||||
const merge = await git.merge(DOCMOST_BRANCH);
|
||||
if (merge.conflict) {
|
||||
console.error("pull: merge of docmost -> main CONFLICTED. Conflict markers were left " +
|
||||
"in the vault for manual resolution (SPEC §9). Nothing is pushed to " +
|
||||
"Docmost (read-only). Resolve locally, then re-run.");
|
||||
}
|
||||
else if (!merge.ok) {
|
||||
console.error(`pull: merge of docmost -> main failed: ${merge.output}`);
|
||||
}
|
||||
console.log("pull: git push to remote is DEFERRED in this increment (SPEC §7).");
|
||||
return { written, movedApplied, deleted, failed, committed, merge };
|
||||
}
|
||||
504
packages/git-sync/build/engine/push.d.ts
vendored
504
packages/git-sync/build/engine/push.d.ts
vendored
@@ -1,504 +0,0 @@
|
||||
/**
|
||||
* Push cycle — vault -> Docmost (SPEC §6 "ФС → Docmost"), FIRST increment.
|
||||
*
|
||||
* This module mirrors the structure of `./pull.ts`: a set of VaultGit diff/ref
|
||||
* primitives (in `./git.ts`), a PURE planner (`computePushActions`) that turns
|
||||
* a git diff into a classified action set with NO IO, and a THIN injectable
|
||||
* applier (`applyPushActions`) exercised in tests via fakes only.
|
||||
*
|
||||
* Direction is vault -> Docmost. The diff is `main` against
|
||||
* `refs/docmost/last-pushed` (SPEC §6 step 2); each `A`/`M`/`D`/`R` row is
|
||||
* translated into a Docmost mutation by `pageId` identity (SPEC §4):
|
||||
* - A without pageId -> create_page (then write the assigned pageId back).
|
||||
* - A with pageId -> update (restored/copied file; the page already exists).
|
||||
* - M -> update content (collab/Yjs path, SPEC §2/§15.6).
|
||||
* - D -> delete_page (pageId recovered from the PRE-IMAGE meta).
|
||||
* - R -> rename/move (CLASSIFIED here, APPLIED in push #3).
|
||||
*
|
||||
* MOVE/RENAME APPLY (push #3) — DONE here. `classifyRenameMoves` (PURE) resolves
|
||||
* each `renamesMoves` entry into the Docmost op(s) it needs, comparing the PATH-
|
||||
* derived parent (SPEC §5: the file path is the source of truth for tree
|
||||
* position, NOT stale `meta.parentPageId`) and the meta title; `applyPushActions`
|
||||
* then calls `move_page` / `rename_page` (both for a reparent+retitle), or
|
||||
* records a NO-OP for a cosmetic local-only file-path rename.
|
||||
*
|
||||
* The client seam is the native `GitSyncClient` (`Pick<GitSyncClient, ...>`);
|
||||
* the gitmost server drives the engine in-process (there is no standalone CLI
|
||||
* entry point).
|
||||
*/
|
||||
import { type DocmostMdMeta } from "../lib/index.js";
|
||||
import type { GitSyncClient } from "./client.types.js";
|
||||
import type { DiffEntry } from "./git.js";
|
||||
import { VaultGit } from "./git.js";
|
||||
import { type Settings } from "./settings.js";
|
||||
export type { DiffEntry } from "./git.js";
|
||||
/** A page to CREATE in Docmost (new local file, meta has no pageId yet). */
|
||||
export interface CreateAction {
|
||||
/** Vault-relative path of the new file. */
|
||||
path: string;
|
||||
}
|
||||
/** A page whose CONTENT changed (meta carries the existing pageId). */
|
||||
export interface UpdateAction {
|
||||
pageId: string;
|
||||
/** Vault-relative path of the changed file. */
|
||||
path: string;
|
||||
}
|
||||
/** A page to soft-delete in Docmost (Trash, SPEC §8). */
|
||||
export interface DeleteAction {
|
||||
pageId: string;
|
||||
}
|
||||
/** A renamed/moved page (same pageId, new path). Resolution DEFERRED. */
|
||||
export interface RenameMoveAction {
|
||||
pageId: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
}
|
||||
/**
|
||||
* A CLASSIFIED rename/move (push #3): a `RenameMoveAction` resolved into the
|
||||
* Docmost op(s) it actually needs. The file PATH is the source of truth for tree
|
||||
* position (SPEC §5: "истина связи — pageId, не путь" — the path is COSMETIC and
|
||||
* LOCAL, the page identity is its pageId), so we compare the RESOLVED parent of
|
||||
* the new path against the resolved parent of the old path, and the title in the
|
||||
* current meta against the title in the previous meta. Each sub-op is emitted
|
||||
* ONLY when something real changed:
|
||||
* - `move` — the resolved parent page changed (reparent in Docmost). A `null`
|
||||
* `parentPageId` means the new parent is ROOT (the file sits at the space
|
||||
* root, no enclosing folder).
|
||||
* - `rename` — the page title changed (a pure title edit in Docmost).
|
||||
* - `noop` — neither changed: a purely LOCAL file-path rename (same parent,
|
||||
* same title). The page identity is its pageId, so Docmost is NOT called.
|
||||
* `move` and `rename` are independent and may BOTH be present (reparent + retitle).
|
||||
*/
|
||||
export interface RenameMoveActionClassified {
|
||||
pageId: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
/** Present iff the resolved parent changed -> `move_page` (reparent). */
|
||||
move?: {
|
||||
parentPageId: string | null;
|
||||
};
|
||||
/** Present iff the title changed -> `rename_page` (title-only). */
|
||||
rename?: {
|
||||
title: string;
|
||||
};
|
||||
/** True iff neither parent nor title changed (cosmetic local-only rename). */
|
||||
noop?: true;
|
||||
}
|
||||
/**
|
||||
* Injected resolvers for the PURE `classifyRenameMoves` (push #3). Both are PURE
|
||||
* given a path + side; the real `main` (a follow-up) wires them to the file tree
|
||||
* (`readFile` for `current`, `git.showFileAtRef` for `prev`), tests pass plain
|
||||
* lookups. SPEC §5 path-as-truth:
|
||||
* - `metaAt`: the file's synthetic native meta at that side (title from the
|
||||
* filename, pageId from the `gitmost_id` frontmatter).
|
||||
* - `resolveParentPageId`: the pageId of the page whose FILE is the parent
|
||||
* FOLDER's `.md` (one level up from the given path), or `null` for ROOT.
|
||||
*/
|
||||
export interface ClassifyRenameMovesDeps {
|
||||
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
|
||||
resolveParentPageId: (path: string, side: MetaSide) => string | null;
|
||||
}
|
||||
/**
|
||||
* PURE classifier for the `renamesMoves` produced by `computePushActions`
|
||||
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
|
||||
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
|
||||
*
|
||||
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
|
||||
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
|
||||
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
|
||||
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
|
||||
*
|
||||
* For each entry:
|
||||
* - `newParent = resolveParentPageId(newPath, 'current')`,
|
||||
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
|
||||
* - `newTitle = metaAt(newPath,'current')?.title`,
|
||||
* `oldTitle = metaAt(oldPath,'prev')?.title`.
|
||||
* - include `move` iff `newParent !== oldParent` (a real reparent),
|
||||
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
|
||||
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
|
||||
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
|
||||
* the page is its pageId, so Docmost is not touched).
|
||||
*/
|
||||
export declare function classifyRenameMoves(renamesMoves: RenameMoveAction[], deps: ClassifyRenameMovesDeps): RenameMoveActionClassified[];
|
||||
/** The classified set of push actions (PURE output of `computePushActions`). */
|
||||
export interface PushActions {
|
||||
creates: CreateAction[];
|
||||
updates: UpdateAction[];
|
||||
deletes: DeleteAction[];
|
||||
renamesMoves: RenameMoveAction[];
|
||||
/**
|
||||
* Diff rows that could NOT be classified into an action, with a reason — e.g.
|
||||
* a deleted file whose PRE-IMAGE meta carried no recoverable pageId (the
|
||||
* untracked-file guard, SPEC §8: only files that were tracked with a pageId
|
||||
* are deleted in Docmost). Carried so the caller can log them.
|
||||
*/
|
||||
skipped: {
|
||||
path: string;
|
||||
status: DiffEntry["status"];
|
||||
reason: string;
|
||||
}[];
|
||||
}
|
||||
/**
|
||||
* Which tree a `metaAt` lookup reads the file's native meta from:
|
||||
* - `current`: the current `main` tree (the live file content) — used for
|
||||
* A/M/R, where the file still exists.
|
||||
* - `prev`: the last-pushed PRE-IMAGE (e.g. `refs/docmost/last-pushed:<path>`)
|
||||
* — used for D, where the file is gone from `main` but its pageId must be
|
||||
* recovered from the version Docmost last knew (SPEC §6/§8).
|
||||
*/
|
||||
export type MetaSide = "current" | "prev";
|
||||
/** Input to the PURE planner. `metaAt` is injected (no IO inside the planner). */
|
||||
export interface PushActionsInput {
|
||||
/** Diff rows of `main` vs `refs/docmost/last-pushed` (SPEC §6 step 2). */
|
||||
changes: DiffEntry[];
|
||||
/**
|
||||
* Resolve a file's synthetic native meta at a given side, or `null` if the file is
|
||||
* absent there / has no parseable meta. PURE injection: the real `main` reads
|
||||
* the working tree (current) or `git show <last-pushed>:<path>` (prev); tests
|
||||
* pass a plain lookup.
|
||||
*/
|
||||
metaAt: (path: string, side: MetaSide) => DocmostMdMeta | null;
|
||||
/**
|
||||
* The pageIds present at ANY path in the current `main` tree (optional). When
|
||||
* given, a deleted file whose pageId still lives somewhere in the tree is NOT
|
||||
* a deletion but a MOVE — guards against trashing a live page when a layout
|
||||
* reshuffle relocated its file (possibly across two cycles, so the matching
|
||||
* add isn't in THIS diff). When omitted, only the in-diff D+A/M coalescing
|
||||
* applies.
|
||||
*/
|
||||
currentPageIds?: Set<string>;
|
||||
}
|
||||
/**
|
||||
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
|
||||
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
|
||||
*
|
||||
* Classification rules:
|
||||
* - `A` (added):
|
||||
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
|
||||
* page already exists; we push its content rather than create a dup).
|
||||
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
|
||||
* brand-new local file; the page does not exist in Docmost yet).
|
||||
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
|
||||
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
|
||||
* (§16), and a new local file may carry only partial human meta. We
|
||||
* refuse to create rather than guess a space (SPEC §8 guard spirit).
|
||||
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
|
||||
* file somehow lost its pageId it is skipped — there is nothing to target.)
|
||||
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
|
||||
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
|
||||
* (untracked-file guard, SPEC §8: never delete an untracked page).
|
||||
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
|
||||
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
|
||||
* DEFERRED to the next increment; here we only record oldPath/newPath/
|
||||
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
|
||||
* (`C` copy is treated the same as `R` for recording purposes.)
|
||||
*/
|
||||
export declare function computePushActions(input: PushActionsInput): PushActions;
|
||||
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
|
||||
export declare const LAST_PUSHED_REF = "refs/docmost/last-pushed";
|
||||
/**
|
||||
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
|
||||
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
|
||||
* commit closes the loop so the next pull diffs empty for the pushed pages.
|
||||
*/
|
||||
export declare const DOCMOST_BRANCH = "docmost";
|
||||
/**
|
||||
* Injectable IO for `applyPushActions`. The real `main` (NEXT increment) wires
|
||||
* these to the live client, `node:fs/promises`, and the vault git wrapper; this
|
||||
* increment drives them only through FAKES in tests (no live destructive run).
|
||||
* - `client`: the create/update/delete/move/rename subset of `GitSyncClient`.
|
||||
* - `readFile`/`writeFile`: read a changed file's body / write a file back
|
||||
* (by vault-relative path; the applier does not resolve absolute paths so
|
||||
* fakes stay trivial).
|
||||
* - `git`: `updateRef` (advance `refs/docmost/last-pushed`) and
|
||||
* `fastForwardBranch` (advance the `docmost` mirror after a clean push, the
|
||||
* loop-close — SPEC §6 step 3 / §10).
|
||||
*/
|
||||
export interface ApplyPushDeps {
|
||||
client: Pick<GitSyncClient, "importPageMarkdown" | "createPage" | "deletePage" | "movePage" | "renamePage">;
|
||||
/** Read a changed file's full text by its vault-relative path. */
|
||||
readFile: (path: string) => Promise<string>;
|
||||
/** Write a file's full text by its vault-relative path. */
|
||||
writeFile: (path: string, text: string) => Promise<void>;
|
||||
/**
|
||||
* The Docmost spaceId this vault mirrors. A CREATE targets this space (the
|
||||
* native file carries no spaceId — every file in the vault belongs to it), and
|
||||
* it backs the synthetic native meta the classifier reads.
|
||||
*/
|
||||
spaceId: string;
|
||||
/**
|
||||
* `updateRef` advances `refs/docmost/last-pushed`; `fastForwardBranch` advances
|
||||
* the `docmost` mirror after a clean push. `showFileAtRef` reads a file's text
|
||||
* at a ref (used by the move/rename classifier to resolve the PREVIOUS parent
|
||||
* folder's `.md` at `refs/docmost/last-pushed`, SPEC §5 path-as-truth).
|
||||
*/
|
||||
git: Pick<VaultGit, "updateRef" | "fastForwardBranch" | "showFileAtRef">;
|
||||
}
|
||||
/** A file whose meta was rewritten with a freshly-assigned pageId (post-create). */
|
||||
export interface WrittenBackPage {
|
||||
path: string;
|
||||
pageId: string;
|
||||
}
|
||||
/**
|
||||
* The per-page push record consulted by a FUTURE poll-suppression (SPEC §10): a
|
||||
* pulled page whose body hash + `updatedAt` match a record here is OUR OWN write
|
||||
* and must not be re-pulled. PRODUCED here; CONSUMED on the pull side later.
|
||||
*/
|
||||
export interface PushedPageRecord {
|
||||
/** The Docmost pageId that was updated/created. */
|
||||
pageId: string;
|
||||
/**
|
||||
* The `updatedAt` from the create/update client result, when the result
|
||||
* exposed one. Absent when the (fake) client did not return it.
|
||||
*/
|
||||
updatedAt?: string;
|
||||
/** Stable hash of the markdown BODY that was pushed (SPEC §10 "хэш тела"). */
|
||||
bodyHash: string;
|
||||
}
|
||||
/**
|
||||
* One page whose operation FAILED during apply (SPEC §12 resumability). The bad
|
||||
* page is isolated — recorded here — and the rest of the batch still runs; the
|
||||
* refs are NOT advanced when there is any failure, so a re-run retries cleanly.
|
||||
*/
|
||||
export interface PushFailure {
|
||||
kind: "update" | "create" | "delete" | "move" | "rename";
|
||||
/** The pageId for update/delete/move/rename; absent for a never-id'd create. */
|
||||
pageId?: string;
|
||||
/** The vault-relative path for create/update/move/rename; absent for delete. */
|
||||
path?: string;
|
||||
/** The error message captured from the thrown error. */
|
||||
error: string;
|
||||
}
|
||||
/**
|
||||
* A rename/move action that resolved to a NO-OP (push #3, SPEC §5): a purely
|
||||
* LOCAL file-path rename whose resolved parent AND title are both unchanged. The
|
||||
* page identity is its pageId and the path is COSMETIC/local-only, so Docmost is
|
||||
* NOT called — the skip is recorded here (with the reason) for logging.
|
||||
*/
|
||||
export interface PushNoop {
|
||||
pageId: string;
|
||||
oldPath: string;
|
||||
newPath: string;
|
||||
/** Why no Docmost op was emitted (currently always a path-only rename). */
|
||||
reason: "path-only-rename";
|
||||
}
|
||||
/** Structured outcome of `applyPushActions` (counts + write-backs + noops). */
|
||||
export interface ApplyPushResult {
|
||||
created: number;
|
||||
updated: number;
|
||||
deleted: number;
|
||||
/** Pages reparented in Docmost via `move_page` (push #3, SPEC §5/§16). */
|
||||
moved: number;
|
||||
/** Pages retitled in Docmost via `rename_page` (push #3, SPEC §5/§6). */
|
||||
renamed: number;
|
||||
/**
|
||||
* Files whose `gitmost_id` frontmatter was written with the pageId Docmost assigned on
|
||||
* create — these now need a FOLLOW-UP commit (the meta on disk changed). The
|
||||
* commit itself is the caller's job (NEXT increment); recorded here so it is
|
||||
* not lost.
|
||||
*/
|
||||
writtenBack: WrittenBackPage[];
|
||||
/**
|
||||
* Per-page push records (pageId + optional `updatedAt` + body hash) for every
|
||||
* page successfully updated/created — the §10 loop-guard data a future
|
||||
* poll-suppression (pull side) will consult so it does not re-pull our own
|
||||
* write. Deletes are not included (no body was pushed).
|
||||
*/
|
||||
pushed: PushedPageRecord[];
|
||||
/**
|
||||
* Pages whose operation threw — isolated and recorded, the batch continued
|
||||
* (SPEC §12). Non-empty here means the refs were NOT advanced.
|
||||
*/
|
||||
failures: PushFailure[];
|
||||
/**
|
||||
* Rename/move actions that resolved to a NO-OP — a purely LOCAL file-path
|
||||
* rename (same parent, same title). NO Docmost call was made for these (SPEC
|
||||
* §5: the page is its pageId, the path is local-only). Recorded for logging.
|
||||
*/
|
||||
noops: PushNoop[];
|
||||
/** Diff rows the planner could not classify (carried through for logging). */
|
||||
skipped: PushActions["skipped"];
|
||||
/** Whether `refs/docmost/last-pushed` was advanced (only on a CLEAN push). */
|
||||
lastPushedAdvanced: boolean;
|
||||
/**
|
||||
* Result of fast-forwarding the `docmost` mirror branch after a CLEAN push
|
||||
* (the loop-close, SPEC §6 step 3 / §10). `null` when no advance was attempted
|
||||
* (no `pushedCommit`, or there were failures). `{ ok:false, reason }` when a
|
||||
* non-fast-forward was REFUSED (divergent `docmost` history is never clobbered).
|
||||
*/
|
||||
docmostFastForward: {
|
||||
ok: boolean;
|
||||
reason?: string;
|
||||
} | null;
|
||||
}
|
||||
/**
|
||||
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
|
||||
* via FAKES only in this increment — there is no live wiring.
|
||||
*
|
||||
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
|
||||
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
|
||||
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
|
||||
* `importPageMarkdown` parses the meta/body itself.
|
||||
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
|
||||
* `client.createPage(...)`, take the assigned pageId from the result, and
|
||||
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
|
||||
* `serializePageFile`, body preserved) so the file becomes
|
||||
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
|
||||
* is needed — NEXT increment).
|
||||
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
|
||||
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
|
||||
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
|
||||
* the parent pageId — path-as-truth — and the meta for the title), then:
|
||||
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
|
||||
* `position` is UNDEFINED for now — the client supplies a default),
|
||||
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
|
||||
* - BOTH -> move (reparent) THEN rename (title), in that order,
|
||||
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
|
||||
* file-path rename: the page is its pageId, the path is local, SPEC §5).
|
||||
*
|
||||
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
|
||||
* is wrapped in its own try/catch: a single failing page is recorded in
|
||||
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
|
||||
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
|
||||
* when `failures.length === 0`: a PARTIAL push must NOT advance
|
||||
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
|
||||
* whole batch cleanly (the already-applied pages are idempotent re-applies).
|
||||
*
|
||||
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
|
||||
* `pushedCommit` is supplied:
|
||||
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
|
||||
* - fast-forward the `docmost` mirror branch to it via
|
||||
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
|
||||
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
|
||||
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
|
||||
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
|
||||
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
|
||||
*
|
||||
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
|
||||
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
|
||||
* hash of what was pushed plus the write's `updatedAt` (when the client returned
|
||||
* one). A future pull-side poll-suppression consults this so it does not re-pull
|
||||
* our own write; producing it is in scope here, consuming it is deferred.
|
||||
*
|
||||
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
|
||||
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
|
||||
*/
|
||||
export declare function applyPushActions(deps: ApplyPushDeps, actions: PushActions, pushedCommit?: string): Promise<ApplyPushResult>;
|
||||
/**
|
||||
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
|
||||
* (forward-slash) path. `buildVaultLayout` puts a page with children at
|
||||
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
|
||||
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
|
||||
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
|
||||
* space root) has no parent folder file -> `null` (the parent is ROOT).
|
||||
*/
|
||||
export declare function parentFolderFile(path: string): string | null;
|
||||
/**
|
||||
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
|
||||
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
|
||||
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
|
||||
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
|
||||
* screen the PUSH diff so non-page files are never created/updated/deleted in
|
||||
* Docmost (and never get a `gitmost_id` frontmatter written into them).
|
||||
*/
|
||||
export declare function isPageFile(path: string): boolean;
|
||||
/**
|
||||
* The human ("local") git identity used for engine-made commits on `main` in the
|
||||
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
|
||||
* which the loop-guard keys on; the identity is for history readability only.
|
||||
* When the vault repo already has a configured `user.name`/`user.email`, git
|
||||
* uses that for the working-tree commit; this is the fallback the daemon stamps.
|
||||
*/
|
||||
export declare const LOCAL_AUTHOR_NAME = "Local";
|
||||
export declare const LOCAL_AUTHOR_EMAIL = "local@local";
|
||||
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
|
||||
export declare const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
|
||||
/**
|
||||
* Injectable deps for `runPush` (mirrors `pull.ts`'s wiring; everything that
|
||||
* touches the outside world is here so tests pass fakes). `makeClient` is a
|
||||
* FACTORY, not a client — a dry-run must build NO client at all (it is never
|
||||
* called), and only `--apply` invokes it.
|
||||
*/
|
||||
export interface PushDeps {
|
||||
settings: Settings;
|
||||
git: Pick<VaultGit, "assertGitAvailable" | "ensureRepo" | "isMergeInProgress" | "checkout" | "stageAll" | "commit" | "readRef" | "revParse" | "diffNameStatus" | "showFileAtRef" | "updateRef" | "fastForwardBranch" | "listTrackedFiles">;
|
||||
/** Build a real client — called ONLY on `--apply`, never on dry-run. */
|
||||
makeClient: (settings: Settings) => ApplyPushDeps["client"];
|
||||
/** Read a file's full text by its vault-relative (forward-slash) path. */
|
||||
readFile: (path: string) => Promise<string>;
|
||||
/** Write a file's full text by its vault-relative path. */
|
||||
writeFile: (path: string, text: string) => Promise<void>;
|
||||
/** Structured logger (defaults to console in `main`; a recorder in tests). */
|
||||
log: (line: string) => void;
|
||||
}
|
||||
/** The structured outcome of a `runPush` cycle (returned + summarized). */
|
||||
export interface PushRunResult {
|
||||
/** Which path ran: `dry-run` (plan only) or `apply` (Docmost mutated). */
|
||||
mode: "dry-run" | "apply";
|
||||
/** Why the cycle stopped before planning, if it did (e.g. a left-over merge). */
|
||||
aborted?: "merge-in-progress";
|
||||
/** The diff base the plan was computed against (`last-pushed` else `docmost`). */
|
||||
base?: {
|
||||
ref: string;
|
||||
source: "last-pushed" | "docmost";
|
||||
sha: string | null;
|
||||
};
|
||||
/** The `main` commit the plan targets (the would-be pushed commit). */
|
||||
pushedCommit?: string;
|
||||
/** Planned action counts from the PURE planner (present once a plan was built). */
|
||||
planned?: {
|
||||
creates: number;
|
||||
updates: number;
|
||||
deletes: number;
|
||||
renamesMoves: number;
|
||||
skipped: number;
|
||||
};
|
||||
/** The applier's structured result — ONLY present on the `--apply` path. */
|
||||
applied?: ApplyPushResult;
|
||||
/**
|
||||
* True when `applyPushActions` REFUSED to fast-forward a divergent `docmost`
|
||||
* mirror (SPEC §5 invariant broken). Escalated (logged prominently) and folded
|
||||
* into the CLI's non-zero exit.
|
||||
*/
|
||||
divergentDocmost?: boolean;
|
||||
/** Per-page failures from the applier (empty/absent on a clean run). */
|
||||
failures?: PushFailure[];
|
||||
}
|
||||
/**
|
||||
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
|
||||
*
|
||||
* Steps (mirrors `pull.ts`):
|
||||
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
|
||||
* non-zero-ish result) if a merge is in progress — never push on top of an
|
||||
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
|
||||
* Docmost (SPEC §9).
|
||||
* 2. Checkout `main` (the human-facing branch the push reads from).
|
||||
* 3. Commit the human's pending working-tree changes on `main` with the
|
||||
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
|
||||
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
|
||||
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
|
||||
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
|
||||
* resolver (current = working tree, prev = `git show <base>:<path>`); run
|
||||
* the PURE `computePushActions`.
|
||||
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
|
||||
* calls, NO ref advance.
|
||||
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
|
||||
* then (a) if any pageIds were written back (creates), commit them on `main`
|
||||
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
|
||||
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
|
||||
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
|
||||
* WARNING and a non-zero-ish flag. Then log a one-line summary.
|
||||
*/
|
||||
export declare function runPush(deps: PushDeps, opts: {
|
||||
dryRun: boolean;
|
||||
}): Promise<PushRunResult>;
|
||||
/** Parsed `push` CLI flags. DRY-RUN is the default; `--apply` opts into writes. */
|
||||
export interface PushParsedArgs {
|
||||
/** True when `--apply` was passed (the ONLY path that writes to Docmost). */
|
||||
apply: boolean;
|
||||
}
|
||||
/**
|
||||
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
|
||||
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
|
||||
*/
|
||||
export declare function parseArgs(argv: string[]): PushParsedArgs;
|
||||
@@ -1,971 +0,0 @@
|
||||
import { parsePageFile, serializePageFile } from "../lib/page-file.js";
|
||||
import { DEFAULT_BRANCH } from "./git.js";
|
||||
import { bodyHash } from "./loop-guard.js";
|
||||
/**
|
||||
* PURE classifier for the `renamesMoves` produced by `computePushActions`
|
||||
* (push #3, SPEC §5/§6/§8). Resolves each `{pageId, oldPath, newPath}` into the
|
||||
* Docmost op(s) it needs, with NO IO (both resolvers are injected).
|
||||
*
|
||||
* SPEC §5 — the file PATH is the source of truth for tree position, NOT the
|
||||
* (possibly stale) `meta.parentPageId`. So the NEW parent is resolved from
|
||||
* `newPath`'s enclosing folder, and the OLD parent from `oldPath`'s enclosing
|
||||
* folder, via `deps.resolveParentPageId`. The title comes from the meta.
|
||||
*
|
||||
* For each entry:
|
||||
* - `newParent = resolveParentPageId(newPath, 'current')`,
|
||||
* `oldParent = resolveParentPageId(oldPath, 'prev')`.
|
||||
* - `newTitle = metaAt(newPath,'current')?.title`,
|
||||
* `oldTitle = metaAt(oldPath,'prev')?.title`.
|
||||
* - include `move` iff `newParent !== oldParent` (a real reparent),
|
||||
* - include `rename` iff `newTitle` is a NON-EMPTY string AND differs from
|
||||
* `oldTitle` (a real title edit; an empty/absent new title is never a rename),
|
||||
* - if NEITHER applies -> `noop: true` (a cosmetic local-only file-path rename;
|
||||
* the page is its pageId, so Docmost is not touched).
|
||||
*/
|
||||
export function classifyRenameMoves(renamesMoves, deps) {
|
||||
return renamesMoves.map((rm) => {
|
||||
const newParent = deps.resolveParentPageId(rm.newPath, "current");
|
||||
const oldParent = deps.resolveParentPageId(rm.oldPath, "prev");
|
||||
const newTitle = deps.metaAt(rm.newPath, "current")?.title;
|
||||
const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
|
||||
const out = {
|
||||
pageId: rm.pageId,
|
||||
oldPath: rm.oldPath,
|
||||
newPath: rm.newPath,
|
||||
};
|
||||
// A reparent: the new path's resolved parent page differs from the old's.
|
||||
if (newParent !== oldParent) {
|
||||
out.move = { parentPageId: newParent };
|
||||
}
|
||||
// A title edit: only when there is a real, non-empty new title that changed.
|
||||
if (typeof newTitle === "string" &&
|
||||
newTitle.length > 0 &&
|
||||
newTitle !== oldTitle) {
|
||||
out.rename = { title: newTitle };
|
||||
}
|
||||
// Neither changed -> a purely LOCAL file-path rename; do NOT call Docmost.
|
||||
if (!out.move && !out.rename) {
|
||||
out.noop = true;
|
||||
}
|
||||
return out;
|
||||
});
|
||||
}
|
||||
/**
|
||||
* PURE push planner (SPEC §4/§6/§8). Classifies each diff row into a Docmost
|
||||
* action by `pageId` identity, with NO IO (the `metaAt` resolver is injected).
|
||||
*
|
||||
* Classification rules:
|
||||
* - `A` (added):
|
||||
* - current meta HAS a pageId -> UPDATE (a restored/copied file whose
|
||||
* page already exists; we push its content rather than create a dup).
|
||||
* - current meta has NO pageId but HAS a non-empty spaceId -> CREATE (a
|
||||
* brand-new local file; the page does not exist in Docmost yet).
|
||||
* - current meta has NO pageId and NO usable spaceId -> SKIP with reason
|
||||
* `create-without-spaceId`: Docmost `create_page` REQUIRES a spaceId
|
||||
* (§16), and a new local file may carry only partial human meta. We
|
||||
* refuse to create rather than guess a space (SPEC §8 guard spirit).
|
||||
* - `M` (modified): current meta has a pageId -> UPDATE content. (If a modified
|
||||
* file somehow lost its pageId it is skipped — there is nothing to target.)
|
||||
* - `D` (deleted): recover the pageId from the PRE-IMAGE meta (`metaAt(path,
|
||||
* 'prev')`) -> DELETE. If no pageId can be recovered, SKIP with a reason
|
||||
* (untracked-file guard, SPEC §8: never delete an untracked page).
|
||||
* - `R` (renamed/moved): same pageId (from current meta), path changed ->
|
||||
* RENAME/MOVE. Resolution of move-vs-rename + the new parentPageId is
|
||||
* DEFERRED to the next increment; here we only record oldPath/newPath/
|
||||
* pageId. If the renamed file has no recoverable pageId it is SKIPPED.
|
||||
* (`C` copy is treated the same as `R` for recording purposes.)
|
||||
*/
|
||||
export function computePushActions(input) {
|
||||
const { metaAt, currentPageIds } = input;
|
||||
// PAGE-FILE FILTER (design §"Адопция"): only `.md` files OUTSIDE any dot-folder
|
||||
// are Docmost pages. `.obsidian/*`, attachments, and other non-page files are
|
||||
// committed to the vault (no `.gitignore`) and so appear in the diff, but they
|
||||
// are NEVER pages — Obsidian owns them. Without this filter every ADDED such
|
||||
// file would be mis-classified as a CREATE (nativeMeta always supplies a
|
||||
// spaceId, so the old `create-without-spaceId` skip no longer screens them),
|
||||
// creating junk pages in Docmost and corrupting the file with a `gitmost_id`
|
||||
// frontmatter. Filter BEFORE any classification so non-page A/M/D/R are ignored.
|
||||
const changes = input.changes.filter((c) => isPageFile(c.path));
|
||||
const actions = {
|
||||
creates: [],
|
||||
updates: [],
|
||||
deletes: [],
|
||||
renamesMoves: [],
|
||||
skipped: [],
|
||||
};
|
||||
// GHOST-MOVE coalescing (⭐ data-loss guard). git's rename detection (`-M`)
|
||||
// can miss a move when the two files are too dissimilar — which is exactly the
|
||||
// case for the tiny meta-only files a layout RESHUFFLE produces (e.g.
|
||||
// several untitled pages sharing the `_` fallback name; retitling one frees the
|
||||
// bare `_` and another page's file relocates `_ ~slug.md` -> `_.md`). git then
|
||||
// reports the move as a DELETE of the old path + an ADD of the new one. Taken
|
||||
// literally that soft-deletes a page that merely MOVED — a live page vanishing
|
||||
// into Trash. Identity is the pageId, not git's heuristic: a pageId that is
|
||||
// BOTH deleted (pre-image) and added (current) is one page that relocated, so
|
||||
// we classify it as a rename/move and NEVER as a delete.
|
||||
// A pageId can land at its new path two ways: as an ADD (the path was free) or
|
||||
// as a MODIFY (the path was occupied by ANOTHER page that left — the reshuffle
|
||||
// case, where `_.md`'s occupant changes pageId). Both are "the page survives at
|
||||
// a new path", so the surviving side is the CURRENT-meta pageId of A *and* M.
|
||||
const deletedPath = new Map();
|
||||
const survivingPath = new Map();
|
||||
for (const change of changes) {
|
||||
if (change.status === "D") {
|
||||
const pid = metaAt(change.path, "prev")?.pageId;
|
||||
if (pid)
|
||||
deletedPath.set(pid, change.path);
|
||||
}
|
||||
else if (change.status === "A" || change.status === "M") {
|
||||
const pid = metaAt(change.path, "current")?.pageId;
|
||||
if (pid)
|
||||
survivingPath.set(pid, change.path);
|
||||
}
|
||||
}
|
||||
const ghostMove = new Map();
|
||||
for (const [pid, oldPath] of deletedPath) {
|
||||
const newPath = survivingPath.get(pid);
|
||||
if (newPath && newPath !== oldPath) {
|
||||
ghostMove.set(pid, { oldPath, newPath });
|
||||
}
|
||||
}
|
||||
for (const change of changes) {
|
||||
switch (change.status) {
|
||||
case "A": {
|
||||
const meta = metaAt(change.path, "current");
|
||||
const pageId = meta?.pageId;
|
||||
if (pageId && ghostMove.has(pageId)) {
|
||||
// Half of a git-undetected move (a matching DELETE exists): record it
|
||||
// as a rename/move (like a real `R`), NOT an update — the `D` side is
|
||||
// suppressed so the page is never soft-deleted.
|
||||
actions.renamesMoves.push({
|
||||
pageId,
|
||||
oldPath: ghostMove.get(pageId).oldPath,
|
||||
newPath: change.path,
|
||||
});
|
||||
}
|
||||
else if (pageId) {
|
||||
// Added but already carries a pageId (restored/copied file): the page
|
||||
// exists in Docmost, so push content as an UPDATE — never a duplicate.
|
||||
actions.updates.push({ pageId, path: change.path });
|
||||
}
|
||||
else if (meta?.spaceId) {
|
||||
// Brand-new local file with a target space -> create the page, then
|
||||
// write the assigned pageId back into its meta (in `applyPushActions`).
|
||||
// `meta.spaceId` is truthy here, so empty-string is also rejected.
|
||||
actions.creates.push({ path: change.path });
|
||||
}
|
||||
else {
|
||||
// A create needs a spaceId (Docmost `create_page` requires it, §16). A
|
||||
// new file with partial meta and no usable spaceId is SKIPPED rather
|
||||
// than created into a guessed space (SPEC §8 guard spirit).
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "A",
|
||||
reason: "create-without-spaceId",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "M": {
|
||||
const meta = metaAt(change.path, "current");
|
||||
const pageId = meta?.pageId;
|
||||
if (pageId && ghostMove.has(pageId)) {
|
||||
// This path's occupant changed pageId: the previous page left and THIS
|
||||
// page relocated here (a reshuffle). Its old file was DELETED elsewhere
|
||||
// — coalesce into a rename/move so the page is never trashed.
|
||||
actions.renamesMoves.push({
|
||||
pageId,
|
||||
oldPath: ghostMove.get(pageId).oldPath,
|
||||
newPath: change.path,
|
||||
});
|
||||
}
|
||||
else if (pageId) {
|
||||
actions.updates.push({ pageId, path: change.path });
|
||||
}
|
||||
else {
|
||||
// A modified file with no pageId has no Docmost target to update.
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "M",
|
||||
reason: "modified file has no pageId in meta",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "D": {
|
||||
// The file is gone from `main`; recover its pageId from the PRE-IMAGE
|
||||
// (the version last pushed to Docmost) so we delete the RIGHT page.
|
||||
const prevMeta = metaAt(change.path, "prev");
|
||||
const pageId = prevMeta?.pageId;
|
||||
if (pageId && ghostMove.has(pageId)) {
|
||||
// The same pageId was re-ADDED at a new path: this is a git-undetected
|
||||
// MOVE, handled by the `A` branch above. Suppress the delete so a moved
|
||||
// page is never trashed (⭐ data-loss guard).
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "D",
|
||||
reason: "ghost-move (re-added at a new path) — not a deletion",
|
||||
});
|
||||
}
|
||||
else if (pageId && currentPageIds?.has(pageId)) {
|
||||
// The pageId still EXISTS elsewhere in the current tree: the file moved
|
||||
// (a layout reshuffle whose matching add was in an earlier cycle, so it
|
||||
// is not in this diff). A live page must never be trashed because its
|
||||
// FILENAME changed — identity is the pageId (⭐ data-loss guard).
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "D",
|
||||
reason: "pageId still present in the tree (moved) — not a deletion",
|
||||
});
|
||||
}
|
||||
else if (pageId) {
|
||||
actions.deletes.push({ pageId });
|
||||
}
|
||||
else {
|
||||
// Untracked-file guard (SPEC §8): a file with no recoverable pageId was
|
||||
// never a Docmost page — do NOT translate its removal into a delete.
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: "D",
|
||||
reason: "deleted file has no recoverable pageId (pre-image meta)",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case "R":
|
||||
case "C": {
|
||||
// Same page, new path. Identity comes from the CURRENT (post-rename) meta
|
||||
// since the file still exists. RESOLUTION (move vs rename, parentPageId)
|
||||
// is deferred — record oldPath/newPath/pageId only.
|
||||
const meta = metaAt(change.path, "current");
|
||||
const pageId = meta?.pageId;
|
||||
const oldPath = change.oldPath ?? change.path;
|
||||
if (pageId) {
|
||||
actions.renamesMoves.push({
|
||||
pageId,
|
||||
oldPath,
|
||||
newPath: change.path,
|
||||
});
|
||||
}
|
||||
else {
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: change.status,
|
||||
reason: "renamed/moved file has no pageId in meta",
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
// Unreachable for A/M/D/R/C; defensive for any future status.
|
||||
actions.skipped.push({
|
||||
path: change.path,
|
||||
status: change.status,
|
||||
reason: `unhandled diff status ${change.status}`,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
return actions;
|
||||
}
|
||||
// --- thin apply (create/update/delete), fakes-only in this increment ---------
|
||||
/** The marker the push direction advances after a successful push (SPEC §5/§6). */
|
||||
export const LAST_PUSHED_REF = "refs/docmost/last-pushed";
|
||||
/**
|
||||
* The mirror branch fast-forwarded after a clean push (SPEC §5/§6 step 3). It
|
||||
* reflects "what Docmost currently contains"; advancing it to the pushed `main`
|
||||
* commit closes the loop so the next pull diffs empty for the pushed pages.
|
||||
*/
|
||||
export const DOCMOST_BRANCH = "docmost";
|
||||
/**
|
||||
* THIN IO applier for the COMMON push cases (create/update/delete). Exercised
|
||||
* via FAKES only in this increment — there is no live wiring.
|
||||
*
|
||||
* - UPDATE: read the file body, then `client.importPageMarkdown(pageId, body)`.
|
||||
* This is the collab/Yjs write path (SPEC §2/§15.6) — NEVER a raw jsonb
|
||||
* overwrite. The full self-contained markdown (meta + body) is sent as-is;
|
||||
* `importPageMarkdown` parses the meta/body itself.
|
||||
* - CREATE: derive title/spaceId/parentPageId from the file's current meta,
|
||||
* `client.createPage(...)`, take the assigned pageId from the result, and
|
||||
* write it BACK as the file's `gitmost_id` frontmatter (re-serialized via
|
||||
* `serializePageFile`, body preserved) so the file becomes
|
||||
* tracked. The write-back is recorded in `writtenBack` (a follow-up commit
|
||||
* is needed — NEXT increment).
|
||||
* - DELETE: `client.deletePage(pageId)` — soft-delete to Trash (SPEC §8).
|
||||
* - RENAME/MOVE (push #3, SPEC §5/§6/§16): classify each `renamesMoves` entry
|
||||
* with `classifyRenameMoves` (resolvers read the parent FOLDER's `.md` for
|
||||
* the parent pageId — path-as-truth — and the meta for the title), then:
|
||||
* - `move` -> `client.movePage(pageId, parentPageId, position?)` (reparent;
|
||||
* `position` is UNDEFINED for now — the client supplies a default),
|
||||
* - `rename` -> `client.renamePage(pageId, title)` (title-only),
|
||||
* - BOTH -> move (reparent) THEN rename (title), in that order,
|
||||
* - `noop` -> NO client call; recorded in `noops` (a cosmetic local-only
|
||||
* file-path rename: the page is its pageId, the path is local, SPEC §5).
|
||||
*
|
||||
* FAIL-SAFE / per-page isolation (SPEC §12 resumability). Each page's operation
|
||||
* is wrapped in its own try/catch: a single failing page is recorded in
|
||||
* `failures[]` (with its kind + pageId/path + error) and the batch CONTINUES —
|
||||
* one bad page must never block the rest. Crucially, the refs are advanced ONLY
|
||||
* when `failures.length === 0`: a PARTIAL push must NOT advance
|
||||
* `refs/docmost/last-pushed` or the `docmost` mirror, so a re-run retries the
|
||||
* whole batch cleanly (the already-applied pages are idempotent re-applies).
|
||||
*
|
||||
* LOOP-CLOSE (SPEC §6 step 3 / §10). After a fully-successful push, when a
|
||||
* `pushedCommit` is supplied:
|
||||
* - advance `refs/docmost/last-pushed` to it (what of `main` is in Docmost), AND
|
||||
* - fast-forward the `docmost` mirror branch to it via
|
||||
* `git.fastForwardBranch('docmost', pushedCommit)` — so the mirror reflects
|
||||
* what Docmost now contains and the NEXT pull diffs EMPTY for these pages
|
||||
* (it does not re-pull our own write). The ff is REFUSED (not forced) if
|
||||
* `docmost` is not an ancestor of the pushed commit; the result is surfaced
|
||||
* in `docmostFastForward`. On ANY failure, NEITHER ref is advanced.
|
||||
*
|
||||
* LOOP-GUARD DATA (SPEC §10). For every page successfully updated/created the
|
||||
* result carries a `pushed` record `{ pageId, updatedAt?, bodyHash }` — the body
|
||||
* hash of what was pushed plus the write's `updatedAt` (when the client returned
|
||||
* one). A future pull-side poll-suppression consults this so it does not re-pull
|
||||
* our own write; producing it is in scope here, consuming it is deferred.
|
||||
*
|
||||
* @param pushedCommit The `main` commit just reflected into Docmost (SHA or
|
||||
* commit-ish). When omitted, NEITHER ref is advanced (e.g. a dry plan).
|
||||
*/
|
||||
export async function applyPushActions(deps, actions, pushedCommit) {
|
||||
const { client, git } = deps;
|
||||
let created = 0;
|
||||
let updated = 0;
|
||||
let deleted = 0;
|
||||
let moved = 0;
|
||||
let renamed = 0;
|
||||
const writtenBack = [];
|
||||
const pushed = [];
|
||||
const failures = [];
|
||||
const noops = [];
|
||||
// 1. UPDATES — collab/Yjs write path (SPEC §2/§15.6), never a raw overwrite.
|
||||
// Each update is isolated: a thrown page is recorded and the batch goes on.
|
||||
for (const u of actions.updates) {
|
||||
try {
|
||||
// Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter
|
||||
// is engine metadata, never page content. The server converts the markdown
|
||||
// it receives verbatim, so stripping here keeps the id out of Docmost.
|
||||
const body = parsePageFile(await deps.readFile(u.path)).body;
|
||||
// The last-synced version of this file (pre-image) is the common ancestor
|
||||
// for a 3-way merge against the live page, so concurrent human edits are
|
||||
// not clobbered (review #5). Null when the file is new at last-pushed. Its
|
||||
// body is stripped the SAME way so the merge compares body-to-body.
|
||||
const baseFull = await deps.git.showFileAtRef(LAST_PUSHED_REF, u.path);
|
||||
const baseMarkdown = baseFull === null ? null : parsePageFile(baseFull).body;
|
||||
const result = await client.importPageMarkdown(u.pageId, body, baseMarkdown);
|
||||
updated++;
|
||||
// §10 loop-guard data: hash the BODY we pushed + capture `updatedAt`.
|
||||
pushed.push({
|
||||
pageId: u.pageId,
|
||||
...extractUpdatedAt(result),
|
||||
bodyHash: bodyHash(body),
|
||||
});
|
||||
}
|
||||
catch (err) {
|
||||
failures.push({
|
||||
kind: "update",
|
||||
pageId: u.pageId,
|
||||
path: u.path,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
// 2. CREATES — create the page, then write the assigned pageId back to meta so
|
||||
// the file becomes tracked (SPEC §4 "записать присвоенный pageId обратно").
|
||||
// Isolated per page like updates.
|
||||
for (const c of actions.creates) {
|
||||
try {
|
||||
const text = await deps.readFile(c.path);
|
||||
const { body } = parsePageFile(text);
|
||||
// Derive create args from the PATH (native-Obsidian, SPEC §5): title from
|
||||
// the filename, parent from the enclosing folder's folder-note, space from
|
||||
// the run (the vault's space). `parentPageId: null` -> created at ROOT.
|
||||
const title = titleFromPath(c.path);
|
||||
const parentPageId = (await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined;
|
||||
const result = await client.createPage(title, body, deps.spaceId, parentPageId);
|
||||
// `createPage` returns `{ data: { id, ... }, success }`; the assigned
|
||||
// pageId is at `result.data.id`.
|
||||
const assignedPageId = result?.data?.id;
|
||||
if (assignedPageId) {
|
||||
// Write the assigned pageId back as the `gitmost_id` frontmatter, body
|
||||
// preserved — the file becomes engine-tracked (SPEC §4).
|
||||
const rewritten = serializePageFile(assignedPageId, body);
|
||||
await deps.writeFile(c.path, rewritten);
|
||||
writtenBack.push({ path: c.path, pageId: assignedPageId });
|
||||
// §10 loop-guard data for the created page (hash the pushed BODY).
|
||||
pushed.push({
|
||||
pageId: assignedPageId,
|
||||
...extractUpdatedAt(result),
|
||||
bodyHash: bodyHash(body),
|
||||
});
|
||||
}
|
||||
created++;
|
||||
}
|
||||
catch (err) {
|
||||
failures.push({ kind: "create", path: c.path, error: errMessage(err) });
|
||||
}
|
||||
}
|
||||
// 3. DELETES — soft-delete to Trash (SPEC §8), reversible. Isolated per page.
|
||||
for (const d of actions.deletes) {
|
||||
try {
|
||||
await client.deletePage(d.pageId);
|
||||
deleted++;
|
||||
}
|
||||
catch (err) {
|
||||
failures.push({
|
||||
kind: "delete",
|
||||
pageId: d.pageId,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
// 4. RENAME/MOVE (push #3, SPEC §5/§6/§16). Classify each entry against the
|
||||
// tree-backed resolvers (the NEW parent comes from the new path's enclosing
|
||||
// folder `.md`, the OLD parent from the old path's at last-pushed — PATH is
|
||||
// the truth, not stale `meta.parentPageId`; the title from the meta), then
|
||||
// apply only the real ops. Each page is isolated like the cases above: a
|
||||
// thrown op is recorded in `failures` and the batch continues. ORDER for a
|
||||
// page that needs both: reparent (move) FIRST, then retitle (rename).
|
||||
if (actions.renamesMoves.length > 0) {
|
||||
// The classifier is PURE over sync resolvers; the tree reads are async, so
|
||||
// prefetch every (path, side) lookup it will make into plain tables first.
|
||||
const parentTable = new Map();
|
||||
const metaTable = new Map();
|
||||
// A tree read (readFile / git.showFileAtRef) throwing must isolate THAT page
|
||||
// into `failures`, NOT abort the whole batch (§12 resumability). The helpers
|
||||
// already swallow their own errors, but this per-entry try/catch keeps the
|
||||
// batch-isolation invariant holding regardless of future changes to them.
|
||||
const prefetchFailed = new Set();
|
||||
for (const rm of actions.renamesMoves) {
|
||||
// newParent + newTitle from the CURRENT tree; oldParent + oldTitle from the
|
||||
// last-pushed pre-image (`prev`). Keyed by `path|side` so duplicates fold.
|
||||
try {
|
||||
parentTable.set(`${rm.newPath}|current`, await resolveParentPageIdViaTree(deps, rm.newPath, "current"));
|
||||
parentTable.set(`${rm.oldPath}|prev`, await resolveParentPageIdViaTree(deps, rm.oldPath, "prev"));
|
||||
metaTable.set(`${rm.newPath}|current`, await metaAtViaTree(deps, rm.newPath, "current", deps.spaceId));
|
||||
metaTable.set(`${rm.oldPath}|prev`, await metaAtViaTree(deps, rm.oldPath, "prev", deps.spaceId));
|
||||
}
|
||||
catch (err) {
|
||||
prefetchFailed.add(rm.pageId);
|
||||
failures.push({
|
||||
kind: "move",
|
||||
pageId: rm.pageId,
|
||||
path: rm.newPath,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
const classified = classifyRenameMoves(actions.renamesMoves.filter((rm) => !prefetchFailed.has(rm.pageId)), {
|
||||
metaAt: (path, side) => metaTable.get(`${path}|${side}`) ?? null,
|
||||
resolveParentPageId: (path, side) => parentTable.get(`${path}|${side}`) ?? null,
|
||||
});
|
||||
for (const c of classified) {
|
||||
if (c.noop) {
|
||||
// Cosmetic local-only file-path rename — no Docmost op (SPEC §5).
|
||||
noops.push({
|
||||
pageId: c.pageId,
|
||||
oldPath: c.oldPath,
|
||||
newPath: c.newPath,
|
||||
reason: "path-only-rename",
|
||||
});
|
||||
continue;
|
||||
}
|
||||
// Track which op is in flight so a failure is attributed to the op that
|
||||
// ACTUALLY threw: for a page needing both, a move that succeeds then a
|
||||
// rename that throws must be recorded as `rename`, not `move`.
|
||||
let failingKind = c.move ? "move" : "rename";
|
||||
try {
|
||||
// Reparent FIRST so the page is in its new tree position, THEN retitle.
|
||||
if (c.move) {
|
||||
failingKind = "move";
|
||||
// TODO(next): compute a fractional-index position between siblings
|
||||
// (SPEC §16). `position` is UNDEFINED here; the client supplies a valid
|
||||
// default. Pass `parentPageId: null` for a move to the space ROOT.
|
||||
await client.movePage(c.pageId, c.move.parentPageId);
|
||||
moved++;
|
||||
}
|
||||
if (c.rename) {
|
||||
failingKind = "rename";
|
||||
await client.renamePage(c.pageId, c.rename.title);
|
||||
renamed++;
|
||||
}
|
||||
}
|
||||
catch (err) {
|
||||
// Isolate the failed page: the op that ACTUALLY threw is recorded so a
|
||||
// re-run can retry. A move that threw before its rename leaves `rename`
|
||||
// for the next run (idempotent re-apply); refs are NOT advanced (below).
|
||||
failures.push({
|
||||
kind: failingKind,
|
||||
pageId: c.pageId,
|
||||
path: c.newPath,
|
||||
error: errMessage(err),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
// 5. Advance the refs ONLY on a CLEAN push (no failures) AND when a pushed
|
||||
// commit is supplied. A partial push must advance NEITHER ref, so a re-run
|
||||
// retries the whole batch (SPEC §12). The loop-close (SPEC §6 step 3 / §10):
|
||||
// advance `refs/docmost/last-pushed` AND fast-forward the `docmost` mirror,
|
||||
// so Docmost's new content is mirrored and the next pull diffs empty.
|
||||
let lastPushedAdvanced = false;
|
||||
let docmostFastForward = null;
|
||||
if (pushedCommit && failures.length === 0) {
|
||||
await git.updateRef(LAST_PUSHED_REF, pushedCommit);
|
||||
lastPushedAdvanced = true;
|
||||
// Fast-forward the mirror (refused, not forced, on a non-fast-forward — the
|
||||
// caller logs the reason). Surfaced in the result.
|
||||
docmostFastForward = await git.fastForwardBranch(DOCMOST_BRANCH, pushedCommit);
|
||||
}
|
||||
return {
|
||||
created,
|
||||
updated,
|
||||
deleted,
|
||||
moved,
|
||||
renamed,
|
||||
writtenBack,
|
||||
pushed,
|
||||
failures,
|
||||
noops,
|
||||
skipped: actions.skipped,
|
||||
lastPushedAdvanced,
|
||||
docmostFastForward,
|
||||
};
|
||||
}
|
||||
/** Stringify a thrown value into a stable error message. */
|
||||
function errMessage(err) {
|
||||
return err instanceof Error ? err.message : String(err);
|
||||
}
|
||||
/**
|
||||
* SPEC §5 path-as-truth: the parent FOLDER's `.md` file for a vault-relative
|
||||
* (forward-slash) path. `buildVaultLayout` puts a page with children at
|
||||
* `<...>/Title.md` and nests its children under `<...>/Title/`, so for
|
||||
* `newPath = <dir>/Child.md` the parent page's file is `<dir>.md` (the enclosing
|
||||
* folder, one level up). A path with NO enclosing folder (`Child.md`, at the
|
||||
* space root) has no parent folder file -> `null` (the parent is ROOT).
|
||||
*/
|
||||
export function parentFolderFile(path) {
|
||||
const slash = path.lastIndexOf("/");
|
||||
if (slash < 0)
|
||||
return null; // root-level file: parent is ROOT.
|
||||
const dir = path.slice(0, slash); // the enclosing folder
|
||||
// The page that OWNS the enclosing folder is its folder-note `<dir>/<base>.md`.
|
||||
const folderNote = `${dir}/${baseSegment(dir)}.md`;
|
||||
if (path === folderNote) {
|
||||
// This path IS its folder's folder-note, so its parent is ONE LEVEL UP: the
|
||||
// folder-note of the grandparent folder (or ROOT at the top level).
|
||||
const up = dir.lastIndexOf("/");
|
||||
if (up < 0)
|
||||
return null; // top-level folder -> parent is ROOT.
|
||||
const grandDir = dir.slice(0, up);
|
||||
return `${grandDir}/${baseSegment(grandDir)}.md`;
|
||||
}
|
||||
// A leaf (or a nested folder-note) sitting inside `dir`: its parent is `dir`'s
|
||||
// folder-note.
|
||||
return folderNote;
|
||||
}
|
||||
/**
|
||||
* Whether a vault path is a Docmost PAGE file (design §"Адопция"): a `.md` file
|
||||
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
|
||||
* `.trash/`, dotfiles (`.foo.md`), and every non-`.md` file (attachments, JSON,
|
||||
* …) — Obsidian owns those; they live in the vault but are never pages. Used to
|
||||
* screen the PUSH diff so non-page files are never created/updated/deleted in
|
||||
* Docmost (and never get a `gitmost_id` frontmatter written into them).
|
||||
*/
|
||||
export function isPageFile(path) {
|
||||
if (!path.endsWith(".md"))
|
||||
return false;
|
||||
return !path.split("/").some((seg) => seg.startsWith("."));
|
||||
}
|
||||
/** The last path segment of a forward-slash path (the folder/file base name). */
|
||||
function baseSegment(path) {
|
||||
const slash = path.lastIndexOf("/");
|
||||
return slash < 0 ? path : path.slice(slash + 1);
|
||||
}
|
||||
/**
|
||||
* The page TITLE derived from a vault path: the file's base name without the
|
||||
* `.md` extension. In the native-Obsidian layout the filename IS the title — for
|
||||
* a folder-note `<dir>/<base>.md` that base equals the folder name, so the same
|
||||
* rule yields the folder's title. Self-consistent across pull/push: a pulled
|
||||
* (possibly disambiguated) filename round-trips to the same title, so a stable
|
||||
* file never pushes a spurious rename.
|
||||
*/
|
||||
function titleFromPath(path) {
|
||||
const base = baseSegment(path);
|
||||
return base.endsWith(".md") ? base.slice(0, -3) : base;
|
||||
}
|
||||
/**
|
||||
* Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the
|
||||
* NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the
|
||||
* filename, `spaceId` from the run (the vault's space — every file belongs to
|
||||
* it). `parentPageId` is intentionally absent: tree position is resolved from the
|
||||
* PATH (`resolveParentPageId`), never from a stored field (SPEC §5).
|
||||
*/
|
||||
function nativeMeta(text, path, spaceId) {
|
||||
const { id } = parsePageFile(text);
|
||||
const meta = { version: 1, title: titleFromPath(path), spaceId };
|
||||
if (id)
|
||||
meta.pageId = id;
|
||||
return meta;
|
||||
}
|
||||
/**
|
||||
* Build the `resolveParentPageId(path, side)` resolver `classifyRenameMoves`
|
||||
* needs, reading the PARENT FOLDER's `.md` (SPEC §5 path-as-truth):
|
||||
* - `current` -> `deps.readFile(<dir>.md)` (the live working tree),
|
||||
* - `prev` -> `git.showFileAtRef('refs/docmost/last-pushed', <dir>.md)` (the
|
||||
* last-pushed pre-image),
|
||||
* then read its `gitmost_id` frontmatter and return that page's pageId. A root-level path
|
||||
* (no enclosing folder), a missing/unreadable parent file, or a parent file with
|
||||
* no parseable pageId all resolve to `null` (parent is ROOT / unknown ->
|
||||
* `parentPageId: null`, SPEC §16 "parentPageId: null -> в корень").
|
||||
*
|
||||
* The IO is async, so this returns an ASYNC resolver; the call sites prefetch the
|
||||
* parent pageIds (the classifier itself stays pure/sync over a plain table).
|
||||
*/
|
||||
async function resolveParentPageIdViaTree(deps, path, side) {
|
||||
const parentFile = parentFolderFile(path);
|
||||
if (parentFile === null)
|
||||
return null; // root-level: parent is ROOT.
|
||||
let text;
|
||||
try {
|
||||
text =
|
||||
side === "current"
|
||||
? await deps.readFile(parentFile)
|
||||
: await deps.git.showFileAtRef(LAST_PUSHED_REF, parentFile);
|
||||
}
|
||||
catch {
|
||||
// Parent folder file missing/unreadable at that side -> treat as ROOT.
|
||||
return null;
|
||||
}
|
||||
if (text === null)
|
||||
return null; // showFileAtRef returns null when absent.
|
||||
// The parent page's identity is its `gitmost_id` frontmatter; folder position
|
||||
// is irrelevant here, only the pageId.
|
||||
return parsePageFile(text).id;
|
||||
}
|
||||
/**
|
||||
* Resolve the synthetic native meta at a side for the rename/move classifier (the
|
||||
* title — derived from the path — comes from here). Mirrors
|
||||
* `resolveParentPageIdViaTree`'s IO sides: `current` reads the working tree,
|
||||
* `prev` reads `refs/docmost/last-pushed`. Returns `null` only when the file is
|
||||
* missing/unreadable at that side (a real absence the classifier must see).
|
||||
*/
|
||||
async function metaAtViaTree(deps, path, side, spaceId) {
|
||||
let text;
|
||||
try {
|
||||
text =
|
||||
side === "current"
|
||||
? await deps.readFile(path)
|
||||
: await deps.git.showFileAtRef(LAST_PUSHED_REF, path);
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
if (text === null)
|
||||
return null;
|
||||
return nativeMeta(text, path, spaceId);
|
||||
}
|
||||
/**
|
||||
* Pull an `updatedAt` out of a create/update client result, if present. The
|
||||
* shape is `{ data: { updatedAt? }, ... }` (createPage) or a flatter object;
|
||||
* absent in the simple fakes, so the field is omitted rather than `undefined`.
|
||||
*/
|
||||
function extractUpdatedAt(result) {
|
||||
const r = result;
|
||||
const raw = r?.data?.updatedAt ?? r?.updatedAt;
|
||||
return typeof raw === "string" ? { updatedAt: raw } : {};
|
||||
}
|
||||
// --- runnable push orchestration (`runPush`) ---------------------------------
|
||||
//
|
||||
// `runPush` is the FS->Docmost twin of `pull.ts`'s `main`: it wires the VaultGit
|
||||
// diff/ref primitives + the PURE `computePushActions` planner + the THIN
|
||||
// `applyPushActions` applier into one runnable cycle. SAFE BY DEFAULT — the
|
||||
// engine's FIRST write path to Docmost defaults to DRY-RUN (plan only, NO
|
||||
// Docmost writes, NO ref advance); an explicit `--apply` is the ONLY path that
|
||||
// builds a client and mutates Docmost.
|
||||
//
|
||||
// Every external effect is injected (`PushDeps`) so the whole orchestration is
|
||||
// driven by FAKES in tests — no live Docmost, git, fs, or network.
|
||||
/**
|
||||
* The human ("local") git identity used for engine-made commits on `main` in the
|
||||
* push direction (SPEC §7.3). The provenance is carried by the trailer (below),
|
||||
* which the loop-guard keys on; the identity is for history readability only.
|
||||
* When the vault repo already has a configured `user.name`/`user.email`, git
|
||||
* uses that for the working-tree commit; this is the fallback the daemon stamps.
|
||||
*/
|
||||
export const LOCAL_AUTHOR_NAME = "Local";
|
||||
export const LOCAL_AUTHOR_EMAIL = "local@local";
|
||||
/** The provenance trailer marking a `main`-side (human/local) commit (SPEC §7.3). */
|
||||
export const LOCAL_SOURCE_TRAILER = "Docmost-Sync-Source: local";
|
||||
/**
|
||||
* Run one FS->Docmost push cycle (SPEC §6 "ФС → Docmost"), DRY-RUN BY DEFAULT.
|
||||
*
|
||||
* Steps (mirrors `pull.ts`):
|
||||
* 1. Preflight git: `assertGitAvailable` + `ensureRepo`; ABORT (clear message +
|
||||
* non-zero-ish result) if a merge is in progress — never push on top of an
|
||||
* unresolved conflict (SPEC §9/§12). Conflict markers must NEVER reach
|
||||
* Docmost (SPEC §9).
|
||||
* 2. Checkout `main` (the human-facing branch the push reads from).
|
||||
* 3. Commit the human's pending working-tree changes on `main` with the
|
||||
* `local` provenance trailer (SPEC §7.3). A no-op when nothing changed.
|
||||
* 4. Pick the diff BASE: `refs/docmost/last-pushed` if it resolves, else the
|
||||
* `docmost` mirror branch (what Docmost currently has). Resolve `main`.
|
||||
* 5. `diffNameStatus(base, main)` -> changes; build the `metaAt(path, side)`
|
||||
* resolver (current = working tree, prev = `git show <base>:<path>`); run
|
||||
* the PURE `computePushActions`.
|
||||
* 6. DRY-RUN (default): LOG the full plan and RETURN — NO client, NO Docmost
|
||||
* calls, NO ref advance.
|
||||
* 7. `--apply`: build the client, run `applyPushActions(..., pushedCommit=main)`,
|
||||
* then (a) if any pageIds were written back (creates), commit them on `main`
|
||||
* with the `local` trailer and RE-advance `refs/docmost/last-pushed` to the
|
||||
* new commit so the recorded pageIds are persisted in what Docmost mirrors;
|
||||
* (b) ESCALATE a divergent-`docmost` ff refusal (SPEC §5) with a prominent
|
||||
* WARNING and a non-zero-ish flag. Then log a one-line summary.
|
||||
*/
|
||||
export async function runPush(deps, opts) {
|
||||
const { git, settings, log } = deps;
|
||||
const dryRun = opts.dryRun;
|
||||
// 1. Preflight git. Fail fast (actionable message via main().catch) if the git
|
||||
// binary is missing — the vault state store relies on it.
|
||||
await git.assertGitAvailable();
|
||||
await git.ensureRepo();
|
||||
// 1b. Refuse to push on top of an unresolved merge (SPEC §9/§12). A previous
|
||||
// conflicting pull leaves the vault mid-merge; pushing now could leak
|
||||
// conflict markers into Docmost (SPEC §9, the cardinal invariant). Detect
|
||||
// it BEFORE any checkout/diff and stop with a clear, actionable message so
|
||||
// re-runs converge once the human resolves (or aborts) the merge.
|
||||
if (await git.isMergeInProgress()) {
|
||||
log(`push: vault has an unresolved merge at ${settings.vaultPath} — resolve ` +
|
||||
`it (or 'git merge --abort') and re-run. Nothing was pushed to Docmost ` +
|
||||
`(conflict markers must never reach Docmost, SPEC §9).`);
|
||||
return { mode: dryRun ? "dry-run" : "apply", aborted: "merge-in-progress" };
|
||||
}
|
||||
// 2. Work on `main` — the human-facing branch the push diffs FROM.
|
||||
await git.checkout(DEFAULT_BRANCH);
|
||||
// 3. Commit the human's pending working-tree changes on `main` with the `local`
|
||||
// provenance trailer (SPEC §7.3). A no-op commit when nothing changed is
|
||||
// fine (`commit` returns false). The loop-guard keys on the trailer.
|
||||
// Even on a "plan only" dry-run this commits the working tree (it is the
|
||||
// only way to diff `base..main`, acceptable §6.1 behavior) — so make that
|
||||
// LOCAL git mutation VISIBLE, never silent: a created commit is local-only
|
||||
// and nothing is sent to Docmost.
|
||||
await git.stageAll();
|
||||
const committedWorkingTree = await git.commit("local: working-tree changes", {
|
||||
authorName: LOCAL_AUTHOR_NAME,
|
||||
authorEmail: LOCAL_AUTHOR_EMAIL,
|
||||
trailers: [LOCAL_SOURCE_TRAILER],
|
||||
});
|
||||
if (committedWorkingTree) {
|
||||
const sha = await git.revParse(DEFAULT_BRANCH);
|
||||
log(`push: committed local working-tree changes on main` +
|
||||
(sha ? ` as ${sha.slice(0, 8)}` : "") +
|
||||
` (local git only — nothing sent to Docmost).`);
|
||||
}
|
||||
else {
|
||||
log("push: working tree clean (no local changes to push).");
|
||||
}
|
||||
// 4. Pick the diff BASE (SPEC §5/§6): `refs/docmost/last-pushed` if it resolves
|
||||
// (the marker of what `main` is already in Docmost), else fall back to the
|
||||
// `docmost` mirror branch (the mirror of what Docmost currently has) — which
|
||||
// is what exists before the first push ever advanced last-pushed.
|
||||
let base;
|
||||
const lastPushedSha = await git.readRef(LAST_PUSHED_REF);
|
||||
if (lastPushedSha) {
|
||||
base = { ref: LAST_PUSHED_REF, source: "last-pushed", sha: lastPushedSha };
|
||||
}
|
||||
else {
|
||||
base = {
|
||||
ref: DOCMOST_BRANCH,
|
||||
source: "docmost",
|
||||
sha: await git.revParse(DOCMOST_BRANCH),
|
||||
};
|
||||
}
|
||||
const pushedCommit = await git.revParse(DEFAULT_BRANCH);
|
||||
if (!pushedCommit) {
|
||||
// `main` has no commit — `ensureRepo` always makes an initial one, so this is
|
||||
// defensive. Nothing to diff.
|
||||
log("push: `main` has no commit to push — nothing to do.");
|
||||
return { mode: dryRun ? "dry-run" : "apply", base };
|
||||
}
|
||||
// 5. Diff the base against `main` and build the `metaAt` resolver (PURE planner
|
||||
// input). `current` reads the live working tree; `prev` reads the base ref's
|
||||
// pre-image via `git show <base>:<path>` (so a DELETE recovers its pageId).
|
||||
const changes = await git.diffNameStatus(base.ref, DEFAULT_BRANCH);
|
||||
// Synchronous resolver over PREFETCHED meta tables: `computePushActions` is
|
||||
// PURE/sync, but the file/ref reads are async — so we prefetch every (path,
|
||||
// side) the diff will ask for into a table first, then resolve from it.
|
||||
const metaTable = new Map();
|
||||
for (const change of changes) {
|
||||
// `current`: A/M/R/C still have the file on `main`. `prev`: D needs the
|
||||
// pre-image; R/C also benefit (old title). Prefetch both sides per path.
|
||||
const currentPath = change.path;
|
||||
const prevPath = change.oldPath ?? change.path;
|
||||
if (!metaTable.has(`${currentPath}|current`)) {
|
||||
metaTable.set(`${currentPath}|current`, await readMetaCurrent(deps, currentPath, settings.docmostSpaceId));
|
||||
}
|
||||
if (!metaTable.has(`${prevPath}|prev`)) {
|
||||
metaTable.set(`${prevPath}|prev`, await readMetaPrev(deps, base.ref, prevPath, settings.docmostSpaceId));
|
||||
}
|
||||
}
|
||||
const metaAt = (path, side) => metaTable.get(`${path}|${side}`) ?? null;
|
||||
// The set of pageIds that STILL EXIST somewhere in the current `main` tree.
|
||||
// Identity is the pageId, NOT the filename: a file vanishing from one path
|
||||
// while the SAME pageId lives at another path is a MOVE (often a layout
|
||||
// reshuffle of `_`-fallback names, whose two halves can even land in separate
|
||||
// cycles), never a deletion. Built only when the diff contains deletes — the
|
||||
// guard's whole job is to stop a phantom delete from trashing a live page.
|
||||
let currentPageIds;
|
||||
if (changes.some((c) => c.status === "D")) {
|
||||
currentPageIds = new Set();
|
||||
for (const relPath of await git.listTrackedFiles("*.md")) {
|
||||
const pid = (await readMetaCurrent(deps, relPath, settings.docmostSpaceId))
|
||||
?.pageId;
|
||||
if (pid)
|
||||
currentPageIds.add(pid);
|
||||
}
|
||||
}
|
||||
const actions = computePushActions({ changes, metaAt, currentPageIds });
|
||||
const planned = {
|
||||
creates: actions.creates.length,
|
||||
updates: actions.updates.length,
|
||||
deletes: actions.deletes.length,
|
||||
renamesMoves: actions.renamesMoves.length,
|
||||
skipped: actions.skipped.length,
|
||||
};
|
||||
// 6. DRY-RUN (default): log the full plan and RETURN — build NO client, make
|
||||
// ZERO Docmost calls, advance NO refs. This is the SAFE default.
|
||||
logPlan(log, base, pushedCommit, actions, planned, dryRun);
|
||||
if (dryRun) {
|
||||
return { mode: "dry-run", base, pushedCommit, planned };
|
||||
}
|
||||
// 7. --apply: build the REAL client and execute. This is the ONLY write path.
|
||||
const client = deps.makeClient(settings);
|
||||
const applied = await applyPushActions({
|
||||
client,
|
||||
// Pass the WHOLE `git` object (it satisfies the applier's
|
||||
// `Pick<VaultGit, ...>` deps surface). Passing bare method references
|
||||
// (`git.updateRef`, …) would lose their `this` binding, so on a REAL
|
||||
// `VaultGit` they would throw `this.runRaw is not a function`. Hand over
|
||||
// the object so the methods keep their receiver — exactly as `pull.ts`
|
||||
// does for `applyPullActions`.
|
||||
git,
|
||||
readFile: deps.readFile,
|
||||
writeFile: deps.writeFile,
|
||||
spaceId: settings.docmostSpaceId,
|
||||
}, actions, pushedCommit);
|
||||
// 7a. Persist freshly-assigned pageIds (creates) back into git. `applyPushActions`
|
||||
// rewrote those files on disk; commit them on `main` with the `local` trailer
|
||||
// so the new pageIds are recorded, then RE-advance `refs/docmost/last-pushed`
|
||||
// to the new commit so what Docmost mirrors and what last-pushed points at
|
||||
// stay in lock-step (the write-back commit is part of `main` now).
|
||||
// Track a divergent-`docmost` mirror across BOTH ff sites (the applier's main
|
||||
// push ff in 7b, and the write-back ff here). A divergent mirror is a §5
|
||||
// invariant breach in EITHER branch and must escalate identically (exit 1).
|
||||
let divergentDocmost = false;
|
||||
if (applied.writtenBack.length > 0) {
|
||||
await git.stageAll();
|
||||
const recorded = await git.commit("local: record created pageIds", {
|
||||
authorName: LOCAL_AUTHOR_NAME,
|
||||
authorEmail: LOCAL_AUTHOR_EMAIL,
|
||||
trailers: [LOCAL_SOURCE_TRAILER],
|
||||
});
|
||||
if (recorded) {
|
||||
const newCommit = await git.revParse(DEFAULT_BRANCH);
|
||||
// Only re-advance when the original push was CLEAN (last-pushed was already
|
||||
// advanced by the applier); a partial push left the refs untouched and a
|
||||
// re-run retries the whole batch, so we must not move them either.
|
||||
if (newCommit && applied.lastPushedAdvanced) {
|
||||
await git.updateRef(LAST_PUSHED_REF, newCommit);
|
||||
const ff = await git.fastForwardBranch(DOCMOST_BRANCH, newCommit);
|
||||
if (!ff.ok) {
|
||||
// SYMMETRIC with the main escalation (7b): a divergent mirror in the
|
||||
// write-back branch is the SAME §5 invariant breach and must escalate
|
||||
// (exit 1), not just log a soft warning.
|
||||
divergentDocmost = true;
|
||||
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
|
||||
`fast-forwarded to the pageId write-back commit ` +
|
||||
`(${ff.reason ?? "not-fast-forward"}). The §5 invariant ('docmost' ` +
|
||||
`mirrors what Docmost contains) is broken: reconcile 'docmost' ` +
|
||||
`against the live Docmost tree before the next cycle.`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// 7b. ESCALATE a divergent-`docmost` fast-forward refusal (SPEC §5 invariant
|
||||
// broken). The applier already refused to clobber a divergent mirror; make
|
||||
// it LOUD (not silent) so the operator notices, and fold it into the exit.
|
||||
if (applied.docmostFastForward && !applied.docmostFastForward.ok) {
|
||||
divergentDocmost = true;
|
||||
log(`push: WARNING — the 'docmost' mirror branch DIVERGED and was NOT ` +
|
||||
`fast-forwarded (${applied.docmostFastForward.reason ?? "not-fast-forward"}). ` +
|
||||
`The §5 invariant ('docmost' mirrors what Docmost contains) is broken: ` +
|
||||
`reconcile 'docmost' against the live Docmost tree before the next cycle.`);
|
||||
}
|
||||
// 7c. One-line summary (mirrors pull.ts's summary line).
|
||||
log(`push complete: ${applied.created} created, ${applied.updated} updated, ` +
|
||||
`${applied.deleted} deleted, ${applied.moved} moved, ${applied.renamed} ` +
|
||||
`renamed, ${applied.noops.length} no-op(s), ${applied.skipped.length} ` +
|
||||
`skipped, ${applied.failures.length} failure(s)` +
|
||||
(divergentDocmost ? " [DIVERGENT docmost mirror]" : ""));
|
||||
return {
|
||||
mode: "apply",
|
||||
base,
|
||||
pushedCommit,
|
||||
planned,
|
||||
applied,
|
||||
divergentDocmost,
|
||||
failures: applied.failures,
|
||||
};
|
||||
}
|
||||
/** Synthetic native meta from the live working tree (`current` side). */
|
||||
async function readMetaCurrent(deps, path, spaceId) {
|
||||
let text;
|
||||
try {
|
||||
text = await deps.readFile(path);
|
||||
}
|
||||
catch {
|
||||
return null; // absent on disk (e.g. a D row's path) -> no current meta.
|
||||
}
|
||||
return nativeMeta(text, path, spaceId);
|
||||
}
|
||||
/** Synthetic native meta from the base ref's pre-image (`prev` side). */
|
||||
async function readMetaPrev(deps, baseRef, path, spaceId) {
|
||||
let text;
|
||||
try {
|
||||
text = await deps.git.showFileAtRef(baseRef, path);
|
||||
}
|
||||
catch {
|
||||
return null;
|
||||
}
|
||||
if (text === null)
|
||||
return null; // path absent at the base ref.
|
||||
return nativeMeta(text, path, spaceId);
|
||||
}
|
||||
/** Emit the full plan (counts + per-item) to the injected logger. */
|
||||
function logPlan(log, base, pushedCommit, actions, planned, dryRun) {
|
||||
log(`push plan (${dryRun ? "DRY-RUN — no Docmost writes" : "APPLY"}): base=` +
|
||||
`${base.ref} (${base.source}${base.sha ? ` ${base.sha.slice(0, 8)}` : ""}) ` +
|
||||
`-> main ${pushedCommit.slice(0, 8)}`);
|
||||
log(`push plan counts: ${planned.creates} create, ${planned.updates} update, ` +
|
||||
`${planned.deletes} delete, ${planned.renamesMoves} rename/move, ` +
|
||||
`${planned.skipped} skipped`);
|
||||
for (const c of actions.creates)
|
||||
log(` create: ${c.path}`);
|
||||
for (const u of actions.updates)
|
||||
log(` update: ${u.pageId} (${u.path})`);
|
||||
for (const d of actions.deletes)
|
||||
log(` delete: ${d.pageId}`);
|
||||
for (const rm of actions.renamesMoves)
|
||||
log(` rename/move: ${rm.oldPath} -> ${rm.newPath} (${rm.pageId})`);
|
||||
for (const s of actions.skipped)
|
||||
log(` skipped [${s.status}] ${s.path}: ${s.reason}`);
|
||||
}
|
||||
/**
|
||||
* Parse the `push` CLI flags. SAFE BY DEFAULT: without `--apply` the run is a
|
||||
* DRY-RUN (plan only). Exported so the flag handling is unit-testable.
|
||||
*/
|
||||
export function parseArgs(argv) {
|
||||
return { apply: argv.includes("--apply") };
|
||||
}
|
||||
126
packages/git-sync/build/engine/reconcile.d.ts
vendored
126
packages/git-sync/build/engine/reconcile.d.ts
vendored
@@ -1,126 +0,0 @@
|
||||
/**
|
||||
* Pure reconciliation planner (SPEC §5/§6/§8).
|
||||
*
|
||||
* Given the desired live set of files (computed from the current Docmost tree)
|
||||
* and the set of files currently tracked in the vault, compute what to write,
|
||||
* what to move (old path to remove), and what to delete. Identity is `pageId`
|
||||
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
|
||||
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
|
||||
* the live tree is a DELETE.
|
||||
*
|
||||
* This module is intentionally PURE (no IO, no git) so the whole plan is
|
||||
* unit-testable. The actual file writing / git operations happen in pull.ts.
|
||||
*/
|
||||
/** A page that SHOULD exist in the vault at a given path. */
|
||||
export interface LiveEntry {
|
||||
pageId: string;
|
||||
/** Vault-relative path (forward-slash), e.g. `Space/Parent/Child.md`. */
|
||||
relPath: string;
|
||||
}
|
||||
/** A page currently tracked in the vault (pageId parsed from its meta). */
|
||||
export interface ExistingEntry {
|
||||
pageId: string;
|
||||
/** Vault-relative path (forward-slash) of the tracked file. */
|
||||
relPath: string;
|
||||
}
|
||||
/** A page to (re)write at its destination path. */
|
||||
export interface WriteEntry {
|
||||
pageId: string;
|
||||
relPath: string;
|
||||
}
|
||||
/** A page that moved: written at its NEW relPath, with the OLD path removed. */
|
||||
export interface MovedEntry {
|
||||
pageId: string;
|
||||
fromRelPath: string;
|
||||
toRelPath: string;
|
||||
/**
|
||||
* Whether the old path (`fromRelPath`) is SAFE to remove. False when another
|
||||
* live page will (re)write that exact path (path reuse): removing it would
|
||||
* destroy real data, so the caller must skip the removal. The move itself is
|
||||
* still recorded (the new path is written regardless).
|
||||
*/
|
||||
removeOldPath: boolean;
|
||||
}
|
||||
/** The full reconciliation plan. */
|
||||
export interface ReconciliationPlan {
|
||||
/**
|
||||
* Pages present in `live` -> (re)write at their relPath. This naturally
|
||||
* covers add, content-update (same path) AND move (same pageId, new path),
|
||||
* since every live page is (re)written regardless of whether it existed.
|
||||
*/
|
||||
toWrite: WriteEntry[];
|
||||
/**
|
||||
* Vault-relative paths to delete because their tracked pageId is ABSENT from
|
||||
* `live` (page removed/trashed). This set is ONLY absence-based deletions —
|
||||
* the OLD paths of moved pages are NOT here (they live in `moved` and are
|
||||
* applied separately by the caller). Keeping the two apart lets pull.ts gate
|
||||
* absence deletions behind the incomplete-fetch suppression + mass-delete
|
||||
* guard (SPEC §8) while still applying real moves.
|
||||
*/
|
||||
toDelete: string[];
|
||||
/**
|
||||
* Tracked pages whose relPath changed. The caller writes the page at
|
||||
* `toRelPath`, then removes `fromRelPath` — but ONLY after the new-path write
|
||||
* succeeded. The old path is NOT in `toDelete`.
|
||||
*/
|
||||
moved: MovedEntry[];
|
||||
}
|
||||
/**
|
||||
* Compute the reconciliation plan.
|
||||
*
|
||||
* Rules:
|
||||
* - Every `live` page is written at its relPath (covers add + update + move).
|
||||
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
|
||||
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
|
||||
* path is written) and is NEVER added to `toDelete`.
|
||||
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
|
||||
* is added to `toDelete`.
|
||||
*
|
||||
* Notes:
|
||||
* - Safety filter (no data loss): no path that is a live TARGET path of any
|
||||
* page is ever deleted/removed (a write owns it). This applies to BOTH the
|
||||
* absence `toDelete` set AND a moved page's old-path removal — if a moved
|
||||
* page's OLD path is reused by ANOTHER live page, the move records no old
|
||||
* path to remove, because that path will be (re)written.
|
||||
* - `existing` may legitimately contain duplicate pageIds (two stray files
|
||||
* carrying the same meta pageId); each such file that is not the live target
|
||||
* path is removed (as an absence/move) so the vault converges to exactly the
|
||||
* live set.
|
||||
*/
|
||||
export declare function planReconciliation(live: LiveEntry[], existing: ExistingEntry[]): ReconciliationPlan;
|
||||
/**
|
||||
* Below this many tracked files the mass-delete fraction guard is not applied
|
||||
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
|
||||
*/
|
||||
export declare const MASS_DELETE_MIN_EXISTING = 4;
|
||||
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
|
||||
export declare const MASS_DELETE_FRACTION = 0.5;
|
||||
/** Why absence-based deletions were (or were not) applied this cycle. */
|
||||
export type DeletionDecision = {
|
||||
apply: true;
|
||||
} | {
|
||||
apply: false;
|
||||
reason: "incomplete-fetch" | "empty-live" | "mass-delete";
|
||||
};
|
||||
/**
|
||||
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
|
||||
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
|
||||
* testable without live creds or git:
|
||||
*
|
||||
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
|
||||
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
|
||||
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
|
||||
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
|
||||
* (almost always a failed fetch, never a real "delete everything").
|
||||
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
|
||||
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
|
||||
*
|
||||
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
|
||||
* its old-path removal is real (handled by the caller separately).
|
||||
*/
|
||||
export declare function decideAbsenceDeletions(args: {
|
||||
treeComplete: boolean;
|
||||
liveCount: number;
|
||||
existingCount: number;
|
||||
deleteCount: number;
|
||||
}): DeletionDecision;
|
||||
@@ -1,117 +0,0 @@
|
||||
/**
|
||||
* Pure reconciliation planner (SPEC §5/§6/§8).
|
||||
*
|
||||
* Given the desired live set of files (computed from the current Docmost tree)
|
||||
* and the set of files currently tracked in the vault, compute what to write,
|
||||
* what to move (old path to remove), and what to delete. Identity is `pageId`
|
||||
* (the stable file<->page anchor, SPEC §4): a page that keeps its pageId but
|
||||
* changes relPath is a MOVE, not delete+add; a tracked pageId that is gone from
|
||||
* the live tree is a DELETE.
|
||||
*
|
||||
* This module is intentionally PURE (no IO, no git) so the whole plan is
|
||||
* unit-testable. The actual file writing / git operations happen in pull.ts.
|
||||
*/
|
||||
/**
|
||||
* Compute the reconciliation plan.
|
||||
*
|
||||
* Rules:
|
||||
* - Every `live` page is written at its relPath (covers add + update + move).
|
||||
* - A tracked pageId present in `live` whose relPath changed is `moved`; its
|
||||
* OLD relPath goes into `moved` ONLY (the caller removes it after the new
|
||||
* path is written) and is NEVER added to `toDelete`.
|
||||
* - A tracked pageId NOT present in `live` is an ABSENCE delete; its relPath
|
||||
* is added to `toDelete`.
|
||||
*
|
||||
* Notes:
|
||||
* - Safety filter (no data loss): no path that is a live TARGET path of any
|
||||
* page is ever deleted/removed (a write owns it). This applies to BOTH the
|
||||
* absence `toDelete` set AND a moved page's old-path removal — if a moved
|
||||
* page's OLD path is reused by ANOTHER live page, the move records no old
|
||||
* path to remove, because that path will be (re)written.
|
||||
* - `existing` may legitimately contain duplicate pageIds (two stray files
|
||||
* carrying the same meta pageId); each such file that is not the live target
|
||||
* path is removed (as an absence/move) so the vault converges to exactly the
|
||||
* live set.
|
||||
*/
|
||||
export function planReconciliation(live, existing) {
|
||||
// Desired path for each live pageId.
|
||||
const liveByPageId = new Map();
|
||||
// Set of all paths that WILL be written (never delete/remove one of these).
|
||||
const liveTargetPaths = new Set();
|
||||
for (const e of live) {
|
||||
liveByPageId.set(e.pageId, e.relPath);
|
||||
liveTargetPaths.add(e.relPath);
|
||||
}
|
||||
const toWrite = live.map((e) => ({
|
||||
pageId: e.pageId,
|
||||
relPath: e.relPath,
|
||||
}));
|
||||
const moved = [];
|
||||
// Absence-based deletions ONLY (tracked pageId absent from `live`). Use a Set
|
||||
// so the same path coming from multiple existing rows is queued only once.
|
||||
const toDeleteSet = new Set();
|
||||
for (const ex of existing) {
|
||||
const liveRel = liveByPageId.get(ex.pageId);
|
||||
if (liveRel === undefined) {
|
||||
// Tracked page is gone from the live tree -> absence delete.
|
||||
// Never queue a path a live page will (re)write (path reuse -> no loss).
|
||||
if (!liveTargetPaths.has(ex.relPath))
|
||||
toDeleteSet.add(ex.relPath);
|
||||
continue;
|
||||
}
|
||||
if (liveRel !== ex.relPath) {
|
||||
// Same pageId, different path -> a MOVE. Record it so the caller can write
|
||||
// the new path first, then remove the old one. If the old path is itself a
|
||||
// live target (reused by another page), it must NOT be removed — the write
|
||||
// owns it — so flag `removeOldPath: false` (move still recorded).
|
||||
moved.push({
|
||||
pageId: ex.pageId,
|
||||
fromRelPath: ex.relPath,
|
||||
toRelPath: liveRel,
|
||||
removeOldPath: !liveTargetPaths.has(ex.relPath),
|
||||
});
|
||||
}
|
||||
// liveRel === ex.relPath -> content-update in place; nothing extra to do
|
||||
// (the write above re-emits the file; identical bytes => git no-op).
|
||||
}
|
||||
const toDelete = [...toDeleteSet];
|
||||
return { toWrite, toDelete, moved };
|
||||
}
|
||||
/**
|
||||
* Below this many tracked files the mass-delete fraction guard is not applied
|
||||
* (a tiny vault where deleting "most" files is normal, e.g. 1-of-2).
|
||||
*/
|
||||
export const MASS_DELETE_MIN_EXISTING = 4;
|
||||
/** Fraction of tracked files above which a delete plan is a suspected wipe. */
|
||||
export const MASS_DELETE_FRACTION = 0.5;
|
||||
/**
|
||||
* Pure decision: should the ABSENCE-based deletions (`plan.toDelete`) be applied
|
||||
* this cycle? Encapsulates the SPEC §8 safety invariants so they are unit-
|
||||
* testable without live creds or git:
|
||||
*
|
||||
* - `treeComplete === false` (a partial Docmost tree fetch) -> SUPPRESS. A page
|
||||
* missing from a partial tree is NOT proof of deletion (SPEC §8); we must not
|
||||
* delete merely-absent files this cycle. (Writes/updates/moves still happen.)
|
||||
* - The live fetch returned 0 pages while files are tracked -> SUPPRESS
|
||||
* (almost always a failed fetch, never a real "delete everything").
|
||||
* - The plan would delete more than `MASS_DELETE_FRACTION` of a non-trivial
|
||||
* vault -> SUPPRESS as a mass-deletion guard (defense in depth).
|
||||
*
|
||||
* Moves are NOT governed by this decision: a moved page IS present in `live`, so
|
||||
* its old-path removal is real (handled by the caller separately).
|
||||
*/
|
||||
export function decideAbsenceDeletions(args) {
|
||||
const { treeComplete, liveCount, existingCount, deleteCount } = args;
|
||||
// No tracked files, or nothing to delete -> trivially fine to "apply".
|
||||
if (existingCount === 0 || deleteCount === 0)
|
||||
return { apply: true };
|
||||
if (!treeComplete)
|
||||
return { apply: false, reason: "incomplete-fetch" };
|
||||
if (liveCount === 0)
|
||||
return { apply: false, reason: "empty-live" };
|
||||
if (existingCount >= MASS_DELETE_MIN_EXISTING &&
|
||||
deleteCount > existingCount * MASS_DELETE_FRACTION) {
|
||||
return { apply: false, reason: "mass-delete" };
|
||||
}
|
||||
return { apply: true };
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
/**
|
||||
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
|
||||
* round-trip harness that drives these lives in the package's tests, not in the
|
||||
* engine.
|
||||
*/
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export declare function stripBlockIds(node: any): any;
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export declare function firstDivergence(a: any, b: any, path?: string): {
|
||||
path: string;
|
||||
a: any;
|
||||
b: any;
|
||||
} | null;
|
||||
@@ -1,70 +0,0 @@
|
||||
/**
|
||||
* Pure, IO-free comparison helpers for the idempotency round-trip checks. The
|
||||
* round-trip harness that drives these lives in the package's tests, not in the
|
||||
* engine.
|
||||
*/
|
||||
/**
|
||||
* Recursively strip every `attrs.id` from a ProseMirror node tree. Block ids
|
||||
* are regenerated by `markdownToProseMirror` (SPEC §11), so they must be
|
||||
* ignored when comparing the semantic shape of two documents. Returns a NEW
|
||||
* tree; the input is not mutated.
|
||||
*/
|
||||
export function stripBlockIds(node) {
|
||||
if (Array.isArray(node)) {
|
||||
return node.map(stripBlockIds);
|
||||
}
|
||||
if (node && typeof node === "object") {
|
||||
const out = {};
|
||||
for (const key of Object.keys(node)) {
|
||||
if (key === "attrs" && node.attrs && typeof node.attrs === "object") {
|
||||
// Drop the `id` attr; keep every other attribute.
|
||||
const { id, ...rest } = node.attrs;
|
||||
void id;
|
||||
out.attrs = stripBlockIds(rest);
|
||||
}
|
||||
else {
|
||||
out[key] = stripBlockIds(node[key]);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
/**
|
||||
* Find the first divergence between two values via a recursive deep compare.
|
||||
* Returns a short path + the two differing values, or null if they are equal.
|
||||
*/
|
||||
export function firstDivergence(a, b, path = "$") {
|
||||
if (a === b)
|
||||
return null;
|
||||
const ta = typeof a;
|
||||
const tb = typeof b;
|
||||
if (ta !== tb || a === null || b === null) {
|
||||
return { path, a, b };
|
||||
}
|
||||
if (ta !== "object") {
|
||||
return { path, a, b };
|
||||
}
|
||||
const aIsArr = Array.isArray(a);
|
||||
const bIsArr = Array.isArray(b);
|
||||
if (aIsArr !== bIsArr)
|
||||
return { path, a, b };
|
||||
if (aIsArr) {
|
||||
if (a.length !== b.length) {
|
||||
return { path: `${path}.length`, a: a.length, b: b.length };
|
||||
}
|
||||
for (let i = 0; i < a.length; i++) {
|
||||
const d = firstDivergence(a[i], b[i], `${path}[${i}]`);
|
||||
if (d)
|
||||
return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
const keys = new Set([...Object.keys(a), ...Object.keys(b)]);
|
||||
for (const k of keys) {
|
||||
const d = firstDivergence(a[k], b[k], `${path}.${k}`);
|
||||
if (d)
|
||||
return d;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
23
packages/git-sync/build/engine/sanitize.d.ts
vendored
23
packages/git-sync/build/engine/sanitize.d.ts
vendored
@@ -1,23 +0,0 @@
|
||||
/**
|
||||
* Deterministic filename strategy (SPEC §12).
|
||||
*
|
||||
* The file name is COSMETIC — the source of truth for the file<->page link is
|
||||
* `pageId` / `slugId` inside the meta block, so renaming a file is safe. These
|
||||
* functions are intentionally dependency-free and pure, so they are trivially
|
||||
* unit-testable.
|
||||
*/
|
||||
/**
|
||||
* Sanitize a page title into a safe file-name component (WITHOUT extension).
|
||||
*
|
||||
* Steps: replace forbidden / control characters with "-", collapse whitespace
|
||||
* runs to a single space, trim, cap the length, then guard against an empty
|
||||
* result, an all-dots result, or a reserved Windows device name by prefixing
|
||||
* with "_".
|
||||
*/
|
||||
export declare function sanitizeTitle(title: string): string;
|
||||
/**
|
||||
* Disambiguate a sanitized name when two siblings in the same folder collapse
|
||||
* to the same name. Appends a stable suffix built from the page's `slugId`, so
|
||||
* the result stays deterministic across runs (SPEC §12: `Title ~slugId`).
|
||||
*/
|
||||
export declare function disambiguate(name: string, slugId: string): string;
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user