Merge remote-tracking branch 'gitea/develop' into fix/review-batch-2
# Conflicts: # AGENTS.md # CHANGELOG.md # README.md # apps/server/src/collaboration/collaboration.handler.ts # apps/server/src/common/helpers/prosemirror/html-embed.spec.ts # apps/server/src/common/helpers/prosemirror/html-embed.util.ts # apps/server/src/core/ai-chat/public-share-chat.service.ts # apps/server/src/core/ai-chat/public-share-chat.spec.ts # apps/server/src/core/ai-chat/public-share-workspace-limiter.ts # apps/server/src/core/page/services/page.service.ts # apps/server/src/core/page/transclusion/transclusion.service.ts # apps/server/src/integrations/import/services/file-import-task.service.ts # apps/server/src/integrations/import/services/import.service.ts
This commit is contained in:
14
.env.example
14
.env.example
@@ -29,6 +29,11 @@ PORT=3000
|
||||
# `127.0.0.1, 10.0.0.0/8`
|
||||
# TRUST_PROXY=
|
||||
|
||||
# APP_SECRET has a DUAL role: it signs JWTs AND derives the AES-256-GCM key that
|
||||
# encrypts stored AI-provider credentials (API keys) at rest. CONSEQUENCE: if you
|
||||
# change APP_SECRET after setup, every stored AI API key becomes undecryptable —
|
||||
# you must re-enter them in AI settings — and all existing sessions/JWTs are
|
||||
# invalidated. Choose it ONCE, keep it stable, and back it up alongside your DB.
|
||||
# minimum of 32 characters. Generate one with: openssl rand -hex 32
|
||||
APP_SECRET=REPLACE_WITH_LONG_SECRET
|
||||
|
||||
@@ -139,7 +144,12 @@ MCP_DOCMOST_PASSWORD=
|
||||
#
|
||||
# Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent,
|
||||
# keyed by the server-resolved workspace id) bounds the owner's bill even if the
|
||||
# per-IP limit is fully evaded. It is a COST backstop, not an access control,
|
||||
# and FAILS OPEN if Redis is unavailable. Override the hourly cap below
|
||||
# per-IP limit is fully evaded. It is a COST backstop, not an access control, and
|
||||
# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going
|
||||
# offline is safer than an unbounded bill). Override the hourly cap below
|
||||
# (default: 300 calls per workspace per rolling hour).
|
||||
# SHARE_AI_WORKSPACE_MAX_PER_HOUR=300
|
||||
#
|
||||
# Per-request output-token ceiling for the anonymous assistant (default: 512).
|
||||
# Worst-case output per accepted call = agent steps (5) × this value.
|
||||
# SHARE_AI_MAX_OUTPUT_TOKENS=512
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -42,3 +42,6 @@ lerna-debug.log*
|
||||
.nx/installation
|
||||
.nx/cache
|
||||
.claude/worktrees/
|
||||
|
||||
# TypeScript incremental build artifacts
|
||||
*.tsbuildinfo
|
||||
|
||||
@@ -280,4 +280,4 @@ The git tag is the source of truth for the displayed version (UI reads `git desc
|
||||
|
||||
## Planning docs
|
||||
|
||||
`docs/*.md` hold design plans for in-progress / planned features (mobile app, offline sync, RAG improvements, streaming dictation). Arbitrary HTML embed has **shipped** (admin-gated by the `htmlEmbed` workspace toggle in Workspace settings) and is no longer a planning doc. `docs/backlog/*.md` track known issues / follow-ups (e.g. AI-chat review follow-ups). Consult the relevant plan before working on one of those areas.
|
||||
`docs/*.md` hold design plans for in-progress / planned features (mobile app, offline sync, RAG improvements, voice dictation). Arbitrary HTML embed has **shipped** — it renders inside a sandboxed iframe and, when the `htmlEmbed` workspace toggle is on, is insertable by any member (no longer admin-only); turning the toggle off hides/stops serving existing embeds on public share pages. `docs/backlog/*.md` track known issues / follow-ups (e.g. AI-chat review follow-ups). Consult the relevant plan before working on one of those areas.
|
||||
|
||||
18
CHANGELOG.md
18
CHANGELOG.md
@@ -10,6 +10,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Added
|
||||
|
||||
- Admin-only "Analytics / tracker" workspace setting: a raw HTML/JS snippet
|
||||
injected into the `<head>` of public share pages only (for analytics such as
|
||||
Google Analytics or Yandex.Metrika).
|
||||
|
||||
### Changed
|
||||
|
||||
- HTML embed blocks now render inside a sandboxed iframe (separate origin) and,
|
||||
when the workspace HTML-embed toggle is on, can be inserted by any member
|
||||
(previously admin-only). Turning the toggle off hides existing embeds and
|
||||
stops serving them on public share pages.
|
||||
- Remove the server-side role-based stripping of HTML-embed blocks from the
|
||||
write paths (collab/REST/MCP, page create/duplicate, import, transclusion
|
||||
unsync); sandboxing makes per-write gating unnecessary. The only remaining
|
||||
server-side strip is the public-share read path, which still honors the
|
||||
workspace HTML-embed toggle.
|
||||
|
||||
### Breaking Changes
|
||||
|
||||
- **MCP shared-token auth moved to its own header.** The `/mcp` shared guard
|
||||
|
||||
@@ -102,6 +102,8 @@ community feature, with no enterprise license. Open it from the page header; the
|
||||
- ✅ **AI chat** — built-in AI agent chat over your wiki content (read + write, RAG search, configurable provider, optional web access via external MCP).
|
||||
- ✅ **Voice dictation** — microphone button in the AI agent chat and the page editor; audio is transcribed server-side (Whisper / OpenAI-compatible STT) via the workspace AI provider, with an admin toggle to show/hide it.
|
||||
- ✅ **Page templates** — flag a page as a template and embed its whole content live into other pages; edits to the template propagate to every place it is inserted (whole-page transclusion on top of the existing synced blocks).
|
||||
- ✅ **Public-share AI assistant** — anonymous visitors of a shared page can ask the AI agent, scoped strictly to that share's page tree (read-only, share-scoped search), behind a workspace toggle.
|
||||
- ✅ **Footnotes** — academic-style footnotes: a numbered superscript reference inline (read it in place via a hover popover), with the note text living as a real, editable block at the bottom of the page; auto-numbered, collaboration-safe, and round-trips through Markdown export/import and the AI agent / MCP.
|
||||
|
||||
### In progress
|
||||
|
||||
@@ -110,12 +112,10 @@ community feature, with no enterprise license. Open it from the page header; the
|
||||
### Planned
|
||||
|
||||
- 🔭 **Viewer comments** — let read-only viewers leave comments.
|
||||
- 🔭 **Public-share AI assistant** — let anonymous visitors of a shared page ask the AI agent, scoped strictly to that share's page tree (read-only, share-scoped search), behind a workspace toggle. See [docs/public-share-assistant-plan.md](docs/public-share-assistant-plan.md).
|
||||
- 🔭 **Password-protected pages** — protect individual pages / shares with a password.
|
||||
- 🔭 **Windows / Linux app** — native desktop app for Windows and Linux.
|
||||
- 🔭 **Mobile app** — mobile apps (iOS first, Android to follow), reusing the existing responsive web UI and editor via a Capacitor wrapper, with offline planned for later. See [docs/mobile-app-plan.md](docs/mobile-app-plan.md).
|
||||
- 🔭 **Offline mode** — offline sync & PWA support.
|
||||
- 🔭 **Footnotes** — academic-style footnotes: a numbered superscript reference inline (read it in place via a hover popover), with the note text living as a real, editable block at the bottom of the page; auto-numbered, collaboration-safe, and round-trips through Markdown export/import and the AI agent / MCP. See [docs/footnotes-plan.md](docs/footnotes-plan.md).
|
||||
- 🔭 **Editor & UX improvements** — blocks inside tables (lists, to-do items), column layout, additional heading levels, highlight blocks, custom emoji in callouts, floating images, anchor links for page mentions, toggles (shared-page width, aside/sidebar, spellcheck, ligatures), sanitized space-tree export, and mentions in breadcrumbs.
|
||||
|
||||
## Getting started
|
||||
@@ -158,6 +158,11 @@ the existing data directory is reused as-is:
|
||||
start the new migrations apply on top of your existing schema (`CREATE EXTENSION vector` plus the
|
||||
`page_embeddings` and AI tables); watch the logs for `Migration "..." executed successfully`.
|
||||
|
||||
> ⚠️ **Never change `APP_SECRET` after setup.** It does double duty: it signs JWTs *and* derives the
|
||||
> AES-256-GCM key that encrypts stored AI-provider credentials (API keys). Rotating it makes every
|
||||
> saved AI API key undecryptable (you'd have to re-enter them in AI settings) and invalidates all
|
||||
> existing sessions. Pick it once, keep it stable, and back it up together with your database.
|
||||
|
||||
### Notes
|
||||
|
||||
- **Back up first.** Take a `pg_dump` before swapping — migrations apply in place, and the
|
||||
|
||||
12
README.ru.md
12
README.ru.md
@@ -102,6 +102,9 @@ real-time-коллаборации Docmost, поэтому запись нико
|
||||
- ✅ **Приложение для macOS** — нативное приложение для macOS ([gitmost-app](https://github.com/vvzvlad/gitmost-app)), встраивающее UI с вкладками для нескольких серверов.
|
||||
- ✅ **AI-чат** — встроенный чат с AI-агентом по содержимому вики (чтение + запись, RAG-поиск, настраиваемый провайдер, опциональный доступ в интернет через внешние MCP).
|
||||
- ✅ **Голосовая диктовка** — кнопка-микрофон в чате AI-агента и в редакторе страниц; аудио распознаётся на сервере (Whisper / OpenAI-совместимый STT) через AI-провайдер воркспейса, с тумблером админа для показа/скрытия.
|
||||
- ✅ **Шаблоны страниц** — пометить страницу шаблоном и вставлять её содержимое живой ссылкой в другие страницы; правки шаблона распространяются на все места вставки (whole-page-транслюзия поверх существующих synced-блоков).
|
||||
- ✅ **AI-ассистент на публичных шарах** — анонимный зритель расшаренной страницы может спросить AI-агента, который ищет строго по дереву этой шары (read-only, share-scoped поиск), за тумблером воркспейса.
|
||||
- ✅ **Сноски** — сноски академического вида: нумерованная ссылка-надстрочник прямо в тексте (читается на месте во всплывающем окне по наведению), а текст сноски живёт реальным редактируемым блоком внизу страницы; авто-нумерация, безопасна для совместного редактирования, переживает экспорт/импорт Markdown и доступна AI-агенту / MCP.
|
||||
|
||||
### В процессе
|
||||
|
||||
@@ -109,14 +112,11 @@ real-time-коллаборации Docmost, поэтому запись нико
|
||||
|
||||
### В планах
|
||||
|
||||
- 🔭 **Шаблоны страниц** — пометить страницу шаблоном и вставлять её содержимое живой ссылкой в другие страницы; правки шаблона распространяются на все места вставки (whole-page-транслюзия поверх существующих synced-блоков). См. [docs/page-templates-plan.md](docs/page-templates-plan.md).
|
||||
- 🔭 **Комментарии зрителей** — возможность комментировать для пользователей с доступом только на чтение.
|
||||
- 🔭 **AI-ассистент на публичных шарах** — возможность анонимному зрителю расшаренной страницы спросить AI-агента, который ищет строго по дереву этой шары (read-only, share-scoped поиск), за тумблером воркспейса. См. [docs/public-share-assistant-plan.md](docs/public-share-assistant-plan.md).
|
||||
- 🔭 **Защищённые паролем страницы** — защита отдельных страниц / шар паролем.
|
||||
- 🔭 **Приложение для Windows / Linux** — нативное десктоп-приложение для Windows и Linux.
|
||||
- 🔭 **Мобильное приложение** — мобильные приложения (iOS обязательно, Android как пойдёт) на базе существующей адаптивной веб-версии и редактора через обёртку Capacitor; оффлайн запланирован на будущее. См. [docs/mobile-app-plan.md](docs/mobile-app-plan.md).
|
||||
- 🔭 **Офлайн-режим** — офлайн-синхронизация и поддержка PWA.
|
||||
- 🔭 **Сноски** — сноски академического вида: нумерованная ссылка-надстрочник прямо в тексте (читается на месте во всплывающем окне по наведению), а текст сноски живёт реальным редактируемым блоком внизу страницы; авто-нумерация, безопасна для совместного редактирования, переживает экспорт/импорт Markdown и доступна AI-агенту / MCP. См. [docs/footnotes-plan.md](docs/footnotes-plan.md).
|
||||
- 🔭 **Улучшения редактора и UX** — блоки внутри таблиц (списки, чек-листы), колоночная вёрстка, дополнительные уровни заголовков, highlight-блоки, кастомные эмодзи в callout-ах, плавающие изображения, anchor-ссылки на упоминания страниц, тоглы (ширина шары, aside/сайдбар, spellcheck, лигатуры), санитизация экспорта дерева спейса и mentions в хлебных крошках.
|
||||
|
||||
## С чего начать
|
||||
@@ -159,6 +159,12 @@ dump/restore, существующий каталог данных переис
|
||||
новые миграции применяются поверх вашей схемы (`CREATE EXTENSION vector` плюс таблицы
|
||||
`page_embeddings` и AI-таблицы); следите в логах за строками `Migration "..." executed successfully`.
|
||||
|
||||
> ⚠️ **Никогда не меняйте `APP_SECRET` после установки.** Он выполняет двойную роль: подписывает JWT
|
||||
> *и* служит материалом для ключа AES-256-GCM, которым шифруются сохранённые ключи AI-провайдеров
|
||||
> (API-ключи). Смена секрета сделает все сохранённые AI-ключи нерасшифровываемыми (придётся вводить
|
||||
> их заново в настройках AI) и инвалидирует все текущие сессии. Задайте его один раз, держите
|
||||
> неизменным и бэкапьте вместе с базой данных.
|
||||
|
||||
|
||||
## Возможности
|
||||
|
||||
|
||||
@@ -1145,6 +1145,7 @@
|
||||
"Current context size": "Current context size",
|
||||
"AI agent": "AI agent",
|
||||
"AI agent is typing…": "AI agent is typing…",
|
||||
"{{name}} is typing…": "{{name}} is typing…",
|
||||
"Send": "Send",
|
||||
"Stop": "Stop",
|
||||
"Chat menu": "Chat menu",
|
||||
@@ -1239,5 +1240,20 @@
|
||||
"Reusable presets that shape the agent's behavior (and optionally its model). Picked when starting a new chat.": "Reusable presets that shape the agent's behavior (and optionally its model). Picked when starting a new chat.",
|
||||
"No roles configured": "No roles configured",
|
||||
"Delete role": "Delete role",
|
||||
"Are you sure you want to delete this role?": "Are you sure you want to delete this role?"
|
||||
"Are you sure you want to delete this role?": "Are you sure you want to delete this role?",
|
||||
"HTML embed": "HTML embed",
|
||||
"Edit HTML embed": "Edit HTML embed",
|
||||
"HTML embed is disabled in this workspace": "HTML embed is disabled in this workspace",
|
||||
"Click to add HTML / CSS / JS": "Click to add HTML / CSS / JS",
|
||||
"This HTML/CSS/JS runs in a sandboxed frame and cannot access the viewer's session, cookies, or API.": "This HTML/CSS/JS runs in a sandboxed frame and cannot access the viewer's session, cookies, or API.",
|
||||
"<script>...</script>": "<script>...</script>",
|
||||
"Height (px, blank = auto)": "Height (px, blank = auto)",
|
||||
"advanced": "advanced",
|
||||
"Enable HTML embed": "Enable HTML embed",
|
||||
"Allow members to insert raw HTML/CSS/JavaScript blocks. The block renders in a sandboxed frame and cannot access the viewer's session, cookies, or API. Off by default.": "Allow members to insert raw HTML/CSS/JavaScript blocks. The block renders in a sandboxed frame and cannot access the viewer's session, cookies, or API. Off by default.",
|
||||
"When enabled, any member can insert an HTML embed block. The toggle just enables or disables the block type workspace-wide.": "When enabled, any member can insert an HTML embed block. The toggle just enables or disables the block type workspace-wide.",
|
||||
"Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.": "Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.",
|
||||
"Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.": "Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.",
|
||||
"Analytics / tracker": "Analytics / tracker",
|
||||
"Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.": "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only."
|
||||
}
|
||||
|
||||
@@ -668,6 +668,7 @@
|
||||
"AI search": "Поиск ИИ",
|
||||
"AI Answer": "Ответ ИИ",
|
||||
"Ask AI": "Спросить ИИ",
|
||||
"{{name}} is typing…": "{{name}} печатает…",
|
||||
"AI is thinking...": "ИИ обрабатывает запрос...",
|
||||
"Thinking": "Думаю",
|
||||
"Ask a question...": "Задайте вопрос...",
|
||||
|
||||
@@ -22,6 +22,11 @@ interface MessageItemProps {
|
||||
* UUIDs/routes in the assistant's markdown don't leak as clickable links.
|
||||
*/
|
||||
neutralizeInternalLinks?: boolean;
|
||||
/**
|
||||
* Display name for the dimmed assistant label. Defaults to "AI agent" when
|
||||
* absent; the public share passes the configured identity (agent role) name.
|
||||
*/
|
||||
assistantName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -40,6 +45,7 @@ export default function MessageItem({
|
||||
message,
|
||||
showCitations = true,
|
||||
neutralizeInternalLinks = false,
|
||||
assistantName,
|
||||
}: MessageItemProps) {
|
||||
const { t } = useTranslation();
|
||||
const isUser = message.role === "user";
|
||||
@@ -61,7 +67,7 @@ export default function MessageItem({
|
||||
return (
|
||||
<Box className={classes.messageRow}>
|
||||
<Text size="xs" c="dimmed" mb={4}>
|
||||
{t("AI agent")}
|
||||
{assistantName?.trim() || t("AI agent")}
|
||||
</Text>
|
||||
{message.parts.map((part, index) => {
|
||||
if (part.type === "text") {
|
||||
|
||||
@@ -30,6 +30,12 @@ interface MessageListProps {
|
||||
* UUIDs/routes don't leak as clickable links to anonymous readers.
|
||||
*/
|
||||
neutralizeInternalLinks?: boolean;
|
||||
/**
|
||||
* Display name for the assistant's dimmed row label and typing indicator.
|
||||
* Defaults to "AI agent" when absent. The public share passes the configured
|
||||
* identity (agent role) name; the internal chat omits it.
|
||||
*/
|
||||
assistantName?: string;
|
||||
}
|
||||
|
||||
// Distance (px) from the bottom within which the viewport still counts as
|
||||
@@ -67,6 +73,7 @@ export default function MessageList({
|
||||
emptyState,
|
||||
showCitations = true,
|
||||
neutralizeInternalLinks = false,
|
||||
assistantName,
|
||||
}: MessageListProps) {
|
||||
const { t } = useTranslation();
|
||||
const viewportRef = useRef<HTMLDivElement>(null);
|
||||
@@ -148,9 +155,10 @@ export default function MessageList({
|
||||
message={message}
|
||||
showCitations={showCitations}
|
||||
neutralizeInternalLinks={neutralizeInternalLinks}
|
||||
assistantName={assistantName}
|
||||
/>
|
||||
))}
|
||||
{typing && <TypingIndicator />}
|
||||
{typing && <TypingIndicator assistantName={assistantName} />}
|
||||
</Stack>
|
||||
</ScrollArea>
|
||||
);
|
||||
|
||||
@@ -2,22 +2,33 @@ import { Box, Group, Text } from "@mantine/core";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import classes from "@/features/ai-chat/components/ai-chat.module.css";
|
||||
|
||||
/**
|
||||
* Live "AI agent is typing…" placeholder shown while a turn is in flight but the
|
||||
* latest assistant message has no visible content yet (no rendered text/tool
|
||||
* parts). It covers the gap between sending and the first streamed token, and is
|
||||
* replaced by the real assistant message once content starts arriving.
|
||||
*
|
||||
* Mirrors the assistant row layout in MessageItem (the dimmed "AI agent" label),
|
||||
* so it reads as the assistant's bubble taking shape.
|
||||
interface TypingIndicatorProps {
|
||||
/**
|
||||
* Display name for the dimmed label and the "… is typing…" line. Defaults to
|
||||
* "AI agent" when absent; the public share passes the configured identity
|
||||
* (agent role) name.
|
||||
*/
|
||||
export default function TypingIndicator() {
|
||||
assistantName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Live "… is typing…" placeholder shown while a turn is in flight but the latest
|
||||
* assistant message has no visible content yet (no rendered text/tool parts). It
|
||||
* covers the gap between sending and the first streamed token, and is replaced by
|
||||
* the real assistant message once content starts arriving.
|
||||
*
|
||||
* Mirrors the assistant row layout in MessageItem (the dimmed label), so it reads
|
||||
* as the assistant's bubble taking shape. The label and typing line use the
|
||||
* configured identity name when provided, otherwise the generic "AI agent".
|
||||
*/
|
||||
export default function TypingIndicator({ assistantName }: TypingIndicatorProps) {
|
||||
const { t } = useTranslation();
|
||||
const name = assistantName?.trim();
|
||||
|
||||
return (
|
||||
<Box className={classes.messageRow}>
|
||||
<Text size="xs" c="dimmed" mb={4}>
|
||||
{t("AI agent")}
|
||||
{name || t("AI agent")}
|
||||
</Text>
|
||||
<Group gap={8} align="center">
|
||||
<span className={classes.typingDots} aria-hidden="true">
|
||||
@@ -26,7 +37,7 @@ export default function TypingIndicator() {
|
||||
<span />
|
||||
</span>
|
||||
<Text size="sm" c="dimmed">
|
||||
{t("AI agent is typing…")}
|
||||
{name ? t("{{name}} is typing…", { name }) : t("AI agent is typing…")}
|
||||
</Text>
|
||||
</Group>
|
||||
</Box>
|
||||
|
||||
@@ -0,0 +1,63 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import {
|
||||
buildSandboxSrcdoc,
|
||||
canEdit,
|
||||
HTML_EMBED_HEIGHT_MESSAGE,
|
||||
shouldRender,
|
||||
} from "./html-embed-sandbox";
|
||||
|
||||
describe("buildSandboxSrcdoc", () => {
|
||||
it("embeds the user source verbatim", () => {
|
||||
const out = buildSandboxSrcdoc("<div id='x'>hello</div>");
|
||||
expect(out).toContain("<div id='x'>hello</div>");
|
||||
});
|
||||
|
||||
it("injects the height-postMessage bootstrap after the source", () => {
|
||||
const out = buildSandboxSrcdoc("<p>body</p>");
|
||||
// The bootstrap is appended AFTER the source.
|
||||
expect(out.indexOf("<p>body</p>")).toBeLessThan(
|
||||
out.indexOf(HTML_EMBED_HEIGHT_MESSAGE),
|
||||
);
|
||||
// It reports its height to the parent via postMessage with the agreed type.
|
||||
expect(out).toContain("parent.postMessage");
|
||||
expect(out).toContain(HTML_EMBED_HEIGHT_MESSAGE);
|
||||
// It observes resizes so the parent can keep the iframe sized to fit.
|
||||
expect(out).toContain("ResizeObserver");
|
||||
expect(out).toContain('addEventListener("load"');
|
||||
});
|
||||
|
||||
it("handles an empty source (still injects the bootstrap)", () => {
|
||||
const out = buildSandboxSrcdoc("");
|
||||
expect(out).toContain(HTML_EMBED_HEIGHT_MESSAGE);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldRender (render policy)", () => {
|
||||
it("read-only renders regardless of the workspace toggle", () => {
|
||||
// isEditable=false → the server already gated the content.
|
||||
expect(shouldRender(false, false)).toBe(true);
|
||||
expect(shouldRender(false, true)).toBe(true);
|
||||
});
|
||||
|
||||
it("editable + toggle OFF does NOT render", () => {
|
||||
expect(shouldRender(true, false)).toBe(false);
|
||||
});
|
||||
|
||||
it("editable + toggle ON renders", () => {
|
||||
expect(shouldRender(true, true)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("canEdit (edit policy)", () => {
|
||||
it("any member can edit when editable and the toggle is ON (no admin gate)", () => {
|
||||
expect(canEdit(true, true)).toBe(true);
|
||||
});
|
||||
|
||||
it("cannot edit when the toggle is OFF", () => {
|
||||
expect(canEdit(true, false)).toBe(false);
|
||||
});
|
||||
|
||||
it("cannot edit in read-only mode (no edit affordance)", () => {
|
||||
expect(canEdit(false, true)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,100 @@
|
||||
/**
|
||||
* Pure helpers for the HTML embed node view. Kept out of the React component so
|
||||
* the sandbox srcdoc builder and the render/edit policy can be unit-tested
|
||||
* against a bare environment with no Tiptap/Mantine providers.
|
||||
*/
|
||||
|
||||
/** postMessage type the sandboxed iframe uses to report its content height. */
|
||||
export const HTML_EMBED_HEIGHT_MESSAGE = "gitmost-html-embed-height";
|
||||
|
||||
/**
|
||||
* Build the `srcdoc` document for the sandboxed embed iframe.
|
||||
*
|
||||
* The user's `source` is placed verbatim, then a small bootstrap <script> is
|
||||
* appended at the end of the body. The iframe is rendered with a sandbox that
|
||||
* does NOT include `allow-same-origin`, so this content runs in an opaque
|
||||
* ("null") origin and cannot read the viewer's cookies/session/API — it is
|
||||
* harmless. The bootstrap measures the document height and reports it to the
|
||||
* parent via postMessage on load and whenever the content resizes, so the
|
||||
* parent can size the iframe to fit (auto-resize mode).
|
||||
*/
|
||||
export function buildSandboxSrcdoc(source: string): string {
|
||||
const bootstrap = `
|
||||
<script>
|
||||
(function () {
|
||||
var lastSent = -1;
|
||||
var scheduled = false;
|
||||
function measure() {
|
||||
var doc = document.documentElement;
|
||||
var body = document.body;
|
||||
return Math.max(
|
||||
doc ? doc.scrollHeight : 0,
|
||||
body ? body.scrollHeight : 0
|
||||
);
|
||||
}
|
||||
function flush() {
|
||||
scheduled = false;
|
||||
var height = measure();
|
||||
// Only report when the height actually changed by more than 1px. This
|
||||
// damps the iframe self-measure feedback loop: content sized to the iframe
|
||||
// viewport would otherwise oscillate as the parent resizes the frame in
|
||||
// response to each report.
|
||||
if (Math.abs(height - lastSent) <= 1) return;
|
||||
lastSent = height;
|
||||
parent.postMessage(
|
||||
{ type: ${JSON.stringify(HTML_EMBED_HEIGHT_MESSAGE)}, height: height },
|
||||
"*"
|
||||
);
|
||||
}
|
||||
function reportHeight() {
|
||||
if (scheduled) return;
|
||||
scheduled = true;
|
||||
if (typeof requestAnimationFrame === "function") {
|
||||
requestAnimationFrame(flush);
|
||||
} else {
|
||||
flush();
|
||||
}
|
||||
}
|
||||
window.addEventListener("load", reportHeight);
|
||||
// Report an initial height now (runs during parse, before load/images
|
||||
// settle); the load handler and ResizeObserver refine it as content changes.
|
||||
reportHeight();
|
||||
if (typeof ResizeObserver !== "undefined") {
|
||||
try {
|
||||
var ro = new ResizeObserver(reportHeight);
|
||||
ro.observe(document.documentElement);
|
||||
} catch (e) {
|
||||
// ResizeObserver unavailable/failed: the load handler still reports once.
|
||||
}
|
||||
}
|
||||
})();
|
||||
</script>`;
|
||||
return `${source || ""}${bootstrap}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Render policy split by editor mode:
|
||||
* - READ-ONLY / public-share view: the SERVER already decided whether to
|
||||
* include the embed (it strips htmlEmbed from shared content when the
|
||||
* workspace master toggle is OFF). An anonymous viewer has no workspace and
|
||||
* thus reads `featureEnabled` as false, so we must NOT gate rendering on it
|
||||
* here — we render exactly the `source` the server chose to serve.
|
||||
* - EDITABLE editor: gate on the per-workspace master toggle so an author sees
|
||||
* the inert placeholder when the feature is OFF.
|
||||
*/
|
||||
export function shouldRender(
|
||||
isEditable: boolean,
|
||||
featureEnabled: boolean,
|
||||
): boolean {
|
||||
return !isEditable || featureEnabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* The edit affordance is only meaningful in edit mode and is offered only when
|
||||
* the workspace master toggle is ON. The block renders in a sandboxed iframe
|
||||
* (no same-origin access), so authoring is allowed to ANY member — there is no
|
||||
* admin requirement.
|
||||
*/
|
||||
export function canEdit(isEditable: boolean, featureEnabled: boolean): boolean {
|
||||
return isEditable && featureEnabled;
|
||||
}
|
||||
@@ -2,11 +2,18 @@
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* The container the raw source is injected into. */
|
||||
/* Fallback container used only for the empty, non-editor case. */
|
||||
.htmlEmbedContent {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* The sandboxed iframe the embed source is rendered into. */
|
||||
.htmlEmbedFrame {
|
||||
display: block;
|
||||
width: 100%;
|
||||
border: none;
|
||||
}
|
||||
|
||||
/* Edit affordance overlay, only shown while editing the document. */
|
||||
.htmlEmbedToolbar {
|
||||
position: absolute;
|
||||
|
||||
@@ -1,85 +1,118 @@
|
||||
import { NodeViewProps, NodeViewWrapper } from "@tiptap/react";
|
||||
import React, { useCallback, useEffect, useRef, useState } from "react";
|
||||
import React, {
|
||||
useCallback,
|
||||
useEffect,
|
||||
useMemo,
|
||||
useRef,
|
||||
useState,
|
||||
} from "react";
|
||||
import clsx from "clsx";
|
||||
import {
|
||||
ActionIcon,
|
||||
Button,
|
||||
Group,
|
||||
Modal,
|
||||
NumberInput,
|
||||
Text,
|
||||
Textarea,
|
||||
} from "@mantine/core";
|
||||
import { IconCode, IconEdit } from "@tabler/icons-react";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useAtomValue } from "jotai";
|
||||
import useUserRole from "@/hooks/use-user-role.tsx";
|
||||
import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
|
||||
import classes from "./html-embed-view.module.css";
|
||||
import {
|
||||
buildSandboxSrcdoc,
|
||||
canEdit as computeCanEdit,
|
||||
renderRawHtml,
|
||||
shouldExecute as computeShouldExecute,
|
||||
} from "./render-raw-html.ts";
|
||||
HTML_EMBED_HEIGHT_MESSAGE,
|
||||
shouldRender as computeShouldRender,
|
||||
} from "./html-embed-sandbox.ts";
|
||||
|
||||
// Sane bounds for the auto-resized iframe so a runaway embed cannot blow up the
|
||||
// page layout, and a sensible default before the first height message arrives.
|
||||
const MIN_IFRAME_HEIGHT = 40;
|
||||
const MAX_IFRAME_HEIGHT = 4000;
|
||||
const DEFAULT_IFRAME_HEIGHT = 150;
|
||||
|
||||
// Clamp a reported/configured height into the sane iframe bounds.
|
||||
const clampHeight = (h: number) =>
|
||||
Math.min(MAX_IFRAME_HEIGHT, Math.max(MIN_IFRAME_HEIGHT, h));
|
||||
|
||||
export default function HtmlEmbedView(props: NodeViewProps) {
|
||||
const { t } = useTranslation();
|
||||
const { node, selected, updateAttributes, editor } = props;
|
||||
const { source } = node.attrs as { source: string };
|
||||
const { isAdmin } = useUserRole();
|
||||
const { source, height } = node.attrs as {
|
||||
source: string;
|
||||
height: number | null;
|
||||
};
|
||||
|
||||
// Defense in depth: only execute the raw HTML/JS when the workspace HTML embed
|
||||
// feature toggle is ON. When OFF (the default), we render a neutral disabled
|
||||
// placeholder and inject nothing — so turning the feature off neutralizes
|
||||
// existing embeds at render time as well as on the next server-side save.
|
||||
// The HTML embed renders inside a SANDBOXED iframe (no same-origin access), so
|
||||
// the workspace toggle is a feature switch, not a security gate. When OFF (the
|
||||
// default) we render a neutral placeholder in the editor and nothing else.
|
||||
const workspace = useAtomValue(workspaceAtom);
|
||||
const htmlEmbedEnabled = workspace?.settings?.htmlEmbed === true;
|
||||
|
||||
// Execution policy split by editor mode:
|
||||
// - READ-ONLY / public-share view: the SERVER already decided whether to
|
||||
// include the embed (it strips htmlEmbed from shared content when the
|
||||
// workspace toggle is OFF). An anonymous viewer has no workspace and thus
|
||||
// reads `htmlEmbedEnabled` as false, so we must NOT gate execution on it
|
||||
// here — we execute exactly the `source` the server chose to serve.
|
||||
// - EDITABLE editor (admin authoring): keep gating on the per-workspace
|
||||
// toggle so an admin sees the inert placeholder when the feature is OFF.
|
||||
const shouldExecute = computeShouldExecute(
|
||||
const shouldRender = computeShouldRender(
|
||||
editor.isEditable,
|
||||
htmlEmbedEnabled,
|
||||
);
|
||||
|
||||
const contentRef = useRef<HTMLDivElement | null>(null);
|
||||
const iframeRef = useRef<HTMLIFrameElement | null>(null);
|
||||
const [modalOpen, setModalOpen] = useState(false);
|
||||
const [draft, setDraft] = useState<string>(source || "");
|
||||
const [draftHeight, setDraftHeight] = useState<number | "">(height ?? "");
|
||||
|
||||
// (Re)render the raw source whenever it changes. This runs in BOTH the
|
||||
// editable editor and the read-only / public-share editor (same NodeView),
|
||||
// so trackers fire for readers too — that is the intended behaviour. When the
|
||||
// feature toggle is OFF we clear the container and inject/execute nothing.
|
||||
// True when the author pinned an explicit height; otherwise we auto-resize to
|
||||
// the iframe's reported content height.
|
||||
const hasFixedHeight = typeof height === "number" && Number.isFinite(height);
|
||||
|
||||
// Auto-resize height tracked in state. Seeded to the default and updated from
|
||||
// the iframe's postMessage reports (see effect below) regardless of mode, so
|
||||
// switching a fixed-height embed back to auto immediately reflects the last
|
||||
// reported content height instead of staying pinned to the old fixed value.
|
||||
const [autoHeight, setAutoHeight] = useState<number>(DEFAULT_IFRAME_HEIGHT);
|
||||
|
||||
const srcdoc = useMemo(() => buildSandboxSrcdoc(source || ""), [source]);
|
||||
|
||||
// Auto-resize: accept height messages ONLY from this iframe's own content
|
||||
// window. The sandboxed srcdoc has an opaque ("null") origin, so we cannot
|
||||
// match by event.origin — we match by event.source instead. We track the
|
||||
// reported height even while a fixed height is in effect, so toggling back to
|
||||
// auto shows the current content height with no iframe reload.
|
||||
useEffect(() => {
|
||||
if (!contentRef.current) return;
|
||||
if (shouldExecute) {
|
||||
renderRawHtml(contentRef.current, source || "");
|
||||
} else {
|
||||
contentRef.current.innerHTML = "";
|
||||
function onMessage(event: MessageEvent) {
|
||||
if (event.source !== iframeRef.current?.contentWindow) return;
|
||||
const data = event.data as { type?: string; height?: number };
|
||||
if (data?.type !== HTML_EMBED_HEIGHT_MESSAGE) return;
|
||||
const next = Number(data.height);
|
||||
if (!Number.isFinite(next)) return;
|
||||
setAutoHeight(clampHeight(next));
|
||||
}
|
||||
}, [source, shouldExecute]);
|
||||
window.addEventListener("message", onMessage);
|
||||
return () => window.removeEventListener("message", onMessage);
|
||||
}, []);
|
||||
|
||||
const effectiveHeight = hasFixedHeight ? clampHeight(height) : autoHeight;
|
||||
|
||||
const openEditor = useCallback(() => {
|
||||
setDraft(source || "");
|
||||
setDraftHeight(height ?? "");
|
||||
setModalOpen(true);
|
||||
}, [source]);
|
||||
}, [source, height]);
|
||||
|
||||
const onSave = useCallback(() => {
|
||||
if (editor.isEditable) {
|
||||
updateAttributes({ source: draft });
|
||||
updateAttributes({
|
||||
source: draft,
|
||||
height: draftHeight === "" ? null : Number(draftHeight),
|
||||
});
|
||||
}
|
||||
setModalOpen(false);
|
||||
}, [draft, editor.isEditable, updateAttributes]);
|
||||
}, [draft, draftHeight, editor.isEditable, updateAttributes]);
|
||||
|
||||
// The edit affordance is only meaningful in edit mode, is restricted to admins
|
||||
// (the server strips the node for non-admins anyway), and is offered only when
|
||||
// the workspace feature toggle is ON.
|
||||
const canEdit = computeCanEdit(editor.isEditable, isAdmin, htmlEmbedEnabled);
|
||||
// The edit affordance is only meaningful in edit mode and is offered only when
|
||||
// the workspace master toggle is ON. Any member can edit (sandboxed = safe).
|
||||
const canEdit = computeCanEdit(editor.isEditable, htmlEmbedEnabled);
|
||||
|
||||
return (
|
||||
<NodeViewWrapper
|
||||
@@ -101,12 +134,12 @@ export default function HtmlEmbedView(props: NodeViewProps) {
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!shouldExecute ? (
|
||||
{!shouldRender ? (
|
||||
// Feature disabled for this workspace AND we're in the editable editor:
|
||||
// never inject/execute the source. Show a neutral placeholder so an
|
||||
// existing embed is visibly inert for the authoring admin. Read-only /
|
||||
// share viewers never hit this branch (`shouldExecute` is always true
|
||||
// there) — they execute exactly the source the server chose to serve.
|
||||
// render a neutral placeholder so an existing embed is visibly inert for
|
||||
// the author. Read-only / share viewers never hit this branch
|
||||
// (`shouldRender` is always true there) — they render exactly the
|
||||
// source the server chose to serve.
|
||||
<div className={classes.htmlEmbedPlaceholder}>
|
||||
<IconCode size={18} />
|
||||
<Text size="sm">
|
||||
@@ -114,9 +147,18 @@ export default function HtmlEmbedView(props: NodeViewProps) {
|
||||
</Text>
|
||||
</div>
|
||||
) : source ? (
|
||||
// Raw HTML/CSS/JS rendered into the wiki origin. Scripts are re-created
|
||||
// in renderRawHtml so they execute.
|
||||
<div ref={contentRef} className={classes.htmlEmbedContent} />
|
||||
// Raw HTML/CSS/JS rendered inside a sandboxed iframe (no same-origin):
|
||||
// scripts run in an opaque origin and cannot touch the viewer's
|
||||
// session/cookies/API.
|
||||
<iframe
|
||||
ref={iframeRef}
|
||||
className={classes.htmlEmbedFrame}
|
||||
sandbox="allow-scripts allow-popups allow-forms"
|
||||
srcDoc={srcdoc}
|
||||
title={t("HTML embed")}
|
||||
referrerPolicy="no-referrer"
|
||||
style={{ height: effectiveHeight }}
|
||||
/>
|
||||
) : canEdit ? (
|
||||
<div className={classes.htmlEmbedPlaceholder} onClick={openEditor}>
|
||||
<IconCode size={18} />
|
||||
@@ -124,7 +166,7 @@ export default function HtmlEmbedView(props: NodeViewProps) {
|
||||
</div>
|
||||
) : (
|
||||
// Empty source, non-editor: render nothing visible.
|
||||
<div ref={contentRef} className={classes.htmlEmbedContent} />
|
||||
<div className={classes.htmlEmbedContent} />
|
||||
)}
|
||||
|
||||
<Modal
|
||||
@@ -135,7 +177,7 @@ export default function HtmlEmbedView(props: NodeViewProps) {
|
||||
>
|
||||
<Text size="xs" c="dimmed" mb="xs">
|
||||
{t(
|
||||
"This HTML/CSS/JS runs in the page origin for everyone who views it. Admins only.",
|
||||
"This HTML/CSS/JS runs in a sandboxed frame and cannot access the viewer's session, cookies, or API.",
|
||||
)}
|
||||
</Text>
|
||||
<Textarea
|
||||
@@ -148,6 +190,19 @@ export default function HtmlEmbedView(props: NodeViewProps) {
|
||||
styles={{ input: { fontFamily: "monospace" } }}
|
||||
data-autofocus
|
||||
/>
|
||||
<NumberInput
|
||||
mt="md"
|
||||
label={t("Height (px, blank = auto)")}
|
||||
value={draftHeight}
|
||||
onChange={(value) =>
|
||||
setDraftHeight(
|
||||
value === "" || value === null ? "" : Number(value),
|
||||
)
|
||||
}
|
||||
min={MIN_IFRAME_HEIGHT}
|
||||
max={MAX_IFRAME_HEIGHT}
|
||||
allowDecimal={false}
|
||||
/>
|
||||
<Group justify="flex-end" mt="md">
|
||||
<Button variant="default" onClick={() => setModalOpen(false)}>
|
||||
{t("Cancel")}
|
||||
|
||||
@@ -1,112 +0,0 @@
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { JSDOM } from "jsdom";
|
||||
import { renderRawHtml, shouldExecute, canEdit } from "./render-raw-html";
|
||||
|
||||
// jsdom does NOT execute <script> nodes unless its instance was created with
|
||||
// `runScripts: "dangerously"`. The whole point of renderRawHtml is to make
|
||||
// re-created scripts run, so the execution tests drive a dedicated script-
|
||||
// running JSDOM and pass it a container from THAT document (renderRawHtml uses
|
||||
// `container.ownerDocument`, so it creates the fresh scripts in the running
|
||||
// instance). The default vitest jsdom (no runScripts) is used for the
|
||||
// structural and policy assertions.
|
||||
describe("renderRawHtml (script execution against a runScripts jsdom)", () => {
|
||||
let dom: JSDOM;
|
||||
let container: HTMLElement;
|
||||
|
||||
beforeEach(() => {
|
||||
dom = new JSDOM("<!doctype html><html><body></body></html>", {
|
||||
runScripts: "dangerously",
|
||||
});
|
||||
container = dom.window.document.createElement("div");
|
||||
dom.window.document.body.appendChild(container);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
dom.window.close();
|
||||
});
|
||||
|
||||
it("re-creates and executes an inline <script> (observable side effect)", () => {
|
||||
renderRawHtml(
|
||||
container,
|
||||
"<div>hello</div><script>window.__htmlEmbedFlag = true;</script>",
|
||||
);
|
||||
// The re-created inline script ran inside the jsdom window.
|
||||
expect((dom.window as unknown as Record<string, unknown>).__htmlEmbedFlag).toBe(
|
||||
true,
|
||||
);
|
||||
// The non-script markup is preserved.
|
||||
expect(container.querySelector("div")?.textContent).toBe("hello");
|
||||
});
|
||||
|
||||
it("copies src/async/defer onto a re-created external <script src>", () => {
|
||||
renderRawHtml(
|
||||
container,
|
||||
'<script src="https://example.com/t.js" async defer></script>',
|
||||
);
|
||||
const script = container.querySelector("script");
|
||||
expect(script).not.toBeNull();
|
||||
expect(script?.getAttribute("src")).toBe("https://example.com/t.js");
|
||||
expect(script?.hasAttribute("async")).toBe(true);
|
||||
expect(script?.hasAttribute("defer")).toBe(true);
|
||||
});
|
||||
|
||||
it("clears the container when the source is empty", () => {
|
||||
container.innerHTML = "<p>stale</p>";
|
||||
renderRawHtml(container, "");
|
||||
expect(container.innerHTML).toBe("");
|
||||
});
|
||||
|
||||
it("clears prior content first on a re-render with new source", () => {
|
||||
const win = dom.window as unknown as Record<string, unknown>;
|
||||
renderRawHtml(
|
||||
container,
|
||||
"<span id='first'>one</span><script>window.__htmlEmbedCount = 1;</script>",
|
||||
);
|
||||
expect(win.__htmlEmbedCount).toBe(1);
|
||||
expect(container.querySelector("#first")).not.toBeNull();
|
||||
|
||||
renderRawHtml(
|
||||
container,
|
||||
"<span id='second'>two</span><script>window.__htmlEmbedCount = 2;</script>",
|
||||
);
|
||||
// Prior content is gone; only the new render remains.
|
||||
expect(container.querySelector("#first")).toBeNull();
|
||||
expect(container.querySelector("#second")).not.toBeNull();
|
||||
expect(win.__htmlEmbedCount).toBe(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldExecute (execution policy)", () => {
|
||||
it("read-only executes regardless of the workspace toggle", () => {
|
||||
// isEditable=false → the server already gated the content.
|
||||
expect(shouldExecute(false, false)).toBe(true);
|
||||
expect(shouldExecute(false, true)).toBe(true);
|
||||
});
|
||||
|
||||
it("editable + toggle OFF does NOT execute", () => {
|
||||
expect(shouldExecute(true, false)).toBe(false);
|
||||
});
|
||||
|
||||
it("editable + toggle ON executes", () => {
|
||||
expect(shouldExecute(true, true)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("canEdit (edit policy)", () => {
|
||||
it("a member (non-admin) can never edit", () => {
|
||||
expect(canEdit(true, false, true)).toBe(false);
|
||||
expect(canEdit(false, false, true)).toBe(false);
|
||||
});
|
||||
|
||||
it("an admin with the toggle OFF cannot edit", () => {
|
||||
expect(canEdit(true, true, false)).toBe(false);
|
||||
});
|
||||
|
||||
it("an admin with the toggle ON in editable mode can edit", () => {
|
||||
expect(canEdit(true, true, true)).toBe(true);
|
||||
});
|
||||
|
||||
it("an admin in read-only mode cannot edit (no edit affordance)", () => {
|
||||
expect(canEdit(false, true, true)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,73 +0,0 @@
|
||||
/**
|
||||
* Pure DOM helpers for the HTML embed node view. Kept out of the React
|
||||
* component so the script re-creation/execution mechanism and the execution/
|
||||
* edit policy can be unit-tested against a bare jsdom container with no
|
||||
* Tiptap/Mantine providers.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Inject raw HTML (including <script> tags) into `container`, executing any
|
||||
* scripts.
|
||||
*
|
||||
* Setting `innerHTML` does NOT run inline or external <script> tags the browser
|
||||
* parses that way: the HTML spec marks scripts inserted via innerHTML as
|
||||
* "already started" so they never execute. To get the tracker/analytics
|
||||
* use-case working we walk the freshly-parsed scripts and replace each with a
|
||||
* brand-new <script> element copying its attributes and inline code. A
|
||||
* programmatically created+inserted <script> DOES execute, so this restores
|
||||
* normal script behaviour in the wiki origin (Variant C).
|
||||
*/
|
||||
export function renderRawHtml(container: HTMLElement, source: string): void {
|
||||
// Clear any previous render (re-render on source change).
|
||||
container.innerHTML = "";
|
||||
if (!source) return;
|
||||
|
||||
container.innerHTML = source;
|
||||
|
||||
// Use the container's own document so the helper works against any document
|
||||
// (the live page or a standalone jsdom instance in tests), not just the
|
||||
// ambient global `document`.
|
||||
const doc = container.ownerDocument;
|
||||
const scripts = Array.from(container.querySelectorAll("script"));
|
||||
for (const oldScript of scripts) {
|
||||
const newScript = doc.createElement("script");
|
||||
// Copy every attribute (src, type, async, defer, data-*, etc.).
|
||||
for (const attr of Array.from(oldScript.attributes)) {
|
||||
newScript.setAttribute(attr.name, attr.value);
|
||||
}
|
||||
// Copy inline code.
|
||||
newScript.text = oldScript.textContent ?? "";
|
||||
// Replacing the node in place triggers execution.
|
||||
oldScript.parentNode?.replaceChild(newScript, oldScript);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Execution policy split by editor mode:
|
||||
* - READ-ONLY / public-share view: the SERVER already decided whether to
|
||||
* include the embed (it strips htmlEmbed from shared content when the
|
||||
* workspace toggle is OFF). An anonymous viewer has no workspace and thus
|
||||
* reads `featureEnabled` as false, so we must NOT gate execution on it here
|
||||
* — we execute exactly the `source` the server chose to serve.
|
||||
* - EDITABLE editor (admin authoring): keep gating on the per-workspace toggle
|
||||
* so an admin sees the inert placeholder when the feature is OFF.
|
||||
*/
|
||||
export function shouldExecute(
|
||||
isEditable: boolean,
|
||||
featureEnabled: boolean,
|
||||
): boolean {
|
||||
return !isEditable || featureEnabled;
|
||||
}
|
||||
|
||||
/**
|
||||
* The edit affordance is only meaningful in edit mode, is restricted to admins
|
||||
* (the server strips the node for non-admins anyway), and is offered only when
|
||||
* the workspace feature toggle is ON.
|
||||
*/
|
||||
export function canEdit(
|
||||
isEditable: boolean,
|
||||
isAdmin: boolean,
|
||||
featureEnabled: boolean,
|
||||
): boolean {
|
||||
return isEditable && isAdmin && featureEnabled;
|
||||
}
|
||||
@@ -623,10 +623,9 @@ const CommandGroups: SlashMenuGroupedItemsType = {
|
||||
},
|
||||
{
|
||||
title: "HTML embed",
|
||||
description: "Embed raw HTML, CSS and JavaScript (admins only).",
|
||||
description: "Embed raw HTML, CSS and JavaScript (sandboxed).",
|
||||
searchTerms: ["html", "css", "js", "javascript", "script", "tracker", "analytics", "raw", "embed"],
|
||||
icon: IconCode,
|
||||
adminOnly: true,
|
||||
requiresHtmlEmbedFeature: true,
|
||||
command: ({ editor, range }: CommandProps) => {
|
||||
editor
|
||||
@@ -795,30 +794,12 @@ const CommandGroups: SlashMenuGroupedItemsType = {
|
||||
};
|
||||
|
||||
/**
|
||||
* Read whether the current user is a workspace admin/owner from the persisted
|
||||
* `currentUser` (the same payload `currentUserAtom` stores via localStorage).
|
||||
* Used to hide admin-only slash items (e.g. raw HTML embed). This is a UI gate
|
||||
* only; the server independently strips admin-only nodes from non-admin writes.
|
||||
*/
|
||||
function isCurrentUserAdmin(): boolean {
|
||||
try {
|
||||
const raw = localStorage.getItem("currentUser");
|
||||
if (!raw) return false;
|
||||
const parsed = JSON.parse(raw);
|
||||
const role = parsed?.user?.role;
|
||||
return role === "owner" || role === "admin";
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the workspace-level HTML embed feature toggle from the persisted
|
||||
* Read the workspace-level HTML embed master toggle from the persisted
|
||||
* `currentUser` payload (the same localStorage entry `currentUserAtom` writes,
|
||||
* carrying `workspace.settings`). ABSENT/false => OFF (the default). The slash
|
||||
* `getSuggestionItems` is a plain function (no React/atom context), so we read
|
||||
* the persisted state the same way `isCurrentUserAdmin()` does. UI gate only;
|
||||
* the server independently strips htmlEmbed from every non-allowed write.
|
||||
* the persisted state directly. UI gate only; an anonymous public-share read is
|
||||
* served already-stripped content by the server when the toggle is OFF.
|
||||
*/
|
||||
function isHtmlEmbedFeatureEnabled(): boolean {
|
||||
try {
|
||||
@@ -840,7 +821,6 @@ export const getSuggestionItems = ({
|
||||
}): SlashMenuGroupedItemsType => {
|
||||
const search = query.toLowerCase();
|
||||
const filteredGroups: SlashMenuGroupedItemsType = {};
|
||||
const isAdmin = isCurrentUserAdmin();
|
||||
const htmlEmbedFeatureEnabled = isHtmlEmbedFeatureEnabled();
|
||||
|
||||
const fuzzyMatch = (query: string, target: string) => {
|
||||
@@ -856,9 +836,7 @@ export const getSuggestionItems = ({
|
||||
for (const [group, items] of Object.entries(CommandGroups)) {
|
||||
const filteredItems = items.filter((item) => {
|
||||
if (excludeItems?.has(item.title)) return false;
|
||||
// Hide admin-only items (raw HTML embed) from non-admins.
|
||||
if (item.adminOnly && !isAdmin) return false;
|
||||
// Hide HTML-embed-gated items unless the workspace feature toggle is ON.
|
||||
// Hide the HTML embed item unless the workspace master toggle is ON.
|
||||
if (item.requiresHtmlEmbedFeature && !htmlEmbedFeatureEnabled)
|
||||
return false;
|
||||
return (
|
||||
|
||||
@@ -21,13 +21,9 @@ export type SlashMenuItemType = {
|
||||
searchTerms: string[];
|
||||
command: (props: CommandProps) => void;
|
||||
disable?: (editor: ReturnType<typeof useEditor>) => boolean;
|
||||
// When true, the item is only offered to workspace admins/owners. This is a
|
||||
// UI convenience only — the real authoring gate is enforced server-side.
|
||||
adminOnly?: boolean;
|
||||
// When true, the item is hidden unless the workspace HTML embed feature toggle
|
||||
// is ON. Combined with adminOnly, the item shows only for admins in workspaces
|
||||
// where the feature is enabled. UI gate only — the server strips htmlEmbed on
|
||||
// every write where the toggle is OFF or the user is not an admin.
|
||||
// When true, the item is hidden unless the workspace HTML embed master toggle
|
||||
// is ON. UI gate only — for anonymous public-share reads the server serves
|
||||
// already-stripped content when the toggle is OFF.
|
||||
requiresHtmlEmbedFeature?: boolean;
|
||||
};
|
||||
|
||||
|
||||
@@ -28,6 +28,8 @@ interface ShareAiWidgetProps {
|
||||
shareId: string;
|
||||
/** The page the reader currently has open (context for "this page"). */
|
||||
pageId: string;
|
||||
/** Display name of the configured assistant identity; falls back to 'AI agent' when absent. */
|
||||
assistantName?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -48,7 +50,11 @@ interface ShareAiWidgetProps {
|
||||
* links (so internal UUIDs/auth-gated routes in the answer don't leak as
|
||||
* clickable links), and a documentation-focused empty state.
|
||||
*/
|
||||
export default function ShareAiWidget({ shareId, pageId }: ShareAiWidgetProps) {
|
||||
export default function ShareAiWidget({
|
||||
shareId,
|
||||
pageId,
|
||||
assistantName,
|
||||
}: ShareAiWidgetProps) {
|
||||
const { t } = useTranslation();
|
||||
const [open, setOpen] = useState(false);
|
||||
const [input, setInput] = useState("");
|
||||
@@ -153,6 +159,7 @@ export default function ShareAiWidget({ shareId, pageId }: ShareAiWidgetProps) {
|
||||
<MessageList
|
||||
messages={messages}
|
||||
isStreaming={isStreaming}
|
||||
assistantName={assistantName}
|
||||
showCitations={false}
|
||||
// Anonymous reader: neutralize internal/relative links in the
|
||||
// assistant's markdown so internal UUIDs/auth-gated routes don't
|
||||
|
||||
@@ -45,6 +45,10 @@ export interface ISharedPage extends IShare {
|
||||
// Whether the anonymous public-share AI assistant is enabled for the
|
||||
// workspace (server-resolved). Gates the "Ask AI" widget.
|
||||
aiAssistant?: boolean;
|
||||
// Display name of the configured assistant identity (agent role name), used
|
||||
// to label the public-share chat. Null/absent when no identity is set →
|
||||
// the widget falls back to the generic "AI agent" label.
|
||||
aiAssistantName?: string | null;
|
||||
}
|
||||
|
||||
export interface IShareForPage extends IShare {
|
||||
|
||||
@@ -1,57 +1,32 @@
|
||||
import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
|
||||
import { useAtom } from "jotai";
|
||||
import { useState } from "react";
|
||||
import { updateWorkspace } from "@/features/workspace/services/workspace-service.ts";
|
||||
import { useWorkspaceSetting } from "@/features/workspace/hooks/use-workspace-setting.ts";
|
||||
import { Switch, Stack, Paper, Group, Text, List } from "@mantine/core";
|
||||
import { notifications } from "@mantine/notifications";
|
||||
import useUserRole from "@/hooks/use-user-role.tsx";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
/**
|
||||
* Admin toggle for the workspace HTML embed feature.
|
||||
* Workspace master toggle that enables/disables the HTML embed block type.
|
||||
*
|
||||
* SECURITY: when ON, workspace admins/owners can embed raw HTML/CSS/JS that
|
||||
* EXECUTES in the wiki page origin for every reader (a deliberate stored-XSS
|
||||
* surface, e.g. for analytics trackers). OFF by default. The server strips
|
||||
* htmlEmbed nodes on every write where the toggle is OFF or the saver is not an
|
||||
* admin, so this switch fully enables/disables the feature workspace-wide.
|
||||
* The block renders inside a SANDBOXED iframe (no same-origin access), so it
|
||||
* cannot touch the viewer's session/cookies/API — it is a feature switch, not a
|
||||
* security gate. When ON, ANY member can insert the block. OFF by default; for
|
||||
* anonymous public-share reads the server serves already-stripped content when
|
||||
* the toggle is OFF. The toggle itself is managed by workspace admins.
|
||||
*/
|
||||
export default function HtmlEmbedSettings() {
|
||||
const { t } = useTranslation();
|
||||
const [workspace, setWorkspace] = useAtom(workspaceAtom);
|
||||
const { workspace, isLoading, save } = useWorkspaceSetting("htmlEmbed");
|
||||
const { isAdmin } = useUserRole();
|
||||
|
||||
const [checked, setChecked] = useState<boolean>(
|
||||
workspace?.settings?.htmlEmbed ?? false,
|
||||
);
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
async function handleToggle(value: boolean) {
|
||||
setIsLoading(true);
|
||||
const previous = checked;
|
||||
setChecked(value); // optimistic update
|
||||
try {
|
||||
const updated = await updateWorkspace({ htmlEmbed: value });
|
||||
// Force settings.htmlEmbed to the new value so the atom is consistent even
|
||||
// if the response shape omits it.
|
||||
setWorkspace({
|
||||
...updated,
|
||||
settings: {
|
||||
...updated.settings,
|
||||
htmlEmbed: value,
|
||||
},
|
||||
});
|
||||
notifications.show({ message: t("Updated successfully") });
|
||||
} catch (err) {
|
||||
console.log(err);
|
||||
setChecked(previous); // revert on failure
|
||||
notifications.show({
|
||||
message: t("Failed to update data"),
|
||||
color: "red",
|
||||
});
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
const ok = await save(value);
|
||||
if (!ok) setChecked(previous); // revert on failure
|
||||
}
|
||||
|
||||
return (
|
||||
@@ -69,7 +44,7 @@ export default function HtmlEmbedSettings() {
|
||||
<Switch
|
||||
label={t("Enable HTML embed")}
|
||||
description={t(
|
||||
"Allow workspace admins to insert raw HTML/CSS/JavaScript that EXECUTES in the wiki page origin for everyone who views the page (a deliberate stored-XSS surface, e.g. for analytics trackers). Off by default.",
|
||||
"Allow members to insert raw HTML/CSS/JavaScript blocks. The block renders in a sandboxed frame and cannot access the viewer's session, cookies, or API. Off by default.",
|
||||
)}
|
||||
checked={checked}
|
||||
disabled={!isAdmin || isLoading}
|
||||
@@ -79,17 +54,17 @@ export default function HtmlEmbedSettings() {
|
||||
<List size="xs" c="dimmed" mt="md" spacing={4}>
|
||||
<List.Item>
|
||||
{t(
|
||||
"Only workspace admins/owners can insert HTML embeds. Members never can: the editor option is hidden for them and the server strips the embed on save at every write path.",
|
||||
"When enabled, any member can insert an HTML embed block. The toggle just enables or disables the block type workspace-wide.",
|
||||
)}
|
||||
</List.Item>
|
||||
<List.Item>
|
||||
{t(
|
||||
"If a non-admin edits and saves a page that contains an admin's embed, that save strips the embed (fail-closed). An admin must re-add it.",
|
||||
"Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.",
|
||||
)}
|
||||
</List.Item>
|
||||
<List.Item>
|
||||
{t(
|
||||
"Turning this off strips existing embeds on their next save and immediately disables execution (existing embeds render as a disabled placeholder).",
|
||||
"Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.",
|
||||
)}
|
||||
</List.Item>
|
||||
</List>
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
import { useState } from "react";
|
||||
import { useWorkspaceSetting } from "@/features/workspace/hooks/use-workspace-setting.ts";
|
||||
import {
|
||||
Button,
|
||||
Group,
|
||||
Paper,
|
||||
Stack,
|
||||
Text,
|
||||
Textarea,
|
||||
} from "@mantine/core";
|
||||
import useUserRole from "@/hooks/use-user-role.tsx";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
/**
|
||||
* Admin-only analytics/tracker snippet for public share pages.
|
||||
*
|
||||
* The value is injected VERBATIM into the <head> of PUBLIC SHARE pages only,
|
||||
* in the page's own (same-origin) context. It is the deliberate same-origin
|
||||
* surface for analytics snippets (Google Analytics, Yandex.Metrika, etc.).
|
||||
* Admin only — the workspace settings write is admin-gated server-side, and the
|
||||
* Save button is disabled for non-admins.
|
||||
*/
|
||||
export default function TrackerSettings() {
|
||||
const { t } = useTranslation();
|
||||
const { workspace, isLoading, save } = useWorkspaceSetting("trackerHead");
|
||||
const { isAdmin } = useUserRole();
|
||||
|
||||
const [value, setValue] = useState<string>(
|
||||
workspace?.settings?.trackerHead ?? "",
|
||||
);
|
||||
|
||||
async function handleSave() {
|
||||
await save(value);
|
||||
}
|
||||
|
||||
return (
|
||||
<Stack mt="sm">
|
||||
<Group justify="space-between" align="center">
|
||||
<Text fw={700} size="lg">
|
||||
{t("Analytics / tracker")}
|
||||
</Text>
|
||||
<Text size="xs" c="dimmed" tt="uppercase" fw={600}>
|
||||
{t("advanced")}
|
||||
</Text>
|
||||
</Group>
|
||||
|
||||
<Paper withBorder radius="md" p="lg">
|
||||
<Text size="xs" c="dimmed" mb="xs">
|
||||
{t(
|
||||
"Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.",
|
||||
)}
|
||||
</Text>
|
||||
<Textarea
|
||||
autosize
|
||||
minRows={6}
|
||||
maxRows={20}
|
||||
aria-label={t("Analytics / tracker")}
|
||||
value={value}
|
||||
onChange={(e) => setValue(e.currentTarget.value)}
|
||||
placeholder={t("<script>...</script>")}
|
||||
styles={{ input: { fontFamily: "monospace" } }}
|
||||
disabled={!isAdmin || isLoading}
|
||||
/>
|
||||
<Group justify="flex-end" mt="md">
|
||||
<Button
|
||||
onClick={handleSave}
|
||||
loading={isLoading}
|
||||
disabled={!isAdmin}
|
||||
>
|
||||
{t("Save")}
|
||||
</Button>
|
||||
</Group>
|
||||
</Paper>
|
||||
</Stack>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,65 @@
|
||||
import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts";
|
||||
import { useAtom } from "jotai";
|
||||
import { useCallback, useState } from "react";
|
||||
import { updateWorkspace } from "@/features/workspace/services/workspace-service.ts";
|
||||
import { IWorkspace } from "@/features/workspace/types/workspace.types.ts";
|
||||
import { notifications } from "@mantine/notifications";
|
||||
import { useTranslation } from "react-i18next";
|
||||
|
||||
/**
|
||||
* Workspace setting keys that this hook can persist. Each key is both a
|
||||
* write-only field on the update payload and a read field under
|
||||
* `workspace.settings`, so the value type is derived from the settings shape.
|
||||
*/
|
||||
type WorkspaceSettingKey = "htmlEmbed" | "trackerHead";
|
||||
type WorkspaceSettingValue<K extends WorkspaceSettingKey> =
|
||||
NonNullable<IWorkspace["settings"][K]>;
|
||||
|
||||
/**
|
||||
* Shared "save a workspace setting" plumbing extracted from the individual
|
||||
* settings components. Owns the `isLoading` state and the persist-then-merge
|
||||
* flow (call `updateWorkspace`, merge the response back into the workspace atom
|
||||
* while forcing `settings[key]` to the saved value, and surface a success/error
|
||||
* notification). Callers keep their own interaction model (optimistic toggle,
|
||||
* edit-then-save, etc.) on top of this.
|
||||
*/
|
||||
export function useWorkspaceSetting<K extends WorkspaceSettingKey>(key: K) {
|
||||
const [workspace, setWorkspace] = useAtom(workspaceAtom);
|
||||
const { t } = useTranslation();
|
||||
const [isLoading, setIsLoading] = useState(false);
|
||||
|
||||
const save = useCallback(
|
||||
async (value: WorkspaceSettingValue<K>): Promise<boolean> => {
|
||||
setIsLoading(true);
|
||||
try {
|
||||
const updated = await updateWorkspace({
|
||||
[key]: value,
|
||||
} as Partial<IWorkspace>);
|
||||
// Force settings[key] to the new value so the atom is consistent even
|
||||
// if the response shape omits it.
|
||||
setWorkspace({
|
||||
...updated,
|
||||
settings: {
|
||||
...updated.settings,
|
||||
[key]: value,
|
||||
},
|
||||
});
|
||||
notifications.show({ message: t("Updated successfully") });
|
||||
return true;
|
||||
} catch (err) {
|
||||
console.error(`Failed to update workspace setting "${key}"`, err);
|
||||
notifications.show({
|
||||
message:
|
||||
(err as any)?.response?.data?.message ?? t("Failed to update data"),
|
||||
color: "red",
|
||||
});
|
||||
return false;
|
||||
} finally {
|
||||
setIsLoading(false);
|
||||
}
|
||||
},
|
||||
[key, setWorkspace, t],
|
||||
);
|
||||
|
||||
return { workspace, isLoading, save };
|
||||
}
|
||||
@@ -33,6 +33,9 @@ export interface IWorkspace {
|
||||
// Write-only field for updateWorkspace({ htmlEmbed }). Read state lives at
|
||||
// settings.htmlEmbed.
|
||||
htmlEmbed?: boolean;
|
||||
// Write-only field for updateWorkspace({ trackerHead }). Read state lives at
|
||||
// settings.trackerHead.
|
||||
trackerHead?: string;
|
||||
}
|
||||
|
||||
export interface IWorkspaceSettings {
|
||||
@@ -40,8 +43,13 @@ export interface IWorkspaceSettings {
|
||||
sharing?: IWorkspaceSharingSettings;
|
||||
api?: IWorkspaceApiSettings;
|
||||
templates?: IWorkspaceTemplateSettings;
|
||||
// Admin-only HTML embed feature toggle. ABSENT/false => OFF (default).
|
||||
// HTML embed master toggle (enables/disables the block type). The block
|
||||
// renders in a sandboxed iframe, so this is a feature switch, not a security
|
||||
// gate. ABSENT/false => OFF (default).
|
||||
htmlEmbed?: boolean;
|
||||
// Admin-only analytics/tracker snippet injected into the <head> of public
|
||||
// share pages (same-origin). ABSENT/empty => none.
|
||||
trackerHead?: string;
|
||||
}
|
||||
|
||||
export interface IWorkspaceApiSettings {
|
||||
|
||||
@@ -2,6 +2,7 @@ import SettingsTitle from "@/components/settings/settings-title.tsx";
|
||||
import WorkspaceNameForm from "@/features/workspace/components/settings/components/workspace-name-form";
|
||||
import WorkspaceIcon from "@/features/workspace/components/settings/components/workspace-icon.tsx";
|
||||
import HtmlEmbedSettings from "@/features/workspace/components/settings/components/html-embed-settings.tsx";
|
||||
import TrackerSettings from "@/features/workspace/components/settings/components/tracker-settings.tsx";
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { getAppName } from "@/lib/config.ts";
|
||||
import { Helmet } from "react-helmet-async";
|
||||
@@ -17,6 +18,7 @@ export default function WorkspaceSettings() {
|
||||
<WorkspaceIcon />
|
||||
<WorkspaceNameForm />
|
||||
<HtmlEmbedSettings />
|
||||
<TrackerSettings />
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -79,7 +79,11 @@ export default function SharedPage() {
|
||||
{/* Anonymous "Ask AI" widget — only when the workspace enables the
|
||||
public-share assistant (server-resolved flag on /shares/page-info). */}
|
||||
{data?.aiAssistant && data.share?.id && data.page?.id && (
|
||||
<ShareAiWidget shareId={data.share.id} pageId={data.page.id} />
|
||||
<ShareAiWidget
|
||||
shareId={data.share.id}
|
||||
pageId={data.page.id}
|
||||
assistantName={data.aiAssistantName ?? undefined}
|
||||
/>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
|
||||
@@ -1,120 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { CollaborationHandler } from './collaboration.handler';
|
||||
import { hasHtmlEmbedNode } from '../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL CollaborationHandler.updatePageContent admin gate (the
|
||||
// REST/MCP/AI content-update entrypoint, used by the page update endpoint and
|
||||
// the MCP/AI agent). updatePageContent reads `user?.role` and strips htmlEmbed
|
||||
// BEFORE handing the json to withYdocConnection. We stub only
|
||||
// withYdocConnection (which would otherwise open a real hocuspocus connection):
|
||||
// the role-extraction (`user?.role`) + strip that run upstream of it are REAL
|
||||
// production code. The 'replace' branch then runs the production
|
||||
// TiptapTransformer.toYdoc on the gated json against a real Y.Doc, which we
|
||||
// decode back to JSON and assert on. This replaces the re-implemented
|
||||
// `applyAdminGate` stand-in for this entrypoint.
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'left' },
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>nested</script>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'inner' }] },
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'right' },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'r' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>top</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
/**
|
||||
* Run the REAL updatePageContent('replace') with a stubbed withYdocConnection.
|
||||
* The stub provides a real Y.Doc + recording fragment; the production fn calls
|
||||
* TiptapTransformer.toYdoc(<gated json>) and applies it to the doc, so decoding
|
||||
* the doc afterward yields exactly the gated content.
|
||||
*/
|
||||
async function gatedContentFor(
|
||||
role: string | null | undefined,
|
||||
featureEnabled = true,
|
||||
) {
|
||||
// Workspace settings read used by the toggle-AND-admin gate.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: 'ws-1',
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
const handler = new CollaborationHandler(workspaceRepo as any);
|
||||
const captureDoc = new Y.Doc();
|
||||
|
||||
jest
|
||||
.spyOn(handler, 'withYdocConnection')
|
||||
.mockImplementation(async (_hp, _name, _ctx, fn: any) => {
|
||||
const fragment = captureDoc.getXmlFragment('default');
|
||||
// Mirror the real Document surface the fn touches.
|
||||
const docLike: any = {
|
||||
getXmlFragment: () => fragment,
|
||||
};
|
||||
// The fn does: fragment.delete(0,len) then
|
||||
// Y.applyUpdate(doc, encodeStateAsUpdate(toYdoc(gatedJson))). It calls
|
||||
// Y.applyUpdate(doc, ...) — so docLike must be a real Y.Doc target.
|
||||
fn(captureDoc);
|
||||
});
|
||||
|
||||
const handlers = handler.getHandlers({} as any);
|
||||
await handlers.updatePageContent('page-1', {
|
||||
prosemirrorJson: docWithEmbed(),
|
||||
operation: 'replace',
|
||||
user: { id: 'u1', role, workspaceId: 'ws-1' } as any,
|
||||
});
|
||||
|
||||
return TiptapTransformer.fromYdoc(captureDoc, 'default');
|
||||
}
|
||||
|
||||
describe('CollaborationHandler.updatePageContent htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin (member): every htmlEmbed (top-level + nested) stripped before the ydoc', async () => {
|
||||
const gated = await gatedContentFor('member');
|
||||
expect(hasHtmlEmbedNode(gated)).toBe(false);
|
||||
// Non-embed siblings survive.
|
||||
const json = JSON.stringify(gated);
|
||||
expect(json).toContain('keep');
|
||||
expect(json).toContain('inner');
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor(role))).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('toggle ON + admin: htmlEmbed preserved', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('admin', true))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: htmlEmbed preserved', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('owner', true))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('admin', false))).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + member: stripped', async () => {
|
||||
expect(hasHtmlEmbedNode(await gatedContentFor('member', false))).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -8,11 +8,6 @@ import {
|
||||
import { setYjsMark, updateYjsMarkAttribute, YjsSelection } from './yjs.util';
|
||||
import * as Y from 'yjs';
|
||||
import { User } from '@docmost/db/types/entity.types';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
|
||||
export type CollabEventHandlers = ReturnType<
|
||||
CollaborationHandler['getHandlers']
|
||||
@@ -22,8 +17,6 @@ export type CollabEventHandlers = ReturnType<
|
||||
export class CollaborationHandler {
|
||||
private readonly logger = new Logger(CollaborationHandler.name);
|
||||
|
||||
constructor(private readonly workspaceRepo: WorkspaceRepo) {}
|
||||
|
||||
getHandlers(hocuspocus: Hocuspocus) {
|
||||
return {
|
||||
alterState: async (documentName: string, payload: { pageId: string }) => {
|
||||
@@ -89,30 +82,9 @@ export class CollaborationHandler {
|
||||
},
|
||||
) => {
|
||||
const { operation, user } = payload;
|
||||
let { prosemirrorJson } = payload;
|
||||
const { prosemirrorJson } = payload;
|
||||
this.logger.debug('Updating page content via yjs', documentName);
|
||||
|
||||
// SECURITY (Variant C admin gate, REST/MCP/AI write path):
|
||||
// updatePageContent is the server-side entrypoint used by the REST page
|
||||
// update endpoint and by the MCP/AI agent. Raw `htmlEmbed` nodes execute
|
||||
// arbitrary JS in every reader's browser, so a NON-admin caller must not
|
||||
// be able to persist them here. If the editing user is not a workspace
|
||||
// admin/owner, strip every htmlEmbed node before it reaches the ydoc.
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the editing user is an admin/owner. OFF
|
||||
// (default) => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(user?.workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: user?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from update by user ${user?.id} on ${documentName}`,
|
||||
),
|
||||
});
|
||||
|
||||
await this.withYdocConnection(
|
||||
hocuspocus,
|
||||
documentName,
|
||||
|
||||
@@ -1,456 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import { PersistenceExtension } from './persistence.extension';
|
||||
import { tiptapExtensions } from '../collaboration.util';
|
||||
import {
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
HTML_EMBED_NODE_NAME,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL PersistenceExtension.onStoreDocument (the primary collab
|
||||
// WebSocket write path) against a REAL ydoc, with thin repo/db/queue mocks.
|
||||
// This replaces the prior re-implemented `applyAdminGate` stand-in for this
|
||||
// entrypoint: if the role-extraction expression (`context?.user?.role`), the
|
||||
// strip call, or the ydoc-rebuild branch is deleted/changed, these tests fail.
|
||||
|
||||
const RICH_DOC = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'intro paragraph' }],
|
||||
},
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'left' },
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{ type: 'text', text: 'left col, mentioning ' },
|
||||
{
|
||||
type: 'mention',
|
||||
attrs: {
|
||||
id: 'mention-1',
|
||||
label: 'Alice',
|
||||
entityType: 'user',
|
||||
entityId: 'user-123',
|
||||
creatorId: 'creator-1',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
// Nested embed inside a column — must be stripped recursively.
|
||||
{
|
||||
type: HTML_EMBED_NODE_NAME,
|
||||
attrs: { source: '<script>nested()</script>' },
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'column',
|
||||
attrs: { position: 'right' },
|
||||
content: [
|
||||
{
|
||||
type: 'table',
|
||||
content: [
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableHeader',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'H' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'tableRow',
|
||||
content: [
|
||||
{
|
||||
type: 'tableCell',
|
||||
attrs: { colspan: 1, rowspan: 1 },
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'cell' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
// Top-level embed — must be stripped.
|
||||
{
|
||||
type: HTML_EMBED_NODE_NAME,
|
||||
attrs: { source: '<script>top()</script>' },
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'text', text: 'outro paragraph' }],
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
function buildYdoc(json: any): Y.Doc {
|
||||
return TiptapTransformer.toYdoc(json, 'default', tiptapExtensions);
|
||||
}
|
||||
|
||||
// Count nodes by type across the whole tree (excludes htmlEmbed by listing it
|
||||
// separately) so we can assert every OTHER node type survived the strip.
|
||||
function nodeTypeCounts(json: any): Record<string, number> {
|
||||
const counts: Record<string, number> = {};
|
||||
const walk = (n: any) => {
|
||||
if (!n || typeof n !== 'object') return;
|
||||
if (n.type) counts[n.type] = (counts[n.type] ?? 0) + 1;
|
||||
if (Array.isArray(n.content)) n.content.forEach(walk);
|
||||
};
|
||||
walk(json);
|
||||
return counts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a real PersistenceExtension with the minimum mocks needed for
|
||||
* onStoreDocument to reach the strip + persist branch, and capture the content
|
||||
* that would be written to the page row.
|
||||
*/
|
||||
function buildExtension(featureEnabled = true, priorContent?: any) {
|
||||
const captured: { content?: any } = {};
|
||||
|
||||
const existingPage = {
|
||||
id: 'page-1',
|
||||
slugId: 'slug-1',
|
||||
spaceId: 'space-1',
|
||||
workspaceId: 'ws-1',
|
||||
creatorId: 'creator-1',
|
||||
contributorIds: [],
|
||||
// The currently-persisted content. Defaults to an empty doc (differs from
|
||||
// new content -> persist runs); a test may pass a prior admin embed here to
|
||||
// exercise the preserve-admin-embed branch.
|
||||
content: priorContent ?? { type: 'doc', content: [] },
|
||||
createdAt: new Date(),
|
||||
lastUpdatedSource: 'user',
|
||||
};
|
||||
|
||||
const pageRepo = {
|
||||
findById: jest.fn(async () => ({ ...existingPage })),
|
||||
updatePage: jest.fn(async (values: any) => {
|
||||
captured.content = values.content;
|
||||
}),
|
||||
};
|
||||
const pageHistoryRepo = {
|
||||
findPageLastHistory: jest.fn(async () => null),
|
||||
saveHistory: jest.fn(async () => undefined),
|
||||
};
|
||||
// db.transaction().execute(cb) just runs the callback (no real DB).
|
||||
const db = {
|
||||
transaction: () => ({
|
||||
execute: (cb: any) => cb({} as any),
|
||||
}),
|
||||
};
|
||||
const noopQueue = { add: jest.fn(async () => undefined) } as any;
|
||||
const collabHistory = { addContributors: jest.fn(async () => undefined) } as any;
|
||||
const transclusionService = {
|
||||
syncPageTransclusions: jest.fn(async () => undefined),
|
||||
syncPageReferences: jest.fn(async () => undefined),
|
||||
} as any;
|
||||
|
||||
// Workspace settings read used by the toggle-AND-admin gate.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: 'ws-1',
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const ext = new PersistenceExtension(
|
||||
pageRepo as any,
|
||||
pageHistoryRepo as any,
|
||||
db as any,
|
||||
noopQueue,
|
||||
noopQueue,
|
||||
noopQueue,
|
||||
collabHistory,
|
||||
transclusionService,
|
||||
workspaceRepo as any,
|
||||
);
|
||||
|
||||
return { ext, captured, pageRepo };
|
||||
}
|
||||
|
||||
async function runStore(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext, captured } = buildExtension(featureEnabled, priorContent);
|
||||
// hocuspocus augments the Y.Doc with broadcastStateless; a bare Y.Doc has
|
||||
// none, so stub it (the post-persist broadcast is not under test here).
|
||||
(doc as any).broadcastStateless = () => undefined;
|
||||
await ext.onStoreDocument({
|
||||
documentName: 'page-1',
|
||||
document: doc,
|
||||
context: { user: { id: 'u1', role } },
|
||||
} as any);
|
||||
return captured;
|
||||
}
|
||||
|
||||
describe('PersistenceExtension.onStoreDocument htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin store: strips EVERY htmlEmbed but preserves every other node', async () => {
|
||||
const doc = buildYdoc(RICH_DOC);
|
||||
const before = TiptapTransformer.fromYdoc(doc, 'default');
|
||||
expect(hasHtmlEmbedNode(before)).toBe(true);
|
||||
const beforeCounts = nodeTypeCounts(before);
|
||||
|
||||
const captured = await runStore('member', doc);
|
||||
|
||||
expect(captured.content).toBeDefined();
|
||||
// htmlEmbed gone from the persisted content.
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
|
||||
// Every non-embed node type is preserved with the SAME count (guards against
|
||||
// data loss if a node were missing from tiptapExtensions and dropped on the
|
||||
// toYdoc rebuild).
|
||||
const afterCounts = nodeTypeCounts(captured.content);
|
||||
for (const [type, count] of Object.entries(beforeCounts)) {
|
||||
if (type === HTML_EMBED_NODE_NAME) continue;
|
||||
expect(afterCounts[type]).toBe(count);
|
||||
}
|
||||
// The two embeds are gone.
|
||||
expect(beforeCounts[HTML_EMBED_NODE_NAME]).toBe(2);
|
||||
expect(afterCounts[HTML_EMBED_NODE_NAME]).toBeUndefined();
|
||||
|
||||
// The shared ydoc fragment was also rewritten clean (re-decode it).
|
||||
const reDecoded = TiptapTransformer.fromYdoc(doc, 'default');
|
||||
expect(hasHtmlEmbedNode(reDecoded)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle ON + admin store: htmlEmbed preserved in persisted content', async () => {
|
||||
const captured = await runStore('admin', buildYdoc(RICH_DOC), true);
|
||||
expect(captured.content).toBeDefined();
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
|
||||
expect(nodeTypeCounts(captured.content)[HTML_EMBED_NODE_NAME]).toBe(2);
|
||||
});
|
||||
|
||||
it('toggle ON + owner store: htmlEmbed preserved', async () => {
|
||||
const captured = await runStore('owner', buildYdoc(RICH_DOC), true);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin store: stripped (feature disabled for everyone)', async () => {
|
||||
const captured = await runStore('admin', buildYdoc(RICH_DOC), false);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + owner store: stripped', async () => {
|
||||
const captured = await runStore('owner', buildYdoc(RICH_DOC), false);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + member store: stripped', async () => {
|
||||
const captured = await runStore('member', buildYdoc(RICH_DOC), false);
|
||||
expect(hasHtmlEmbedNode(captured.content)).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore(undefined, buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore(null, buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
expect(
|
||||
hasHtmlEmbedNode((await runStore('viewer', buildYdoc(RICH_DOC))).content),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin store: PRESERVES an admin embed already in the persisted content through an unrelated edit', async () => {
|
||||
// Prior persisted content already holds an admin-authored embed.
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// A non-admin makes an UNRELATED edit (tweaks the paragraph) but the embed
|
||||
// is still present in the merged doc.
|
||||
const edited = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
|
||||
const captured = await runStore('member', buildYdoc(edited), true, prior);
|
||||
expect(captured.content).toBeDefined();
|
||||
// The admin's pre-existing embed survives the non-admin store.
|
||||
expect(collectHtmlEmbedSources(captured.content)).toEqual(
|
||||
new Set([ADMIN_SOURCE]),
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin store: strips a NEWLY-added embed while keeping the prior admin one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Non-admin keeps the admin embed, makes an unrelated paragraph edit (so the
|
||||
// store is not a no-op and is persisted), and ALSO adds a brand-new embed.
|
||||
const edited = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro edited' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
};
|
||||
|
||||
const captured = await runStore('member', buildYdoc(edited), true, prior);
|
||||
expect(captured.content).toBeDefined();
|
||||
// Only the admin-vetted source remains; the newly-introduced one is stripped.
|
||||
expect(collectHtmlEmbedSources(captured.content)).toEqual(
|
||||
new Set([ADMIN_SOURCE]),
|
||||
);
|
||||
});
|
||||
|
||||
it('empty-fragment ydoc (no content) does not throw and persists no embed', async () => {
|
||||
const emptyDoc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph' }],
|
||||
});
|
||||
// Non-admin path with an empty/embed-free fragment must be a no-op strip,
|
||||
// not throw.
|
||||
await expect(runStore('member', emptyDoc)).resolves.toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
// Exercises the REAL early onChange guard (Gitea #26): guardHtmlEmbed converges
|
||||
// the shared ydoc sub-second, before the 10s store debounce. We call it directly
|
||||
// (it is the debounced timer body) and assert the ydoc fragment no longer yields
|
||||
// an htmlEmbed for the non-admin's transient embed, while admin-vetted embeds
|
||||
// already in the persisted content survive.
|
||||
describe('PersistenceExtension.guardHtmlEmbed early onChange guard (real code)', () => {
|
||||
async function runGuard(
|
||||
role: string | null | undefined,
|
||||
doc: Y.Doc,
|
||||
featureEnabled = true,
|
||||
priorContent?: any,
|
||||
) {
|
||||
const { ext } = buildExtension(featureEnabled, priorContent);
|
||||
await (ext as any).guardHtmlEmbed(
|
||||
'page-1',
|
||||
doc,
|
||||
{ user: { id: 'u1', role, workspaceId: 'ws-1' } },
|
||||
);
|
||||
}
|
||||
|
||||
it('toggle ON + non-admin: strips a newly-added embed from the shared ydoc', async () => {
|
||||
// Prior persisted content has NO embed; the live doc has one a non-admin
|
||||
// just added.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
|
||||
// The shared ydoc fragment no longer yields any htmlEmbed.
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + non-admin: preserves a prior admin embed, strips the new one', async () => {
|
||||
const ADMIN_SOURCE = '<script>adminAuthored()</script>';
|
||||
const prior = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
],
|
||||
};
|
||||
// Live doc keeps the admin embed AND adds a brand-new one.
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'intro' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: ADMIN_SOURCE } },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>evil()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
await runGuard('member', doc, true, prior);
|
||||
|
||||
// Only the admin-vetted source survives in the shared ydoc.
|
||||
expect(
|
||||
collectHtmlEmbedSources(TiptapTransformer.fromYdoc(doc, 'default')),
|
||||
).toEqual(new Set([ADMIN_SOURCE]));
|
||||
});
|
||||
|
||||
it('toggle OFF + non-admin: strips ALL embeds (allow-list is null)', async () => {
|
||||
// Even an embed that matches the prior content is stripped when the toggle
|
||||
// is OFF, because the OFF path passes allowed=null (strip everything) and
|
||||
// never reads the prior content for an allow-list.
|
||||
const SOURCE = '<script>any()</script>';
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] },
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } },
|
||||
],
|
||||
});
|
||||
await runGuard('member', doc, false, {
|
||||
type: 'doc',
|
||||
content: [{ type: HTML_EMBED_NODE_NAME, attrs: { source: SOURCE } }],
|
||||
});
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('admin role: guard is a defensive no-op (embed preserved)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: HTML_EMBED_NODE_NAME, attrs: { source: '<script>ok()</script>' } },
|
||||
],
|
||||
});
|
||||
await runGuard('admin', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('no embed present: guard is a cheap no-op (loop-safe re-fire)', async () => {
|
||||
const doc = buildYdoc({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'plain' }] }],
|
||||
});
|
||||
await runGuard('member', doc, true, { type: 'doc', content: [] });
|
||||
expect(hasHtmlEmbedNode(TiptapTransformer.fromYdoc(doc, 'default'))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -39,16 +39,6 @@ import {
|
||||
HISTORY_INTERVAL,
|
||||
} from '../constants';
|
||||
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripDisallowedHtmlEmbedNodes,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
|
||||
@Injectable()
|
||||
export class PersistenceExtension implements Extension {
|
||||
@@ -59,21 +49,6 @@ export class PersistenceExtension implements Extension {
|
||||
// coalescing window" per document and OR it across all edits in the window,
|
||||
// so the snapshot is marked 'agent' regardless of who wrote last.
|
||||
private agentTouched: Map<string, boolean> = new Map();
|
||||
// Per-document debounce timers for the early htmlEmbed guard (Gitea #26).
|
||||
// onChange schedules a short (~300ms) debounced strip that converges the
|
||||
// shared ydoc for all connected clients well before the 10s store debounce,
|
||||
// shrinking the pre-persist broadcast window of a non-admin's transient embed.
|
||||
private htmlEmbedGuardTimers: Map<string, NodeJS.Timeout> = new Map();
|
||||
// Per-document cache of the workspace htmlEmbed toggle (Gitea #26). Populated
|
||||
// in onLoadDocument (which already loads the page + has workspace context) and
|
||||
// read in onChange to gate early-guard scheduling: when the toggle is OFF (the
|
||||
// common default) we schedule NOTHING — no timer, no fromYdoc, no DB read — and
|
||||
// rely on the onStoreDocument strip as the backstop (when OFF the embed does
|
||||
// not execute in editable mode anyway). Cleared in afterUnloadDocument.
|
||||
// STALENESS: if an admin flips the toggle ON mid-session this cache stays OFF
|
||||
// until the document is reloaded, so the early guard won't schedule — accepted,
|
||||
// the onStoreDocument backstop still strips on persist.
|
||||
private htmlEmbedToggleByDoc: Map<string, boolean> = new Map();
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -84,7 +59,6 @@ export class PersistenceExtension implements Extension {
|
||||
@InjectQueue(QueueName.NOTIFICATION_QUEUE) private notificationQueue: Queue,
|
||||
private readonly collabHistory: CollabHistoryService,
|
||||
private readonly transclusionService: TransclusionService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async onLoadDocument(data: onLoadDocumentPayload) {
|
||||
@@ -105,23 +79,6 @@ export class PersistenceExtension implements Extension {
|
||||
return;
|
||||
}
|
||||
|
||||
// Cache the workspace htmlEmbed toggle for this document (Gitea #26). We
|
||||
// already have the page (hence its workspaceId) here, so resolve the toggle
|
||||
// once and cache it keyed by documentName. onChange reads this to decide
|
||||
// whether to schedule the early guard at all — when OFF we skip the guard
|
||||
// entirely (no timer, no fromYdoc, no DB read). Cleared in
|
||||
// afterUnloadDocument. See htmlEmbedToggleByDoc for the staleness note.
|
||||
try {
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(page.workspaceId))?.settings,
|
||||
);
|
||||
this.htmlEmbedToggleByDoc.set(documentName, enabled);
|
||||
} catch (err) {
|
||||
// Fail OFF: if the toggle can't be resolved, never schedule the early
|
||||
// guard; the onStoreDocument backstop still strips on persist.
|
||||
this.htmlEmbedToggleByDoc.set(documentName, false);
|
||||
}
|
||||
|
||||
if (page.ydoc) {
|
||||
this.logger.debug(`ydoc loaded from db: ${pageId}`);
|
||||
|
||||
@@ -155,109 +112,7 @@ export class PersistenceExtension implements Extension {
|
||||
|
||||
const pageId = getPageId(documentName);
|
||||
|
||||
let tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
// SECURITY (Variant C admin gate, collab WebSocket write path):
|
||||
// The persisted snapshot is the merged ydoc, which may contain an htmlEmbed
|
||||
// node inserted by ANY connected editor. htmlEmbed renders raw, unsanitized
|
||||
// JS in every reader's browser, so only workspace admins/owners may author
|
||||
// it. When the user whose store triggers this persist is not an admin, strip
|
||||
// every htmlEmbed node before it is written to the page row AND before the
|
||||
// ydoc state is re-encoded, so the node cannot be reintroduced by a
|
||||
// non-admin via the collab socket.
|
||||
// NOTE (defense-in-depth refinement, Gitea #29): the gate is keyed to the
|
||||
// storing connection's user, but it no longer blindly strips EVERY embed on
|
||||
// a non-admin store. We distinguish two cases inside the !allowed branch:
|
||||
// - Feature toggle OFF => strip ALL embeds (the feature is disabled for
|
||||
// everyone; existing embeds get cleaned up on the next save).
|
||||
// - Toggle ON but the storer is a NON-admin => strip only NEWLY-introduced
|
||||
// embeds and PRESERVE embeds already present in the currently-persisted
|
||||
// page content (admin-authored, already vetted). So a non-admin still
|
||||
// cannot ADD an embed, but an unrelated edit (e.g. a paragraph tweak) no
|
||||
// longer destroys an admin's existing embed (the prior data-loss bug).
|
||||
// The pre-existing-embed identity is the raw `attrs.source` (see
|
||||
// collectHtmlEmbedSources). A non-admin who copies an existing admin embed's
|
||||
// exact source elsewhere passes — acceptable, that HTML is already vetted.
|
||||
//
|
||||
// ACCEPTED RESIDUAL RISK (toggle-ON allow-list TOCTOU): the allow-list is a
|
||||
// best-effort snapshot read OUTSIDE the locked transaction (the prior content
|
||||
// is pre-read above, but inside executeTx the row is re-read withLock without
|
||||
// recomputing the allow-list). A concurrent admin store that changes the
|
||||
// persisted embeds between the pre-read and this write can make the preserve
|
||||
// decision use a slightly stale snapshot — worst case one embed transiently
|
||||
// kept or dropped; it converges on the next store, with no auth bypass or
|
||||
// broader data loss. The race is accepted because it only affects concurrent
|
||||
// authenticated editors on the (rare) toggle-ON non-admin path, converges on
|
||||
// the next store, and the persisted row plus every share/readonly read path
|
||||
// remain protected by the strip.
|
||||
//
|
||||
// RESIDUAL RISK (pre-persist broadcast window) — NOW MITIGATED (Gitea #26):
|
||||
// this strip runs in the debounced onStoreDocument (up to 10s), but
|
||||
// hocuspocus broadcasts each inbound Yjs update to connected clients
|
||||
// immediately, so a non-admin's transient htmlEmbed can execute in OTHER open
|
||||
// editors' browsers in the window before this persist strips it. The exposure
|
||||
// is limited to concurrent AUTHENTICATED space members who have the doc open
|
||||
// with Edit rights (semi-trusted) — anonymous public-share/readonly viewers do
|
||||
// NOT open a collab socket (ReadonlyPageEditor renders fetched,
|
||||
// already-stripped content; HocuspocusProvider is only used by the
|
||||
// authenticated editable page-editor), and the PERSISTED page row plus every
|
||||
// share/readonly read path are protected by this strip.
|
||||
// The window is now SHRUNK to sub-second by an onChange-debounced early guard
|
||||
// (~300ms) — see guardHtmlEmbed() — which runs the SAME preserve/strip gate as
|
||||
// this block and re-encodes the cleaned ydoc, converging the doc for all
|
||||
// clients long before this 10s store debounce fires. This onStoreDocument
|
||||
// strip remains the authoritative backstop for persistence. The irreducible
|
||||
// residual is only the VERY FIRST inbound broadcast before the ~300ms debounce
|
||||
// fires: hocuspocus exposes no synchronous beforeBroadcast filter to drop the
|
||||
// node before that first relay, so it cannot be eliminated entirely.
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace feature
|
||||
// toggle is ON and the storing user is an admin/owner. OFF (default) =>
|
||||
// stripped for everyone (existing embeds get cleaned up on next save).
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(context?.user?.workspaceId))?.settings,
|
||||
);
|
||||
if (!htmlEmbedAllowed(htmlEmbedEnabled, context?.user?.role)) {
|
||||
if (hasHtmlEmbedNode(tiptapJson)) {
|
||||
let strippedJson: typeof tiptapJson;
|
||||
if (htmlEmbedEnabled === false) {
|
||||
// Toggle OFF: feature disabled for everyone -> strip ALL embeds.
|
||||
strippedJson = stripHtmlEmbedNodes(tiptapJson);
|
||||
} else {
|
||||
// Toggle ON, non-admin storer: preserve embeds already present in the
|
||||
// currently-persisted (admin-vetted) page content; strip only the
|
||||
// newly-introduced ones. Pre-read the prior content — a small extra
|
||||
// query only on this rare non-admin + toggle-ON path.
|
||||
const prior = await this.pageRepo.findById(pageId, {
|
||||
includeContent: true,
|
||||
});
|
||||
const allowed = collectHtmlEmbedSources(prior?.content);
|
||||
strippedJson = stripDisallowedHtmlEmbedNodes(tiptapJson, allowed);
|
||||
}
|
||||
|
||||
// Only mutate the ydoc + log when the strip actually removed something;
|
||||
// an unnecessary ydoc rewrite would churn the doc for all clients. With
|
||||
// the toggle-ON branch a non-admin store that only touches admin-vetted
|
||||
// embeds leaves the content unchanged here.
|
||||
if (!isDeepStrictEqual(strippedJson, tiptapJson)) {
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from collab store by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
tiptapJson = strippedJson;
|
||||
// Reflect the stripped content back into the shared ydoc so the node
|
||||
// is removed for all connected clients, not just the persisted row.
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
tiptapJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
}
|
||||
}
|
||||
}
|
||||
const tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
|
||||
|
||||
@@ -429,168 +284,12 @@ export class PersistenceExtension implements Extension {
|
||||
if (data.context?.actor === 'agent') {
|
||||
this.agentTouched.set(documentName, true);
|
||||
}
|
||||
|
||||
// Early htmlEmbed guard scheduling (Gitea #26). Schedule the short debounced
|
||||
// guard ONLY when (a) this document's workspace toggle is cached ON and
|
||||
// (b) the changing connection's user is a NON-admin (cannot author
|
||||
// htmlEmbed). When the toggle is OFF/unknown we schedule NOTHING — no timer,
|
||||
// no fromYdoc, no DB read — killing the OFF-case overhead (the common
|
||||
// default); the onStoreDocument strip is the backstop and an OFF embed does
|
||||
// not execute in editable mode anyway. We do NO expensive work here — we only
|
||||
// (re)schedule the timer; the debounce coalesces rapid edits into a single
|
||||
// guard check.
|
||||
if (
|
||||
userId &&
|
||||
this.htmlEmbedToggleByDoc.get(documentName) === true &&
|
||||
!canAuthorHtmlEmbed(data.context?.user?.role)
|
||||
) {
|
||||
const existing = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (existing) {
|
||||
clearTimeout(existing);
|
||||
}
|
||||
const timer = setTimeout(() => {
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
void this.guardHtmlEmbed(documentName, data.document, data.context);
|
||||
}, 300);
|
||||
this.htmlEmbedGuardTimers.set(documentName, timer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Early, onChange-debounced htmlEmbed strip (Gitea #26). Mirrors the
|
||||
* onStoreDocument admin gate but runs ~300ms after a non-admin edit instead of
|
||||
* waiting for the 10s store debounce, so a non-admin's transient embed is
|
||||
* removed from the shared ydoc — and re-broadcast as cleaned state — for all
|
||||
* connected clients in sub-second time. onStoreDocument remains the
|
||||
* authoritative persistence backstop; this is an ADDITIONAL early pass.
|
||||
*
|
||||
* CONCURRENCY (the critical invariant): the Y.Doc mutation is a single
|
||||
* SYNCHRONOUS block with NO `await` between the fromYdoc snapshot and the
|
||||
* applyUpdate write. ALL async work (the workspace toggle lookup and the
|
||||
* persisted-content read for the allow-list) happens FIRST, before that block.
|
||||
* Because JS is single-threaded, a synchronous block cannot interleave with
|
||||
* inbound Yjs update handlers, so a concurrent edit that lands while we await
|
||||
* cannot be CLOBBERED: we re-snapshot the live doc only after all awaits, then
|
||||
* delete + rebuild + applyUpdate without yielding. (An earlier version awaited
|
||||
* DB reads BETWEEN the snapshot and the write, so a concurrent edit in that gap
|
||||
* was lost — this restructure fixes that.)
|
||||
*
|
||||
* The allow-list is a best-effort snapshot read outside any lock (TOCTOU
|
||||
* accepted, same as onStoreDocument): worst case one embed is transiently kept
|
||||
* or dropped; it converges on the next guard/store, with no auth bypass.
|
||||
*
|
||||
* Loop-safety: the corrective applyUpdate has a null origin, so the re-fired
|
||||
* onChange carries no userId and is not rescheduled; and after a strip no
|
||||
* htmlEmbed remains, so a subsequent guard fire is a cheap no-op (the
|
||||
* hasHtmlEmbedNode early-exit). NEVER throws — an unhandled rejection in a timer
|
||||
* would crash the process — so the whole body is wrapped in try/catch.
|
||||
*/
|
||||
private async guardHtmlEmbed(
|
||||
documentName: string,
|
||||
document: Y.Doc,
|
||||
context: any,
|
||||
): Promise<void> {
|
||||
// Defensive: ensure no stale timer entry survives for this document.
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
try {
|
||||
// Re-check defensively: onChange only schedules for non-admins, but if an
|
||||
// admin/owner somehow reaches here, the embed is authored content — do
|
||||
// nothing (onStoreDocument's toggle-AND-admin gate handles persistence).
|
||||
if (canAuthorHtmlEmbed(context?.user?.role)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- ASYNC PHASE: do ALL awaits up front, before touching the ydoc. ----
|
||||
// Resolve the workspace toggle exactly as onStoreDocument does. When OFF we
|
||||
// strip everything; when ON we use the preserve logic (keep admin-vetted
|
||||
// embeds, strip only the non-admin's newly-introduced ones).
|
||||
const enabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(context?.user?.workspaceId))
|
||||
?.settings,
|
||||
);
|
||||
|
||||
// The allow-list (admin-vetted sources already in the persisted content).
|
||||
// null => strip ALL (toggle OFF). Read here, BEFORE the synchronous block,
|
||||
// so no await sits between the doc snapshot and the doc write.
|
||||
let allowed: Set<string> | null = null;
|
||||
if (enabled !== false) {
|
||||
const prior = await this.pageRepo.findById(getPageId(documentName), {
|
||||
includeContent: true,
|
||||
});
|
||||
allowed = collectHtmlEmbedSources(prior?.content);
|
||||
}
|
||||
|
||||
// The awaits above may have let the document be unloaded/destroyed. If so,
|
||||
// bail — mutating a destroyed doc is pointless and could throw (the
|
||||
// try/catch is the ultimate safety net regardless).
|
||||
if ((document as { isDestroyed?: boolean }).isDestroyed) {
|
||||
return;
|
||||
}
|
||||
|
||||
// ---- SYNCHRONOUS PHASE: snapshot -> strip -> reflect, NO await here. ----
|
||||
// Because there is no await between fromYdoc and applyUpdate, no inbound
|
||||
// Yjs update can interleave, so a concurrent edit cannot be lost.
|
||||
const json = TiptapTransformer.fromYdoc(document, 'default');
|
||||
|
||||
// Cheap exit: nothing to guard if the doc has no embed at all. This is also
|
||||
// why a post-strip re-fire is a no-op (loop-safe).
|
||||
if (!hasHtmlEmbedNode(json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const strippedJson =
|
||||
allowed === null
|
||||
? stripHtmlEmbedNodes(json)
|
||||
: stripDisallowedHtmlEmbedNodes(json, allowed);
|
||||
|
||||
// Nothing was stripped (e.g. the only embed is an admin-vetted one) — do
|
||||
// not churn the shared ydoc for all clients.
|
||||
if (isDeepStrictEqual(strippedJson, json)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Reflect the stripped content back into the shared ydoc EXACTLY as
|
||||
// onStoreDocument does, so the node is removed for all connected clients,
|
||||
// not just on the eventual persist. This re-encode broadcasts the cleaned
|
||||
// state; after it hasHtmlEmbedNode is false, so any later guard fire is a
|
||||
// cheap no-op (loop-safe).
|
||||
const fragment = document.getXmlFragment('default');
|
||||
if (fragment.length > 0) {
|
||||
fragment.delete(0, fragment.length);
|
||||
}
|
||||
const cleanDoc = TiptapTransformer.toYdoc(
|
||||
strippedJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
Y.applyUpdate(document, Y.encodeStateAsUpdate(cleanDoc));
|
||||
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) via early onChange guard by user ${context?.user?.id} on ${documentName}`,
|
||||
);
|
||||
} catch (err) {
|
||||
// NEVER rethrow out of a timer-scheduled call.
|
||||
this.logger.error(
|
||||
`Early htmlEmbed guard failed on ${documentName}`,
|
||||
err,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
async afterUnloadDocument(data: afterUnloadDocumentPayload) {
|
||||
const documentName = data.documentName;
|
||||
this.contributors.delete(documentName);
|
||||
this.agentTouched.delete(documentName);
|
||||
// Drop the cached toggle for this document so a reload re-resolves it (and
|
||||
// picks up a mid-session admin toggle flip).
|
||||
this.htmlEmbedToggleByDoc.delete(documentName);
|
||||
// Clear any pending early-guard timer so it cannot fire after the document
|
||||
// is unloaded (leak / use-after-unload prevention).
|
||||
const timer = this.htmlEmbedGuardTimers.get(documentName);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
this.htmlEmbedGuardTimers.delete(documentName);
|
||||
}
|
||||
}
|
||||
|
||||
private consumeContributors(documentName: string): string[] {
|
||||
|
||||
@@ -3,20 +3,17 @@ import { htmlToJson } from '../../../collaboration/collaboration.util';
|
||||
import { hasHtmlEmbedNode, stripHtmlEmbedNodes } from './html-embed.util';
|
||||
|
||||
/**
|
||||
* CONTRACT (security): an attacker who controls imported markdown/HTML could try
|
||||
* to smuggle an htmlEmbed in the *serialized* DOM form —
|
||||
* CONTRACT: imported markdown/HTML can carry an htmlEmbed in the *serialized*
|
||||
* DOM form —
|
||||
* <div data-type="htmlEmbed" data-source="...">
|
||||
* — directly, bypassing the editor's `<!--html-embed:-->` comment marker.
|
||||
*
|
||||
* This exercises the REAL server import conversion path that ImportService uses
|
||||
* The block renders inside a sandboxed iframe, so this is not an XSS surface;
|
||||
* this exercises the REAL server import conversion path that ImportService uses
|
||||
* (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
|
||||
* link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
|
||||
* the ACTUAL behaviour so we know whether the strip gate can be bypassed.
|
||||
*
|
||||
* FINDING (documented): the raw embed div DOES round-trip through marked +
|
||||
* htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
|
||||
* `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
|
||||
* detectable and STRIPPABLE — the write-path gate covers it.
|
||||
* that such a node is DETECTED and STRIPPABLE — so the share read path's
|
||||
* master-toggle strip can remove it when the workspace toggle is OFF.
|
||||
*/
|
||||
describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
|
||||
it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
|
||||
@@ -38,7 +35,7 @@ describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTM
|
||||
// The div parses into a real htmlEmbed node carrying the decoded source.
|
||||
expect(hasHtmlEmbedNode(json)).toBe(true);
|
||||
|
||||
// Because it is detected, the write-path gate can strip it for non-admins.
|
||||
// Because it is detected, the share master-toggle strip can remove it.
|
||||
const stripped = stripHtmlEmbedNodes(json);
|
||||
expect(hasHtmlEmbedNode(stripped)).toBe(false);
|
||||
// Surrounding non-embed content is retained.
|
||||
|
||||
@@ -1,11 +1,6 @@
|
||||
import {
|
||||
canAuthorHtmlEmbed,
|
||||
collectHtmlEmbedSources,
|
||||
hasHtmlEmbedNode,
|
||||
htmlEmbedAllowed,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripDisallowedHtmlEmbedNodes,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
stripHtmlEmbedNodes,
|
||||
} from './html-embed.util';
|
||||
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
|
||||
@@ -96,17 +91,6 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
expect(result).toEqual(doc);
|
||||
});
|
||||
|
||||
it('neutralizes a root node that is itself an htmlEmbed', () => {
|
||||
// Defensive: the PM root is always a `doc`, so this is unreachable in normal
|
||||
// use, but the helper must still never return a bare htmlEmbed.
|
||||
const root = {
|
||||
type: 'htmlEmbed',
|
||||
attrs: { source: '<script>alert(1)</script>' },
|
||||
};
|
||||
const result = stripHtmlEmbedNodes(root);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
|
||||
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
|
||||
const doc = {
|
||||
@@ -172,169 +156,6 @@ describe('stripHtmlEmbedNodes', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('collectHtmlEmbedSources', () => {
|
||||
it('collects the source of every htmlEmbed node, including nested ones', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<b>top</b>' } },
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<i>nested</i>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'x' }] },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const sources = collectHtmlEmbedSources(doc);
|
||||
expect(sources).toEqual(new Set(['<b>top</b>', '<i>nested</i>']));
|
||||
});
|
||||
|
||||
it('returns an empty set for a doc with no embeds', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
|
||||
};
|
||||
expect(collectHtmlEmbedSources(doc).size).toBe(0);
|
||||
});
|
||||
|
||||
it('gracefully skips embeds with absent attrs or non-string source', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed' }, // no attrs
|
||||
{ type: 'htmlEmbed', attrs: {} }, // no source
|
||||
{ type: 'htmlEmbed', attrs: { source: 42 } }, // non-string
|
||||
{ type: 'htmlEmbed', attrs: { source: '<ok/>' } },
|
||||
],
|
||||
};
|
||||
expect(collectHtmlEmbedSources(doc)).toEqual(new Set(['<ok/>']));
|
||||
});
|
||||
|
||||
it('returns an empty set for non-object input', () => {
|
||||
expect(collectHtmlEmbedSources(null).size).toBe(0);
|
||||
expect(collectHtmlEmbedSources(undefined).size).toBe(0);
|
||||
expect(collectHtmlEmbedSources('x' as any).size).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('stripDisallowedHtmlEmbedNodes', () => {
|
||||
it('keeps an embed whose source is allowed and removes the rest', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<new-evil/>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
|
||||
// The allowed embed and the paragraph survive; the new embed is gone.
|
||||
expect(result.content).toHaveLength(2);
|
||||
expect(result.content[0].attrs.source).toBe('<vetted/>');
|
||||
expect(result.content[1].type).toBe('paragraph');
|
||||
});
|
||||
|
||||
it('keeps BOTH embeds when two nodes share the same allowed source', () => {
|
||||
// Source-identity semantics: identity is the raw `attrs.source`, so a
|
||||
// non-admin who duplicates an existing admin-vetted source keeps both copies.
|
||||
// This is intended — the raw HTML is already vetted, so a duplicate is safe.
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'mid' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
const embeds = result.content.filter(
|
||||
(n: any) => n.type === 'htmlEmbed',
|
||||
);
|
||||
expect(embeds).toHaveLength(2);
|
||||
expect(embeds.every((n: any) => n.attrs.source === '<vetted/>')).toBe(true);
|
||||
});
|
||||
|
||||
it('removes a newly-introduced embed when nothing is allowed', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set());
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('filters nested embeds by the allow-list (e.g. inside columns)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'columns',
|
||||
content: [
|
||||
{
|
||||
type: 'column',
|
||||
content: [
|
||||
{ type: 'htmlEmbed', attrs: { source: '<vetted/>' } },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<new/>' } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
const col = findFirstChild(result, 'column');
|
||||
expect(col.content).toHaveLength(1);
|
||||
expect(col.content[0].attrs.source).toBe('<vetted/>');
|
||||
});
|
||||
|
||||
it('treats an embed with absent/non-string source as not allowed (stripped)', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'htmlEmbed' },
|
||||
{ type: 'htmlEmbed', attrs: {} },
|
||||
],
|
||||
};
|
||||
const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['<vetted/>']));
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('does not mutate the input document', () => {
|
||||
const doc = {
|
||||
type: 'doc',
|
||||
content: [{ type: 'htmlEmbed', attrs: { source: '<new/>' } }],
|
||||
};
|
||||
stripDisallowedHtmlEmbedNodes(doc, new Set());
|
||||
expect(doc.content).toHaveLength(1);
|
||||
expect(doc.content[0].type).toBe('htmlEmbed');
|
||||
});
|
||||
|
||||
it('neutralizes a root node that is itself a disallowed htmlEmbed', () => {
|
||||
const root = { type: 'htmlEmbed', attrs: { source: '<new/>' } };
|
||||
const result = stripDisallowedHtmlEmbedNodes(root, new Set());
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
|
||||
it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => {
|
||||
const root = { type: 'htmlEmbed', attrs: { source: '<vetted/>' } };
|
||||
const result = stripDisallowedHtmlEmbedNodes(root, new Set(['<vetted/>']));
|
||||
expect(collectHtmlEmbedSources(result)).toEqual(new Set(['<vetted/>']));
|
||||
});
|
||||
|
||||
it('returns non-object input unchanged', () => {
|
||||
expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
|
||||
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
|
||||
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '<script>r</script>' } };
|
||||
@@ -367,19 +188,6 @@ describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('canAuthorHtmlEmbed', () => {
|
||||
it('allows owner and admin', () => {
|
||||
expect(canAuthorHtmlEmbed('owner')).toBe(true);
|
||||
expect(canAuthorHtmlEmbed('admin')).toBe(true);
|
||||
});
|
||||
it('denies member and unknown/empty roles', () => {
|
||||
expect(canAuthorHtmlEmbed('member')).toBe(false);
|
||||
expect(canAuthorHtmlEmbed(null)).toBe(false);
|
||||
expect(canAuthorHtmlEmbed(undefined)).toBe(false);
|
||||
expect(canAuthorHtmlEmbed('viewer')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isHtmlEmbedFeatureEnabled', () => {
|
||||
it('is true only when settings.htmlEmbed === true', () => {
|
||||
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true);
|
||||
@@ -394,165 +202,22 @@ describe('isHtmlEmbedFeatureEnabled', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('htmlEmbedAllowed (toggle AND admin)', () => {
|
||||
it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => {
|
||||
expect(htmlEmbedAllowed(false, 'admin')).toBe(false);
|
||||
expect(htmlEmbedAllowed(false, 'owner')).toBe(false);
|
||||
});
|
||||
it('toggle OFF + member => not allowed', () => {
|
||||
expect(htmlEmbedAllowed(false, 'member')).toBe(false);
|
||||
});
|
||||
it('toggle ON + admin/owner => allowed', () => {
|
||||
expect(htmlEmbedAllowed(true, 'admin')).toBe(true);
|
||||
expect(htmlEmbedAllowed(true, 'owner')).toBe(true);
|
||||
});
|
||||
it('toggle ON + member/unknown => not allowed', () => {
|
||||
expect(htmlEmbedAllowed(true, 'member')).toBe(false);
|
||||
expect(htmlEmbedAllowed(true, null)).toBe(false);
|
||||
expect(htmlEmbedAllowed(true, undefined)).toBe(false);
|
||||
expect(htmlEmbedAllowed(true, 'viewer')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// The shared write-path strip ritual extracted from the 5 plain call-sites
|
||||
// (collab handler, page create/duplicate, import, file-import-task,
|
||||
// transclusion-unsync). Tested here once instead of being re-verified in each
|
||||
// call-site's spec.
|
||||
describe('stripHtmlEmbedIfNotAllowed (shared write-path gate)', () => {
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>x()</script>' } },
|
||||
],
|
||||
});
|
||||
const docWithoutEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }],
|
||||
});
|
||||
|
||||
it('keeps the doc unchanged when feature is ON and role is admin (allowed)', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: true,
|
||||
role: 'admin',
|
||||
onStrip,
|
||||
});
|
||||
// Allowed => same reference returned, embed preserved, no side-effect.
|
||||
expect(result).toBe(json);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
expect(onStrip).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('keeps the doc unchanged for an owner when feature is ON (allowed)', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: true,
|
||||
role: 'owner',
|
||||
onStrip,
|
||||
});
|
||||
expect(result).toBe(json);
|
||||
expect(hasHtmlEmbedNode(result)).toBe(true);
|
||||
expect(onStrip).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('strips the embed when the feature is OFF (even for an admin)', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: false,
|
||||
role: 'admin',
|
||||
onStrip,
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('strips the embed for a non-admin when the feature is ON', () => {
|
||||
const json = docWithEmbed();
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: true,
|
||||
role: 'member',
|
||||
onStrip,
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('strips the embed for a null/undefined role when the feature is ON', () => {
|
||||
for (const role of [null, undefined]) {
|
||||
const onStrip = jest.fn();
|
||||
const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
|
||||
featureEnabled: true,
|
||||
role,
|
||||
onStrip,
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('returns input unchanged and does NOT call onStrip when no embed is present', () => {
|
||||
const json = docWithoutEmbed();
|
||||
const onStrip = jest.fn();
|
||||
// Not allowed (feature OFF), but there is nothing to strip.
|
||||
const result = stripHtmlEmbedIfNotAllowed(json, {
|
||||
featureEnabled: false,
|
||||
role: 'member',
|
||||
onStrip,
|
||||
});
|
||||
expect(result).toBe(json);
|
||||
expect(onStrip).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('calls onStrip exactly once per strip', () => {
|
||||
const onStrip = jest.fn();
|
||||
stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
|
||||
featureEnabled: false,
|
||||
role: 'member',
|
||||
onStrip,
|
||||
});
|
||||
expect(onStrip).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('works without an onStrip callback (optional)', () => {
|
||||
const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
|
||||
featureEnabled: false,
|
||||
role: 'member',
|
||||
});
|
||||
expect(hasHtmlEmbedNode(result)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// NOTE: a previous revision of this file re-implemented the write-path admin
|
||||
// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
|
||||
// deleted/misplaced real guard would have kept those green. The stand-in is
|
||||
// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
|
||||
// now tested against their REAL code in:
|
||||
// - collaboration/extensions/persistence.extension.html-embed.spec.ts
|
||||
// - collaboration/collaboration.handler.html-embed.spec.ts
|
||||
// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
|
||||
// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
|
||||
// - integrations/import/services/import-html-embed-identity.spec.ts (import)
|
||||
// The htmlEmbed node renders inside a sandboxed iframe, so the per-write role
|
||||
// gate has been removed. `stripHtmlEmbedNodes` + `isHtmlEmbedFeatureEnabled`
|
||||
// remain ONLY to honor the workspace master toggle on the anonymous public-share
|
||||
// read path — tested against the real share code in:
|
||||
// - core/share/share-html-embed.spec.ts
|
||||
//
|
||||
// The case below stays here because it asserts a REAL parse path
|
||||
// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
|
||||
// re-implemented gate.
|
||||
describe('htmlEmbed smuggled via the markdown/html <!--html-embed--> form (real parse + real helpers)', () => {
|
||||
it('the parsed node is detected and stripped by the real helpers', () => {
|
||||
// The markdown/html create formats decode to the same htmlEmbed node, so the
|
||||
// gate (run on the parsed JSON) covers them identically.
|
||||
const source = '<script>steal()</script>';
|
||||
// The case below asserts that the REAL parse path (htmlToJson, the markdown/html
|
||||
// form) produces an htmlEmbed node the master-toggle strip can detect & remove.
|
||||
describe('htmlEmbed via the markdown/html form (real parse + real strip helper)', () => {
|
||||
it('the parsed node is detected and stripped by the real helper', () => {
|
||||
const source = '<script>track()</script>';
|
||||
const encoded = encodeHtmlEmbedSource(source);
|
||||
const html = `<div data-type="htmlEmbed" data-source="${encoded}"></div>`;
|
||||
const parsed = htmlToJson(html);
|
||||
expect(hasHtmlEmbedNode(parsed)).toBe(true);
|
||||
|
||||
// A non-admin role gates to strip via the real helpers.
|
||||
expect(canAuthorHtmlEmbed('member')).toBe(false);
|
||||
const stripped = stripHtmlEmbedNodes(parsed);
|
||||
expect(hasHtmlEmbedNode(stripped)).toBe(false);
|
||||
});
|
||||
|
||||
@@ -5,12 +5,12 @@ export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
|
||||
/**
|
||||
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
|
||||
*
|
||||
* SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
|
||||
* (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
|
||||
* author it. This helper is the server-side enforcement primitive: every WRITE
|
||||
* path that may persist content from a NON-admin caller must run the incoming
|
||||
* document through this function so a non-admin cannot smuggle the node in via
|
||||
* the collab socket, the REST/MCP/AI content-update path, paste, or import.
|
||||
* The `htmlEmbed` node renders inside a SANDBOXED iframe (no `allow-same-origin`)
|
||||
* on the client, so its content cannot touch the viewer's session/cookies/API —
|
||||
* it is NOT a stored-XSS surface. This helper is retained ONLY to honor the
|
||||
* workspace master toggle (`settings.htmlEmbed`) on the anonymous public-share
|
||||
* read path: an anonymous viewer cannot read the workspace toggle, so the server
|
||||
* strips the block when the toggle is OFF before serving shared content.
|
||||
*
|
||||
* Returns a NEW document; the input is not mutated. If the input is not a valid
|
||||
* doc object it is returned unchanged (callers persist what they were given).
|
||||
@@ -22,15 +22,6 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||
|
||||
const node = pmJson as unknown as JSONContent;
|
||||
|
||||
// Defensive root-type check: if the ROOT node is itself an htmlEmbed, the
|
||||
// children-filtering below could never drop it, so a bare htmlEmbed would be
|
||||
// returned as-is. This branch is unreachable in normal use (the PM document
|
||||
// root is always a `doc`) and exists only to make the helper total — a bare
|
||||
// htmlEmbed can never be returned by this function.
|
||||
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||
return { type: 'doc', content: [] } as unknown as T;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
const filtered: JSONContent[] = [];
|
||||
for (const child of node.content) {
|
||||
@@ -48,111 +39,12 @@ export function stripHtmlEmbedNodes<T = JSONContent>(pmJson: T): T {
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk the document and collect a stable identity for every `htmlEmbed` node.
|
||||
*
|
||||
* The identity is the node's `attrs.source` string — the raw HTML the embed
|
||||
* renders. Two embeds that render the exact same HTML are treated as the same
|
||||
* identity. Used by the collab persist path to know which embeds are ALREADY
|
||||
* present in the currently-persisted (admin-vetted) page content, so a later
|
||||
* non-admin store can strip only NEWLY-introduced embeds while preserving the
|
||||
* pre-existing admin-authored ones.
|
||||
*
|
||||
* Absent attrs or a non-string/absent `source` are skipped gracefully (such a
|
||||
* node contributes no identity to the set).
|
||||
*/
|
||||
export function collectHtmlEmbedSources(pmJson: unknown): Set<string> {
|
||||
const sources = new Set<string>();
|
||||
|
||||
const walk = (node: unknown): void => {
|
||||
if (!node || typeof node !== 'object') {
|
||||
return;
|
||||
}
|
||||
const n = node as JSONContent;
|
||||
if (n.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (n.attrs as Record<string, unknown> | undefined)?.source;
|
||||
if (typeof source === 'string') {
|
||||
sources.add(source);
|
||||
}
|
||||
}
|
||||
if (Array.isArray(n.content)) {
|
||||
for (const child of n.content) {
|
||||
walk(child);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
walk(pmJson);
|
||||
return sources;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose
|
||||
* `attrs.source` is in `allowedSources`; remove the rest.
|
||||
*
|
||||
* Used on the collab persist path when the feature toggle is ON but the storing
|
||||
* user is a NON-admin: `allowedSources` is the set of embed sources already
|
||||
* present in the currently-persisted page content (admin-authored, already
|
||||
* vetted). A non-admin therefore cannot ADD a new embed, but their unrelated
|
||||
* edit also cannot destroy an admin's existing one.
|
||||
*
|
||||
* NOTE: identity is the raw source string, so a non-admin who COPIES an existing
|
||||
* admin embed's exact source into a NEW location passes this check. That is
|
||||
* acceptable — the source is already admin-vetted content present in the doc; no
|
||||
* new untrusted HTML is introduced.
|
||||
*
|
||||
* Returns a NEW document; the input is not mutated. Same defensive root-type
|
||||
* check pattern as {@link stripHtmlEmbedNodes}.
|
||||
*/
|
||||
export function stripDisallowedHtmlEmbedNodes<T = JSONContent>(
|
||||
pmJson: T,
|
||||
allowedSources: Set<string>,
|
||||
): T {
|
||||
if (!pmJson || typeof pmJson !== 'object') {
|
||||
return pmJson;
|
||||
}
|
||||
|
||||
const node = pmJson as unknown as JSONContent;
|
||||
|
||||
// Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is
|
||||
// itself an htmlEmbed and its source is NOT allowed, the children-filtering
|
||||
// below could never drop it, so neutralize it here. Unreachable in normal use
|
||||
// (the PM document root is always a `doc`).
|
||||
if (node.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (node.attrs as Record<string, unknown> | undefined)?.source;
|
||||
if (typeof source === 'string' && allowedSources.has(source)) {
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
return { type: 'doc', content: [] } as unknown as T;
|
||||
}
|
||||
|
||||
if (Array.isArray(node.content)) {
|
||||
const filtered: JSONContent[] = [];
|
||||
for (const child of node.content) {
|
||||
// Drop a disallowed htmlEmbed child (newly introduced); keep an allowed
|
||||
// one (already present in the persisted, admin-vetted content).
|
||||
if (child && child.type === HTML_EMBED_NODE_NAME) {
|
||||
const source = (child.attrs as Record<string, unknown> | undefined)
|
||||
?.source;
|
||||
if (typeof source === 'string' && allowedSources.has(source)) {
|
||||
filtered.push({ ...child });
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
|
||||
// also filtered by the same allow-list.
|
||||
filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources));
|
||||
}
|
||||
return { ...node, content: filtered } as unknown as T;
|
||||
}
|
||||
|
||||
return { ...node } as unknown as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the document contains at least one `htmlEmbed` node anywhere
|
||||
* in its tree. Useful to decide whether a strip pass actually changed anything
|
||||
* (e.g. for logging a rejected non-admin embed attempt).
|
||||
* in its tree. Useful to decide whether a strip pass on the share read path
|
||||
* actually changed anything. After the write-path role gate removal this is no
|
||||
* longer called by production code; it is retained as a test-only assertion
|
||||
* helper (and a detection primitive should a future read path need it).
|
||||
*/
|
||||
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
|
||||
if (!pmJson || typeof pmJson !== 'object') {
|
||||
@@ -169,62 +61,9 @@ export function hasHtmlEmbedNode(pmJson: unknown): boolean {
|
||||
}
|
||||
|
||||
/**
|
||||
* Map the workspace user role to whether it may author `htmlEmbed` nodes.
|
||||
* Owners and admins are trusted; everyone else (member, and any unknown role)
|
||||
* is not. Kept here so every write path shares one definition of "trusted".
|
||||
*/
|
||||
export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
|
||||
return role === 'owner' || role === 'admin';
|
||||
}
|
||||
|
||||
/**
|
||||
* Combined write-path gate for the htmlEmbed feature.
|
||||
*
|
||||
* htmlEmbed is allowed in a document only when the workspace feature toggle is
|
||||
* ON and the authoring/saving user is a workspace admin/owner. OFF (default) =>
|
||||
* stripped for EVERYONE, including admins (the feature is disabled).
|
||||
*
|
||||
* `featureEnabled` is read from the workspace settings for the relevant write
|
||||
* (`workspace.settings?.htmlEmbed === true`). Every WRITE path that may persist
|
||||
* htmlEmbed content must gate on this combined predicate, so that turning the
|
||||
* toggle OFF strips existing embeds on the next save and prevents new ones from
|
||||
* being persisted regardless of role.
|
||||
*/
|
||||
export function htmlEmbedAllowed(
|
||||
featureEnabled: boolean,
|
||||
role: string | null | undefined,
|
||||
): boolean {
|
||||
return featureEnabled === true && canAuthorHtmlEmbed(role);
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip htmlEmbed nodes unless the (feature-enabled AND role-allowed) gate
|
||||
* passes. Returns the possibly-stripped doc. The caller resolves featureEnabled
|
||||
* (from workspace settings) and role (actor) itself — those legitimately differ
|
||||
* per call-site (e.g. share path uses role=null) — this helper owns only the
|
||||
* has-check + AND + strip + optional onStrip callback.
|
||||
*
|
||||
* Centralizes the 4-step write-path ritual (resolve role -> resolve
|
||||
* featureEnabled -> htmlEmbedAllowed AND -> stripHtmlEmbedNodes) so the plain
|
||||
* strip-all call-sites share one tested decision. Sites with CUSTOM strip logic
|
||||
* (e.g. the collab persist path's preserve-admin variant) keep their own code.
|
||||
*/
|
||||
export function stripHtmlEmbedIfNotAllowed<T>(
|
||||
json: T,
|
||||
opts: { featureEnabled: boolean; role: string | null | undefined; onStrip?: () => void },
|
||||
): T {
|
||||
if (htmlEmbedAllowed(opts.featureEnabled, opts.role)) return json;
|
||||
if (hasHtmlEmbedNode(json)) {
|
||||
opts.onStrip?.();
|
||||
return stripHtmlEmbedNodes(json);
|
||||
}
|
||||
return json;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the workspace-level htmlEmbed feature toggle from a workspace's settings
|
||||
* jsonb. ABSENT/non-true => OFF (the default). Kept here so every server write
|
||||
* path resolves the toggle the same way.
|
||||
* Read the workspace-level htmlEmbed master toggle from a workspace's settings
|
||||
* jsonb. ABSENT/non-true => OFF (the default). Kept here so the share read path
|
||||
* resolves the toggle the same way it is persisted.
|
||||
*/
|
||||
export function isHtmlEmbedFeatureEnabled(
|
||||
settings: unknown | null | undefined,
|
||||
|
||||
@@ -65,21 +65,19 @@ export const MAX_SHARE_MESSAGES = 30;
|
||||
export const MAX_SHARE_MESSAGE_CHARS = 8000;
|
||||
|
||||
/**
|
||||
* Default per-request output cap for the anonymous share assistant. Bounds the
|
||||
* tokens a single anonymous request can generate; worst case = steps x this.
|
||||
*/
|
||||
export const SHARE_AI_MAX_OUTPUT_TOKENS = 512;
|
||||
|
||||
/**
|
||||
* Read the per-request output cap from the environment (overridable seam),
|
||||
* falling back to the sane default. A non-positive / unparseable value uses the
|
||||
* default. Mirrors resolveShareAiWorkspaceMax().
|
||||
* Per-request output-token ceiling for the anonymous assistant. `streamText`
|
||||
* runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
|
||||
* request is bounded by (steps × this). The per-workspace cap bounds the COUNT
|
||||
* of calls; this bounds the SIZE of each, so a single anonymous call cannot run
|
||||
* up the provider bill even if the per-IP throttle is evaded. Env-overridable
|
||||
* seam; a non-positive or unparseable value falls back to the default.
|
||||
*/
|
||||
export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
|
||||
export function resolveShareAiMaxOutputTokens(): number {
|
||||
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
|
||||
return Number.isFinite(raw) && raw > 0
|
||||
? Math.floor(raw)
|
||||
: SHARE_AI_MAX_OUTPUT_TOKENS;
|
||||
: SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -225,8 +223,8 @@ export class PublicShareChatService {
|
||||
tools,
|
||||
// Bound the agent loop for anonymous callers.
|
||||
stopWhen: stepCountIs(5),
|
||||
// Bounds per-request output so one anonymous request can't run up the
|
||||
// provider bill; worst case = steps x this.
|
||||
// Cap per-request output so one anonymous call cannot run up the provider
|
||||
// bill even if the per-IP throttle is evaded; worst case = steps × this.
|
||||
maxOutputTokens: resolveShareAiMaxOutputTokens(),
|
||||
abortSignal: signal,
|
||||
onError: ({ error }) => {
|
||||
|
||||
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
|
||||
import {
|
||||
PublicShareChatService,
|
||||
filterShareTranscript,
|
||||
resolveShareAiMaxOutputTokens,
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
} from './public-share-chat.service';
|
||||
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
|
||||
import {
|
||||
@@ -400,6 +402,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
|
||||
const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
|
||||
const original = process.env[ENV];
|
||||
|
||||
afterEach(() => {
|
||||
if (original === undefined) delete process.env[ENV];
|
||||
else process.env[ENV] = original;
|
||||
});
|
||||
|
||||
it('falls back to the default when unset', () => {
|
||||
delete process.env[ENV];
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
|
||||
});
|
||||
|
||||
it('uses (and floors) a valid positive value from the env', () => {
|
||||
process.env[ENV] = '1024.9';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(1024);
|
||||
});
|
||||
|
||||
it('falls back to the default for zero, a negative, or a non-numeric value', () => {
|
||||
process.env[ENV] = '0';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
process.env[ENV] = '-5';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
process.env[ENV] = 'not-a-number';
|
||||
expect(resolveShareAiMaxOutputTokens()).toBe(
|
||||
SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
|
||||
it('allows up to the cap within a window, then 429s (returns false)', async () => {
|
||||
const limiter = makeLimiter(3, 60_000, () => 1_000);
|
||||
@@ -482,9 +522,11 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
|
||||
});
|
||||
|
||||
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
|
||||
// FAIL CLOSED (#62): if Redis is down we cannot prove the workspace is under
|
||||
// its cap, so DENY (the controller 429s) rather than admit an unmetered,
|
||||
// billable anonymous call. The feature is optional, so denial is harmless.
|
||||
// The per-workspace cap is the COST backstop for an OPTIONAL anonymous
|
||||
// assistant. If Redis is unavailable we cannot prove the workspace is under
|
||||
// its cap, so we DENY (controller 429s) rather than admit an unmetered,
|
||||
// billable call — a brief Redis blip disabling the assistant is safer than
|
||||
// an unbounded provider bill.
|
||||
const failingRedis = {
|
||||
eval: () => Promise.reject(new Error('redis down')),
|
||||
} as unknown as import('ioredis').Redis;
|
||||
|
||||
@@ -99,11 +99,11 @@ export class PublicShareWorkspaceLimiter {
|
||||
/**
|
||||
* Account one call for `key`. Returns true if it is within the cap (allowed),
|
||||
* false if the cap over the trailing window is exceeded (caller must 429).
|
||||
* On a Redis failure we FAIL CLOSED (return false): if Redis is down we cannot
|
||||
* prove the workspace is under its cap, so we DENY rather than admit an
|
||||
* unmetered, billable anonymous call. The feature is optional, so the
|
||||
* temporary denial is harmless. (Operators wanting a tighter steady-state cap
|
||||
* can lower the default via SHARE_AI_WORKSPACE_MAX_PER_HOUR, e.g. =100.)
|
||||
* On a Redis failure we FAIL CLOSED (return false): this cap is the COST
|
||||
* backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
|
||||
* cannot prove the workspace is under its cap and therefore DENY rather than
|
||||
* admit an unmetered, billable anonymous call. A transient Redis blip briefly
|
||||
* disabling the assistant is preferable to an unbounded provider bill.
|
||||
*/
|
||||
async tryConsume(key: string): Promise<boolean> {
|
||||
const t = this.now();
|
||||
@@ -122,9 +122,11 @@ export class PublicShareWorkspaceLimiter {
|
||||
);
|
||||
return admitted === 1;
|
||||
} catch (err) {
|
||||
// FAIL CLOSED: if Redis is down we cannot prove the workspace is under its
|
||||
// cap, so DENY (controller 429s) rather than admit an unmetered, billable
|
||||
// anonymous call. The feature is optional, so denial is harmless.
|
||||
// FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
|
||||
// under its cap, so we DENY (the controller 429s) rather than admit an
|
||||
// unmetered, billable anonymous call. The assistant is optional, so a
|
||||
// transient Redis blip briefly disabling it is the safer failure mode than
|
||||
// an unbounded provider bill.
|
||||
this.logger.error(
|
||||
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
|
||||
err as Error,
|
||||
|
||||
@@ -10,7 +10,6 @@ describe('PageController', () => {
|
||||
controller = new PageController(
|
||||
{} as any, // pageService
|
||||
{} as any, // pageRepo
|
||||
{} as any, // workspaceRepo
|
||||
{} as any, // pageHistoryService
|
||||
{} as any, // spaceAbility
|
||||
{} as any, // pageAccessService
|
||||
|
||||
@@ -39,11 +39,6 @@ import {
|
||||
} from '../casl/interfaces/space-ability.type';
|
||||
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedNodes,
|
||||
} from '../../common/helpers/prosemirror/html-embed.util';
|
||||
import { RecentPageDto } from './dto/recent-page.dto';
|
||||
import { CreatedByUserDto } from './dto/created-by-user.dto';
|
||||
import { DuplicatePageDto } from './dto/duplicate-page.dto';
|
||||
@@ -68,7 +63,6 @@ export class PageController {
|
||||
constructor(
|
||||
private readonly pageService: PageService,
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly pageHistoryService: PageHistoryService,
|
||||
private readonly spaceAbility: SpaceAbilityFactory,
|
||||
private readonly pageAccessService: PageAccessService,
|
||||
@@ -98,18 +92,6 @@ export class PageController {
|
||||
|
||||
const permissions = { canEdit, hasRestriction };
|
||||
|
||||
if (page.content) {
|
||||
const workspace = await this.workspaceRepo.findById(page.workspaceId);
|
||||
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
|
||||
// Kill-switch: when the workspace feature is OFF, never serve raw
|
||||
// htmlEmbed nodes on the read path (mirrors the public-share strip),
|
||||
// so disabling the feature is an immediate, total kill-switch and not
|
||||
// dependent on the page being re-saved. Admin-authored content only.
|
||||
// Fail-closed: a missing workspace resolves to OFF and is stripped.
|
||||
page.content = stripHtmlEmbedNodes(page.content) as any;
|
||||
}
|
||||
}
|
||||
|
||||
if (dto.format && dto.format !== 'json' && page.content) {
|
||||
const contentOutput =
|
||||
dto.format === 'markdown'
|
||||
@@ -255,9 +237,6 @@ export class PageController {
|
||||
user.id,
|
||||
workspace.id,
|
||||
createPageDto,
|
||||
// Pass the caller's workspace role so create() can enforce the htmlEmbed
|
||||
// admin gate (non-admins cannot author raw-JS embeds).
|
||||
user.role,
|
||||
provenance,
|
||||
);
|
||||
|
||||
@@ -554,16 +533,6 @@ export class PageController {
|
||||
|
||||
await this.pageAccessService.validateCanView(page, user);
|
||||
|
||||
if (history.content) {
|
||||
const workspace = await this.workspaceRepo.findById(page.workspaceId);
|
||||
if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
|
||||
// Kill-switch: history snapshots are an authenticated read path too, so
|
||||
// strip htmlEmbed when the workspace feature is OFF (same as /info and
|
||||
// the public-share path). Fail-closed on a missing workspace.
|
||||
history.content = stripHtmlEmbedNodes(history.content) as any;
|
||||
}
|
||||
}
|
||||
|
||||
return history;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,240 +0,0 @@
|
||||
// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab
|
||||
// write paths: PageService.create() and PageService.duplicatePage(). Both build
|
||||
// content/textContent/ydoc directly and persist, bypassing the collab
|
||||
// onStoreDocument strip, so each must run the incoming document through the
|
||||
// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not
|
||||
// allowed, `stripHtmlEmbedNodes`) BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL PageService with every constructor dep mocked,
|
||||
// feeds content containing an `htmlEmbed`, calls the real method, and asserts on
|
||||
// the PERSISTED content (captured at the repo insert / db insert boundary) that
|
||||
// the embed was actually stripped (member/unknown role) or preserved
|
||||
// (admin/owner + toggle ON). Mirrors the GOOD pattern in
|
||||
// transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// page.service.ts pulls in the collaboration gateway (a transitive ESM chain
|
||||
// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so
|
||||
// that single module is mocked away — it is never used on the create/duplicate
|
||||
// gate paths.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
|
||||
import { PageService } from './page.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'u1';
|
||||
|
||||
const docWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>alert(1)</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
// Minimal chainable kysely stub. `nextPagePosition` (used by create) and
|
||||
// duplicatePage's bulk insert go through `this.db`; only the calls those paths
|
||||
// make need to resolve. `capturedInserts` collects every page row handed to
|
||||
// `insertInto('pages').values(...)` so we can assert on the persisted content.
|
||||
function buildDb(capturedInserts: any[]) {
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
selectAll: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
execute: async () => [],
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = {
|
||||
selectFrom: () => selectChain,
|
||||
insertInto: (table: string) => ({
|
||||
values: (rows: any) => {
|
||||
if (table === 'pages') {
|
||||
for (const row of Array.isArray(rows) ? rows : [rows]) {
|
||||
capturedInserts.push(row);
|
||||
}
|
||||
}
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb): run the callback with `db`
|
||||
// itself acting as the transaction so any in-tx inserts are captured too.
|
||||
transaction: () => ({ execute: async (cb: any) => cb(db) }),
|
||||
};
|
||||
return db;
|
||||
}
|
||||
|
||||
// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled`
|
||||
// drives the workspace toggle the gate reads via workspaceRepo.findById.
|
||||
function buildService(opts: {
|
||||
featureEnabled: boolean;
|
||||
capturedInserts: any[];
|
||||
rootPage?: any; // for duplicatePage
|
||||
}) {
|
||||
const { featureEnabled, capturedInserts } = opts;
|
||||
|
||||
const pageRepo: any = {
|
||||
findById: jest.fn(async () => null), // no parent page in create tests
|
||||
// create() persists here; capture the row so we can inspect content.
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
capturedInserts.push(row);
|
||||
return { id: 'new-page', slugId: 'slug-1', ...row };
|
||||
}),
|
||||
getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)),
|
||||
};
|
||||
|
||||
const pagePermissionRepo: any = {
|
||||
// duplicatePage filters accessible pages; grant the root so it is copied.
|
||||
filterAccessiblePageIds: jest.fn(async () =>
|
||||
opts.rootPage ? [opts.rootPage.id] : [],
|
||||
),
|
||||
};
|
||||
|
||||
const workspaceRepo: any = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const attachmentRepo: any = { findByIds: jest.fn(async () => []) };
|
||||
const storageService: any = { copy: jest.fn(async () => undefined) };
|
||||
const noopQueue: any = { add: jest.fn(async () => undefined) };
|
||||
const eventEmitter: any = { emit: jest.fn() };
|
||||
const collaborationGateway: any = {};
|
||||
const watcherService: any = {};
|
||||
// duplicatePage fires transclusion bulk inserts after persisting; they are
|
||||
// best-effort (wrapped in try/catch) and irrelevant to the gate.
|
||||
const transclusionService: any = {
|
||||
insertTransclusionsForPages: jest.fn(async () => undefined),
|
||||
insertReferencesForPages: jest.fn(async () => undefined),
|
||||
insertTemplateReferencesForPages: jest.fn(async () => undefined),
|
||||
};
|
||||
|
||||
const db = buildDb(capturedInserts);
|
||||
|
||||
const service = new PageService(
|
||||
pageRepo,
|
||||
pagePermissionRepo,
|
||||
attachmentRepo,
|
||||
db,
|
||||
storageService,
|
||||
noopQueue, // attachmentQueue
|
||||
noopQueue, // aiQueue
|
||||
noopQueue, // generalQueue
|
||||
eventEmitter,
|
||||
collaborationGateway,
|
||||
watcherService,
|
||||
transclusionService,
|
||||
workspaceRepo,
|
||||
);
|
||||
return service;
|
||||
}
|
||||
|
||||
describe('PageService.create htmlEmbed admin gate (real code)', () => {
|
||||
// Run create() and return the content actually persisted via insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
callerRole: string | null | undefined,
|
||||
) {
|
||||
const capturedInserts: any[] = [];
|
||||
const service = buildService({ featureEnabled, capturedInserts });
|
||||
await service.create(
|
||||
USER,
|
||||
WS,
|
||||
{
|
||||
spaceId: SPACE,
|
||||
title: 'p',
|
||||
// 'json' format is used as-is by parseProsemirrorContent (passed to the
|
||||
// real jsonToNode schema validation), so hand it the PM-JSON object.
|
||||
content: docWithEmbed(),
|
||||
format: 'json' as any,
|
||||
} as any,
|
||||
callerRole,
|
||||
);
|
||||
expect(capturedInserts).toHaveLength(1);
|
||||
return capturedInserts[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, 'member');
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
// Non-embed content survives.
|
||||
expect(JSON.stringify(content)).toContain('body');
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => {
|
||||
// Duplicate a single source page that contains an embed and return the content
|
||||
// persisted for the copy (captured at db.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
role: string | null | undefined,
|
||||
) {
|
||||
const rootPage: any = {
|
||||
id: 'src-page',
|
||||
slugId: 'src-slug',
|
||||
title: 'Source',
|
||||
icon: null,
|
||||
position: 'a0',
|
||||
spaceId: SPACE,
|
||||
workspaceId: WS,
|
||||
parentPageId: null,
|
||||
content: docWithEmbed(),
|
||||
};
|
||||
const capturedInserts: any[] = [];
|
||||
const service = buildService({ featureEnabled, capturedInserts, rootPage });
|
||||
const authUser: any = { id: USER, workspaceId: WS, role };
|
||||
await service.duplicatePage(rootPage, undefined, authUser);
|
||||
// The bulk insert is the page persist boundary; one source page -> one copy.
|
||||
const pageRows = capturedInserts.filter((r) => r.content);
|
||||
expect(pageRows.length).toBeGreaterThanOrEqual(1);
|
||||
return pageRows[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, 'member');
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('body');
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
@@ -20,7 +20,6 @@ describe('PageService', () => {
|
||||
{} as any, // collaborationGateway
|
||||
{} as any, // watcherService
|
||||
{} as any, // transclusionService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
@@ -31,11 +31,6 @@ import {
|
||||
isAttachmentNode,
|
||||
removeMarkTypeFromDoc,
|
||||
} from '../../../common/helpers/prosemirror/utils';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import {
|
||||
htmlToJson,
|
||||
jsonToNode,
|
||||
@@ -81,7 +76,6 @@ export class PageService {
|
||||
private collaborationGateway: CollaborationGateway,
|
||||
private readonly watcherService: WatcherService,
|
||||
private readonly transclusionService: TransclusionService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async findById(
|
||||
@@ -101,10 +95,6 @@ export class PageService {
|
||||
userId: string,
|
||||
workspaceId: string,
|
||||
createPageDto: CreatePageDto,
|
||||
// Workspace role of the caller. Used to enforce the htmlEmbed admin gate on
|
||||
// the create write path (see below). Optional/typed loosely so unknown or
|
||||
// missing roles fall through to the non-admin (strip) branch by default.
|
||||
callerRole?: string | null,
|
||||
// Optional agent-edit provenance (from the signed access claim). When the
|
||||
// actor is 'agent', stamp the page's source marker so a freshly created page
|
||||
// shows it was created by the AI agent (§14 N2) — create goes through REST,
|
||||
@@ -135,35 +125,11 @@ export class PageService {
|
||||
let ydoc = undefined;
|
||||
|
||||
if (createPageDto?.content && createPageDto?.format) {
|
||||
let prosemirrorJson = await this.parseProsemirrorContent(
|
||||
const prosemirrorJson = await this.parseProsemirrorContent(
|
||||
createPageDto.content,
|
||||
createPageDto.format,
|
||||
);
|
||||
|
||||
// SECURITY (Variant C admin gate, plain page-create write path):
|
||||
// create() builds content/textContent/ydoc directly and persists them via
|
||||
// insertPage, bypassing the collab onStoreDocument strip. htmlEmbed renders
|
||||
// raw, unsanitized JS in readers' browsers, so only workspace admins/owners
|
||||
// may author it. The create controller requires only space Edit, so a
|
||||
// regular member could otherwise POST a doc (json, or the markdown/html
|
||||
// <!--html-embed:BASE64--> forms that parse to the same node) containing an
|
||||
// htmlEmbed and store XSS for every reader. Strip every htmlEmbed node when
|
||||
// the caller is not an admin, BEFORE deriving textContent/ydoc/insert.
|
||||
// The gate is toggle-AND-admin: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the caller is an admin/owner. OFF (default) =>
|
||||
// stripped for everyone. Cheap settings read keyed to the workspace.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: callerRole,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from page creation by user ${userId} (space ${createPageDto.spaceId})`,
|
||||
),
|
||||
});
|
||||
|
||||
content = prosemirrorJson;
|
||||
textContent = jsonToText(prosemirrorJson);
|
||||
ydoc = createYdocFromJson(prosemirrorJson);
|
||||
@@ -653,12 +619,6 @@ export class PageService {
|
||||
|
||||
const attachmentMap = new Map<string, ICopyPageAttachment>();
|
||||
|
||||
// Resolve the htmlEmbed toggle ONCE for the workspace; the per-page gate
|
||||
// below is toggle-AND-admin (OFF default => stripped for everyone).
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(rootPage.workspaceId))?.settings,
|
||||
);
|
||||
|
||||
const insertablePages: InsertablePage[] = await Promise.all(
|
||||
pages.map(async (page) => {
|
||||
const pageContent = getProsemirrorContent(page.content);
|
||||
@@ -769,24 +729,7 @@ export class PageService {
|
||||
}
|
||||
});
|
||||
|
||||
let prosemirrorJson = prosemirrorDoc.toJSON();
|
||||
|
||||
// SECURITY (Variant C admin gate, duplication write path):
|
||||
// Duplication builds the ydoc directly and bypasses the collab
|
||||
// onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in
|
||||
// readers' browsers, so only workspace admins/owners may author it. A
|
||||
// non-admin with space Edit could otherwise duplicate an admin page
|
||||
// that contains an embed into a new page authored by them. Strip every
|
||||
// htmlEmbed node from each duplicated page when the duplicating user is
|
||||
// not an admin, BEFORE computing textContent/ydoc/insert.
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: authUser.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from page duplication by user ${authUser.id} (source page ${page.id})`,
|
||||
),
|
||||
});
|
||||
const prosemirrorJson = prosemirrorDoc.toJSON();
|
||||
|
||||
// Add "Copy of " prefix to the root page title only for duplicates in same space
|
||||
let title = page.title;
|
||||
|
||||
@@ -68,7 +68,6 @@ describe('TransclusionService — template access core (real filter)', () => {
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
return { service, db, pageRepo, spaceMemberRepo, pagePermissionRepo };
|
||||
@@ -227,7 +226,6 @@ describe('TransclusionService.filterViewerAccessiblePageIds — AND ordering (co
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
return { service, filterAccessiblePageIds };
|
||||
@@ -324,7 +322,6 @@ describe('TransclusionService.syncPageTemplateReferences — workspace scoping',
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
return {
|
||||
@@ -471,7 +468,6 @@ describe('TransclusionService.insertTemplateReferencesForPages — per-workspace
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
return { service, insertMany };
|
||||
}
|
||||
|
||||
@@ -41,7 +41,6 @@ describe('TransclusionService.lookupTemplate — anti-leak catch branch', () =>
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
// Stub the access decision; we are testing the content-prep stage, not access.
|
||||
@@ -158,7 +157,6 @@ describe('TransclusionService.lookupTemplate — soft-deleted source via real fi
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
);
|
||||
|
||||
const { items } = await service.lookupTemplate(['deleted-src'], 'u1', 'w1');
|
||||
|
||||
@@ -35,7 +35,6 @@ describe('TransclusionService.lookupTemplate (access mapping)', () => {
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
|
||||
jest
|
||||
|
||||
@@ -57,7 +57,6 @@ function buildService(opts: {
|
||||
{} as any, // attachmentRepo
|
||||
{} as any, // storageService
|
||||
{} as any, // pageAccessService
|
||||
{} as any, // workspaceRepo
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -1,145 +0,0 @@
|
||||
import { TransclusionService } from '../transclusion.service';
|
||||
import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
|
||||
// unsync returns a source snapshot the client materializes into the reference
|
||||
// page; a non-admin must never receive an embed payload to re-persist. The gate
|
||||
// reads `user.role` and strips before returning. All repos / access checks are
|
||||
// mocked so the REAL gate logic runs end-to-end. Complements the existing
|
||||
// transclusion specs (rewriteAttachmentsForUnsync, controller).
|
||||
|
||||
const WS = 'ws-1';
|
||||
const REF_PAGE = 'ref-1';
|
||||
const SRC_PAGE = 'src-1';
|
||||
const TX_ID = 'tx-1';
|
||||
|
||||
const sourceContentWithEmbed = () => ({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{ type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
|
||||
{ type: 'htmlEmbed', attrs: { source: '<script>steal()</script>' } },
|
||||
],
|
||||
});
|
||||
|
||||
function buildService(featureEnabled = true) {
|
||||
const pageRepo = {
|
||||
findById: jest.fn(async (id: string) => ({
|
||||
id,
|
||||
workspaceId: WS,
|
||||
spaceId: 'space-1',
|
||||
deletedAt: null,
|
||||
})),
|
||||
};
|
||||
const pageTransclusionsRepo = {
|
||||
findByPageAndTransclusion: jest.fn(async () => ({
|
||||
content: sourceContentWithEmbed(),
|
||||
})),
|
||||
};
|
||||
const pageTransclusionReferencesRepo = {
|
||||
deleteOne: jest.fn(async () => undefined),
|
||||
};
|
||||
const attachmentRepo = { findByIds: jest.fn(async () => []) };
|
||||
const storageService = { copy: jest.fn(async () => undefined) };
|
||||
const pageAccessService = {
|
||||
validateCanEdit: jest.fn(async () => undefined),
|
||||
validateCanView: jest.fn(async () => undefined),
|
||||
};
|
||||
// Workspace settings read used by the toggle-AND-admin gate.
|
||||
const workspaceRepo = {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
|
||||
const service = new TransclusionService(
|
||||
{} as any, // db (unused on this path)
|
||||
pageTransclusionsRepo as any,
|
||||
pageTransclusionReferencesRepo as any,
|
||||
{} as any, // pageTemplateReferencesRepo (unused on this path)
|
||||
pageRepo as any,
|
||||
{} as any, // pagePermissionRepo (unused)
|
||||
{} as any, // spaceMemberRepo (unused)
|
||||
attachmentRepo as any,
|
||||
storageService as any,
|
||||
pageAccessService as any,
|
||||
workspaceRepo as any,
|
||||
);
|
||||
return service;
|
||||
}
|
||||
|
||||
function userWithRole(role: string | null | undefined) {
|
||||
return { id: 'u1', workspaceId: WS, role } as any;
|
||||
}
|
||||
|
||||
describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
|
||||
it('non-admin (member): returned content has htmlEmbed stripped', async () => {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('member'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
// Non-embed content is preserved.
|
||||
expect(JSON.stringify(content)).toContain('snapshot body');
|
||||
});
|
||||
|
||||
it('unknown/empty role: fails closed (stripped)', async () => {
|
||||
for (const role of [undefined, null, 'viewer'] as const) {
|
||||
const service = buildService();
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole(role),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it('toggle ON + admin: returned content keeps the htmlEmbed', async () => {
|
||||
const service = buildService(true);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('admin'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: returned content keeps the htmlEmbed', async () => {
|
||||
const service = buildService(true);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('owner'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(true);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
const service = buildService(false);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('admin'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
});
|
||||
|
||||
it('toggle OFF + member: stripped', async () => {
|
||||
const service = buildService(false);
|
||||
const { content } = await service.unsyncReference(
|
||||
REF_PAGE,
|
||||
SRC_PAGE,
|
||||
TX_ID,
|
||||
userWithRole('member'),
|
||||
);
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -33,11 +33,6 @@ import {
|
||||
import { jsonToNode } from '../../../collaboration/collaboration.util';
|
||||
import { Page, User } from '@docmost/db/types/entity.types';
|
||||
import { PageAccessService } from '../page-access/page-access.service';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
|
||||
type ReferencingPageInfo = {
|
||||
id: string;
|
||||
@@ -63,7 +58,6 @@ export class TransclusionService {
|
||||
private readonly attachmentRepo: AttachmentRepo,
|
||||
private readonly storageService: StorageService,
|
||||
private readonly pageAccessService: PageAccessService,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async syncPageTransclusions(
|
||||
@@ -773,26 +767,6 @@ export class TransclusionService {
|
||||
transclusionId,
|
||||
);
|
||||
|
||||
// SECURITY (Variant C admin gate, transclusion unsync write path):
|
||||
// The returned content is a source snapshot that the client materializes
|
||||
// into the reference page via insertContentAt. The snapshot keeps any
|
||||
// htmlEmbed verbatim, and unsync requires only space Edit/View. If the
|
||||
// requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a
|
||||
// non-admin can never receive an embed payload to re-persist (the collab
|
||||
// strip on the subsequent save is debounced/race-prone and must not be the
|
||||
// only guard). Admin behavior is unchanged.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(user.workspaceId))?.settings,
|
||||
);
|
||||
content = stripHtmlEmbedIfNotAllowed(content, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: user.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`,
|
||||
),
|
||||
});
|
||||
|
||||
return { content };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
import { ShareService } from './share.service';
|
||||
import { hasHtmlEmbedNode } from '../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
// Exercises the REAL ShareService server-authoritative htmlEmbed kill-switch for
|
||||
// shared content. An anonymous public-share viewer cannot read the per-workspace
|
||||
// htmlEmbed toggle, so the SERVER must decide what to serve: when the toggle is
|
||||
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are kept so
|
||||
// the read-only client executes them. All repos / token service are mocked so the
|
||||
// real prepareContentForShare logic runs end-to-end via getSharedPage.
|
||||
// Exercises the REAL ShareService server-authoritative htmlEmbed master toggle
|
||||
// for shared content. The block renders inside a sandboxed iframe (harmless), so
|
||||
// this is NOT an XSS guard — it is the master-toggle enforcement for anonymous
|
||||
// shares: an anonymous public-share viewer cannot read the per-workspace
|
||||
// htmlEmbed toggle, so the SERVER must decide what to serve. When the toggle is
|
||||
// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are served
|
||||
// and rendered in their sandboxed frame. All repos / token service are mocked so
|
||||
// the real prepareContentForShare logic runs end-to-end via getSharedPage.
|
||||
|
||||
const WS = 'ws-1';
|
||||
const PAGE = 'page-1';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Controller, Get, Param, Req, Res } from '@nestjs/common';
|
||||
import { Controller, Get, Logger, Param, Req, Res } from '@nestjs/common';
|
||||
import { ShareService } from './share.service';
|
||||
import { FastifyReply, FastifyRequest } from 'fastify';
|
||||
import { join } from 'path';
|
||||
@@ -11,6 +11,8 @@ import { htmlEscape } from '../../common/helpers/html-escaper';
|
||||
|
||||
@Controller('share')
|
||||
export class ShareSeoController {
|
||||
private readonly logger = new Logger(ShareSeoController.name);
|
||||
|
||||
constructor(
|
||||
private readonly shareService: ShareService,
|
||||
private workspaceRepo: WorkspaceRepo,
|
||||
@@ -84,10 +86,34 @@ export class ShareSeoController {
|
||||
.join('\n ');
|
||||
|
||||
const html = fs.readFileSync(indexFilePath, 'utf8');
|
||||
const transformedHtml = html
|
||||
let transformedHtml = html
|
||||
.replace(/<title>[\s\S]*?<\/title>/i, `<title>${metaTitle}</title>`)
|
||||
.replace(metaTagVar, metaTags);
|
||||
|
||||
// Deliberate same-origin tracker surface: this is the ONE place where an
|
||||
// admin-authored analytics/tracker snippet (settings.trackerHead) is
|
||||
// injected verbatim into the page origin. It is admin-only (writable only
|
||||
// via the admin-gated workspace settings) and applies to PUBLIC SHARE
|
||||
// pages only. It is trusted content, so it is NOT escaped. The htmlEmbed
|
||||
// block itself is sandboxed and is the safe surface for everyone else.
|
||||
const trackerHead = (workspace?.settings as any)?.trackerHead;
|
||||
if (typeof trackerHead === 'string' && trackerHead.trim().length > 0) {
|
||||
if (transformedHtml.includes('</head>')) {
|
||||
// Function replacer: the snippet is admin-authored trusted content and
|
||||
// must be injected verbatim. A string replacement would interpret `$&`,
|
||||
// `$'`, `` $` `` and `$$` inside it as substitution patterns and mangle
|
||||
// the tracker; a function return value is inserted literally.
|
||||
transformedHtml = transformedHtml.replace(
|
||||
'</head>',
|
||||
() => `${trackerHead}\n</head>`,
|
||||
);
|
||||
} else {
|
||||
this.logger.warn(
|
||||
'trackerHead is configured but no </head> marker was found in the share index HTML; tracker snippet was not injected.',
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
res.type('text/html').send(transformedHtml);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -87,9 +87,16 @@ export class ShareController {
|
||||
workspace.id,
|
||||
);
|
||||
|
||||
// Resolve the identity name only when the assistant is enabled, so the
|
||||
// anonymous widget can label messages with the configured persona name.
|
||||
const aiAssistantName = aiAssistant
|
||||
? await this.aiSettings.resolvePublicShareAssistantName(workspace.id)
|
||||
: null;
|
||||
|
||||
return {
|
||||
...shareData,
|
||||
aiAssistant,
|
||||
aiAssistantName,
|
||||
features: this.licenseCheckService.resolveFeatures(
|
||||
workspace.licenseKey,
|
||||
workspace.plan,
|
||||
|
||||
@@ -524,12 +524,14 @@ export class ShareService {
|
||||
* not leak structure (existence, location, count, resolved state, or
|
||||
* comment ids) to public viewers.
|
||||
*
|
||||
* 3. Strip `htmlEmbed` nodes when the workspace feature toggle is OFF. This
|
||||
* makes the toggle a SERVER-AUTHORITATIVE kill-switch for shared content:
|
||||
* when OFF the embed is never served to the anonymous viewer (who can't
|
||||
* read the per-workspace toggle), when ON the embed is served so the
|
||||
* read-only client executes it. `htmlEmbedEnabled` is resolved fail-closed
|
||||
* by the callers (missing workspace => OFF => strip).
|
||||
* 3. Strip `htmlEmbed` nodes when the workspace master toggle is OFF. The
|
||||
* block renders inside a sandboxed iframe on the client (harmless, no
|
||||
* same-origin access), so this is NOT an XSS guard — it is the
|
||||
* SERVER-AUTHORITATIVE enforcement of the workspace master toggle for
|
||||
* anonymous shares: an anonymous viewer cannot read the per-workspace
|
||||
* toggle, so when OFF the block is never served, and when ON it is served
|
||||
* and rendered in its sandboxed frame. `htmlEmbedEnabled` is resolved
|
||||
* fail-closed by the callers (missing workspace => OFF => strip).
|
||||
*
|
||||
* Both share-content paths — the host page (`updatePublicAttachments`) and
|
||||
* the share-scoped transclusion lookup (`lookupTransclusionForShare`) —
|
||||
@@ -544,8 +546,9 @@ export class ShareService {
|
||||
): Promise<Node | null> {
|
||||
let pmJson = getProsemirrorContent(content);
|
||||
|
||||
// Kill-switch: when the workspace toggle is OFF, never serve htmlEmbed
|
||||
// nodes to public viewers. Strip before tokenizing/serializing.
|
||||
// Master-toggle enforcement: when the workspace toggle is OFF, never serve
|
||||
// htmlEmbed nodes to anonymous public viewers (who cannot read the toggle).
|
||||
// Strip before tokenizing/serializing.
|
||||
if (!htmlEmbedEnabled) {
|
||||
pmJson = stripHtmlEmbedNodes(pmJson);
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ import {
|
||||
IsBoolean,
|
||||
IsInt,
|
||||
IsOptional,
|
||||
IsString,
|
||||
MaxLength,
|
||||
Min,
|
||||
} from 'class-validator';
|
||||
|
||||
@@ -53,12 +55,22 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
|
||||
@IsBoolean()
|
||||
aiDictation: boolean;
|
||||
|
||||
// Workspace feature toggle for the admin-only HTML embed feature. Persisted at
|
||||
// settings.htmlEmbed. ABSENT/false => OFF (default).
|
||||
// Workspace master toggle that enables/disables the HTML embed block type.
|
||||
// Persisted at settings.htmlEmbed. ABSENT/false => OFF (default). The block
|
||||
// itself renders in a sandboxed iframe, so this is a feature switch, not a
|
||||
// security gate.
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
htmlEmbed: boolean;
|
||||
|
||||
// Admin-only analytics/tracker snippet (raw HTML/JS) injected verbatim into
|
||||
// the <head> of PUBLIC SHARE pages only (same-origin). Persisted at
|
||||
// settings.trackerHead. Admin-authored trusted content.
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
@MaxLength(20000)
|
||||
trackerHead?: string;
|
||||
|
||||
@IsOptional()
|
||||
@IsBoolean()
|
||||
aiPublicShareAssistant: boolean;
|
||||
|
||||
@@ -108,4 +108,38 @@ describe('WorkspaceService.update — htmlEmbed toggle persistence (real code)',
|
||||
expect(logged.changes.before.htmlEmbed).toBe(false);
|
||||
expect(logged.changes.after.htmlEmbed).toBe(true);
|
||||
});
|
||||
|
||||
it('persists trackerHead via updateSetting with the trackerHead key', async () => {
|
||||
const { service, updateSetting } = buildService({});
|
||||
|
||||
await service.update('w1', { trackerHead: '<script>ga()</script>' } as any);
|
||||
|
||||
expect(updateSetting).toHaveBeenCalledWith(
|
||||
'w1',
|
||||
'trackerHead',
|
||||
'<script>ga()</script>',
|
||||
expect.anything(),
|
||||
);
|
||||
});
|
||||
|
||||
it('does NOT call updateSetting when trackerHead is undefined in the dto', async () => {
|
||||
const { service, updateSetting } = buildService({});
|
||||
|
||||
await service.update('w1', { name: 'New name' } as any);
|
||||
|
||||
expect(updateSetting).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('audits the trackerHead change (before/after) when the value changes', async () => {
|
||||
const { service, auditService } = buildService({
|
||||
settingsBefore: { trackerHead: '' },
|
||||
});
|
||||
|
||||
await service.update('w1', { trackerHead: '<script>m()</script>' } as any);
|
||||
|
||||
expect(auditService.log).toHaveBeenCalledTimes(1);
|
||||
const logged = auditService.log.mock.calls[0][0];
|
||||
expect(logged.changes.before.trackerHead).toBe('');
|
||||
expect(logged.changes.after.trackerHead).toBe('<script>m()</script>');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -525,6 +525,22 @@ export class WorkspaceService {
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof updateWorkspaceDto.trackerHead !== 'undefined') {
|
||||
// Admin-only analytics/tracker snippet injected into the <head> of
|
||||
// public share pages (same-origin). Persisted at settings.trackerHead.
|
||||
const prev = (settingsBefore as any)?.trackerHead ?? '';
|
||||
if (prev !== updateWorkspaceDto.trackerHead) {
|
||||
before.trackerHead = prev;
|
||||
after.trackerHead = updateWorkspaceDto.trackerHead;
|
||||
}
|
||||
await this.workspaceRepo.updateSetting(
|
||||
workspaceId,
|
||||
'trackerHead',
|
||||
updateWorkspaceDto.trackerHead,
|
||||
trx,
|
||||
);
|
||||
}
|
||||
|
||||
if (typeof updateWorkspaceDto.aiPublicShareAssistant !== 'undefined') {
|
||||
const prev = settingsBefore?.ai?.publicShareAssistant ?? false;
|
||||
if (prev !== updateWorkspaceDto.aiPublicShareAssistant) {
|
||||
@@ -549,6 +565,7 @@ export class WorkspaceService {
|
||||
delete updateWorkspaceDto.aiChat;
|
||||
delete updateWorkspaceDto.aiDictation;
|
||||
delete updateWorkspaceDto.htmlEmbed;
|
||||
delete updateWorkspaceDto.trackerHead;
|
||||
delete updateWorkspaceDto.aiPublicShareAssistant;
|
||||
|
||||
await this.workspaceRepo.updateWorkspace(
|
||||
|
||||
@@ -3,6 +3,7 @@ import { InjectQueue } from '@nestjs/bullmq';
|
||||
import { Queue } from 'bullmq';
|
||||
import { QueueName, QueueJob } from '../queue/constants';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
|
||||
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
|
||||
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
@@ -49,6 +50,7 @@ export interface UpdateAiSettingsInput {
|
||||
export class AiSettingsService {
|
||||
constructor(
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private readonly aiAgentRoleRepo: AiAgentRoleRepo,
|
||||
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
|
||||
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
|
||||
private readonly pageRepo: PageRepo,
|
||||
@@ -110,6 +112,26 @@ export class AiSettingsService {
|
||||
return settings?.ai?.publicShareAssistant === true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the display name of the agent role acting as the public-share
|
||||
* assistant's identity, so the anonymous widget can label messages with the
|
||||
* persona name instead of the generic "AI agent". Returns null when no role
|
||||
* is configured, or the referenced role is missing/disabled (built-in persona
|
||||
* → the client falls back to "AI agent"). Mirrors the role resolution in
|
||||
* PublicShareChatService.resolveShareRole.
|
||||
*/
|
||||
async resolvePublicShareAssistantName(
|
||||
workspaceId: string,
|
||||
): Promise<string | null> {
|
||||
const resolved = await this.resolve(workspaceId);
|
||||
const roleId = resolved?.publicShareAssistantRoleId;
|
||||
if (!roleId) return null;
|
||||
const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
|
||||
if (!role || !role.enabled) return null;
|
||||
const name = role.name?.trim();
|
||||
return name ? name : null;
|
||||
}
|
||||
|
||||
/** Read the stored non-secret provider settings for a workspace. */
|
||||
private async readProvider(
|
||||
workspaceId: string,
|
||||
|
||||
@@ -20,12 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
||||
import { markdownToHtml } from '@docmost/editor-ext';
|
||||
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
||||
import {
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { formatImportHtml } from '../utils/import-formatter';
|
||||
import {
|
||||
buildAttachmentCandidates,
|
||||
@@ -59,8 +53,6 @@ export class FileImportTaskService {
|
||||
private readonly backlinkRepo: BacklinkRepo,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
private readonly importAttachmentService: ImportAttachmentService,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
private eventEmitter: EventEmitter2,
|
||||
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
|
||||
) {}
|
||||
@@ -157,25 +149,6 @@ export class FileImportTaskService {
|
||||
.where('id', '=', fileTask.spaceId)
|
||||
.executeTakeFirst();
|
||||
|
||||
// SECURITY (Variant C admin gate, zip/multi-file import write path):
|
||||
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||
// serialized form), which would execute raw, unsanitized JS in readers'
|
||||
// browsers. Only workspace admins/owners may author it. Resolve the
|
||||
// importer's role ONCE here; each page's prosemirror JSON is run through the
|
||||
// strip below before textContent/ydoc/insert when the importer is not an
|
||||
// admin, so a non-admin cannot smuggle the node in via a zip import (which
|
||||
// requires only space Edit).
|
||||
const importingUser = await this.userRepo.findById(
|
||||
fileTask.creatorId,
|
||||
fileTask.workspaceId,
|
||||
);
|
||||
// Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
|
||||
// survives only when the workspace feature toggle is ON and the importer is
|
||||
// an admin/owner. OFF (default) => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
|
||||
);
|
||||
|
||||
const pagesMap = new Map<string, ImportPageNode>();
|
||||
|
||||
for (const absPath of allFiles) {
|
||||
@@ -523,22 +496,9 @@ export class FileImportTaskService {
|
||||
await this.importService.processHTML(html),
|
||||
);
|
||||
|
||||
let { title, prosemirrorJson } =
|
||||
const { title, prosemirrorJson } =
|
||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||
|
||||
// SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
|
||||
// imported by a non-admin BEFORE computing textContent/ydoc/insert.
|
||||
// Gate (featureEnabled AND admin) is resolved once above and recomputed
|
||||
// by the helper from the same htmlEmbedEnabled + importer role.
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: importingUser?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
|
||||
),
|
||||
});
|
||||
|
||||
const insertablePage: InsertablePage = {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
|
||||
@@ -1,266 +0,0 @@
|
||||
// Exercises the REAL htmlEmbed admin gate on the two import write paths:
|
||||
//
|
||||
// (1) ImportService.importPage() — single .html/.md upload
|
||||
// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
|
||||
//
|
||||
// Both build content/textContent/ydoc directly and persist (bypassing the
|
||||
// collab onStoreDocument strip), so each must run the imported document through
|
||||
// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
|
||||
// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
|
||||
// `stripHtmlEmbedNodes` BEFORE persisting.
|
||||
//
|
||||
// This spec constructs the REAL services with deps mocked, feeds an imported
|
||||
// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
|
||||
// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
|
||||
// content (captured at the insert boundary) is stripped for a non-admin /
|
||||
// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
|
||||
// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
|
||||
//
|
||||
// Three modules are mocked away because they pull transitive ESM deps that
|
||||
// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
|
||||
// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
|
||||
// import-attachment). None of them participate in the gate decision:
|
||||
// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
|
||||
// faithful passthroughs (the embed div has no href/iframe, so the real
|
||||
// normalizer would leave it untouched anyway).
|
||||
// - import-attachment: attachment rewriting; passthrough returns html as-is.
|
||||
jest.mock('../../../collaboration/collaboration.gateway', () => ({
|
||||
CollaborationGateway: class {},
|
||||
}));
|
||||
jest.mock('../utils/import-formatter', () => ({
|
||||
normalizeImportHtml: () => {},
|
||||
formatImportHtml: async (opts: any) => ({
|
||||
html: opts.html,
|
||||
backlinks: [],
|
||||
pageIcon: undefined,
|
||||
}),
|
||||
}));
|
||||
jest.mock('./import-attachment.service', () => ({
|
||||
ImportAttachmentService: class {},
|
||||
}));
|
||||
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { ImportService } from './import.service';
|
||||
import { FileImportTaskService } from './file-import-task.service';
|
||||
import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
|
||||
const WS = 'ws-1';
|
||||
const SPACE = 'space-1';
|
||||
const USER = 'importer-1';
|
||||
|
||||
// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
|
||||
// `<div data-type="htmlEmbed" data-source="BASE64">` into an htmlEmbed PM node
|
||||
// (base64 below decodes to `<script>x</script>`).
|
||||
const HTML_WITH_EMBED =
|
||||
'<p>imported body</p>' +
|
||||
'<div data-type="htmlEmbed" data-source="PHNjcmlwdD54PC9zY3JpcHQ+"></div>';
|
||||
|
||||
function workspaceRepoFor(featureEnabled: boolean) {
|
||||
return {
|
||||
findById: jest.fn(async () => ({
|
||||
id: WS,
|
||||
settings: { htmlEmbed: featureEnabled },
|
||||
})),
|
||||
};
|
||||
}
|
||||
|
||||
// userRepo.findById resolves the importer's role (or undefined for a missing
|
||||
// user -> fail closed).
|
||||
function userRepoFor(user: { role?: string } | undefined) {
|
||||
return { findById: jest.fn(async () => user) };
|
||||
}
|
||||
|
||||
describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
|
||||
// Run importPage with a single uploaded .html and return the persisted content
|
||||
// captured at pageRepo.insertPage.
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const pageRepo: any = {
|
||||
insertPage: jest.fn(async (row: any) => {
|
||||
captured.push(row);
|
||||
return { id: 'p1', slugId: 's1', ...row };
|
||||
}),
|
||||
};
|
||||
// db is only used for getNewPagePosition (a select chain).
|
||||
const selectChain: any = {
|
||||
select: () => selectChain,
|
||||
where: () => selectChain,
|
||||
orderBy: () => selectChain,
|
||||
limit: () => selectChain,
|
||||
executeTakeFirst: async () => undefined,
|
||||
};
|
||||
const db: any = { selectFrom: () => selectChain };
|
||||
|
||||
const service = new ImportService(
|
||||
pageRepo,
|
||||
userRepoFor(user) as any,
|
||||
{ putBuffer: jest.fn() } as any, // storageService (unused on this path)
|
||||
db,
|
||||
{ add: jest.fn() } as any, // fileTaskQueue (unused)
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
);
|
||||
|
||||
const file: any = {
|
||||
filename: 'doc.html',
|
||||
toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
|
||||
};
|
||||
await service.importPage(Promise.resolve(file), USER, SPACE, WS);
|
||||
expect(captured).toHaveLength(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
|
||||
let extractDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
// Real temp dir holding a single .html page that carries the embed; the
|
||||
// method reads it from disk via fs.readFile.
|
||||
extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
|
||||
await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fs.rm(extractDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
// Run processGenericImport over the temp dir and return the content persisted
|
||||
// for the imported page (captured at trx.insertInto('pages').values(...)).
|
||||
async function persistedContent(
|
||||
featureEnabled: boolean,
|
||||
user: { role?: string } | undefined,
|
||||
) {
|
||||
const captured: any[] = [];
|
||||
const trxInsertChain = (table: string) => ({
|
||||
values: (row: any) => {
|
||||
if (table === 'pages') captured.push(row);
|
||||
return { execute: async () => undefined };
|
||||
},
|
||||
});
|
||||
const trx: any = { insertInto: trxInsertChain };
|
||||
const db: any = {
|
||||
// spaces lookup at the top of processGenericImport
|
||||
selectFrom: () => ({
|
||||
select: () => ({
|
||||
where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
|
||||
}),
|
||||
}),
|
||||
// executeTx -> db.transaction().execute(cb)
|
||||
transaction: () => ({ execute: async (cb: any) => cb(trx) }),
|
||||
};
|
||||
|
||||
// importService stub: only the real, gate-relevant helpers are used. We give
|
||||
// it the REAL implementations by delegating to a real ImportService for
|
||||
// processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
|
||||
// strip path runs for real.
|
||||
const realImport = new ImportService(
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
{} as any,
|
||||
);
|
||||
const importService: any = {
|
||||
processHTML: (html: string) => realImport.processHTML(html),
|
||||
extractTitleAndRemoveHeading: (s: any) =>
|
||||
realImport.extractTitleAndRemoveHeading(s),
|
||||
createYdoc: (j: any) => realImport.createYdoc(j),
|
||||
};
|
||||
|
||||
const importAttachmentService: any = {
|
||||
// passthrough: no attachment rewriting, return html unchanged
|
||||
processAttachments: jest.fn(async (opts: any) => opts.html),
|
||||
};
|
||||
|
||||
const service = new FileImportTaskService(
|
||||
{ putBuffer: jest.fn() } as any, // storageService
|
||||
importService,
|
||||
{ nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
|
||||
{ insertBacklink: jest.fn() } as any, // backlinkRepo
|
||||
db,
|
||||
importAttachmentService,
|
||||
userRepoFor(user) as any,
|
||||
workspaceRepoFor(featureEnabled) as any,
|
||||
{ emit: jest.fn() } as any, // eventEmitter
|
||||
{ logBatchWithContext: jest.fn() } as any, // auditService
|
||||
);
|
||||
|
||||
const fileTask: any = {
|
||||
id: 'task-1',
|
||||
creatorId: USER,
|
||||
workspaceId: WS,
|
||||
spaceId: SPACE,
|
||||
source: 'generic',
|
||||
};
|
||||
|
||||
await service.processGenericImport({ extractDir, fileTask });
|
||||
expect(captured.length).toBeGreaterThanOrEqual(1);
|
||||
return captured[0].content;
|
||||
}
|
||||
|
||||
it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
|
||||
const content = await persistedContent(true, { role: 'member' });
|
||||
expect(hasHtmlEmbedNode(content)).toBe(false);
|
||||
expect(JSON.stringify(content)).toContain('imported body');
|
||||
});
|
||||
|
||||
it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
|
||||
false,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
|
||||
expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
|
||||
true,
|
||||
);
|
||||
});
|
||||
|
||||
it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
|
||||
expect(
|
||||
hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
@@ -1,12 +1,5 @@
|
||||
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { UserRepo } from '@docmost/db/repos/user/user.repo';
|
||||
import {
|
||||
hasHtmlEmbedNode,
|
||||
isHtmlEmbedFeatureEnabled,
|
||||
stripHtmlEmbedIfNotAllowed,
|
||||
} from '../../../common/helpers/prosemirror/html-embed.util';
|
||||
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
|
||||
import { MultipartFile } from '@fastify/multipart';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
@@ -44,12 +37,10 @@ export class ImportService {
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly userRepo: UserRepo,
|
||||
private readonly storageService: StorageService,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
@InjectQueue(QueueName.FILE_TASK_QUEUE)
|
||||
private readonly fileTaskQueue: Queue,
|
||||
private readonly workspaceRepo: WorkspaceRepo,
|
||||
) {}
|
||||
|
||||
async importPage(
|
||||
@@ -94,32 +85,7 @@ export class ImportService {
|
||||
|
||||
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
|
||||
const title = extracted.title;
|
||||
let prosemirrorJson = extracted.prosemirrorJson;
|
||||
|
||||
// SECURITY (Variant C admin gate, import write path):
|
||||
// An imported .html/.md file can carry an htmlEmbed marker (the node's
|
||||
// serialized form), which would execute raw JS in readers' browsers. Only
|
||||
// workspace admins/owners may author it, so strip htmlEmbed nodes from
|
||||
// imports performed by a non-admin user.
|
||||
// Outer has-check first so the user/workspace lookups below run only when an
|
||||
// embed is actually present (the common case carries none).
|
||||
if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
|
||||
const importingUser = await this.userRepo.findById(userId, workspaceId);
|
||||
// Toggle-AND-admin gate: htmlEmbed survives only when the workspace
|
||||
// feature toggle is ON and the importer is an admin/owner. OFF (default)
|
||||
// => stripped for everyone.
|
||||
const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
|
||||
(await this.workspaceRepo.findById(workspaceId))?.settings,
|
||||
);
|
||||
prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
|
||||
featureEnabled: htmlEmbedEnabled,
|
||||
role: importingUser?.role,
|
||||
onStrip: () =>
|
||||
this.logger.warn(
|
||||
`Stripping htmlEmbed node(s) from import by user ${userId}`,
|
||||
),
|
||||
});
|
||||
}
|
||||
const prosemirrorJson = extracted.prosemirrorJson;
|
||||
|
||||
const pageTitle = title || fileName;
|
||||
|
||||
|
||||
@@ -60,6 +60,36 @@ agent-claim, `docmost-client.loader.ts:159` — `getCollabToken`; см. план
|
||||
встроенный агент получал устаревшую подсказку. Это и есть материализованный
|
||||
parity-баг.
|
||||
|
||||
## Расширение: дублируется не только описания инструментов — ещё и конвертер (PM ↔ Markdown)
|
||||
|
||||
Зафиксировано при планировании встраивания git-синка (`docmost-sync` → gitmost,
|
||||
нативная in-process интеграция). Та же болезнь «несколько рукописных копий одного
|
||||
кода» теперь касается слоя конвертации ProseMirror ↔ Markdown и его lib, а не
|
||||
только метаданных инструментов.
|
||||
|
||||
- **Копия в gitmost** — `packages/mcp/src/lib/`: `markdown-converter.ts` (~885
|
||||
строк), `markdown-document.ts` (~136), `node-ops.ts`, `diff.ts`,
|
||||
`docmost-schema.ts`. Канонизатора (`canonicalize.ts`) здесь НЕТ.
|
||||
- **Копия в docmost-sync** — `packages/docmost-client/src/lib/`: тот же набор +
|
||||
`canonicalize.ts` (~11 КБ, держит идемпотентность round-trip, SPEC §11) +
|
||||
`markdown-document.ts` с режимом «тело + якоря, без тредов комментов»
|
||||
(`includeCommentThreads:false`, на ~20 строк больше).
|
||||
- **Третья копия (планируется)** — план git-синка вендорит чистую часть
|
||||
конвертера в новый `packages/git-sync` (collab-файл не нужен: запись идёт
|
||||
нативно через `openDirectConnection` + `@docmost/editor-ext`).
|
||||
|
||||
Копии уже молча разъехались (docmost-sync vs `packages/mcp`): `collaboration.ts`
|
||||
~329 изменённых строк, `node-ops.ts` ~53, `markdown-converter.ts` ~24,
|
||||
`markdown-document.ts` ~20. Отдельно: `docmost-schema.ts` в lib дублирует
|
||||
**реальную** схему сервера `@docmost/editor-ext` (её использует collab/persistence)
|
||||
— расхождение схем = риск битой конвертации нод.
|
||||
|
||||
Вывод: тот же фикс-вектор (единый источник правды), что и для инструментов, стоит
|
||||
распространить на конвертер — общий пакет конвертации, потребляемый `mcp`,
|
||||
`git-sync` и (в идеале) сервером. До конвергенции git-sync держит вендоренную
|
||||
копию валидированного конвертера с гейтом round-trip против схемы `editor-ext`
|
||||
(осознанный долг «третья копия сейчас, объединяем позже»).
|
||||
|
||||
## Фикс
|
||||
|
||||
Единый реестр спеков (полное устранение дублирования).** Вынести в
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
# /pages/import отдаёт 400 «Error processing file content» (регресс)
|
||||
|
||||
Статус: **диагностируемость починена** (fix #1 применён); корневая причина **не
|
||||
подтверждена** — на текущем коде локально баг воспроизвести не удалось.
|
||||
Ниже — что удалось выяснить, главный подозреваемый и что проверить дальше.
|
||||
|
||||
## Симптом
|
||||
|
||||
На задеплоенном инстансе эндпоинт `POST /pages/import` отдаёт
|
||||
`400 BadRequest` с телом «Error processing file content». Раньше работал —
|
||||
похоже на регресс после редеплоя гитмоста.
|
||||
|
||||
Через этот эндпоинт грузит контент MCP-инструмент `create_page` (это
|
||||
единственный эндпоинт, принимающий контент при создании страницы —
|
||||
см. комментарий в `packages/mcp/src/client.ts:961`).
|
||||
|
||||
Что при этом **исправно** (важно для локализации):
|
||||
- `POST /pages/create` — создание пустой страницы.
|
||||
- `update_page_json` — запись контента через realtime-коллаборацию (Yjs).
|
||||
|
||||
## Где именно бросается ошибка
|
||||
|
||||
`apps/server/src/integrations/import/services/import.service.ts:93-97` —
|
||||
`try/catch` вокруг обработки контента:
|
||||
|
||||
```ts
|
||||
} catch (err) {
|
||||
const message = 'Error processing file content';
|
||||
this.logger.error(message, err); // реальная причина логируется ТОЛЬКО в логи
|
||||
throw new BadRequestException(message); // наружу уходит generic-строка
|
||||
}
|
||||
```
|
||||
|
||||
Реальный текст ошибки/стек **проглатывается** (наружу — generic-строка), что
|
||||
нарушает конвенцию проекта (см. CLAUDE.md, «Errors must never be swallowed»).
|
||||
Поэтому по ответу 400 причину не видно — её надо читать в логах сервера
|
||||
(`logger.error(message, err)` пишет полный err) ИЛИ воспроизвести локально.
|
||||
|
||||
## Цепочка обработки для .md (что внутри try)
|
||||
|
||||
`importPage` → `processMarkdown(fileContent)`:
|
||||
1. `markdownToHtml` (`packages/editor-ext/.../marked.utils.ts`) — marked, чистый JS, без DOM.
|
||||
2. `processHTML`: cheerio `load` → `normalizeImportHtml` (`utils/import-formatter.ts`) — чистый JS.
|
||||
3. `htmlToJson` (`apps/server/src/collaboration/collaboration.util.ts:118`) →
|
||||
`generateJSON(html, tiptapExtensions)`.
|
||||
|
||||
## Ключевая зацепка: путь импорта зависит от happy-dom, рабочие пути — нет
|
||||
|
||||
`generateJSON` (`apps/server/src/common/helpers/prosemirror/html/generateJSON.ts`)
|
||||
парсит HTML через **happy-dom**: `new Window()` + `new localWindow.DOMParser()` +
|
||||
`parseFromString(...)`, затем `PMDOMParser.fromSchema(schema).parse(doc.body)`.
|
||||
|
||||
А исправные пути DOM-парсер НЕ используют:
|
||||
- `/pages/create` — пустая страница, контент не парсится.
|
||||
- `update_page_json` — пишет готовый ProseMirror-JSON в Yjs
|
||||
(`TiptapTransformer.toYdoc`), без HTML→DOM.
|
||||
|
||||
То есть единственное, что есть в сломанном пути и отсутствует в рабочих, —
|
||||
**серверный парсинг HTML через happy-dom**.
|
||||
|
||||
## Главный подозреваемый: бамп happy-dom (14 → 20)
|
||||
|
||||
- Изначально было `"happy-dom": "^14.12.3"`.
|
||||
- Сейчас запинено `"happy-dom": "20.8.9"` в `apps/server/package.json:83`
|
||||
(+ override в корневом `package.json`).
|
||||
- Пин на `20.8.9` пришёл в коммите `17da7629 "overrides"`
|
||||
(Philipinho, 2026-03-28), где `20.8.4` → `20.8.9`.
|
||||
- Скачок 14 → 20 — это 6 мажоров; у happy-dom между мажорами ломающие
|
||||
изменения в API `Window`/`DOMParser` и в поведении парсинга HTML. Очень
|
||||
вероятно, что `generateJSON` ломается на новом happy-dom.
|
||||
|
||||
Версия в node_modules подтверждена: `happy-dom@20.8.9` (симлинк свежий).
|
||||
|
||||
## Второстепенный подозреваемый
|
||||
|
||||
`getSchema(tiptapExtensions)` / `PMDOMParser.parse(...)` могут спотыкаться на
|
||||
`parseHTML`-правилах недавно добавленных нод (synced blocks/transclusion,
|
||||
page break, indent, columns, status — все они в `tiptapExtensions`). Но
|
||||
`getSchema` используется и в рабочем пути (`createYdoc`/`update_page_json`),
|
||||
поэтому сам по себе билд схемы скорее всего цел — под подозрением именно
|
||||
DOM-парс-ветка, уникальная для импорта.
|
||||
|
||||
## Направления фикса
|
||||
|
||||
1. **Диагностируемость — ✅ СДЕЛАНО (по конвенции проекта).** В catch-блоках
|
||||
`import.service.ts` (обработка контента + вставка страницы) реальная
|
||||
причина теперь прокидывается наружу: `BadRequestException` несёт
|
||||
`${err.name}: ${err.message}`, а в лог пишется полный `err` со стеком.
|
||||
Раньше наружу уходила generic-строка "Error processing file content".
|
||||
Теперь при повторе 400 на проде реальный reason будет виден прямо в теле
|
||||
ответа — без необходимости лезть в логи.
|
||||
2. **Корневой фикс — ⏳ НЕ ПОДТВЕРЖДЁН.** Гипотеза happy-dom 14→20 **не
|
||||
подтвердилась** при локальном воспроизведении на текущем коде (см. ниже).
|
||||
Применять блайнд-даунгрейд happy-dom нельзя — нужен реальный stack из
|
||||
логов/ответа после повторения.
|
||||
|
||||
## Локальное воспроизведение (выполнено)
|
||||
|
||||
На текущем `main` (happy-dom 20.8.9) вся цепочка импорта `.md` отработала
|
||||
без ошибок через `tsx` (импорты прямо из source, не из dist):
|
||||
|
||||
- `markdownToHtml` → cheerio `load` → `normalizeImportHtml` → `generateJSON`
|
||||
с полным набором из 44 `tiptapExtensions` — **OK** для:
|
||||
- базового markdown (заголовки, bold/italic, списки, таблицы, code-block,
|
||||
blockquote)
|
||||
- edge-cases: пустой контент, whitespace, HTML-сущности, вложенные списки,
|
||||
task-list, emoji, кириллица, спецсимволы в code, ссылки, изображения, hr
|
||||
- API happy-dom 20.8.9, используемые в `generateJSON`, существуют и работают:
|
||||
`new Window()`, `new localWindow.DOMParser()`, `parseFromString('…',
|
||||
'text/html')`, `happyDOM.abort()` (async), `happyDOM.close()` (async).
|
||||
- Блок `finally` в `generateJSON` вызывает `abort()/close()` без `await` и без
|
||||
`try/catch`, но эти методы не бросают синхронно и не перезаписывают
|
||||
результат — **не является** причиной 400 (проверено отдельным тестом).
|
||||
- Все `parseHTML`-правила расширений (status, transclusion, page-break,
|
||||
columns, subpages и т.д.) участвуют в успешном тесте — ни одно не падает.
|
||||
|
||||
Вывод: на текущем коде баг **не воспроизводится**. Вероятные объяснения —
|
||||
контент-специфичный кейс, которого нет в тестах; разница между source и
|
||||
собранным `dist`; либо временное состояние задеплоенного инстанса. После
|
||||
применения fix #1 повторный 400 покажет реальный reason — по нему и искать
|
||||
корень.
|
||||
534
docs/git-sync-plan.md
Normal file
534
docs/git-sync-plan.md
Normal file
@@ -0,0 +1,534 @@
|
||||
# Git-sync: спека реализации (встраивание docmost-sync в gitmost)
|
||||
|
||||
Статус: **спецификация, код не менялся.** Детальный план реализации фичи
|
||||
«двусторонний синк страниц Docmost ↔ локальная git-папка Markdown», встроенной
|
||||
прямо в gitmost.
|
||||
|
||||
Источник движка: `https://gitea.vvzvlad.xyz/vvzvlad/docmost-sync`
|
||||
(ветка `main`, на момент спеки HEAD `b03eb35`). Все сигнатуры ниже сверены с этим
|
||||
исходником и с текущим кодом gitmost.
|
||||
|
||||
Предыстория и обоснование архитектурных развилок — в бэклоге
|
||||
[ai-chat-tool-definitions-duplicated.md](backlog/ai-chat-tool-definitions-duplicated.md)
|
||||
(раздел про дублирование конвертера) и в исходном `SPEC.md` репозитория
|
||||
docmost-sync (нумерация §-параграфов ниже ссылается на него).
|
||||
|
||||
---
|
||||
|
||||
## 0. Зафиксированные решения
|
||||
|
||||
Из обсуждения архитектуры (выбор пользователя) и трёх суб-решений:
|
||||
|
||||
1. **Нативная in-process интеграция.** Никаких REST-к-себе и сервис-юзера: чтение
|
||||
через репозитории gitmost, запись тела — через collab `openDirectConnection`,
|
||||
триггеры — через `EventEmitter2` вместо поллинга `/recent`.
|
||||
2. **Встроенный NestJS-модуль** `GitSyncModule` в `apps/server/src/integrations/git-sync`
|
||||
с `@Interval`/событиями и **leader-lock на Redis** (single-writer при нескольких
|
||||
репликах).
|
||||
3. **Настройка по спейсам в UI** — флаг в `space.settings.gitSync`, секреты
|
||||
(git-remote) — через ENV/`EnvironmentService`.
|
||||
4. **Конвертер** — вендорим *чистую* часть из docmost-sync в `packages/git-sync`,
|
||||
гейт = round-trip-идемпотентность против схемы `@docmost/editor-ext`.
|
||||
5. **Vault** — **репозиторий на спейс**; `move-to-space` = кросс-репо delete+create.
|
||||
6. **Провенанс** — отдельное значение `lastUpdatedSource = 'git-sync'`.
|
||||
|
||||
Вне scope v1 (как и в SPEC): комментарии (только якоря, без тредов), права/ACL,
|
||||
вложения как отдельный поток (едут ссылками внутри контента), realtime-подписка
|
||||
на Hocuspocus (остаётся поллинг-страховка + события).
|
||||
|
||||
---
|
||||
|
||||
## 1. Архитектура верхнего уровня
|
||||
|
||||
```
|
||||
gitmost server (NestJS, один процесс)
|
||||
┌─────────────────────────────────────────────────────────────┐
|
||||
│ GitSyncModule │
|
||||
│ │
|
||||
│ GitSyncOrchestrator ── @Interval + Redis leader-lock │
|
||||
│ │ (per enabled space: pull-cycle / push-cycle) │
|
||||
│ │ │
|
||||
│ ├── engine (vendored docmost-sync, IO инжектируется) │
|
||||
│ │ pull.ts / push.ts / reconcile / layout / stabilize │
|
||||
│ │ │
|
||||
│ ├── GitmostDataSource ── реализует подмножество │
|
||||
│ │ DocmostClient НАТИВНО: │
|
||||
│ │ reads → PageRepo / SpaceRepo (Kysely) │
|
||||
│ │ writes → CollaborationGateway.openDirectConnection│
|
||||
│ │ + PageService (create/move/delete/...) │
|
||||
│ │ │
|
||||
│ └── VaultGit ── shell-out в системный git (как есть) │
|
||||
│ │
|
||||
│ PageChangeListener ── подписка на EventName.PAGE_* → │
|
||||
│ debounce → enqueue push-cycle │
|
||||
└─────────────────────────────────────────────────────────────┘
|
||||
▲ читает/пишет страницы ▼ git push/pull
|
||||
PostgreSQL (pages/spaces) data/git-sync/<spaceId>/ (vault) → remote
|
||||
```
|
||||
|
||||
Ключ интеграции: движок docmost-sync уже **полностью построен на dependency
|
||||
injection** — весь внешний IO (REST-клиент, git, файловая система) передаётся
|
||||
через узкие интерфейсы. Мы НЕ переписываем движок; мы подставляем нативные
|
||||
реализации в его DI-швы.
|
||||
|
||||
---
|
||||
|
||||
## 2. Состав вендоринга из docmost-sync
|
||||
|
||||
В новый пакет `packages/git-sync` копируем (с сохранением истории смысла —
|
||||
backport-friendly, как сделано с `packages/mcp`):
|
||||
|
||||
### 2.1. Движок (engine) — `src/engine/`
|
||||
| Файл | Что несёт | IO | Берём |
|
||||
| --- | --- | --- | --- |
|
||||
| `pull.ts` | Docmost→FS: reconcile + write + commit + merge | client+git+fs (инжектируется) | да |
|
||||
| `push.ts` | FS→Docmost: diff + classify + apply + refs | client+git+fs (инжектируется) | да |
|
||||
| `git.ts` | `VaultGit` — обёртка git shell-out | системный `git` | да, как есть |
|
||||
| `reconcile.ts` | чистый планировщик | нет | да |
|
||||
| `layout.ts` | чистый маппер дерево→пути | нет | да |
|
||||
| `sanitize.ts` | чистая санитизация имён | нет | да |
|
||||
| `stabilize.ts` | fixpoint-нормализация md (SPEC §11) | нет (lib-вызовы) | да |
|
||||
| `loop-guard.ts` | `bodyHash` (sha256) | нет | да |
|
||||
| `settings.ts` | zod-конфиг | `.env` | **адаптируем** (см. §7) |
|
||||
| `index.ts` | тонкий CLI-скаффолд | — | нет (заменяем на NestJS) |
|
||||
|
||||
### 2.2. Конвертер (чистая часть) — `src/lib/`
|
||||
Из `packages/docmost-client/src/lib/` берём **только** чистый конвертер и формат
|
||||
файла (collab/auth REST-части НЕ нужны — запись нативная):
|
||||
|
||||
| Файл | Экспорт |
|
||||
| --- | --- |
|
||||
| `markdown-converter.ts` | `convertProseMirrorToMarkdown(content): string` |
|
||||
| `collaboration.ts` (только конвертер-функция) | `markdownToProseMirror(md): Promise<doc>` ⚠️ |
|
||||
| `markdown-document.ts` | `serializeDocmostMarkdownBody`, `parseDocmostMarkdown`, `serializeDocmostMarkdown`, тип `DocmostMdMeta` |
|
||||
| `canonicalize.ts` | `canonicalizeContent(node)`, `docsCanonicallyEqual(a,b)` |
|
||||
| `docmost-schema.ts` | tiptap-схема для `markdownToProseMirror` |
|
||||
| `node-ops.ts`, `diff.ts` | трансформации/диф (нужны транзитивно) |
|
||||
|
||||
⚠️ `markdownToProseMirror` физически лежит в `collaboration.ts` docmost-client
|
||||
(строка 289) — это **чистая** функция (marked→HTML→generateJSON), не путать с
|
||||
collab/websocket write-path из того же файла, который НЕ берём.
|
||||
|
||||
> **Долг (зафиксирован в бэклоге):** это третья копия конвертера (есть в
|
||||
> docmost-sync, в `packages/mcp`, теперь в `packages/git-sync`). Конвергенция в
|
||||
> общий пакет — отдельная задача; здесь сознательно вендорим валидированную
|
||||
> копию ради сохранения идемпотентности.
|
||||
|
||||
### 2.3. НЕ берём
|
||||
`pull`/`push` CLI-обёртки, `roundtrip.ts` (харнес переносим в тесты, см. §13),
|
||||
`docmost-client` REST-клиент целиком, `lib/collaboration.ts` (websocket-write),
|
||||
`lib/auth-utils.ts`, `Makefile`, Docker-обвязку docmost-sync.
|
||||
|
||||
---
|
||||
|
||||
## 3. Главный шов: `GitmostDataSource`
|
||||
|
||||
Движок дёргает Docmost через `Pick<DocmostClient, …>`. Мы реализуем класс,
|
||||
**структурно совместимый** с этими сигнатурами, но нативный внутри. Это
|
||||
единственный нетривиальный новый код.
|
||||
|
||||
### 3.1. Точный набор методов, которых требует движок
|
||||
|
||||
Из `pull.ts` (`ApplyPullActionsDeps.client`) и обхода дерева:
|
||||
```ts
|
||||
listSpaceTree(spaceId: string, rootPageId?: string): Promise<{ pages: PageNode[]; complete: boolean }>;
|
||||
getPageJson(pageId: string): Promise<{ id; slugId; title; parentPageId; spaceId; updatedAt; content }>;
|
||||
```
|
||||
|
||||
Из `push.ts` (`ApplyPushDeps.client`):
|
||||
```ts
|
||||
importPageMarkdown(pageId: string, fullMarkdown: string): Promise<{ updatedAt?: string; /* … */ }>;
|
||||
createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{ data: { id: string }; updatedAt?: string }>;
|
||||
deletePage(pageId: string): Promise<unknown>;
|
||||
movePage(pageId: string, parentPageId: string | null, position?: string): Promise<unknown>;
|
||||
renamePage(pageId: string, title: string): Promise<unknown>;
|
||||
```
|
||||
|
||||
Для непрерывного режима/детекции удалений (фаза B+, SPEC §8):
|
||||
```ts
|
||||
listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise<any[]>;
|
||||
listTrash(spaceId: string): Promise<any[]>;
|
||||
restorePage(pageId: string): Promise<unknown>;
|
||||
```
|
||||
|
||||
### 3.2. Маппинг на нативные сервисы gitmost
|
||||
|
||||
| Метод адаптера | Нативная реализация |
|
||||
| --- | --- |
|
||||
| `listSpaceTree(spaceId)` | `SpaceRepo.findById(spaceId, wsId)` + `PageRepo.getSpaceDescendants(spaceId, { includeContent: false })` → map в `PageNode { id, title, slugId, parentPageId, hasChildren }`. **`complete: true` всегда** (читаем БД, не пагинированный REST) → суппрессия `incomplete-fetch` из SPEC §8 нативно не срабатывает. |
|
||||
| `getPageJson(pageId)` | `PageRepo.findById(pageId, { includeContent: true })` → `{ id, slugId, title, parentPageId, spaceId, updatedAt, content }`. `content` — ProseMirror JSON в схеме `editor-ext`. |
|
||||
| `importPageMarkdown(pageId, fullMd)` | `parseDocmostMarkdown(fullMd)` → body; `await markdownToProseMirror(body)` → doc; **запись через collab** (см. §3.3). Вернуть `{ updatedAt }` свежей страницы. |
|
||||
| `createPage(title, body, spaceId, parent?)` | `PageService.create(userId, wsId, { spaceId, title, parentPageId }, provenance)` → shell; затем тело через collab (§3.3). Вернуть `{ data: { id }, updatedAt }`. |
|
||||
| `deletePage(pageId)` | `PageService.removePage(pageId, userId, wsId)` (soft-delete → Trash, обратимо). |
|
||||
| `movePage(pageId, parent, pos?)` | `PageService.movePage({ pageId, parentPageId: parent, position }, movedPage, provenance)`. **`position` обязателен** для Docmost-move — вычисляем `fractional-indexing-jittered` ключ между соседями (соседей берём из `PageRepo`). |
|
||||
| `renamePage(pageId, title)` | `PageService.update(page, { title }, user, provenance)`. |
|
||||
| `listRecentSince` | `PageRepo.getRecentPagesInSpace(spaceId, { … })`, фильтр по `updatedAt > since`. |
|
||||
| `listTrash(spaceId)` | `PageRepo` запрос с `deletedAt IS NOT NULL` по спейсу. |
|
||||
| `restorePage(pageId)` | `PageService.restore(...)`. |
|
||||
|
||||
`userId`/`wsId` берём из конфигурации спейса (сервисный аккаунт воркспейса или
|
||||
владелец спейса — см. §7). `provenance` всегда несёт `source: 'git-sync'` (§8).
|
||||
|
||||
### 3.3. Нативная запись тела (linchpin)
|
||||
|
||||
Подтверждено в коде: `CollaborationGateway.openDirectConnection(documentName, context)`
|
||||
([collaboration.gateway.ts:148](../apps/server/src/collaboration/collaboration.gateway.ts#L148-L150))
|
||||
+ паттерн `withYdocConnection`
|
||||
([collaboration.handler.ts:118-133](../apps/server/src/collaboration/collaboration.handler.ts#L118-L133)).
|
||||
Имя документа — `page.<pageId>` ([getPageId](../apps/server/src/collaboration/collaboration.util.ts#L163-L165)).
|
||||
Схему берём из `tiptapExtensions` ([collaboration.util.ts](../apps/server/src/collaboration/collaboration.util.ts)).
|
||||
|
||||
```ts
|
||||
// In-process body write — no loopback websocket, no service-user token.
|
||||
// Mirrors collaboration.handler.ts 'replace' operation exactly.
|
||||
private async writeBody(pageId: string, prosemirrorJson: JSONContent): Promise<void> {
|
||||
const conn = await this.collabGateway.openDirectConnection(
|
||||
`page.${pageId}`,
|
||||
{ actor: 'git-sync' }, // provenance flows into PersistenceExtension (see §8)
|
||||
);
|
||||
try {
|
||||
await conn.transact((doc) => {
|
||||
const fragment = doc.getXmlFragment('default');
|
||||
if (fragment.length > 0) fragment.delete(0, fragment.length);
|
||||
const next = TiptapTransformer.toYdoc(prosemirrorJson, 'default', tiptapExtensions);
|
||||
Y.applyUpdate(doc, Y.encodeStateAsUpdate(next));
|
||||
});
|
||||
} finally {
|
||||
await conn.disconnect();
|
||||
}
|
||||
// PersistenceExtension.onStoreDocument persists ydoc+content+textContent
|
||||
// consistently, stamps lastUpdatedSource, broadcasts 'page.updated'.
|
||||
}
|
||||
```
|
||||
|
||||
**Схема-совместимость (критично).** `markdownToProseMirror` производит
|
||||
ProseMirror JSON в схеме docmost-client, а `TiptapTransformer.toYdoc` валидирует
|
||||
его в схеме `editor-ext`. Аналогично на чтении `convertProseMirrorToMarkdown`
|
||||
получает `content` в схеме `editor-ext`. Эти две схемы **должны совпадать по
|
||||
именам нод/марок/атрибутов**, иначе ноды потеряются. Это и есть гейт §13.1.
|
||||
|
||||
---
|
||||
|
||||
## 4. `VaultGit` и git-бинарь
|
||||
|
||||
`VaultGit` (engine/git.ts) оставляем как есть — он шеллит в системный `git` через
|
||||
`execFile` (args-массив, без инъекций), всегда `cwd=<vaultPath>`. Константы:
|
||||
`DEFAULT_BRANCH = "main"`, `BOT_AUTHOR_NAME = "Docmost Sync"`,
|
||||
`BOT_AUTHOR_EMAIL = "docmost-sync@local"`; в push.ts: `DOCMOST_BRANCH = "docmost"`,
|
||||
`LAST_PUSHED_REF = "refs/docmost/last-pushed"`, провенанс-трейлеры
|
||||
`Docmost-Sync-Source: docmost|local`.
|
||||
|
||||
**Ops-требование:** в рантайм-образ gitmost добавить пакет `git`
|
||||
([Dockerfile](../Dockerfile)) — сейчас его там может не быть. Без бинаря
|
||||
`VaultGit.assertGitAvailable()` падает на старте цикла.
|
||||
|
||||
**Модель веток (пер-репо, SPEC §5):** `main` (правит человек/файлы) ↔ `docmost`
|
||||
(зеркало Docmost, пишет только движок) ↔ `merge-base` как базлайн;
|
||||
`refs/docmost/last-pushed` — что из `main` уже отражено в Docmost.
|
||||
|
||||
---
|
||||
|
||||
## 5. Топология vault: репозиторий на спейс
|
||||
|
||||
- Корень: `<DATA_DIR>/git-sync/<spaceId>/` — отдельный git-репо на каждый
|
||||
включённый спейс. `layout.ts` уже спейс-скоупный (корень спейса → `segments: []`).
|
||||
- Remote — пер-спейс (из конфигурации спейса/ENV). Изоляция конфликтов, блокировок
|
||||
и blast-radius.
|
||||
- `move-to-space` (страница меняет спейс) → **кросс-репо**: `delete` в исходном
|
||||
репо + `create` в целевом. Ловим по событию `PAGE_MOVED_TO_SPACE`.
|
||||
- Redis-lock ключ — `git-sync:lock:<spaceId>` (§9).
|
||||
|
||||
---
|
||||
|
||||
## 6. NestJS-модуль `GitSyncModule`
|
||||
|
||||
Структура (шаблон — `McpModule`):
|
||||
```
|
||||
apps/server/src/integrations/git-sync/
|
||||
git-sync.module.ts
|
||||
git-sync.constants.ts # QueueJob/event-имена, дефолты
|
||||
services/
|
||||
gitmost-datasource.service.ts # §3 адаптер
|
||||
git-sync.orchestrator.ts # @Interval + leader-lock + цикл по спейсам
|
||||
vault-registry.service.ts # путь vault на спейс, VaultGit-инстансы
|
||||
fractional-index.util.ts # position для move (reuse server util)
|
||||
listeners/
|
||||
page-change.listener.ts # подписка на EventName.PAGE_* + debounce
|
||||
git-sync.controller.ts # (опц.) ручной trigger/status для админа
|
||||
```
|
||||
|
||||
```ts
|
||||
@Module({
|
||||
imports: [DatabaseModule, EnvironmentModule, ScheduleModule.forRoot()],
|
||||
providers: [
|
||||
GitmostDataSourceService,
|
||||
GitSyncOrchestrator,
|
||||
VaultRegistryService,
|
||||
PageChangeListener,
|
||||
],
|
||||
})
|
||||
export class GitSyncModule {}
|
||||
```
|
||||
- Регистрируем в [app.module.ts](../apps/server/src/app.module.ts) рядом с `McpModule`.
|
||||
- Зависимости: `PageRepo`/`SpaceRepo` (через `DatabaseModule`), `PageService`,
|
||||
`CollaborationGateway` (экспортировать из `CollaborationModule`),
|
||||
`EnvironmentService`, ioredis-клиент.
|
||||
- `ScheduleModule.forRoot()` уже подключается в `TelemetryModule`; повторный вызов
|
||||
безопасен, но лучше вынести в общий модуль или убедиться, что forRoot один раз.
|
||||
|
||||
---
|
||||
|
||||
## 7. Конфигурация
|
||||
|
||||
### 7.1. Per-space (UI) — `space.settings.gitSync`
|
||||
Расширяем существующий паттерн `settings.sharing` / `settings.comments`.
|
||||
|
||||
Сервер:
|
||||
- `UpdateSpaceDto` ([update-space.dto.ts](../apps/server/src/core/space/dto/update-space.dto.ts)):
|
||||
добавить `@IsOptional() @IsBoolean() gitSyncEnabled?: boolean;` (+ опц.
|
||||
`gitSyncRemote?: string`, если решим хранить remote в БД, а не только в ENV).
|
||||
- `SpaceService.updateSpace(dto, wsId)`
|
||||
([space.service.ts:120](../apps/server/src/core/space/services/space.service.ts#L120)):
|
||||
обработать как `disablePublicSharing`/`allowViewerComments`.
|
||||
- `SpaceRepo`: добавить `updateGitSyncSettings(spaceId, wsId, prefKey, prefValue, trx?)`
|
||||
по образцу `updateSharingSettings`
|
||||
([space.repo.ts:92](../apps/server/src/database/repos/space/space.repo.ts#L92)) —
|
||||
jsonb-merge в `settings.gitSync.<key>`.
|
||||
- Гард: CASL `SpaceCaslAction.Manage / SpaceCaslSubject.Settings` (как в
|
||||
[space.controller.ts:147](../apps/server/src/core/space/space.controller.ts#L147)).
|
||||
|
||||
Клиент:
|
||||
- Тоггл в форме настроек спейса
|
||||
([edit-space-form.tsx](../apps/client/src/features/space/components/edit-space-form.tsx))
|
||||
через `useUpdateSpaceMutation()` → `updateSpace({ spaceId, gitSyncEnabled })`.
|
||||
Образец — `mcp-settings.tsx`. `readOnly` при отсутствии `Manage/Settings`.
|
||||
|
||||
Форма `space.settings.gitSync`:
|
||||
```jsonc
|
||||
{ "gitSync": { "enabled": true, "remote": "git@…", "branch": "main" } }
|
||||
```
|
||||
|
||||
### 7.2. Секреты/тюнинг (ENV) — `EnvironmentService`
|
||||
Движковый `settings.ts` (zod, читает `.env`) **заменяем** на чтение из gitmost
|
||||
`EnvironmentService`: `parseSettings(env)` оставляем как чистую функцию для тестов,
|
||||
но в проде собираем `Settings` из `EnvironmentService`-геттеров.
|
||||
|
||||
Новые переменные (объявить в
|
||||
[environment.validation.ts](../apps/server/src/integrations/environment/environment.validation.ts)
|
||||
class-validator-декораторами, геттеры — в
|
||||
[environment.service.ts](../apps/server/src/integrations/environment/environment.service.ts)):
|
||||
|
||||
| ENV | Назначение | Обяз. |
|
||||
| --- | --- | --- |
|
||||
| `GIT_SYNC_ENABLED` | глобальный мастер-выключатель | нет (default false) |
|
||||
| `GIT_SYNC_DATA_DIR` | корень vault'ов (default `<DATA_DIR>/git-sync`) | нет |
|
||||
| `GIT_SYNC_REMOTE_TEMPLATE` | шаблон remote, напр. `git@host:vault-{spaceId}.git` | нет |
|
||||
| `GIT_SYNC_SSH_KEY_PATH` / креды remote | доступ к git-remote (secret) | по ситуации |
|
||||
| `GIT_SYNC_POLL_INTERVAL_MS` | страховочный поллинг (default 15000) | нет |
|
||||
| `GIT_SYNC_DEBOUNCE_MS` | окно дебаунса событий (default 2000) | нет |
|
||||
| `GIT_SYNC_SERVICE_USER_ID` | от чьего имени писать в Docmost | да (если синк включён) |
|
||||
|
||||
> git-remote = доступ ко всей вики спейса (SPEC §12): креды только в ENV/secret
|
||||
> store, никогда в БД/коммиты. В UI — только `enabled` (+ опц. имя remote из
|
||||
> заранее разрешённого списка).
|
||||
|
||||
---
|
||||
|
||||
## 8. Провенанс и loop-guard
|
||||
|
||||
### 8.1. Значение `'git-sync'`
|
||||
Сегодня `lastUpdatedSource ∈ { 'user', 'agent' }`
|
||||
([persistence.extension.ts:132-134](../apps/server/src/collaboration/extensions/persistence.extension.ts#L132-L134)).
|
||||
Добавляем `'git-sync'`:
|
||||
- `PersistenceExtension`: `context.actor === 'git-sync'` → `lastUpdatedSource = 'git-sync'`.
|
||||
- Снапшот истории для `'git-sync'` — дебаунс (как у человека), а не немедленный
|
||||
(немедленный — только для `'agent'`,
|
||||
[persistence.extension.ts:321](../apps/server/src/collaboration/extensions/persistence.extension.ts#L321)).
|
||||
- Для `create/move/rename/delete` через `PageService` передаём
|
||||
`AuthProvenanceData` c `source: 'git-sync'` (тип уже используется для агента —
|
||||
расширить допустимые значения; точную форму подтвердить на реализации).
|
||||
- Клиент: в истории
|
||||
([history-item.tsx:128](../apps/client/src/features/page-history/components/history-item.tsx#L128))
|
||||
не показывать агентский бейдж/дип-линк для `'git-sync'`; добавить значение в
|
||||
тип [page.types.ts:23-26](../apps/client/src/features/page-history/types/page.types.ts#L23-L26)
|
||||
(опц. свой бейдж «sync»).
|
||||
|
||||
### 8.2. Подавление петли (SPEC §10)
|
||||
На pull-стороне игнорируем страницу как «свою запись», если:
|
||||
`page.lastUpdatedSource === 'git-sync'` **И** `bodyHash(exportedBody)` совпадает
|
||||
с последним запушенным (`PushedPageRecord.bodyHash` из `push.ts`). После записи в
|
||||
Docmost сохраняем `updatedAt` ответа, чтобы поллинг-страховка не утянул свою же
|
||||
запись обратно.
|
||||
|
||||
---
|
||||
|
||||
## 9. Single-writer (Redis leader-lock)
|
||||
|
||||
В кодовой базе `@Interval`-задачи (`trash-cleanup`, `telemetry`, `session-cleanup`)
|
||||
**не защищены** от мультиинстанса. Для синка добавляем явный лок.
|
||||
|
||||
- ioredis уже есть (`RedisModule` из `@nestjs-labs/nestjs-ioredis`,
|
||||
[app.module.ts](../apps/server/src/app.module.ts); прямой `RedisClient`
|
||||
используется в collab-gateway).
|
||||
- Лок на спейс: `SET git-sync:lock:<spaceId> <instanceId> NX PX <ttl>`; держим
|
||||
цикл только при успехе, продлеваем по heartbeat, освобождаем в `finally`
|
||||
(Lua-CAS на удаление по `instanceId`, чтобы не снять чужой лок).
|
||||
- TTL > максимальной длительности цикла; на краше лок истекает сам.
|
||||
|
||||
```ts
|
||||
// Acquire per-space leadership; returns false if another replica holds it.
|
||||
private async acquire(spaceId: string): Promise<boolean> {
|
||||
const ok = await this.redis.set(`git-sync:lock:${spaceId}`, this.instanceId, 'PX', LOCK_TTL_MS, 'NX');
|
||||
return ok === 'OK';
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 10. Планировщик и событийные триггеры
|
||||
|
||||
- **События (основной триггер).** `PageChangeListener` подписывается на
|
||||
`EventName.PAGE_CREATED | PAGE_UPDATED | PAGE_MOVED | PAGE_SOFT_DELETED |
|
||||
PAGE_RESTORED | PAGE_MOVED_TO_SPACE` и job `PAGE_CONTENT_UPDATED`
|
||||
([event.contants.ts](../apps/server/src/common/events/event.contants.ts)).
|
||||
Фильтр по `spaceId` (только включённые спейсы) → дебаунс (`GIT_SYNC_DEBOUNCE_MS`)
|
||||
→ ставит pull/push-цикл спейса в очередь оркестратора.
|
||||
- Loop-guard: события от собственных записей (`source==='git-sync'` + совпавший
|
||||
хэш) пропускаем (§8.2).
|
||||
- **Поллинг-страховка.** `@Interval(GIT_SYNC_POLL_INTERVAL_MS)` в оркестраторе:
|
||||
по каждому включённому спейсу (под локом) — реконсиляция (`listRecentSince` +
|
||||
`listTrash`), ловит пропущенные события и стартовую сверку после простоя
|
||||
(SPEC §12).
|
||||
- Один цикл на спейс за раз (внутри-процессный мьютекс на `spaceId` поверх
|
||||
Redis-лока).
|
||||
|
||||
---
|
||||
|
||||
## 11. Потоки данных (walkthroughs)
|
||||
|
||||
### 11.1. Первичный клон спейса (initial clone, SPEC §12)
|
||||
1. `VaultGit.ensureRepo()` + `ensureBranch('docmost','main')` + `checkout('docmost')`.
|
||||
2. `dataSource.listSpaceTree(spaceId)` → `{ pages, complete:true }`.
|
||||
3. `readExisting({ listTracked: () => git.listTrackedFiles('*.md'), readFile })`.
|
||||
4. `computePullActions({ pages, treeComplete:true, existing })` → план.
|
||||
5. `applyPullActions(deps, actions, vaultRoot)`: на каждую страницу
|
||||
`getPageJson` → `stabilizePageFile(content, meta)` (export→import→export
|
||||
fixpoint, SPEC §11) → запись файла; затем `stageAll` + `commit` (трейлер
|
||||
`docmost`) на `docmost`; `checkout('main')` + `merge('docmost')`.
|
||||
6. Зафиксировать max `updatedAt` как стартовый `T_last`; `git push` в remote.
|
||||
|
||||
### 11.2. Docmost → FS (pull-цикл)
|
||||
Триггер: событие/поллинг → (под локом) шаги §11.1 п.1–5 инкрементально. 3-way
|
||||
merge `docmost→main` делает git: непересекающиеся правки сливаются, реальное
|
||||
пересечение → conflict-маркеры в файле. **При конфликте push этой страницы в
|
||||
Docmost блокируется** до ручного резолва (SPEC §9; фаза D).
|
||||
|
||||
### 11.3. FS → Docmost (push-цикл)
|
||||
`runPush(deps, { dryRun })`:
|
||||
1. `git.ensureRepo` / `isMergeInProgress` (abort при merge) / `checkout('main')`.
|
||||
2. `stageAll` + `commit('local: working-tree changes')` (локально, в Docmost не шлёт).
|
||||
3. База диффа: `readRef(LAST_PUSHED_REF)` ?? `docmost`; `revParse('main')` → `pushedCommit`.
|
||||
4. `diffNameStatus(base, 'main')` → changes; префетч `metaAt(path, side)`.
|
||||
5. `computePushActions({ changes, metaAt })` → creates/updates/deletes/renamesMoves/skipped.
|
||||
6. `dryRun` → лог плана и выход (клиент НЕ создаётся).
|
||||
7. `--apply`: `makeClient(settings)` → наш `GitmostDataSource`;
|
||||
`applyPushActions`:
|
||||
- update → `importPageMarkdown(pageId, fullMd)` (collab-write, §3.3);
|
||||
- create → `createPage(...)` → записать присвоенный `pageId` обратно в meta;
|
||||
- delete → `deletePage(pageId)` (Trash);
|
||||
- rename/move → `classifyRenameMoves` → `movePage`/`renamePage`;
|
||||
- при пустых failures: `updateRef(LAST_PUSHED_REF, pushedCommit)` +
|
||||
`fastForwardBranch('docmost', pushedCommit)`.
|
||||
8. Записать `bodyHash` + `updatedAt` (loop-guard, §8.2); `git push`.
|
||||
|
||||
---
|
||||
|
||||
## 12. Фазирование
|
||||
|
||||
- **A. Каркас + односторонний pull (нативно).** `packages/git-sync` (вендоринг
|
||||
§2), `GitmostDataSource` (чтение через репозитории), `GitSyncModule`, конфиг из
|
||||
`EnvironmentService`, ручной/однократный pull-цикл на один спейс. **Гейт §13.1.**
|
||||
- **B. Push + непрерывность.** Нативная запись (§3.3), `runPush`, ветки/refs,
|
||||
loop-guard (§8), Redis-лок (§9), `@Interval` + `PageChangeListener` (§10).
|
||||
- **C. Per-space UI.** `space.settings.gitSync` (§7.1), DTO/сервис/репо/гард,
|
||||
тоггл на клиенте, скоуп оркестратора по включённым спейсам.
|
||||
- **D. Харднинг.** Conflict-gating (SPEC §9), удаления через Trash + git (§5),
|
||||
стартовая реконсиляция и `move-to-space` кросс-репо, провенанс на клиенте,
|
||||
Dockerfile `git`, полный набор тестов.
|
||||
|
||||
---
|
||||
|
||||
## 13. Тестирование
|
||||
|
||||
### 13.1. Гейт идемпотентности (блокирует фазу B)
|
||||
Перенести round-trip-харнес docmost-sync (`roundtrip.ts` + `test/fixtures/corpus`)
|
||||
в тесты `packages/git-sync`, но прогонять **против схемы `editor-ext`**:
|
||||
`content (editor-ext) → convertProseMirrorToMarkdown → markdownToProseMirror →
|
||||
TiptapTransformer.toYdoc(…, tiptapExtensions) → fromYdoc → canonicalizeContent`
|
||||
должно давать `docsCanonicallyEqual === true`. Любая потеря нод/атрибутов =
|
||||
расхождение схем → чинить `docmost-schema.ts` под `editor-ext`.
|
||||
|
||||
### 13.2. Юнит (чистая логика, переносится как есть)
|
||||
`reconcile` (planReconciliation / decideAbsenceDeletions / mass-delete guards),
|
||||
`layout` (коллизии/санитизация), `computePullActions`, `computePushActions`,
|
||||
`classifyRenameMoves`, `bodyHash`.
|
||||
|
||||
### 13.3. Интеграция (нативный адаптер)
|
||||
`GitmostDataSource` против тестовой БД: `listSpaceTree`/`getPageJson` корректно
|
||||
маппят; `createPage`/`movePage`/`deletePage`/`importPageMarkdown` пишут через
|
||||
collab и проставляют `lastUpdatedSource='git-sync'`; loop-guard не зацикливается
|
||||
(write → poll → no-op).
|
||||
|
||||
### 13.4. e2e (под локом)
|
||||
Полный pull→push round-trip на временном vault + временном спейсе: правка в
|
||||
Docmost доезжает в файл и наоборот; конфликт даёт маркеры и блокирует push.
|
||||
|
||||
---
|
||||
|
||||
## 14. Риски и открытые пункты
|
||||
|
||||
1. **Схема-совместимость конвертера** (§3.3, §13.1) — главный риск; гейт
|
||||
обязателен до фазы B.
|
||||
2. **`AuthProvenanceData`** — точную форму типа подтвердить; возможно, потребует
|
||||
расширения enum источника на сервере и в истории.
|
||||
3. **Согласованность Yjs** — писать строго через `openDirectConnection`/`transact`;
|
||||
не трогать `content`-колонку напрямую.
|
||||
4. **`position` для move** — обязателен в Docmost-move; нужен
|
||||
`fractional-indexing-jittered` между соседями (соседей брать сортировкой
|
||||
`position COLLATE "C"`).
|
||||
5. **`git` в рантайме** — добавить в Dockerfile.
|
||||
6. **`ScheduleModule.forRoot()`** — не задублировать `forRoot`.
|
||||
7. **Сервисный пользователь записи** (`GIT_SYNC_SERVICE_USER_ID`) — от чьего имени
|
||||
идут create/move (влияет на `creatorId`/права); согласовать политику.
|
||||
8. **Конфликты и удаления** — фаза D строго по SPEC §8/§9 (маркеры никогда не
|
||||
уезжают в Docmost).
|
||||
|
||||
---
|
||||
|
||||
## 15. Чек-лист изменений по файлам
|
||||
|
||||
**Новый пакет**
|
||||
- `packages/git-sync/**` — движок + чистый конвертер (§2), `package.json`
|
||||
(`@docmost/git-sync`, `workspace:*`), `tsconfig.json`.
|
||||
|
||||
**Сервер (`apps/server/src`)**
|
||||
- `integrations/git-sync/**` — модуль, оркестратор, адаптер, листенер (§6).
|
||||
- `app.module.ts` — импорт `GitSyncModule`.
|
||||
- `collaboration/collaboration.module.ts` — экспорт `CollaborationGateway`.
|
||||
- `collaboration/extensions/persistence.extension.ts` — источник `'git-sync'` (§8.1).
|
||||
- `core/space/dto/update-space.dto.ts` — `gitSyncEnabled?` (§7.1).
|
||||
- `core/space/services/space.service.ts` — обработка флага.
|
||||
- `database/repos/space/space.repo.ts` — `updateGitSyncSettings` (§7.1).
|
||||
- `integrations/environment/environment.validation.ts` + `environment.service.ts` —
|
||||
новые ENV (§7.2).
|
||||
- `Dockerfile` — пакет `git`.
|
||||
|
||||
**Клиент (`apps/client/src`)**
|
||||
- `features/space/components/edit-space-form.tsx` — тоггл git-sync.
|
||||
- `features/space/types` — поле `settings.gitSync`.
|
||||
- `features/page-history/types/page.types.ts` + `components/history-item.tsx` —
|
||||
значение `'git-sync'` в `lastUpdatedSource`.
|
||||
|
||||
**Корень**
|
||||
- `pnpm-workspace.yaml` уже покрывает `packages/*`; `apps/server/package.json` —
|
||||
зависимость `@docmost/git-sync: workspace:*`.
|
||||
@@ -1,145 +0,0 @@
|
||||
# Улучшение качества RAG-поиска агента — план по итерациям
|
||||
|
||||
> Статус: живой документ. Итерация 1 **реализована** (см. ниже). Остальное —
|
||||
> бэклог на следующие итерации, отсортированный по «качество / усилие».
|
||||
> Контекст: gitmost — форк Docmost. Семантический поиск агента: per-workspace
|
||||
> эмбеддинги в `page_embeddings` (pgvector, dimension-agnostic колонка, seq-scan
|
||||
> с `<=>`), индексация через BullMQ (`reindexPage` / `reindexWorkspace`).
|
||||
> Активная embedding-модель деплоя: OpenAI `text-embedding-3-large` (3072d).
|
||||
|
||||
## Как сверялось с реальным кодом
|
||||
|
||||
Внешнее предложение по улучшению RAG было сверено с кодовой базой. Точные факты
|
||||
на момент итерации 1:
|
||||
|
||||
- Хранилище: [page_embeddings](../apps/server/src/database/migrations/20260617T120000-page-embeddings.ts),
|
||||
колонка `embedding` сделана dimension-agnostic в
|
||||
[20260617T140000](../apps/server/src/database/migrations/20260617T140000-page-embeddings-dimension-agnostic.ts);
|
||||
`model_name` / `model_dimensions` хранятся по строке.
|
||||
- Полнотекстовые индексы **уже существуют** (предложение ошибочно утверждало
|
||||
обратное): `pages_tsv_idx` на `pages.tsv` и `attachments_tsv_idx`. Конфигурация —
|
||||
`to_tsvector('english', f_unaccent(...))` + `setweight`
|
||||
([тут](../apps/server/src/database/migrations/20250729T213756-add-unaccent-pg_trm-update-tsvector..ts)).
|
||||
- Чанкинг: `RecursiveCharacterTextSplitter` 1000/200, без префиксов.
|
||||
- Префиксы `query:` / `passage:` **не нужны**: они требуются для e5/bge/gte/Qwen3,
|
||||
а деплой на OpenAI `text-embedding-3-large` (этот пункт предложения неприменим).
|
||||
- Вложения (`attachment_id` в схеме есть) **не индексируются** — индексатор всегда
|
||||
пишет `attachmentId: null`.
|
||||
|
||||
---
|
||||
|
||||
## Итерация 1 — РЕАЛИЗОВАНО
|
||||
|
||||
Три «низковисящих фрукта»:
|
||||
|
||||
### 1. Хлебные крошки заголовков в чанках
|
||||
Файл: [embedding-indexer.service.ts](../apps/server/src/core/ai-chat/embedding/embedding-indexer.service.ts).
|
||||
Каждый чанк префиксуется путём заголовков `«Заголовок страницы > H1 > H2»` перед
|
||||
эмбеддингом. Крошки строятся обходом **ProseMirror JSON** (`heading`-ноды с
|
||||
`attrs.level`), а не markdown-текста — поэтому `#` внутри fenced-код-блока (типичный
|
||||
bash-сниппет в WirenBoard-вики) **никогда** не принимается за заголовок. Деградация
|
||||
к старому plain-text чанкингу при отсутствии/сбое `content`. Префикс попадает и в
|
||||
эмбеддинг, и в `content` (а значит — в лексический индекс `fts` и в сниппет агента).
|
||||
|
||||
### 2. Гибридный поиск (RRF), слияние двух инструментов в один
|
||||
- Миграция [20260618T150000-page-embeddings-fts.ts](../apps/server/src/database/migrations/20260618T150000-page-embeddings-fts.ts):
|
||||
генерируемая колонка `fts tsvector GENERATED ALWAYS AS (to_tsvector('english',
|
||||
f_unaccent(content))) STORED` + GIN-индекс. Конфиг совпадает с `pages.tsv` (та же
|
||||
обработка unaccent/Cyrillic); `f_unaccent` IMMUTABLE → триггер не нужен.
|
||||
- Репозиторий: метод `hybridSearch` в
|
||||
[page-embedding.repo.ts](../apps/server/src/database/repos/ai-chat/page-embedding.repo.ts) —
|
||||
один SQL-запрос, два CTE (cosine + `websearch_to_tsquery`), слияние Reciprocal Rank
|
||||
Fusion через FULL OUTER JOIN на уровне чанков. `k=60` (дефолт Cormack 2009 /
|
||||
ES / OpenSearch / Weaviate), равные веса 1.0/1.0. RRF сливает **ранги**, поэтому
|
||||
несовместимость шкал BM25 и косинуса не требует нормализации. Dimension-фильтр —
|
||||
только на семантической стороне.
|
||||
- Инструменты: `semanticSearch` удалён, `searchPages` стал единым гибридным
|
||||
инструментом ([ai-chat-tools.service.ts](../apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts)).
|
||||
Контроль доступа сохранён 1-в-1 (scope по доступным спейсам + пост-фильтр прав
|
||||
страниц). Если эмбеддинги не настроены / эмбеддинг упал / нет доступных спейсов /
|
||||
гибрид пуст → graceful fallback на прежний REST-полнотекст (CASL-enforced).
|
||||
|
||||
### 3. Переписывание запроса + описания инструментов
|
||||
- Описание `searchPages` теперь явно просит агента переформулировать вопрос в
|
||||
сфокусированный поисковый запрос и переискивать при слабой выдаче (это переживает
|
||||
кастомный admin-промпт, т.к. лежит в описании инструмента).
|
||||
- Одна строка-подсказка добавлена в `DEFAULT_PROMPT`
|
||||
([ai-chat.prompt.ts](../apps/server/src/core/ai-chat/ai-chat.prompt.ts)).
|
||||
|
||||
> ВАЖНО после деплоя: чтобы крошки и `fts` появились у существующих страниц, нужна
|
||||
> **переиндексация корпуса** (кнопка «Reindex now» / `WORKSPACE_CREATE_EMBEDDINGS`).
|
||||
> Миграция заполнит `fts` у текущих строк автоматически, но крошки добавляются только
|
||||
> при переиндексации (она же перезапишет `content`).
|
||||
|
||||
### Известные нюансы текущей реализации (осознанные компромиссы)
|
||||
- Гибрид покрывает только проиндексированные чанки. Свежесозданная страница
|
||||
становится искомой после отработки её BullMQ-`reindexPage`. Пока эмбеддинги не
|
||||
настроены — работает только REST-fallback (полнотекст уровня страницы по `pages.tsv`).
|
||||
- Если **весь** пул кандидатов гибрида (до 200 чанков) оказался из закрытых для
|
||||
пользователя страниц, инструмент вернёт пусто, а не уйдёт в keyword-fallback.
|
||||
Узкий кейс; возможное улучшение — fallback и при пустом результате пост-фильтра.
|
||||
- `fts` использует конфиг `english` (как и `pages.tsv`) — без русской стеммизации.
|
||||
Для русской вики это консистентно с текущим поиском; переход на `simple`/`russian`
|
||||
конфиг — отдельная задача с переиндексацией.
|
||||
- `candidates` (=clamp(limit×5, 50, 200)) служит и per-CTE лимитом, и финальным
|
||||
лимитом слияния; веса RRF равные. Тюнится после появления оценочного харнесса.
|
||||
|
||||
---
|
||||
|
||||
## Бэклог следующих итераций (по приоритету «качество / усилие»)
|
||||
|
||||
### A. Реранкер (cross-encoder) — наибольший ROI после гибрида
|
||||
Вставить между over-fetch гибрида и дедупом: брать топ-50–100 кандидатов от
|
||||
`hybridSearch`, реранкать, оставлять топ-5–10. Ожидаемый прирост precision/MRR
|
||||
+10–25 %. Точка вставки уже готова — это шаг между `hybridSearch(... candidates)` и
|
||||
циклом дедупа в `searchPages`.
|
||||
- Хостовый старт (раз уже на OpenAI-инфраструктуре): **Cohere Rerank** или
|
||||
**Voyage `rerank-2.5`** — провайдер по аналогии с текущим pluggable embedding-конфигом.
|
||||
- Self-hosted (под Ollama-этос): **BGE-reranker-v2-m3** через HF Text Embeddings
|
||||
Inference (`/rerank`), либо FlashRank (ONNX/CPU, ~15–30 мс).
|
||||
- Диагностика: если реранк не двигает метрики — узкое место в recall (чанкинг/гибрид),
|
||||
а не в ранжировании.
|
||||
|
||||
### B. Индексация вложений — закрыть пробел покрытия
|
||||
Схема уже готова (`attachment_id`). Добавить в BullMQ-flow шаг извлечения текста из
|
||||
PDF/документов (PyMuPDF для цифровых PDF; OCR для сканов; для таблиц — markdown через
|
||||
LLM-парсер) и вливать его в тот же путь чанк→эмбеддинг→`fts`, помечая `attachment_id`.
|
||||
Структура извлечённых данных важнее голой точности OCR.
|
||||
|
||||
### C. Тюнинг гибрида и оценочный харнесс
|
||||
- Золотой датасет 30–100 примеров (вопрос → нужная страница/чанк) + Ragas/DeepEval
|
||||
(Recall@k, MRR/nDCG, context precision/recall, faithfulness). Прогон до/после
|
||||
каждого изменения. **Прерогатива пропущена в итерации 1 осознанно** — без неё все
|
||||
нижеследующие тюнинги делаются «на глаз».
|
||||
- После харнесса: тюнить веса RRF (старт 1.0/1.0), `k` (старт 60), число `candidates`.
|
||||
- Эксперимент: чанки ~512 симв. против 1000 (предложение указывает на рост precision).
|
||||
|
||||
### D. Contextual Retrieval (Anthropic), если крошек мало
|
||||
Один LLM-вызов на чанк добавляет предложение-контекст. Снижение провалов выдачи
|
||||
на 35–49 %. Ложится в BullMQ-`reindexPage`; на сотнях страниц с prompt caching — копейки.
|
||||
Применять, только если хлебных крошек окажется недостаточно против потери контекста.
|
||||
|
||||
### E. ParadeDB `pg_search` (настоящий BM25), если лексика станет узким местом
|
||||
Нативный `ts_rank` использует только TF и длину документа, без IDF. `pg_search`
|
||||
(Rust/Tantivy) даёт честный BM25-индекс. Не drop-in (свои операторы вместо `@@`) —
|
||||
это изменение кода, а не флаг. На сотнях страниц нативного `tsvector` хватает; брать
|
||||
только если качество лексического ранжирования упрётся в потолок.
|
||||
|
||||
### F. Прочее
|
||||
- **Префиксы query/passage** — НЕ нужны на OpenAI. Внедрять только при переходе на
|
||||
e5/bge/gte/Qwen3 (тогда индексатор ставит `passage:`, запрос — `query:`; BGE-v1.5,
|
||||
наоборот, префиксов НЕ должна получать). Зафиксировано как ловушка на будущее.
|
||||
- **Апгрейд embedding-модели** — уже на `text-embedding-3-large` (топ среди закрытых).
|
||||
Matryoshka (обрезка размерности) — запас на будущее; dimension-agnostic колонка
|
||||
делает миграцию тривиальной (цена — переэмбеддинг корпуса).
|
||||
- **HyDE и широкий multi-query/RAG-Fusion** — НЕ рекомендуются как дефолт: в свежих
|
||||
бенчмарках уступали и добавляют задержку/галлюцинации.
|
||||
|
||||
## Оговорки
|
||||
- Все внешние числа (62→84 % precision, +17 % Recall@5, −35…49 % провалов, +10–25 %
|
||||
от реранка) получены на ДРУГИХ корпусах (SEC-отчёты, финтекст, право, медицина).
|
||||
На этой вики величины будут иными — поэтому пункт C (свой датасет) обязателен перед
|
||||
тонким тюнингом. Внешние числа — направление, не гарантия величины.
|
||||
- Часть источников предложения — вендорский маркетинг (Cohere, Voyage, ParadeDB);
|
||||
направление подтверждается независимыми (T2-RAGBench, оценка Anthropic), но величины
|
||||
у вендоров могут быть завышены.
|
||||
@@ -7,8 +7,10 @@ export interface HtmlEmbedOptions {
|
||||
}
|
||||
|
||||
export interface HtmlEmbedAttributes {
|
||||
// Raw HTML/CSS/JS string that is injected verbatim into the wiki origin.
|
||||
// Raw HTML/CSS/JS string rendered inside a sandboxed iframe by the NodeView.
|
||||
source?: string;
|
||||
// Fixed iframe height in pixels. null/absent => auto-resize via postMessage.
|
||||
height?: number | null;
|
||||
}
|
||||
|
||||
declare module "@tiptap/core" {
|
||||
@@ -98,6 +100,21 @@ export const HtmlEmbed = Node.create<HtmlEmbedOptions>({
|
||||
"data-source": encodeHtmlEmbedSource(attributes.source || ""),
|
||||
}),
|
||||
},
|
||||
// Fixed iframe height in px. null/absent => auto-resize on the client.
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el) => {
|
||||
const v = el.getAttribute("data-height");
|
||||
if (!v) return null;
|
||||
const n = parseInt(v, 10);
|
||||
// A non-numeric data-height (e.g. crafted/corrupted import) must not
|
||||
// become NaN: NaN is typeof "number" and would disable auto-resize and
|
||||
// yield an unclamped iframe height downstream. Treat it as auto (null).
|
||||
return Number.isFinite(n) ? n : null;
|
||||
},
|
||||
renderHTML: (attrs: HtmlEmbedAttributes) =>
|
||||
attrs.height ? { "data-height": String(attrs.height) } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
|
||||
|
||||
@@ -797,6 +797,60 @@ const Embed = Node.create({
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Docmost raw HTML embed. Block atom; the client renders `source` inside a
|
||||
* sandboxed iframe. The MCP server never renders it — it only needs the
|
||||
* schema to accept and carry the node so a fromYdoc -> transform -> toYdoc
|
||||
* round-trip does not throw "Unknown node type: htmlEmbed". Mirrors the
|
||||
* @docmost/editor-ext node name, attribute keys and flags; keep in sync when
|
||||
* the editor-ext htmlEmbed schema changes.
|
||||
*
|
||||
* NOTE: unlike the canonical editor-ext node, `data-source` here is mapped as
|
||||
* plain text rather than base64-encoded. That is intentional: the MCP write
|
||||
* path carries the node through Yjs (fromYdoc -> toYdoc) on its JSON `source`
|
||||
* attribute and never invokes parseHTML/renderHTML, and htmlEmbed is not
|
||||
* produced from the markdown/HTML (generateJSON) path. If a future HTML path
|
||||
* for htmlEmbed is added here, this mapping must adopt editor-ext's base64
|
||||
* encode/decode to avoid double-encoding `source`.
|
||||
*/
|
||||
const HtmlEmbed = Node.create({
|
||||
name: "htmlEmbed",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
source: {
|
||||
default: "",
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-source") ?? "",
|
||||
renderHTML: (attrs: Record<string, any>) => ({
|
||||
"data-source": attrs.source ?? "",
|
||||
}),
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => {
|
||||
const v = el.getAttribute("data-height");
|
||||
if (!v) return null;
|
||||
const n = parseInt(v, 10);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
},
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.height != null ? { "data-height": String(attrs.height) } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="htmlEmbed"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "htmlEmbed", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
/** Shared attribute set for drawio/excalidraw diagram nodes. */
|
||||
const diagramAttributes = () => ({
|
||||
src: {
|
||||
@@ -1158,6 +1212,7 @@ export const docmostExtensions = [
|
||||
Video,
|
||||
Youtube,
|
||||
Embed,
|
||||
HtmlEmbed,
|
||||
Drawio,
|
||||
Excalidraw,
|
||||
Columns,
|
||||
|
||||
Reference in New Issue
Block a user