* — directly, bypassing the editor's `` comment marker.
*
- * This exercises the REAL server import conversion path that ImportService uses
+ * The block renders inside a sandboxed iframe, so this is not an XSS surface;
+ * this exercises the REAL server import conversion path that ImportService uses
* (`markdownToHtml` then `htmlToJson`; `processHTML` adds only a cheerio
* link/iframe normalize pass which does not touch htmlEmbed divs) and asserts
- * the ACTUAL behaviour so we know whether the strip gate can be bypassed.
- *
- * FINDING (documented): the raw embed div DOES round-trip through marked +
- * htmlToJson into a real `htmlEmbed` node, so `hasHtmlEmbedNode` returns true and
- * `stripHtmlEmbedNodes` removes it. The serialized-form bypass is therefore
- * detectable and STRIPPABLE — the write-path gate covers it.
+ * that such a node is DETECTED and STRIPPABLE — so the share read path's
+ * master-toggle strip can remove it when the workspace toggle is OFF.
*/
describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTML', () => {
it('round-trips through markdownToHtml -> htmlToJson and is DETECTED (base64 data-source)', async () => {
@@ -38,7 +35,7 @@ describe('htmlEmbed smuggled via the raw serialized div in imported markdown/HTM
// The div parses into a real htmlEmbed node carrying the decoded source.
expect(hasHtmlEmbedNode(json)).toBe(true);
- // Because it is detected, the write-path gate can strip it for non-admins.
+ // Because it is detected, the share master-toggle strip can remove it.
const stripped = stripHtmlEmbedNodes(json);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
// Surrounding non-embed content is retained.
diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
index f54850d3..58a7cb64 100644
--- a/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
+++ b/apps/server/src/common/helpers/prosemirror/html-embed.spec.ts
@@ -1,11 +1,6 @@
import {
- canAuthorHtmlEmbed,
- collectHtmlEmbedSources,
hasHtmlEmbedNode,
- htmlEmbedAllowed,
isHtmlEmbedFeatureEnabled,
- stripDisallowedHtmlEmbedNodes,
- stripHtmlEmbedIfNotAllowed,
stripHtmlEmbedNodes,
} from './html-embed.util';
import { htmlToJson, jsonToHtml } from '../../../collaboration/collaboration.util';
@@ -96,17 +91,6 @@ describe('stripHtmlEmbedNodes', () => {
expect(result).toEqual(doc);
});
- it('neutralizes a root node that is itself an htmlEmbed', () => {
- // Defensive: the PM root is always a `doc`, so this is unreachable in normal
- // use, but the helper must still never return a bare htmlEmbed.
- const root = {
- type: 'htmlEmbed',
- attrs: { source: '' },
- };
- const result = stripHtmlEmbedNodes(root);
- expect(hasHtmlEmbedNode(result)).toBe(false);
- });
-
it('strips a deeply nested htmlEmbed (3+ levels: callout > column > paragraph-sibling)', () => {
// htmlEmbed sits as a sibling of a paragraph, nested four containers deep.
const doc = {
@@ -172,169 +156,6 @@ describe('stripHtmlEmbedNodes', () => {
});
});
-describe('collectHtmlEmbedSources', () => {
- it('collects the source of every htmlEmbed node, including nested ones', () => {
- const doc = {
- type: 'doc',
- content: [
- { type: 'htmlEmbed', attrs: { source: '
top' } },
- {
- type: 'columns',
- content: [
- {
- type: 'column',
- content: [
- { type: 'htmlEmbed', attrs: { source: '
nested' } },
- { type: 'paragraph', content: [{ type: 'text', text: 'x' }] },
- ],
- },
- ],
- },
- ],
- };
- const sources = collectHtmlEmbedSources(doc);
- expect(sources).toEqual(new Set(['
top', '
nested']));
- });
-
- it('returns an empty set for a doc with no embeds', () => {
- const doc = {
- type: 'doc',
- content: [{ type: 'paragraph', content: [{ type: 'text', text: 'hi' }] }],
- };
- expect(collectHtmlEmbedSources(doc).size).toBe(0);
- });
-
- it('gracefully skips embeds with absent attrs or non-string source', () => {
- const doc = {
- type: 'doc',
- content: [
- { type: 'htmlEmbed' }, // no attrs
- { type: 'htmlEmbed', attrs: {} }, // no source
- { type: 'htmlEmbed', attrs: { source: 42 } }, // non-string
- { type: 'htmlEmbed', attrs: { source: '
' } },
- ],
- };
- expect(collectHtmlEmbedSources(doc)).toEqual(new Set(['
']));
- });
-
- it('returns an empty set for non-object input', () => {
- expect(collectHtmlEmbedSources(null).size).toBe(0);
- expect(collectHtmlEmbedSources(undefined).size).toBe(0);
- expect(collectHtmlEmbedSources('x' as any).size).toBe(0);
- });
-});
-
-describe('stripDisallowedHtmlEmbedNodes', () => {
- it('keeps an embed whose source is allowed and removes the rest', () => {
- const doc = {
- type: 'doc',
- content: [
- { type: 'htmlEmbed', attrs: { source: '
' } },
- { type: 'htmlEmbed', attrs: { source: '
' } },
- { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
- ],
- };
- const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['
']));
- expect(collectHtmlEmbedSources(result)).toEqual(new Set(['
']));
- // The allowed embed and the paragraph survive; the new embed is gone.
- expect(result.content).toHaveLength(2);
- expect(result.content[0].attrs.source).toBe('
');
- expect(result.content[1].type).toBe('paragraph');
- });
-
- it('keeps BOTH embeds when two nodes share the same allowed source', () => {
- // Source-identity semantics: identity is the raw `attrs.source`, so a
- // non-admin who duplicates an existing admin-vetted source keeps both copies.
- // This is intended — the raw HTML is already vetted, so a duplicate is safe.
- const doc = {
- type: 'doc',
- content: [
- { type: 'htmlEmbed', attrs: { source: '
' } },
- { type: 'paragraph', content: [{ type: 'text', text: 'mid' }] },
- { type: 'htmlEmbed', attrs: { source: '
' } },
- ],
- };
- const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['
']));
- expect(hasHtmlEmbedNode(result)).toBe(true);
- const embeds = result.content.filter(
- (n: any) => n.type === 'htmlEmbed',
- );
- expect(embeds).toHaveLength(2);
- expect(embeds.every((n: any) => n.attrs.source === '
')).toBe(true);
- });
-
- it('removes a newly-introduced embed when nothing is allowed', () => {
- const doc = {
- type: 'doc',
- content: [{ type: 'htmlEmbed', attrs: { source: '
' } }],
- };
- const result = stripDisallowedHtmlEmbedNodes(doc, new Set());
- expect(hasHtmlEmbedNode(result)).toBe(false);
- });
-
- it('filters nested embeds by the allow-list (e.g. inside columns)', () => {
- const doc = {
- type: 'doc',
- content: [
- {
- type: 'columns',
- content: [
- {
- type: 'column',
- content: [
- { type: 'htmlEmbed', attrs: { source: '
' } },
- { type: 'htmlEmbed', attrs: { source: '
' } },
- ],
- },
- ],
- },
- ],
- };
- const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['
']));
- const col = findFirstChild(result, 'column');
- expect(col.content).toHaveLength(1);
- expect(col.content[0].attrs.source).toBe('
');
- });
-
- it('treats an embed with absent/non-string source as not allowed (stripped)', () => {
- const doc = {
- type: 'doc',
- content: [
- { type: 'htmlEmbed' },
- { type: 'htmlEmbed', attrs: {} },
- ],
- };
- const result = stripDisallowedHtmlEmbedNodes(doc, new Set(['
']));
- expect(hasHtmlEmbedNode(result)).toBe(false);
- });
-
- it('does not mutate the input document', () => {
- const doc = {
- type: 'doc',
- content: [{ type: 'htmlEmbed', attrs: { source: '
' } }],
- };
- stripDisallowedHtmlEmbedNodes(doc, new Set());
- expect(doc.content).toHaveLength(1);
- expect(doc.content[0].type).toBe('htmlEmbed');
- });
-
- it('neutralizes a root node that is itself a disallowed htmlEmbed', () => {
- const root = { type: 'htmlEmbed', attrs: { source: '
' } };
- const result = stripDisallowedHtmlEmbedNodes(root, new Set());
- expect(hasHtmlEmbedNode(result)).toBe(false);
- });
-
- it('keeps a root node that is an allowed htmlEmbed (defensive branch)', () => {
- const root = { type: 'htmlEmbed', attrs: { source: '
' } };
- const result = stripDisallowedHtmlEmbedNodes(root, new Set(['
']));
- expect(collectHtmlEmbedSources(result)).toEqual(new Set(['
']));
- });
-
- it('returns non-object input unchanged', () => {
- expect(stripDisallowedHtmlEmbedNodes(null as any, new Set())).toBeNull();
- });
-});
-
describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
it('returns true when the ROOT node itself is an htmlEmbed (not only a child)', () => {
const rootEmbed = { type: 'htmlEmbed', attrs: { source: '' } };
@@ -367,19 +188,6 @@ describe('hasHtmlEmbedNode (root/odd-shape detection)', () => {
});
});
-describe('canAuthorHtmlEmbed', () => {
- it('allows owner and admin', () => {
- expect(canAuthorHtmlEmbed('owner')).toBe(true);
- expect(canAuthorHtmlEmbed('admin')).toBe(true);
- });
- it('denies member and unknown/empty roles', () => {
- expect(canAuthorHtmlEmbed('member')).toBe(false);
- expect(canAuthorHtmlEmbed(null)).toBe(false);
- expect(canAuthorHtmlEmbed(undefined)).toBe(false);
- expect(canAuthorHtmlEmbed('viewer')).toBe(false);
- });
-});
-
describe('isHtmlEmbedFeatureEnabled', () => {
it('is true only when settings.htmlEmbed === true', () => {
expect(isHtmlEmbedFeatureEnabled({ htmlEmbed: true })).toBe(true);
@@ -394,165 +202,22 @@ describe('isHtmlEmbedFeatureEnabled', () => {
});
});
-describe('htmlEmbedAllowed (toggle AND admin)', () => {
- it('toggle OFF + admin/owner => not allowed (feature disabled for everyone)', () => {
- expect(htmlEmbedAllowed(false, 'admin')).toBe(false);
- expect(htmlEmbedAllowed(false, 'owner')).toBe(false);
- });
- it('toggle OFF + member => not allowed', () => {
- expect(htmlEmbedAllowed(false, 'member')).toBe(false);
- });
- it('toggle ON + admin/owner => allowed', () => {
- expect(htmlEmbedAllowed(true, 'admin')).toBe(true);
- expect(htmlEmbedAllowed(true, 'owner')).toBe(true);
- });
- it('toggle ON + member/unknown => not allowed', () => {
- expect(htmlEmbedAllowed(true, 'member')).toBe(false);
- expect(htmlEmbedAllowed(true, null)).toBe(false);
- expect(htmlEmbedAllowed(true, undefined)).toBe(false);
- expect(htmlEmbedAllowed(true, 'viewer')).toBe(false);
- });
-});
-
-// The shared write-path strip ritual extracted from the 5 plain call-sites
-// (collab handler, page create/duplicate, import, file-import-task,
-// transclusion-unsync). Tested here once instead of being re-verified in each
-// call-site's spec.
-describe('stripHtmlEmbedIfNotAllowed (shared write-path gate)', () => {
- const docWithEmbed = () => ({
- type: 'doc',
- content: [
- { type: 'paragraph', content: [{ type: 'text', text: 'keep' }] },
- { type: 'htmlEmbed', attrs: { source: '' } },
- ],
- });
- const docWithoutEmbed = () => ({
- type: 'doc',
- content: [{ type: 'paragraph', content: [{ type: 'text', text: 'keep' }] }],
- });
-
- it('keeps the doc unchanged when feature is ON and role is admin (allowed)', () => {
- const json = docWithEmbed();
- const onStrip = jest.fn();
- const result = stripHtmlEmbedIfNotAllowed(json, {
- featureEnabled: true,
- role: 'admin',
- onStrip,
- });
- // Allowed => same reference returned, embed preserved, no side-effect.
- expect(result).toBe(json);
- expect(hasHtmlEmbedNode(result)).toBe(true);
- expect(onStrip).not.toHaveBeenCalled();
- });
-
- it('keeps the doc unchanged for an owner when feature is ON (allowed)', () => {
- const json = docWithEmbed();
- const onStrip = jest.fn();
- const result = stripHtmlEmbedIfNotAllowed(json, {
- featureEnabled: true,
- role: 'owner',
- onStrip,
- });
- expect(result).toBe(json);
- expect(hasHtmlEmbedNode(result)).toBe(true);
- expect(onStrip).not.toHaveBeenCalled();
- });
-
- it('strips the embed when the feature is OFF (even for an admin)', () => {
- const json = docWithEmbed();
- const onStrip = jest.fn();
- const result = stripHtmlEmbedIfNotAllowed(json, {
- featureEnabled: false,
- role: 'admin',
- onStrip,
- });
- expect(hasHtmlEmbedNode(result)).toBe(false);
- expect(onStrip).toHaveBeenCalledTimes(1);
- });
-
- it('strips the embed for a non-admin when the feature is ON', () => {
- const json = docWithEmbed();
- const onStrip = jest.fn();
- const result = stripHtmlEmbedIfNotAllowed(json, {
- featureEnabled: true,
- role: 'member',
- onStrip,
- });
- expect(hasHtmlEmbedNode(result)).toBe(false);
- expect(onStrip).toHaveBeenCalledTimes(1);
- });
-
- it('strips the embed for a null/undefined role when the feature is ON', () => {
- for (const role of [null, undefined]) {
- const onStrip = jest.fn();
- const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
- featureEnabled: true,
- role,
- onStrip,
- });
- expect(hasHtmlEmbedNode(result)).toBe(false);
- expect(onStrip).toHaveBeenCalledTimes(1);
- }
- });
-
- it('returns input unchanged and does NOT call onStrip when no embed is present', () => {
- const json = docWithoutEmbed();
- const onStrip = jest.fn();
- // Not allowed (feature OFF), but there is nothing to strip.
- const result = stripHtmlEmbedIfNotAllowed(json, {
- featureEnabled: false,
- role: 'member',
- onStrip,
- });
- expect(result).toBe(json);
- expect(onStrip).not.toHaveBeenCalled();
- });
-
- it('calls onStrip exactly once per strip', () => {
- const onStrip = jest.fn();
- stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
- featureEnabled: false,
- role: 'member',
- onStrip,
- });
- expect(onStrip).toHaveBeenCalledTimes(1);
- });
-
- it('works without an onStrip callback (optional)', () => {
- const result = stripHtmlEmbedIfNotAllowed(docWithEmbed(), {
- featureEnabled: false,
- role: 'member',
- });
- expect(hasHtmlEmbedNode(result)).toBe(false);
- });
-});
-
-// NOTE: a previous revision of this file re-implemented the write-path admin
-// gate as a local `applyAdminGate` stand-in and asserted against THAT. A
-// deleted/misplaced real guard would have kept those green. The stand-in is
-// removed. The collab store, REST/MCP update, and transclusion-unsync paths are
-// now tested against their REAL code in:
-// - collaboration/extensions/persistence.extension.html-embed.spec.ts
-// - collaboration/collaboration.handler.html-embed.spec.ts
-// - core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
-// - core/page/services/page-service-html-embed-identity.spec.ts (create/dup)
-// - integrations/import/services/import-html-embed-identity.spec.ts (import)
+// The htmlEmbed node renders inside a sandboxed iframe, so the per-write role
+// gate has been removed. `stripHtmlEmbedNodes` + `isHtmlEmbedFeatureEnabled`
+// remain ONLY to honor the workspace master toggle on the anonymous public-share
+// read path — tested against the real share code in:
+// - core/share/share-html-embed.spec.ts
//
-// The case below stays here because it asserts a REAL parse path
-// (htmlToJson, the markdown/html create format) feeding the REAL helpers — not a
-// re-implemented gate.
-describe('htmlEmbed smuggled via the markdown/html form (real parse + real helpers)', () => {
- it('the parsed node is detected and stripped by the real helpers', () => {
- // The markdown/html create formats decode to the same htmlEmbed node, so the
- // gate (run on the parsed JSON) covers them identically.
- const source = '';
+// The case below asserts that the REAL parse path (htmlToJson, the markdown/html
+// form) produces an htmlEmbed node the master-toggle strip can detect & remove.
+describe('htmlEmbed via the markdown/html form (real parse + real strip helper)', () => {
+ it('the parsed node is detected and stripped by the real helper', () => {
+ const source = '';
const encoded = encodeHtmlEmbedSource(source);
const html = `
`;
const parsed = htmlToJson(html);
expect(hasHtmlEmbedNode(parsed)).toBe(true);
- // A non-admin role gates to strip via the real helpers.
- expect(canAuthorHtmlEmbed('member')).toBe(false);
const stripped = stripHtmlEmbedNodes(parsed);
expect(hasHtmlEmbedNode(stripped)).toBe(false);
});
diff --git a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts
index e25d4139..8b1054e8 100644
--- a/apps/server/src/common/helpers/prosemirror/html-embed.util.ts
+++ b/apps/server/src/common/helpers/prosemirror/html-embed.util.ts
@@ -5,12 +5,12 @@ export const HTML_EMBED_NODE_NAME = 'htmlEmbed';
/**
* Recursively remove every `htmlEmbed` node from a ProseMirror JSON document.
*
- * SECURITY: `htmlEmbed` renders raw, unsanitized HTML/CSS/JS in the wiki origin
- * (stored-XSS by design, Variant C). Only workspace admins/owners are allowed to
- * author it. This helper is the server-side enforcement primitive: every WRITE
- * path that may persist content from a NON-admin caller must run the incoming
- * document through this function so a non-admin cannot smuggle the node in via
- * the collab socket, the REST/MCP/AI content-update path, paste, or import.
+ * The `htmlEmbed` node renders inside a SANDBOXED iframe (no `allow-same-origin`)
+ * on the client, so its content cannot touch the viewer's session/cookies/API —
+ * it is NOT a stored-XSS surface. This helper is retained ONLY to honor the
+ * workspace master toggle (`settings.htmlEmbed`) on the anonymous public-share
+ * read path: an anonymous viewer cannot read the workspace toggle, so the server
+ * strips the block when the toggle is OFF before serving shared content.
*
* Returns a NEW document; the input is not mutated. If the input is not a valid
* doc object it is returned unchanged (callers persist what they were given).
@@ -22,15 +22,6 @@ export function stripHtmlEmbedNodes
(pmJson: T): T {
const node = pmJson as unknown as JSONContent;
- // Defensive root-type check: if the ROOT node is itself an htmlEmbed, the
- // children-filtering below could never drop it, so a bare htmlEmbed would be
- // returned as-is. This branch is unreachable in normal use (the PM document
- // root is always a `doc`) and exists only to make the helper total — a bare
- // htmlEmbed can never be returned by this function.
- if (node.type === HTML_EMBED_NODE_NAME) {
- return { type: 'doc', content: [] } as unknown as T;
- }
-
if (Array.isArray(node.content)) {
const filtered: JSONContent[] = [];
for (const child of node.content) {
@@ -48,111 +39,12 @@ export function stripHtmlEmbedNodes(pmJson: T): T {
return { ...node } as unknown as T;
}
-/**
- * Walk the document and collect a stable identity for every `htmlEmbed` node.
- *
- * The identity is the node's `attrs.source` string — the raw HTML the embed
- * renders. Two embeds that render the exact same HTML are treated as the same
- * identity. Used by the collab persist path to know which embeds are ALREADY
- * present in the currently-persisted (admin-vetted) page content, so a later
- * non-admin store can strip only NEWLY-introduced embeds while preserving the
- * pre-existing admin-authored ones.
- *
- * Absent attrs or a non-string/absent `source` are skipped gracefully (such a
- * node contributes no identity to the set).
- */
-export function collectHtmlEmbedSources(pmJson: unknown): Set {
- const sources = new Set();
-
- const walk = (node: unknown): void => {
- if (!node || typeof node !== 'object') {
- return;
- }
- const n = node as JSONContent;
- if (n.type === HTML_EMBED_NODE_NAME) {
- const source = (n.attrs as Record | undefined)?.source;
- if (typeof source === 'string') {
- sources.add(source);
- }
- }
- if (Array.isArray(n.content)) {
- for (const child of n.content) {
- walk(child);
- }
- }
- };
-
- walk(pmJson);
- return sources;
-}
-
-/**
- * Like {@link stripHtmlEmbedNodes}, but KEEP any `htmlEmbed` node whose
- * `attrs.source` is in `allowedSources`; remove the rest.
- *
- * Used on the collab persist path when the feature toggle is ON but the storing
- * user is a NON-admin: `allowedSources` is the set of embed sources already
- * present in the currently-persisted page content (admin-authored, already
- * vetted). A non-admin therefore cannot ADD a new embed, but their unrelated
- * edit also cannot destroy an admin's existing one.
- *
- * NOTE: identity is the raw source string, so a non-admin who COPIES an existing
- * admin embed's exact source into a NEW location passes this check. That is
- * acceptable — the source is already admin-vetted content present in the doc; no
- * new untrusted HTML is introduced.
- *
- * Returns a NEW document; the input is not mutated. Same defensive root-type
- * check pattern as {@link stripHtmlEmbedNodes}.
- */
-export function stripDisallowedHtmlEmbedNodes(
- pmJson: T,
- allowedSources: Set,
-): T {
- if (!pmJson || typeof pmJson !== 'object') {
- return pmJson;
- }
-
- const node = pmJson as unknown as JSONContent;
-
- // Defensive root-type check (mirrors stripHtmlEmbedNodes): if the ROOT node is
- // itself an htmlEmbed and its source is NOT allowed, the children-filtering
- // below could never drop it, so neutralize it here. Unreachable in normal use
- // (the PM document root is always a `doc`).
- if (node.type === HTML_EMBED_NODE_NAME) {
- const source = (node.attrs as Record | undefined)?.source;
- if (typeof source === 'string' && allowedSources.has(source)) {
- return { ...node } as unknown as T;
- }
- return { type: 'doc', content: [] } as unknown as T;
- }
-
- if (Array.isArray(node.content)) {
- const filtered: JSONContent[] = [];
- for (const child of node.content) {
- // Drop a disallowed htmlEmbed child (newly introduced); keep an allowed
- // one (already present in the persisted, admin-vetted content).
- if (child && child.type === HTML_EMBED_NODE_NAME) {
- const source = (child.attrs as Record | undefined)
- ?.source;
- if (typeof source === 'string' && allowedSources.has(source)) {
- filtered.push({ ...child });
- }
- continue;
- }
- // Recurse so nested htmlEmbed nodes (e.g. inside columns/callouts) are
- // also filtered by the same allow-list.
- filtered.push(stripDisallowedHtmlEmbedNodes(child, allowedSources));
- }
- return { ...node, content: filtered } as unknown as T;
- }
-
- return { ...node } as unknown as T;
-}
-
/**
* Returns true if the document contains at least one `htmlEmbed` node anywhere
- * in its tree. Useful to decide whether a strip pass actually changed anything
- * (e.g. for logging a rejected non-admin embed attempt).
+ * in its tree. Useful to decide whether a strip pass on the share read path
+ * actually changed anything. After the write-path role gate removal this is no
+ * longer called by production code; it is retained as a test-only assertion
+ * helper (and a detection primitive should a future read path need it).
*/
export function hasHtmlEmbedNode(pmJson: unknown): boolean {
if (!pmJson || typeof pmJson !== 'object') {
@@ -169,62 +61,9 @@ export function hasHtmlEmbedNode(pmJson: unknown): boolean {
}
/**
- * Map the workspace user role to whether it may author `htmlEmbed` nodes.
- * Owners and admins are trusted; everyone else (member, and any unknown role)
- * is not. Kept here so every write path shares one definition of "trusted".
- */
-export function canAuthorHtmlEmbed(role: string | null | undefined): boolean {
- return role === 'owner' || role === 'admin';
-}
-
-/**
- * Combined write-path gate for the htmlEmbed feature.
- *
- * htmlEmbed is allowed in a document only when the workspace feature toggle is
- * ON and the authoring/saving user is a workspace admin/owner. OFF (default) =>
- * stripped for EVERYONE, including admins (the feature is disabled).
- *
- * `featureEnabled` is read from the workspace settings for the relevant write
- * (`workspace.settings?.htmlEmbed === true`). Every WRITE path that may persist
- * htmlEmbed content must gate on this combined predicate, so that turning the
- * toggle OFF strips existing embeds on the next save and prevents new ones from
- * being persisted regardless of role.
- */
-export function htmlEmbedAllowed(
- featureEnabled: boolean,
- role: string | null | undefined,
-): boolean {
- return featureEnabled === true && canAuthorHtmlEmbed(role);
-}
-
-/**
- * Strip htmlEmbed nodes unless the (feature-enabled AND role-allowed) gate
- * passes. Returns the possibly-stripped doc. The caller resolves featureEnabled
- * (from workspace settings) and role (actor) itself — those legitimately differ
- * per call-site (e.g. share path uses role=null) — this helper owns only the
- * has-check + AND + strip + optional onStrip callback.
- *
- * Centralizes the 4-step write-path ritual (resolve role -> resolve
- * featureEnabled -> htmlEmbedAllowed AND -> stripHtmlEmbedNodes) so the plain
- * strip-all call-sites share one tested decision. Sites with CUSTOM strip logic
- * (e.g. the collab persist path's preserve-admin variant) keep their own code.
- */
-export function stripHtmlEmbedIfNotAllowed(
- json: T,
- opts: { featureEnabled: boolean; role: string | null | undefined; onStrip?: () => void },
-): T {
- if (htmlEmbedAllowed(opts.featureEnabled, opts.role)) return json;
- if (hasHtmlEmbedNode(json)) {
- opts.onStrip?.();
- return stripHtmlEmbedNodes(json);
- }
- return json;
-}
-
-/**
- * Read the workspace-level htmlEmbed feature toggle from a workspace's settings
- * jsonb. ABSENT/non-true => OFF (the default). Kept here so every server write
- * path resolves the toggle the same way.
+ * Read the workspace-level htmlEmbed master toggle from a workspace's settings
+ * jsonb. ABSENT/non-true => OFF (the default). Kept here so the share read path
+ * resolves the toggle the same way it is persisted.
*/
export function isHtmlEmbedFeatureEnabled(
settings: unknown | null | undefined,
diff --git a/apps/server/src/core/ai-chat/public-share-chat.service.ts b/apps/server/src/core/ai-chat/public-share-chat.service.ts
index 2844b33c..a5251739 100644
--- a/apps/server/src/core/ai-chat/public-share-chat.service.ts
+++ b/apps/server/src/core/ai-chat/public-share-chat.service.ts
@@ -65,21 +65,19 @@ export const MAX_SHARE_MESSAGES = 30;
export const MAX_SHARE_MESSAGE_CHARS = 8000;
/**
- * Default per-request output cap for the anonymous share assistant. Bounds the
- * tokens a single anonymous request can generate; worst case = steps x this.
- */
-export const SHARE_AI_MAX_OUTPUT_TOKENS = 512;
-
-/**
- * Read the per-request output cap from the environment (overridable seam),
- * falling back to the sane default. A non-positive / unparseable value uses the
- * default. Mirrors resolveShareAiWorkspaceMax().
+ * Per-request output-token ceiling for the anonymous assistant. `streamText`
+ * runs up to `stepCountIs(5)` steps, so the worst-case output of one accepted
+ * request is bounded by (steps × this). The per-workspace cap bounds the COUNT
+ * of calls; this bounds the SIZE of each, so a single anonymous call cannot run
+ * up the provider bill even if the per-IP throttle is evaded. Env-overridable
+ * seam; a non-positive or unparseable value falls back to the default.
*/
+export const SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT = 512;
export function resolveShareAiMaxOutputTokens(): number {
const raw = Number(process.env.SHARE_AI_MAX_OUTPUT_TOKENS);
return Number.isFinite(raw) && raw > 0
? Math.floor(raw)
- : SHARE_AI_MAX_OUTPUT_TOKENS;
+ : SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT;
}
/**
@@ -225,8 +223,8 @@ export class PublicShareChatService {
tools,
// Bound the agent loop for anonymous callers.
stopWhen: stepCountIs(5),
- // Bounds per-request output so one anonymous request can't run up the
- // provider bill; worst case = steps x this.
+ // Cap per-request output so one anonymous call cannot run up the provider
+ // bill even if the per-IP throttle is evaded; worst case = steps × this.
maxOutputTokens: resolveShareAiMaxOutputTokens(),
abortSignal: signal,
onError: ({ error }) => {
diff --git a/apps/server/src/core/ai-chat/public-share-chat.spec.ts b/apps/server/src/core/ai-chat/public-share-chat.spec.ts
index a1ef621c..e459ef31 100644
--- a/apps/server/src/core/ai-chat/public-share-chat.spec.ts
+++ b/apps/server/src/core/ai-chat/public-share-chat.spec.ts
@@ -5,6 +5,8 @@ import { buildShareSystemPrompt } from './public-share-chat.prompt';
import {
PublicShareChatService,
filterShareTranscript,
+ resolveShareAiMaxOutputTokens,
+ SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
} from './public-share-chat.service';
import { PublicShareChatToolsService } from './tools/public-share-chat-tools.service';
import {
@@ -400,6 +402,44 @@ describe('resolveShareAiWorkspaceMax (env-overridable per-workspace cap)', () =>
});
});
+describe('resolveShareAiMaxOutputTokens (env-overridable per-request output cap)', () => {
+ const ENV = 'SHARE_AI_MAX_OUTPUT_TOKENS';
+ const original = process.env[ENV];
+
+ afterEach(() => {
+ if (original === undefined) delete process.env[ENV];
+ else process.env[ENV] = original;
+ });
+
+ it('falls back to the default when unset', () => {
+ delete process.env[ENV];
+ expect(resolveShareAiMaxOutputTokens()).toBe(
+ SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+ );
+ expect(SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT).toBe(512);
+ });
+
+ it('uses (and floors) a valid positive value from the env', () => {
+ process.env[ENV] = '1024.9';
+ expect(resolveShareAiMaxOutputTokens()).toBe(1024);
+ });
+
+ it('falls back to the default for zero, a negative, or a non-numeric value', () => {
+ process.env[ENV] = '0';
+ expect(resolveShareAiMaxOutputTokens()).toBe(
+ SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+ );
+ process.env[ENV] = '-5';
+ expect(resolveShareAiMaxOutputTokens()).toBe(
+ SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+ );
+ process.env[ENV] = 'not-a-number';
+ expect(resolveShareAiMaxOutputTokens()).toBe(
+ SHARE_AI_MAX_OUTPUT_TOKENS_DEFAULT,
+ );
+ });
+});
+
describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace cap)', () => {
it('allows up to the cap within a window, then 429s (returns false)', async () => {
const limiter = makeLimiter(3, 60_000, () => 1_000);
@@ -482,9 +522,11 @@ describe('PublicShareWorkspaceLimiter (cluster-wide sliding-window per-workspace
});
it('FAILS CLOSED (returns false) when the Redis eval rejects', async () => {
- // FAIL CLOSED (#62): if Redis is down we cannot prove the workspace is under
- // its cap, so DENY (the controller 429s) rather than admit an unmetered,
- // billable anonymous call. The feature is optional, so denial is harmless.
+ // The per-workspace cap is the COST backstop for an OPTIONAL anonymous
+ // assistant. If Redis is unavailable we cannot prove the workspace is under
+ // its cap, so we DENY (controller 429s) rather than admit an unmetered,
+ // billable call — a brief Redis blip disabling the assistant is safer than
+ // an unbounded provider bill.
const failingRedis = {
eval: () => Promise.reject(new Error('redis down')),
} as unknown as import('ioredis').Redis;
diff --git a/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts b/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
index 83a1079d..bcc40c5a 100644
--- a/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
+++ b/apps/server/src/core/ai-chat/public-share-workspace-limiter.ts
@@ -99,11 +99,11 @@ export class PublicShareWorkspaceLimiter {
/**
* Account one call for `key`. Returns true if it is within the cap (allowed),
* false if the cap over the trailing window is exceeded (caller must 429).
- * On a Redis failure we FAIL CLOSED (return false): if Redis is down we cannot
- * prove the workspace is under its cap, so we DENY rather than admit an
- * unmetered, billable anonymous call. The feature is optional, so the
- * temporary denial is harmless. (Operators wanting a tighter steady-state cap
- * can lower the default via SHARE_AI_WORKSPACE_MAX_PER_HOUR, e.g. =100.)
+ * On a Redis failure we FAIL CLOSED (return false): this cap is the COST
+ * backstop for an OPTIONAL anonymous assistant, so when Redis is unavailable we
+ * cannot prove the workspace is under its cap and therefore DENY rather than
+ * admit an unmetered, billable anonymous call. A transient Redis blip briefly
+ * disabling the assistant is preferable to an unbounded provider bill.
*/
async tryConsume(key: string): Promise {
const t = this.now();
@@ -122,9 +122,11 @@ export class PublicShareWorkspaceLimiter {
);
return admitted === 1;
} catch (err) {
- // FAIL CLOSED: if Redis is down we cannot prove the workspace is under its
- // cap, so DENY (controller 429s) rather than admit an unmetered, billable
- // anonymous call. The feature is optional, so denial is harmless.
+ // FAIL CLOSED: when Redis is unavailable we cannot prove the workspace is
+ // under its cap, so we DENY (the controller 429s) rather than admit an
+ // unmetered, billable anonymous call. The assistant is optional, so a
+ // transient Redis blip briefly disabling it is the safer failure mode than
+ // an unbounded provider bill.
this.logger.error(
`share-ai workspace limiter Redis failure for key "${key}"; failing closed`,
err as Error,
diff --git a/apps/server/src/core/page/page.controller.spec.ts b/apps/server/src/core/page/page.controller.spec.ts
index e369d51e..23f25ed4 100644
--- a/apps/server/src/core/page/page.controller.spec.ts
+++ b/apps/server/src/core/page/page.controller.spec.ts
@@ -10,7 +10,6 @@ describe('PageController', () => {
controller = new PageController(
{} as any, // pageService
{} as any, // pageRepo
- {} as any, // workspaceRepo
{} as any, // pageHistoryService
{} as any, // spaceAbility
{} as any, // pageAccessService
diff --git a/apps/server/src/core/page/page.controller.ts b/apps/server/src/core/page/page.controller.ts
index 28ec083e..fd5c866e 100644
--- a/apps/server/src/core/page/page.controller.ts
+++ b/apps/server/src/core/page/page.controller.ts
@@ -39,11 +39,6 @@ import {
} from '../casl/interfaces/space-ability.type';
import SpaceAbilityFactory from '../casl/abilities/space-ability.factory';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
-import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
-import {
- isHtmlEmbedFeatureEnabled,
- stripHtmlEmbedNodes,
-} from '../../common/helpers/prosemirror/html-embed.util';
import { RecentPageDto } from './dto/recent-page.dto';
import { CreatedByUserDto } from './dto/created-by-user.dto';
import { DuplicatePageDto } from './dto/duplicate-page.dto';
@@ -68,7 +63,6 @@ export class PageController {
constructor(
private readonly pageService: PageService,
private readonly pageRepo: PageRepo,
- private readonly workspaceRepo: WorkspaceRepo,
private readonly pageHistoryService: PageHistoryService,
private readonly spaceAbility: SpaceAbilityFactory,
private readonly pageAccessService: PageAccessService,
@@ -98,18 +92,6 @@ export class PageController {
const permissions = { canEdit, hasRestriction };
- if (page.content) {
- const workspace = await this.workspaceRepo.findById(page.workspaceId);
- if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
- // Kill-switch: when the workspace feature is OFF, never serve raw
- // htmlEmbed nodes on the read path (mirrors the public-share strip),
- // so disabling the feature is an immediate, total kill-switch and not
- // dependent on the page being re-saved. Admin-authored content only.
- // Fail-closed: a missing workspace resolves to OFF and is stripped.
- page.content = stripHtmlEmbedNodes(page.content) as any;
- }
- }
-
if (dto.format && dto.format !== 'json' && page.content) {
const contentOutput =
dto.format === 'markdown'
@@ -255,9 +237,6 @@ export class PageController {
user.id,
workspace.id,
createPageDto,
- // Pass the caller's workspace role so create() can enforce the htmlEmbed
- // admin gate (non-admins cannot author raw-JS embeds).
- user.role,
provenance,
);
@@ -554,16 +533,6 @@ export class PageController {
await this.pageAccessService.validateCanView(page, user);
- if (history.content) {
- const workspace = await this.workspaceRepo.findById(page.workspaceId);
- if (!isHtmlEmbedFeatureEnabled(workspace?.settings)) {
- // Kill-switch: history snapshots are an authenticated read path too, so
- // strip htmlEmbed when the workspace feature is OFF (same as /info and
- // the public-share path). Fail-closed on a missing workspace.
- history.content = stripHtmlEmbedNodes(history.content) as any;
- }
- }
-
return history;
}
diff --git a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts b/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts
deleted file mode 100644
index bc1b8254..00000000
--- a/apps/server/src/core/page/services/page-service-html-embed-identity.spec.ts
+++ /dev/null
@@ -1,240 +0,0 @@
-// Exercises the REAL PageService htmlEmbed admin gate on its two non-collab
-// write paths: PageService.create() and PageService.duplicatePage(). Both build
-// content/textContent/ydoc directly and persist, bypassing the collab
-// onStoreDocument strip, so each must run the incoming document through the
-// toggle-AND-admin gate (`htmlEmbedAllowed(featureEnabled, role)` -> if not
-// allowed, `stripHtmlEmbedNodes`) BEFORE persisting.
-//
-// This spec constructs the REAL PageService with every constructor dep mocked,
-// feeds content containing an `htmlEmbed`, calls the real method, and asserts on
-// the PERSISTED content (captured at the repo insert / db insert boundary) that
-// the embed was actually stripped (member/unknown role) or preserved
-// (admin/owner + toggle ON). Mirrors the GOOD pattern in
-// transclusion/spec/transclusion-unsync-html-embed.spec.ts.
-//
-// page.service.ts pulls in the collaboration gateway (a transitive ESM chain
-// `lib0/decoding.js` that jest's transformIgnorePatterns does not transpile), so
-// that single module is mocked away — it is never used on the create/duplicate
-// gate paths.
-jest.mock('../../../collaboration/collaboration.gateway', () => ({
- CollaborationGateway: class {},
-}));
-
-import { PageService } from './page.service';
-import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
-
-const WS = 'ws-1';
-const SPACE = 'space-1';
-const USER = 'u1';
-
-const docWithEmbed = () => ({
- type: 'doc',
- content: [
- { type: 'paragraph', content: [{ type: 'text', text: 'body' }] },
- { type: 'htmlEmbed', attrs: { source: '' } },
- ],
-});
-
-// Minimal chainable kysely stub. `nextPagePosition` (used by create) and
-// duplicatePage's bulk insert go through `this.db`; only the calls those paths
-// make need to resolve. `capturedInserts` collects every page row handed to
-// `insertInto('pages').values(...)` so we can assert on the persisted content.
-function buildDb(capturedInserts: any[]) {
- const selectChain: any = {
- select: () => selectChain,
- selectAll: () => selectChain,
- where: () => selectChain,
- orderBy: () => selectChain,
- limit: () => selectChain,
- execute: async () => [],
- executeTakeFirst: async () => undefined,
- };
- const db: any = {
- selectFrom: () => selectChain,
- insertInto: (table: string) => ({
- values: (rows: any) => {
- if (table === 'pages') {
- for (const row of Array.isArray(rows) ? rows : [rows]) {
- capturedInserts.push(row);
- }
- }
- return { execute: async () => undefined };
- },
- }),
- // executeTx -> db.transaction().execute(cb): run the callback with `db`
- // itself acting as the transaction so any in-tx inserts are captured too.
- transaction: () => ({ execute: async (cb: any) => cb(db) }),
- };
- return db;
-}
-
-// Build the REAL PageService with all 13 constructor deps mocked. `featureEnabled`
-// drives the workspace toggle the gate reads via workspaceRepo.findById.
-function buildService(opts: {
- featureEnabled: boolean;
- capturedInserts: any[];
- rootPage?: any; // for duplicatePage
-}) {
- const { featureEnabled, capturedInserts } = opts;
-
- const pageRepo: any = {
- findById: jest.fn(async () => null), // no parent page in create tests
- // create() persists here; capture the row so we can inspect content.
- insertPage: jest.fn(async (row: any) => {
- capturedInserts.push(row);
- return { id: 'new-page', slugId: 'slug-1', ...row };
- }),
- getPageAndDescendants: jest.fn(async () => [opts.rootPage].filter(Boolean)),
- };
-
- const pagePermissionRepo: any = {
- // duplicatePage filters accessible pages; grant the root so it is copied.
- filterAccessiblePageIds: jest.fn(async () =>
- opts.rootPage ? [opts.rootPage.id] : [],
- ),
- };
-
- const workspaceRepo: any = {
- findById: jest.fn(async () => ({
- id: WS,
- settings: { htmlEmbed: featureEnabled },
- })),
- };
-
- const attachmentRepo: any = { findByIds: jest.fn(async () => []) };
- const storageService: any = { copy: jest.fn(async () => undefined) };
- const noopQueue: any = { add: jest.fn(async () => undefined) };
- const eventEmitter: any = { emit: jest.fn() };
- const collaborationGateway: any = {};
- const watcherService: any = {};
- // duplicatePage fires transclusion bulk inserts after persisting; they are
- // best-effort (wrapped in try/catch) and irrelevant to the gate.
- const transclusionService: any = {
- insertTransclusionsForPages: jest.fn(async () => undefined),
- insertReferencesForPages: jest.fn(async () => undefined),
- insertTemplateReferencesForPages: jest.fn(async () => undefined),
- };
-
- const db = buildDb(capturedInserts);
-
- const service = new PageService(
- pageRepo,
- pagePermissionRepo,
- attachmentRepo,
- db,
- storageService,
- noopQueue, // attachmentQueue
- noopQueue, // aiQueue
- noopQueue, // generalQueue
- eventEmitter,
- collaborationGateway,
- watcherService,
- transclusionService,
- workspaceRepo,
- );
- return service;
-}
-
-describe('PageService.create htmlEmbed admin gate (real code)', () => {
- // Run create() and return the content actually persisted via insertPage.
- async function persistedContent(
- featureEnabled: boolean,
- callerRole: string | null | undefined,
- ) {
- const capturedInserts: any[] = [];
- const service = buildService({ featureEnabled, capturedInserts });
- await service.create(
- USER,
- WS,
- {
- spaceId: SPACE,
- title: 'p',
- // 'json' format is used as-is by parseProsemirrorContent (passed to the
- // real jsonToNode schema validation), so hand it the PM-JSON object.
- content: docWithEmbed(),
- format: 'json' as any,
- } as any,
- callerRole,
- );
- expect(capturedInserts).toHaveLength(1);
- return capturedInserts[0].content;
- }
-
- it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
- const content = await persistedContent(true, 'member');
- expect(hasHtmlEmbedNode(content)).toBe(false);
- // Non-embed content survives.
- expect(JSON.stringify(content)).toContain('body');
- });
-
- it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
- });
-
- it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
- });
-
- it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
- });
-
- it('unknown/empty role: fails closed (stripped)', async () => {
- for (const role of [undefined, null, 'viewer'] as const) {
- expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
- }
- });
-});
-
-describe('PageService.duplicatePage htmlEmbed admin gate (real code)', () => {
- // Duplicate a single source page that contains an embed and return the content
- // persisted for the copy (captured at db.insertInto('pages').values(...)).
- async function persistedContent(
- featureEnabled: boolean,
- role: string | null | undefined,
- ) {
- const rootPage: any = {
- id: 'src-page',
- slugId: 'src-slug',
- title: 'Source',
- icon: null,
- position: 'a0',
- spaceId: SPACE,
- workspaceId: WS,
- parentPageId: null,
- content: docWithEmbed(),
- };
- const capturedInserts: any[] = [];
- const service = buildService({ featureEnabled, capturedInserts, rootPage });
- const authUser: any = { id: USER, workspaceId: WS, role };
- await service.duplicatePage(rootPage, undefined, authUser);
- // The bulk insert is the page persist boundary; one source page -> one copy.
- const pageRows = capturedInserts.filter((r) => r.content);
- expect(pageRows.length).toBeGreaterThanOrEqual(1);
- return pageRows[0].content;
- }
-
- it('toggle ON + member: persisted copy has htmlEmbed stripped', async () => {
- const content = await persistedContent(true, 'member');
- expect(hasHtmlEmbedNode(content)).toBe(false);
- expect(JSON.stringify(content)).toContain('body');
- });
-
- it('toggle ON + admin: persisted copy keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, 'admin'))).toBe(true);
- });
-
- it('toggle ON + owner: persisted copy keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, 'owner'))).toBe(true);
- });
-
- it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(false, 'admin'))).toBe(false);
- });
-
- it('unknown/empty role: fails closed (stripped)', async () => {
- for (const role of [undefined, null, 'viewer'] as const) {
- expect(hasHtmlEmbedNode(await persistedContent(true, role))).toBe(false);
- }
- });
-});
diff --git a/apps/server/src/core/page/services/page.service.spec.ts b/apps/server/src/core/page/services/page.service.spec.ts
index c0d7dbc9..ec3a39db 100644
--- a/apps/server/src/core/page/services/page.service.spec.ts
+++ b/apps/server/src/core/page/services/page.service.spec.ts
@@ -20,7 +20,6 @@ describe('PageService', () => {
{} as any, // collaborationGateway
{} as any, // watcherService
{} as any, // transclusionService
- {} as any, // workspaceRepo
);
});
diff --git a/apps/server/src/core/page/services/page.service.ts b/apps/server/src/core/page/services/page.service.ts
index 3dff5a8a..6ea00188 100644
--- a/apps/server/src/core/page/services/page.service.ts
+++ b/apps/server/src/core/page/services/page.service.ts
@@ -31,11 +31,6 @@ import {
isAttachmentNode,
removeMarkTypeFromDoc,
} from '../../../common/helpers/prosemirror/utils';
-import {
- isHtmlEmbedFeatureEnabled,
- stripHtmlEmbedIfNotAllowed,
-} from '../../../common/helpers/prosemirror/html-embed.util';
-import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import {
htmlToJson,
jsonToNode,
@@ -81,7 +76,6 @@ export class PageService {
private collaborationGateway: CollaborationGateway,
private readonly watcherService: WatcherService,
private readonly transclusionService: TransclusionService,
- private readonly workspaceRepo: WorkspaceRepo,
) {}
async findById(
@@ -101,10 +95,6 @@ export class PageService {
userId: string,
workspaceId: string,
createPageDto: CreatePageDto,
- // Workspace role of the caller. Used to enforce the htmlEmbed admin gate on
- // the create write path (see below). Optional/typed loosely so unknown or
- // missing roles fall through to the non-admin (strip) branch by default.
- callerRole?: string | null,
// Optional agent-edit provenance (from the signed access claim). When the
// actor is 'agent', stamp the page's source marker so a freshly created page
// shows it was created by the AI agent (§14 N2) — create goes through REST,
@@ -135,35 +125,11 @@ export class PageService {
let ydoc = undefined;
if (createPageDto?.content && createPageDto?.format) {
- let prosemirrorJson = await this.parseProsemirrorContent(
+ const prosemirrorJson = await this.parseProsemirrorContent(
createPageDto.content,
createPageDto.format,
);
- // SECURITY (Variant C admin gate, plain page-create write path):
- // create() builds content/textContent/ydoc directly and persists them via
- // insertPage, bypassing the collab onStoreDocument strip. htmlEmbed renders
- // raw, unsanitized JS in readers' browsers, so only workspace admins/owners
- // may author it. The create controller requires only space Edit, so a
- // regular member could otherwise POST a doc (json, or the markdown/html
- // forms that parse to the same node) containing an
- // htmlEmbed and store XSS for every reader. Strip every htmlEmbed node when
- // the caller is not an admin, BEFORE deriving textContent/ydoc/insert.
- // The gate is toggle-AND-admin: htmlEmbed survives only when the workspace
- // feature toggle is ON and the caller is an admin/owner. OFF (default) =>
- // stripped for everyone. Cheap settings read keyed to the workspace.
- const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
- (await this.workspaceRepo.findById(workspaceId))?.settings,
- );
- prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
- featureEnabled: htmlEmbedEnabled,
- role: callerRole,
- onStrip: () =>
- this.logger.warn(
- `Stripping htmlEmbed node(s) from page creation by user ${userId} (space ${createPageDto.spaceId})`,
- ),
- });
-
content = prosemirrorJson;
textContent = jsonToText(prosemirrorJson);
ydoc = createYdocFromJson(prosemirrorJson);
@@ -653,12 +619,6 @@ export class PageService {
const attachmentMap = new Map();
- // Resolve the htmlEmbed toggle ONCE for the workspace; the per-page gate
- // below is toggle-AND-admin (OFF default => stripped for everyone).
- const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
- (await this.workspaceRepo.findById(rootPage.workspaceId))?.settings,
- );
-
const insertablePages: InsertablePage[] = await Promise.all(
pages.map(async (page) => {
const pageContent = getProsemirrorContent(page.content);
@@ -769,24 +729,7 @@ export class PageService {
}
});
- let prosemirrorJson = prosemirrorDoc.toJSON();
-
- // SECURITY (Variant C admin gate, duplication write path):
- // Duplication builds the ydoc directly and bypasses the collab
- // onStoreDocument strip. htmlEmbed renders raw, unsanitized JS in
- // readers' browsers, so only workspace admins/owners may author it. A
- // non-admin with space Edit could otherwise duplicate an admin page
- // that contains an embed into a new page authored by them. Strip every
- // htmlEmbed node from each duplicated page when the duplicating user is
- // not an admin, BEFORE computing textContent/ydoc/insert.
- prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
- featureEnabled: htmlEmbedEnabled,
- role: authUser.role,
- onStrip: () =>
- this.logger.warn(
- `Stripping htmlEmbed node(s) from page duplication by user ${authUser.id} (source page ${page.id})`,
- ),
- });
+ const prosemirrorJson = prosemirrorDoc.toJSON();
// Add "Copy of " prefix to the root page title only for duplicates in same space
let title = page.title;
diff --git a/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts
index 6d73dd8b..286ad275 100644
--- a/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts
+++ b/apps/server/src/core/page/transclusion/spec/page-template-access.spec.ts
@@ -68,7 +68,6 @@ describe('TransclusionService — template access core (real filter)', () => {
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
return { service, db, pageRepo, spaceMemberRepo, pagePermissionRepo };
@@ -227,7 +226,6 @@ describe('TransclusionService.filterViewerAccessiblePageIds — AND ordering (co
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
return { service, filterAccessiblePageIds };
@@ -324,7 +322,6 @@ describe('TransclusionService.syncPageTemplateReferences — workspace scoping',
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
return {
@@ -471,7 +468,6 @@ describe('TransclusionService.insertTemplateReferencesForPages — per-workspace
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
return { service, insertMany };
}
diff --git a/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts
index 59dec763..0d8433bd 100644
--- a/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts
+++ b/apps/server/src/core/page/transclusion/spec/page-template-lookup-edge.spec.ts
@@ -41,7 +41,6 @@ describe('TransclusionService.lookupTemplate — anti-leak catch branch', () =>
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
// Stub the access decision; we are testing the content-prep stage, not access.
@@ -158,7 +157,6 @@ describe('TransclusionService.lookupTemplate — soft-deleted source via real fi
{} as any,
{} as any,
{} as any,
- {} as any,
);
const { items } = await service.lookupTemplate(['deleted-src'], 'u1', 'w1');
diff --git a/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts
index fbcd9486..8a8718b2 100644
--- a/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts
+++ b/apps/server/src/core/page/transclusion/spec/page-template-lookup.spec.ts
@@ -35,7 +35,6 @@ describe('TransclusionService.lookupTemplate (access mapping)', () => {
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
jest
diff --git a/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts b/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts
index 4afad554..b8ce5c7d 100644
--- a/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts
+++ b/apps/server/src/core/page/transclusion/spec/page-template-references-sync.spec.ts
@@ -57,7 +57,6 @@ function buildService(opts: {
{} as any, // attachmentRepo
{} as any, // storageService
{} as any, // pageAccessService
- {} as any, // workspaceRepo
);
}
diff --git a/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts b/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
deleted file mode 100644
index 4d149369..00000000
--- a/apps/server/src/core/page/transclusion/spec/transclusion-unsync-html-embed.spec.ts
+++ /dev/null
@@ -1,145 +0,0 @@
-import { TransclusionService } from '../transclusion.service';
-import { hasHtmlEmbedNode } from '../../../../common/helpers/prosemirror/html-embed.util';
-
-// Exercises the REAL TransclusionService.unsyncReference htmlEmbed admin gate.
-// unsync returns a source snapshot the client materializes into the reference
-// page; a non-admin must never receive an embed payload to re-persist. The gate
-// reads `user.role` and strips before returning. All repos / access checks are
-// mocked so the REAL gate logic runs end-to-end. Complements the existing
-// transclusion specs (rewriteAttachmentsForUnsync, controller).
-
-const WS = 'ws-1';
-const REF_PAGE = 'ref-1';
-const SRC_PAGE = 'src-1';
-const TX_ID = 'tx-1';
-
-const sourceContentWithEmbed = () => ({
- type: 'doc',
- content: [
- { type: 'paragraph', content: [{ type: 'text', text: 'snapshot body' }] },
- { type: 'htmlEmbed', attrs: { source: '' } },
- ],
-});
-
-function buildService(featureEnabled = true) {
- const pageRepo = {
- findById: jest.fn(async (id: string) => ({
- id,
- workspaceId: WS,
- spaceId: 'space-1',
- deletedAt: null,
- })),
- };
- const pageTransclusionsRepo = {
- findByPageAndTransclusion: jest.fn(async () => ({
- content: sourceContentWithEmbed(),
- })),
- };
- const pageTransclusionReferencesRepo = {
- deleteOne: jest.fn(async () => undefined),
- };
- const attachmentRepo = { findByIds: jest.fn(async () => []) };
- const storageService = { copy: jest.fn(async () => undefined) };
- const pageAccessService = {
- validateCanEdit: jest.fn(async () => undefined),
- validateCanView: jest.fn(async () => undefined),
- };
- // Workspace settings read used by the toggle-AND-admin gate.
- const workspaceRepo = {
- findById: jest.fn(async () => ({
- id: WS,
- settings: { htmlEmbed: featureEnabled },
- })),
- };
-
- const service = new TransclusionService(
- {} as any, // db (unused on this path)
- pageTransclusionsRepo as any,
- pageTransclusionReferencesRepo as any,
- {} as any, // pageTemplateReferencesRepo (unused on this path)
- pageRepo as any,
- {} as any, // pagePermissionRepo (unused)
- {} as any, // spaceMemberRepo (unused)
- attachmentRepo as any,
- storageService as any,
- pageAccessService as any,
- workspaceRepo as any,
- );
- return service;
-}
-
-function userWithRole(role: string | null | undefined) {
- return { id: 'u1', workspaceId: WS, role } as any;
-}
-
-describe('TransclusionService.unsyncReference htmlEmbed admin gate (real code)', () => {
- it('non-admin (member): returned content has htmlEmbed stripped', async () => {
- const service = buildService();
- const { content } = await service.unsyncReference(
- REF_PAGE,
- SRC_PAGE,
- TX_ID,
- userWithRole('member'),
- );
- expect(hasHtmlEmbedNode(content)).toBe(false);
- // Non-embed content is preserved.
- expect(JSON.stringify(content)).toContain('snapshot body');
- });
-
- it('unknown/empty role: fails closed (stripped)', async () => {
- for (const role of [undefined, null, 'viewer'] as const) {
- const service = buildService();
- const { content } = await service.unsyncReference(
- REF_PAGE,
- SRC_PAGE,
- TX_ID,
- userWithRole(role),
- );
- expect(hasHtmlEmbedNode(content)).toBe(false);
- }
- });
-
- it('toggle ON + admin: returned content keeps the htmlEmbed', async () => {
- const service = buildService(true);
- const { content } = await service.unsyncReference(
- REF_PAGE,
- SRC_PAGE,
- TX_ID,
- userWithRole('admin'),
- );
- expect(hasHtmlEmbedNode(content)).toBe(true);
- });
-
- it('toggle ON + owner: returned content keeps the htmlEmbed', async () => {
- const service = buildService(true);
- const { content } = await service.unsyncReference(
- REF_PAGE,
- SRC_PAGE,
- TX_ID,
- userWithRole('owner'),
- );
- expect(hasHtmlEmbedNode(content)).toBe(true);
- });
-
- it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
- const service = buildService(false);
- const { content } = await service.unsyncReference(
- REF_PAGE,
- SRC_PAGE,
- TX_ID,
- userWithRole('admin'),
- );
- expect(hasHtmlEmbedNode(content)).toBe(false);
- });
-
- it('toggle OFF + member: stripped', async () => {
- const service = buildService(false);
- const { content } = await service.unsyncReference(
- REF_PAGE,
- SRC_PAGE,
- TX_ID,
- userWithRole('member'),
- );
- expect(hasHtmlEmbedNode(content)).toBe(false);
- });
-});
diff --git a/apps/server/src/core/page/transclusion/transclusion.service.ts b/apps/server/src/core/page/transclusion/transclusion.service.ts
index ebee79f9..b72ee1fa 100644
--- a/apps/server/src/core/page/transclusion/transclusion.service.ts
+++ b/apps/server/src/core/page/transclusion/transclusion.service.ts
@@ -33,11 +33,6 @@ import {
import { jsonToNode } from '../../../collaboration/collaboration.util';
import { Page, User } from '@docmost/db/types/entity.types';
import { PageAccessService } from '../page-access/page-access.service';
-import {
- isHtmlEmbedFeatureEnabled,
- stripHtmlEmbedIfNotAllowed,
-} from '../../../common/helpers/prosemirror/html-embed.util';
-import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
type ReferencingPageInfo = {
id: string;
@@ -63,7 +58,6 @@ export class TransclusionService {
private readonly attachmentRepo: AttachmentRepo,
private readonly storageService: StorageService,
private readonly pageAccessService: PageAccessService,
- private readonly workspaceRepo: WorkspaceRepo,
) {}
async syncPageTransclusions(
@@ -773,26 +767,6 @@ export class TransclusionService {
transclusionId,
);
- // SECURITY (Variant C admin gate, transclusion unsync write path):
- // The returned content is a source snapshot that the client materializes
- // into the reference page via insertContentAt. The snapshot keeps any
- // htmlEmbed verbatim, and unsync requires only space Edit/View. If the
- // requesting user is not a workspace admin/owner, strip htmlEmbed nodes so a
- // non-admin can never receive an embed payload to re-persist (the collab
- // strip on the subsequent save is debounced/race-prone and must not be the
- // only guard). Admin behavior is unchanged.
- const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
- (await this.workspaceRepo.findById(user.workspaceId))?.settings,
- );
- content = stripHtmlEmbedIfNotAllowed(content, {
- featureEnabled: htmlEmbedEnabled,
- role: user.role,
- onStrip: () =>
- this.logger.warn(
- `Stripping htmlEmbed node(s) from transclusion unsync by user ${user.id} (reference page ${referencePageId}, source page ${sourcePageId})`,
- ),
- });
-
return { content };
}
}
diff --git a/apps/server/src/core/share/share-html-embed.spec.ts b/apps/server/src/core/share/share-html-embed.spec.ts
index 162ba4ae..49f98c55 100644
--- a/apps/server/src/core/share/share-html-embed.spec.ts
+++ b/apps/server/src/core/share/share-html-embed.spec.ts
@@ -1,12 +1,14 @@
import { ShareService } from './share.service';
import { hasHtmlEmbedNode } from '../../common/helpers/prosemirror/html-embed.util';
-// Exercises the REAL ShareService server-authoritative htmlEmbed kill-switch for
-// shared content. An anonymous public-share viewer cannot read the per-workspace
-// htmlEmbed toggle, so the SERVER must decide what to serve: when the toggle is
-// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are kept so
-// the read-only client executes them. All repos / token service are mocked so the
-// real prepareContentForShare logic runs end-to-end via getSharedPage.
+// Exercises the REAL ShareService server-authoritative htmlEmbed master toggle
+// for shared content. The block renders inside a sandboxed iframe (harmless), so
+// this is NOT an XSS guard — it is the master-toggle enforcement for anonymous
+// shares: an anonymous public-share viewer cannot read the per-workspace
+// htmlEmbed toggle, so the SERVER must decide what to serve. When the toggle is
+// OFF, htmlEmbed nodes are stripped from the shared doc; when ON they are served
+// and rendered in their sandboxed frame. All repos / token service are mocked so
+// the real prepareContentForShare logic runs end-to-end via getSharedPage.
const WS = 'ws-1';
const PAGE = 'page-1';
diff --git a/apps/server/src/core/share/share-seo.controller.ts b/apps/server/src/core/share/share-seo.controller.ts
index 51967ada..1c443dcc 100644
--- a/apps/server/src/core/share/share-seo.controller.ts
+++ b/apps/server/src/core/share/share-seo.controller.ts
@@ -1,4 +1,4 @@
-import { Controller, Get, Param, Req, Res } from '@nestjs/common';
+import { Controller, Get, Logger, Param, Req, Res } from '@nestjs/common';
import { ShareService } from './share.service';
import { FastifyReply, FastifyRequest } from 'fastify';
import { join } from 'path';
@@ -11,6 +11,8 @@ import { htmlEscape } from '../../common/helpers/html-escaper';
@Controller('share')
export class ShareSeoController {
+ private readonly logger = new Logger(ShareSeoController.name);
+
constructor(
private readonly shareService: ShareService,
private workspaceRepo: WorkspaceRepo,
@@ -84,10 +86,34 @@ export class ShareSeoController {
.join('\n ');
const html = fs.readFileSync(indexFilePath, 'utf8');
- const transformedHtml = html
+ let transformedHtml = html
.replace(/[\s\S]*?<\/title>/i, `${metaTitle}`)
.replace(metaTagVar, metaTags);
+ // Deliberate same-origin tracker surface: this is the ONE place where an
+ // admin-authored analytics/tracker snippet (settings.trackerHead) is
+ // injected verbatim into the page origin. It is admin-only (writable only
+ // via the admin-gated workspace settings) and applies to PUBLIC SHARE
+ // pages only. It is trusted content, so it is NOT escaped. The htmlEmbed
+ // block itself is sandboxed and is the safe surface for everyone else.
+ const trackerHead = (workspace?.settings as any)?.trackerHead;
+ if (typeof trackerHead === 'string' && trackerHead.trim().length > 0) {
+ if (transformedHtml.includes('')) {
+ // Function replacer: the snippet is admin-authored trusted content and
+ // must be injected verbatim. A string replacement would interpret `$&`,
+ // `$'`, `` $` `` and `$$` inside it as substitution patterns and mangle
+ // the tracker; a function return value is inserted literally.
+ transformedHtml = transformedHtml.replace(
+ '',
+ () => `${trackerHead}\n`,
+ );
+ } else {
+ this.logger.warn(
+ 'trackerHead is configured but no marker was found in the share index HTML; tracker snippet was not injected.',
+ );
+ }
+ }
+
res.type('text/html').send(transformedHtml);
}
}
diff --git a/apps/server/src/core/share/share.controller.ts b/apps/server/src/core/share/share.controller.ts
index b77e2a37..cdcb41da 100644
--- a/apps/server/src/core/share/share.controller.ts
+++ b/apps/server/src/core/share/share.controller.ts
@@ -87,9 +87,16 @@ export class ShareController {
workspace.id,
);
+ // Resolve the identity name only when the assistant is enabled, so the
+ // anonymous widget can label messages with the configured persona name.
+ const aiAssistantName = aiAssistant
+ ? await this.aiSettings.resolvePublicShareAssistantName(workspace.id)
+ : null;
+
return {
...shareData,
aiAssistant,
+ aiAssistantName,
features: this.licenseCheckService.resolveFeatures(
workspace.licenseKey,
workspace.plan,
diff --git a/apps/server/src/core/share/share.service.ts b/apps/server/src/core/share/share.service.ts
index 9de364ef..bd367f2a 100644
--- a/apps/server/src/core/share/share.service.ts
+++ b/apps/server/src/core/share/share.service.ts
@@ -524,12 +524,14 @@ export class ShareService {
* not leak structure (existence, location, count, resolved state, or
* comment ids) to public viewers.
*
- * 3. Strip `htmlEmbed` nodes when the workspace feature toggle is OFF. This
- * makes the toggle a SERVER-AUTHORITATIVE kill-switch for shared content:
- * when OFF the embed is never served to the anonymous viewer (who can't
- * read the per-workspace toggle), when ON the embed is served so the
- * read-only client executes it. `htmlEmbedEnabled` is resolved fail-closed
- * by the callers (missing workspace => OFF => strip).
+ * 3. Strip `htmlEmbed` nodes when the workspace master toggle is OFF. The
+ * block renders inside a sandboxed iframe on the client (harmless, no
+ * same-origin access), so this is NOT an XSS guard — it is the
+ * SERVER-AUTHORITATIVE enforcement of the workspace master toggle for
+ * anonymous shares: an anonymous viewer cannot read the per-workspace
+ * toggle, so when OFF the block is never served, and when ON it is served
+ * and rendered in its sandboxed frame. `htmlEmbedEnabled` is resolved
+ * fail-closed by the callers (missing workspace => OFF => strip).
*
* Both share-content paths — the host page (`updatePublicAttachments`) and
* the share-scoped transclusion lookup (`lookupTransclusionForShare`) —
@@ -544,8 +546,9 @@ export class ShareService {
): Promise {
let pmJson = getProsemirrorContent(content);
- // Kill-switch: when the workspace toggle is OFF, never serve htmlEmbed
- // nodes to public viewers. Strip before tokenizing/serializing.
+ // Master-toggle enforcement: when the workspace toggle is OFF, never serve
+ // htmlEmbed nodes to anonymous public viewers (who cannot read the toggle).
+ // Strip before tokenizing/serializing.
if (!htmlEmbedEnabled) {
pmJson = stripHtmlEmbedNodes(pmJson);
}
diff --git a/apps/server/src/core/workspace/dto/update-workspace.dto.ts b/apps/server/src/core/workspace/dto/update-workspace.dto.ts
index 1beb7ece..404593d6 100644
--- a/apps/server/src/core/workspace/dto/update-workspace.dto.ts
+++ b/apps/server/src/core/workspace/dto/update-workspace.dto.ts
@@ -5,6 +5,8 @@ import {
IsBoolean,
IsInt,
IsOptional,
+ IsString,
+ MaxLength,
Min,
} from 'class-validator';
@@ -53,12 +55,22 @@ export class UpdateWorkspaceDto extends PartialType(CreateWorkspaceDto) {
@IsBoolean()
aiDictation: boolean;
- // Workspace feature toggle for the admin-only HTML embed feature. Persisted at
- // settings.htmlEmbed. ABSENT/false => OFF (default).
+ // Workspace master toggle that enables/disables the HTML embed block type.
+ // Persisted at settings.htmlEmbed. ABSENT/false => OFF (default). The block
+ // itself renders in a sandboxed iframe, so this is a feature switch, not a
+ // security gate.
@IsOptional()
@IsBoolean()
htmlEmbed: boolean;
+ // Admin-only analytics/tracker snippet (raw HTML/JS) injected verbatim into
+ // the of PUBLIC SHARE pages only (same-origin). Persisted at
+ // settings.trackerHead. Admin-authored trusted content.
+ @IsOptional()
+ @IsString()
+ @MaxLength(20000)
+ trackerHead?: string;
+
@IsOptional()
@IsBoolean()
aiPublicShareAssistant: boolean;
diff --git a/apps/server/src/core/workspace/services/workspace-html-embed.spec.ts b/apps/server/src/core/workspace/services/workspace-html-embed.spec.ts
index fda0f5fa..fbab1f6f 100644
--- a/apps/server/src/core/workspace/services/workspace-html-embed.spec.ts
+++ b/apps/server/src/core/workspace/services/workspace-html-embed.spec.ts
@@ -108,4 +108,38 @@ describe('WorkspaceService.update — htmlEmbed toggle persistence (real code)',
expect(logged.changes.before.htmlEmbed).toBe(false);
expect(logged.changes.after.htmlEmbed).toBe(true);
});
+
+ it('persists trackerHead via updateSetting with the trackerHead key', async () => {
+ const { service, updateSetting } = buildService({});
+
+ await service.update('w1', { trackerHead: '' } as any);
+
+ expect(updateSetting).toHaveBeenCalledWith(
+ 'w1',
+ 'trackerHead',
+ '',
+ expect.anything(),
+ );
+ });
+
+ it('does NOT call updateSetting when trackerHead is undefined in the dto', async () => {
+ const { service, updateSetting } = buildService({});
+
+ await service.update('w1', { name: 'New name' } as any);
+
+ expect(updateSetting).not.toHaveBeenCalled();
+ });
+
+ it('audits the trackerHead change (before/after) when the value changes', async () => {
+ const { service, auditService } = buildService({
+ settingsBefore: { trackerHead: '' },
+ });
+
+ await service.update('w1', { trackerHead: '' } as any);
+
+ expect(auditService.log).toHaveBeenCalledTimes(1);
+ const logged = auditService.log.mock.calls[0][0];
+ expect(logged.changes.before.trackerHead).toBe('');
+ expect(logged.changes.after.trackerHead).toBe('');
+ });
});
diff --git a/apps/server/src/core/workspace/services/workspace.service.ts b/apps/server/src/core/workspace/services/workspace.service.ts
index deead2b8..bb564e79 100644
--- a/apps/server/src/core/workspace/services/workspace.service.ts
+++ b/apps/server/src/core/workspace/services/workspace.service.ts
@@ -525,6 +525,22 @@ export class WorkspaceService {
);
}
+ if (typeof updateWorkspaceDto.trackerHead !== 'undefined') {
+ // Admin-only analytics/tracker snippet injected into the of
+ // public share pages (same-origin). Persisted at settings.trackerHead.
+ const prev = (settingsBefore as any)?.trackerHead ?? '';
+ if (prev !== updateWorkspaceDto.trackerHead) {
+ before.trackerHead = prev;
+ after.trackerHead = updateWorkspaceDto.trackerHead;
+ }
+ await this.workspaceRepo.updateSetting(
+ workspaceId,
+ 'trackerHead',
+ updateWorkspaceDto.trackerHead,
+ trx,
+ );
+ }
+
if (typeof updateWorkspaceDto.aiPublicShareAssistant !== 'undefined') {
const prev = settingsBefore?.ai?.publicShareAssistant ?? false;
if (prev !== updateWorkspaceDto.aiPublicShareAssistant) {
@@ -549,6 +565,7 @@ export class WorkspaceService {
delete updateWorkspaceDto.aiChat;
delete updateWorkspaceDto.aiDictation;
delete updateWorkspaceDto.htmlEmbed;
+ delete updateWorkspaceDto.trackerHead;
delete updateWorkspaceDto.aiPublicShareAssistant;
await this.workspaceRepo.updateWorkspace(
diff --git a/apps/server/src/integrations/ai/ai-settings.service.ts b/apps/server/src/integrations/ai/ai-settings.service.ts
index c5df5f45..6dafe127 100644
--- a/apps/server/src/integrations/ai/ai-settings.service.ts
+++ b/apps/server/src/integrations/ai/ai-settings.service.ts
@@ -3,6 +3,7 @@ import { InjectQueue } from '@nestjs/bullmq';
import { Queue } from 'bullmq';
import { QueueName, QueueJob } from '../queue/constants';
import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
+import { AiAgentRoleRepo } from '@docmost/db/repos/ai-agent-roles/ai-agent-roles.repo';
import { AiProviderCredentialsRepo } from '@docmost/db/repos/ai-chat/ai-provider-credentials.repo';
import { PageEmbeddingRepo } from '@docmost/db/repos/ai-chat/page-embedding.repo';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
@@ -49,6 +50,7 @@ export interface UpdateAiSettingsInput {
export class AiSettingsService {
constructor(
private readonly workspaceRepo: WorkspaceRepo,
+ private readonly aiAgentRoleRepo: AiAgentRoleRepo,
private readonly aiProviderCredentialsRepo: AiProviderCredentialsRepo,
private readonly pageEmbeddingRepo: PageEmbeddingRepo,
private readonly pageRepo: PageRepo,
@@ -110,6 +112,26 @@ export class AiSettingsService {
return settings?.ai?.publicShareAssistant === true;
}
+ /**
+ * Resolve the display name of the agent role acting as the public-share
+ * assistant's identity, so the anonymous widget can label messages with the
+ * persona name instead of the generic "AI agent". Returns null when no role
+ * is configured, or the referenced role is missing/disabled (built-in persona
+ * → the client falls back to "AI agent"). Mirrors the role resolution in
+ * PublicShareChatService.resolveShareRole.
+ */
+ async resolvePublicShareAssistantName(
+ workspaceId: string,
+ ): Promise {
+ const resolved = await this.resolve(workspaceId);
+ const roleId = resolved?.publicShareAssistantRoleId;
+ if (!roleId) return null;
+ const role = await this.aiAgentRoleRepo.findById(roleId, workspaceId);
+ if (!role || !role.enabled) return null;
+ const name = role.name?.trim();
+ return name ? name : null;
+ }
+
/** Read the stored non-secret provider settings for a workspace. */
private async readProvider(
workspaceId: string,
diff --git a/apps/server/src/integrations/import/services/file-import-task.service.ts b/apps/server/src/integrations/import/services/file-import-task.service.ts
index aa39a085..218c75ca 100644
--- a/apps/server/src/integrations/import/services/file-import-task.service.ts
+++ b/apps/server/src/integrations/import/services/file-import-task.service.ts
@@ -20,12 +20,6 @@ import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
import { markdownToHtml } from '@docmost/editor-ext';
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
-import {
- isHtmlEmbedFeatureEnabled,
- stripHtmlEmbedIfNotAllowed,
-} from '../../../common/helpers/prosemirror/html-embed.util';
-import { UserRepo } from '@docmost/db/repos/user/user.repo';
-import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { formatImportHtml } from '../utils/import-formatter';
import {
buildAttachmentCandidates,
@@ -59,8 +53,6 @@ export class FileImportTaskService {
private readonly backlinkRepo: BacklinkRepo,
@InjectKysely() private readonly db: KyselyDB,
private readonly importAttachmentService: ImportAttachmentService,
- private readonly userRepo: UserRepo,
- private readonly workspaceRepo: WorkspaceRepo,
private eventEmitter: EventEmitter2,
@Inject(AUDIT_SERVICE) private readonly auditService: IAuditService,
) {}
@@ -157,25 +149,6 @@ export class FileImportTaskService {
.where('id', '=', fileTask.spaceId)
.executeTakeFirst();
- // SECURITY (Variant C admin gate, zip/multi-file import write path):
- // An imported .html/.md file can carry an htmlEmbed marker (the node's
- // serialized form), which would execute raw, unsanitized JS in readers'
- // browsers. Only workspace admins/owners may author it. Resolve the
- // importer's role ONCE here; each page's prosemirror JSON is run through the
- // strip below before textContent/ydoc/insert when the importer is not an
- // admin, so a non-admin cannot smuggle the node in via a zip import (which
- // requires only space Edit).
- const importingUser = await this.userRepo.findById(
- fileTask.creatorId,
- fileTask.workspaceId,
- );
- // Toggle-AND-admin gate, resolved ONCE for the whole import: htmlEmbed
- // survives only when the workspace feature toggle is ON and the importer is
- // an admin/owner. OFF (default) => stripped for everyone.
- const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
- (await this.workspaceRepo.findById(fileTask.workspaceId))?.settings,
- );
-
const pagesMap = new Map();
for (const absPath of allFiles) {
@@ -523,22 +496,9 @@ export class FileImportTaskService {
await this.importService.processHTML(html),
);
- let { title, prosemirrorJson } =
+ const { title, prosemirrorJson } =
this.importService.extractTitleAndRemoveHeading(pmState);
- // SECURITY (Variant C admin gate): strip htmlEmbed nodes from pages
- // imported by a non-admin BEFORE computing textContent/ydoc/insert.
- // Gate (featureEnabled AND admin) is resolved once above and recomputed
- // by the helper from the same htmlEmbedEnabled + importer role.
- prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
- featureEnabled: htmlEmbedEnabled,
- role: importingUser?.role,
- onStrip: () =>
- this.logger.warn(
- `Stripping htmlEmbed node(s) from non-admin import by user ${fileTask.creatorId} (page ${page.id}, file ${filePath})`,
- ),
- });
-
const insertablePage: InsertablePage = {
id: page.id,
slugId: page.slugId,
diff --git a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts b/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts
deleted file mode 100644
index d2902be0..00000000
--- a/apps/server/src/integrations/import/services/import-html-embed-identity.spec.ts
+++ /dev/null
@@ -1,266 +0,0 @@
-// Exercises the REAL htmlEmbed admin gate on the two import write paths:
-//
-// (1) ImportService.importPage() — single .html/.md upload
-// (2) FileImportTaskService.processGenericImport() — zip / multi-file import
-//
-// Both build content/textContent/ydoc directly and persist (bypassing the
-// collab onStoreDocument strip), so each must run the imported document through
-// the toggle-AND-admin gate: resolve the importer via userRepo.findById, read
-// the workspace toggle, then `htmlEmbedAllowed(enabled, role)` -> if not allowed,
-// `stripHtmlEmbedNodes` BEFORE persisting.
-//
-// This spec constructs the REAL services with deps mocked, feeds an imported
-// HTML document that contains an `htmlEmbed` div (parsed into a real htmlEmbed
-// node by the REAL htmlToJson), runs the real method, and asserts the PERSISTED
-// content (captured at the insert boundary) is stripped for a non-admin /
-// missing user and preserved for admin/owner + toggle ON. Mirrors the GOOD
-// pattern in transclusion/spec/transclusion-unsync-html-embed.spec.ts.
-//
-// Three modules are mocked away because they pull transitive ESM deps that
-// jest's transformIgnorePatterns does not transpile (`lib0/decoding.js` via the
-// collab gateway, `@sindresorhus/slugify` via import-formatter, `p-limit` via
-// import-attachment). None of them participate in the gate decision:
-// - import-formatter: contextless HTML cleanup + link rewriting; replaced with
-// faithful passthroughs (the embed div has no href/iframe, so the real
-// normalizer would leave it untouched anyway).
-// - import-attachment: attachment rewriting; passthrough returns html as-is.
-jest.mock('../../../collaboration/collaboration.gateway', () => ({
- CollaborationGateway: class {},
-}));
-jest.mock('../utils/import-formatter', () => ({
- normalizeImportHtml: () => {},
- formatImportHtml: async (opts: any) => ({
- html: opts.html,
- backlinks: [],
- pageIcon: undefined,
- }),
-}));
-jest.mock('./import-attachment.service', () => ({
- ImportAttachmentService: class {},
-}));
-
-import { promises as fs } from 'node:fs';
-import * as os from 'node:os';
-import * as path from 'node:path';
-import { ImportService } from './import.service';
-import { FileImportTaskService } from './file-import-task.service';
-import { hasHtmlEmbedNode } from '../../../common/helpers/prosemirror/html-embed.util';
-
-const WS = 'ws-1';
-const SPACE = 'space-1';
-const USER = 'importer-1';
-
-// HTML carrying the serialized htmlEmbed node. The REAL htmlToJson parses
-// `` into an htmlEmbed PM node
-// (base64 below decodes to ``).
-const HTML_WITH_EMBED =
- '
imported body
' +
- '
';
-
-function workspaceRepoFor(featureEnabled: boolean) {
- return {
- findById: jest.fn(async () => ({
- id: WS,
- settings: { htmlEmbed: featureEnabled },
- })),
- };
-}
-
-// userRepo.findById resolves the importer's role (or undefined for a missing
-// user -> fail closed).
-function userRepoFor(user: { role?: string } | undefined) {
- return { findById: jest.fn(async () => user) };
-}
-
-describe('ImportService.importPage htmlEmbed admin gate (real code)', () => {
- // Run importPage with a single uploaded .html and return the persisted content
- // captured at pageRepo.insertPage.
- async function persistedContent(
- featureEnabled: boolean,
- user: { role?: string } | undefined,
- ) {
- const captured: any[] = [];
- const pageRepo: any = {
- insertPage: jest.fn(async (row: any) => {
- captured.push(row);
- return { id: 'p1', slugId: 's1', ...row };
- }),
- };
- // db is only used for getNewPagePosition (a select chain).
- const selectChain: any = {
- select: () => selectChain,
- where: () => selectChain,
- orderBy: () => selectChain,
- limit: () => selectChain,
- executeTakeFirst: async () => undefined,
- };
- const db: any = { selectFrom: () => selectChain };
-
- const service = new ImportService(
- pageRepo,
- userRepoFor(user) as any,
- { putBuffer: jest.fn() } as any, // storageService (unused on this path)
- db,
- { add: jest.fn() } as any, // fileTaskQueue (unused)
- workspaceRepoFor(featureEnabled) as any,
- );
-
- const file: any = {
- filename: 'doc.html',
- toBuffer: async () => Buffer.from(HTML_WITH_EMBED, 'utf-8'),
- };
- await service.importPage(Promise.resolve(file), USER, SPACE, WS);
- expect(captured).toHaveLength(1);
- return captured[0].content;
- }
-
- it('toggle ON + member: persisted content has htmlEmbed stripped', async () => {
- const content = await persistedContent(true, { role: 'member' });
- expect(hasHtmlEmbedNode(content)).toBe(false);
- expect(JSON.stringify(content)).toContain('imported body');
- });
-
- it('toggle ON + missing user (findById -> undefined): fails closed (stripped)', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
- false,
- );
- });
-
- it('toggle ON + admin: persisted content keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
- true,
- );
- });
-
- it('toggle ON + owner: persisted content keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
- true,
- );
- });
-
- it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
- expect(
- hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
- ).toBe(false);
- });
-});
-
-describe('FileImportTaskService.processGenericImport htmlEmbed admin gate (real code)', () => {
- let extractDir: string;
-
- beforeEach(async () => {
- // Real temp dir holding a single .html page that carries the embed; the
- // method reads it from disk via fs.readFile.
- extractDir = await fs.mkdtemp(path.join(os.tmpdir(), 'html-embed-import-'));
- await fs.writeFile(path.join(extractDir, 'page.html'), HTML_WITH_EMBED);
- });
-
- afterEach(async () => {
- await fs.rm(extractDir, { recursive: true, force: true });
- });
-
- // Run processGenericImport over the temp dir and return the content persisted
- // for the imported page (captured at trx.insertInto('pages').values(...)).
- async function persistedContent(
- featureEnabled: boolean,
- user: { role?: string } | undefined,
- ) {
- const captured: any[] = [];
- const trxInsertChain = (table: string) => ({
- values: (row: any) => {
- if (table === 'pages') captured.push(row);
- return { execute: async () => undefined };
- },
- });
- const trx: any = { insertInto: trxInsertChain };
- const db: any = {
- // spaces lookup at the top of processGenericImport
- selectFrom: () => ({
- select: () => ({
- where: () => ({ executeTakeFirst: async () => ({ slug: 'sp' }) }),
- }),
- }),
- // executeTx -> db.transaction().execute(cb)
- transaction: () => ({ execute: async (cb: any) => cb(trx) }),
- };
-
- // importService stub: only the real, gate-relevant helpers are used. We give
- // it the REAL implementations by delegating to a real ImportService for
- // processHTML/extractTitleAndRemoveHeading/createYdoc so the embed parse and
- // strip path runs for real.
- const realImport = new ImportService(
- {} as any,
- {} as any,
- {} as any,
- {} as any,
- {} as any,
- {} as any,
- );
- const importService: any = {
- processHTML: (html: string) => realImport.processHTML(html),
- extractTitleAndRemoveHeading: (s: any) =>
- realImport.extractTitleAndRemoveHeading(s),
- createYdoc: (j: any) => realImport.createYdoc(j),
- };
-
- const importAttachmentService: any = {
- // passthrough: no attachment rewriting, return html unchanged
- processAttachments: jest.fn(async (opts: any) => opts.html),
- };
-
- const service = new FileImportTaskService(
- { putBuffer: jest.fn() } as any, // storageService
- importService,
- { nextPagePosition: jest.fn(async () => 'a0') } as any, // pageService (position only)
- { insertBacklink: jest.fn() } as any, // backlinkRepo
- db,
- importAttachmentService,
- userRepoFor(user) as any,
- workspaceRepoFor(featureEnabled) as any,
- { emit: jest.fn() } as any, // eventEmitter
- { logBatchWithContext: jest.fn() } as any, // auditService
- );
-
- const fileTask: any = {
- id: 'task-1',
- creatorId: USER,
- workspaceId: WS,
- spaceId: SPACE,
- source: 'generic',
- };
-
- await service.processGenericImport({ extractDir, fileTask });
- expect(captured.length).toBeGreaterThanOrEqual(1);
- return captured[0].content;
- }
-
- it('toggle ON + member: persisted page has htmlEmbed stripped', async () => {
- const content = await persistedContent(true, { role: 'member' });
- expect(hasHtmlEmbedNode(content)).toBe(false);
- expect(JSON.stringify(content)).toContain('imported body');
- });
-
- it('toggle ON + missing user (creatorId resolves to undefined): fails closed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, undefined))).toBe(
- false,
- );
- });
-
- it('toggle ON + admin: persisted page keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'admin' }))).toBe(
- true,
- );
- });
-
- it('toggle ON + owner: persisted page keeps the htmlEmbed', async () => {
- expect(hasHtmlEmbedNode(await persistedContent(true, { role: 'owner' }))).toBe(
- true,
- );
- });
-
- it('toggle OFF + admin: stripped (feature disabled for everyone)', async () => {
- expect(
- hasHtmlEmbedNode(await persistedContent(false, { role: 'admin' })),
- ).toBe(false);
- });
-});
diff --git a/apps/server/src/integrations/import/services/import.service.ts b/apps/server/src/integrations/import/services/import.service.ts
index cf602b77..19bffe8d 100644
--- a/apps/server/src/integrations/import/services/import.service.ts
+++ b/apps/server/src/integrations/import/services/import.service.ts
@@ -1,12 +1,5 @@
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
import { PageRepo } from '@docmost/db/repos/page/page.repo';
-import { UserRepo } from '@docmost/db/repos/user/user.repo';
-import {
- hasHtmlEmbedNode,
- isHtmlEmbedFeatureEnabled,
- stripHtmlEmbedIfNotAllowed,
-} from '../../../common/helpers/prosemirror/html-embed.util';
-import { WorkspaceRepo } from '@docmost/db/repos/workspace/workspace.repo';
import { MultipartFile } from '@fastify/multipart';
import * as path from 'path';
import {
@@ -44,12 +37,10 @@ export class ImportService {
constructor(
private readonly pageRepo: PageRepo,
- private readonly userRepo: UserRepo,
private readonly storageService: StorageService,
@InjectKysely() private readonly db: KyselyDB,
@InjectQueue(QueueName.FILE_TASK_QUEUE)
private readonly fileTaskQueue: Queue,
- private readonly workspaceRepo: WorkspaceRepo,
) {}
async importPage(
@@ -94,32 +85,7 @@ export class ImportService {
const extracted = this.extractTitleAndRemoveHeading(prosemirrorState);
const title = extracted.title;
- let prosemirrorJson = extracted.prosemirrorJson;
-
- // SECURITY (Variant C admin gate, import write path):
- // An imported .html/.md file can carry an htmlEmbed marker (the node's
- // serialized form), which would execute raw JS in readers' browsers. Only
- // workspace admins/owners may author it, so strip htmlEmbed nodes from
- // imports performed by a non-admin user.
- // Outer has-check first so the user/workspace lookups below run only when an
- // embed is actually present (the common case carries none).
- if (prosemirrorJson && hasHtmlEmbedNode(prosemirrorJson)) {
- const importingUser = await this.userRepo.findById(userId, workspaceId);
- // Toggle-AND-admin gate: htmlEmbed survives only when the workspace
- // feature toggle is ON and the importer is an admin/owner. OFF (default)
- // => stripped for everyone.
- const htmlEmbedEnabled = isHtmlEmbedFeatureEnabled(
- (await this.workspaceRepo.findById(workspaceId))?.settings,
- );
- prosemirrorJson = stripHtmlEmbedIfNotAllowed(prosemirrorJson, {
- featureEnabled: htmlEmbedEnabled,
- role: importingUser?.role,
- onStrip: () =>
- this.logger.warn(
- `Stripping htmlEmbed node(s) from import by user ${userId}`,
- ),
- });
- }
+ const prosemirrorJson = extracted.prosemirrorJson;
const pageTitle = title || fileName;
diff --git a/docs/backlog/ai-chat-tool-definitions-duplicated.md b/docs/backlog/ai-chat-tool-definitions-duplicated.md
index 48be329d..2b78dd9b 100644
--- a/docs/backlog/ai-chat-tool-definitions-duplicated.md
+++ b/docs/backlog/ai-chat-tool-definitions-duplicated.md
@@ -60,6 +60,36 @@ agent-claim, `docmost-client.loader.ts:159` — `getCollabToken`; см. план
встроенный агент получал устаревшую подсказку. Это и есть материализованный
parity-баг.
+## Расширение: дублируется не только описания инструментов — ещё и конвертер (PM ↔ Markdown)
+
+Зафиксировано при планировании встраивания git-синка (`docmost-sync` → gitmost,
+нативная in-process интеграция). Та же болезнь «несколько рукописных копий одного
+кода» теперь касается слоя конвертации ProseMirror ↔ Markdown и его lib, а не
+только метаданных инструментов.
+
+- **Копия в gitmost** — `packages/mcp/src/lib/`: `markdown-converter.ts` (~885
+ строк), `markdown-document.ts` (~136), `node-ops.ts`, `diff.ts`,
+ `docmost-schema.ts`. Канонизатора (`canonicalize.ts`) здесь НЕТ.
+- **Копия в docmost-sync** — `packages/docmost-client/src/lib/`: тот же набор +
+ `canonicalize.ts` (~11 КБ, держит идемпотентность round-trip, SPEC §11) +
+ `markdown-document.ts` с режимом «тело + якоря, без тредов комментов»
+ (`includeCommentThreads:false`, на ~20 строк больше).
+- **Третья копия (планируется)** — план git-синка вендорит чистую часть
+ конвертера в новый `packages/git-sync` (collab-файл не нужен: запись идёт
+ нативно через `openDirectConnection` + `@docmost/editor-ext`).
+
+Копии уже молча разъехались (docmost-sync vs `packages/mcp`): `collaboration.ts`
+~329 изменённых строк, `node-ops.ts` ~53, `markdown-converter.ts` ~24,
+`markdown-document.ts` ~20. Отдельно: `docmost-schema.ts` в lib дублирует
+**реальную** схему сервера `@docmost/editor-ext` (её использует collab/persistence)
+— расхождение схем = риск битой конвертации нод.
+
+Вывод: тот же фикс-вектор (единый источник правды), что и для инструментов, стоит
+распространить на конвертер — общий пакет конвертации, потребляемый `mcp`,
+`git-sync` и (в идеале) сервером. До конвергенции git-sync держит вендоренную
+копию валидированного конвертера с гейтом round-trip против схемы `editor-ext`
+(осознанный долг «третья копия сейчас, объединяем позже»).
+
## Фикс
Единый реестр спеков (полное устранение дублирования).** Вынести в
diff --git a/docs/backlog/pages-import-broken-400.md b/docs/backlog/pages-import-broken-400.md
deleted file mode 100644
index f3975af0..00000000
--- a/docs/backlog/pages-import-broken-400.md
+++ /dev/null
@@ -1,121 +0,0 @@
-# /pages/import отдаёт 400 «Error processing file content» (регресс)
-
-Статус: **диагностируемость починена** (fix #1 применён); корневая причина **не
-подтверждена** — на текущем коде локально баг воспроизвести не удалось.
-Ниже — что удалось выяснить, главный подозреваемый и что проверить дальше.
-
-## Симптом
-
-На задеплоенном инстансе эндпоинт `POST /pages/import` отдаёт
-`400 BadRequest` с телом «Error processing file content». Раньше работал —
-похоже на регресс после редеплоя гитмоста.
-
-Через этот эндпоинт грузит контент MCP-инструмент `create_page` (это
-единственный эндпоинт, принимающий контент при создании страницы —
-см. комментарий в `packages/mcp/src/client.ts:961`).
-
-Что при этом **исправно** (важно для локализации):
-- `POST /pages/create` — создание пустой страницы.
-- `update_page_json` — запись контента через realtime-коллаборацию (Yjs).
-
-## Где именно бросается ошибка
-
-`apps/server/src/integrations/import/services/import.service.ts:93-97` —
-`try/catch` вокруг обработки контента:
-
-```ts
-} catch (err) {
- const message = 'Error processing file content';
- this.logger.error(message, err); // реальная причина логируется ТОЛЬКО в логи
- throw new BadRequestException(message); // наружу уходит generic-строка
-}
-```
-
-Реальный текст ошибки/стек **проглатывается** (наружу — generic-строка), что
-нарушает конвенцию проекта (см. CLAUDE.md, «Errors must never be swallowed»).
-Поэтому по ответу 400 причину не видно — её надо читать в логах сервера
-(`logger.error(message, err)` пишет полный err) ИЛИ воспроизвести локально.
-
-## Цепочка обработки для .md (что внутри try)
-
-`importPage` → `processMarkdown(fileContent)`:
-1. `markdownToHtml` (`packages/editor-ext/.../marked.utils.ts`) — marked, чистый JS, без DOM.
-2. `processHTML`: cheerio `load` → `normalizeImportHtml` (`utils/import-formatter.ts`) — чистый JS.
-3. `htmlToJson` (`apps/server/src/collaboration/collaboration.util.ts:118`) →
- `generateJSON(html, tiptapExtensions)`.
-
-## Ключевая зацепка: путь импорта зависит от happy-dom, рабочие пути — нет
-
-`generateJSON` (`apps/server/src/common/helpers/prosemirror/html/generateJSON.ts`)
-парсит HTML через **happy-dom**: `new Window()` + `new localWindow.DOMParser()` +
-`parseFromString(...)`, затем `PMDOMParser.fromSchema(schema).parse(doc.body)`.
-
-А исправные пути DOM-парсер НЕ используют:
-- `/pages/create` — пустая страница, контент не парсится.
-- `update_page_json` — пишет готовый ProseMirror-JSON в Yjs
- (`TiptapTransformer.toYdoc`), без HTML→DOM.
-
-То есть единственное, что есть в сломанном пути и отсутствует в рабочих, —
-**серверный парсинг HTML через happy-dom**.
-
-## Главный подозреваемый: бамп happy-dom (14 → 20)
-
-- Изначально было `"happy-dom": "^14.12.3"`.
-- Сейчас запинено `"happy-dom": "20.8.9"` в `apps/server/package.json:83`
- (+ override в корневом `package.json`).
-- Пин на `20.8.9` пришёл в коммите `17da7629 "overrides"`
- (Philipinho, 2026-03-28), где `20.8.4` → `20.8.9`.
-- Скачок 14 → 20 — это 6 мажоров; у happy-dom между мажорами ломающие
- изменения в API `Window`/`DOMParser` и в поведении парсинга HTML. Очень
- вероятно, что `generateJSON` ломается на новом happy-dom.
-
-Версия в node_modules подтверждена: `happy-dom@20.8.9` (симлинк свежий).
-
-## Второстепенный подозреваемый
-
-`getSchema(tiptapExtensions)` / `PMDOMParser.parse(...)` могут спотыкаться на
-`parseHTML`-правилах недавно добавленных нод (synced blocks/transclusion,
-page break, indent, columns, status — все они в `tiptapExtensions`). Но
-`getSchema` используется и в рабочем пути (`createYdoc`/`update_page_json`),
-поэтому сам по себе билд схемы скорее всего цел — под подозрением именно
-DOM-парс-ветка, уникальная для импорта.
-
-## Направления фикса
-
-1. **Диагностируемость — ✅ СДЕЛАНО (по конвенции проекта).** В catch-блоках
- `import.service.ts` (обработка контента + вставка страницы) реальная
- причина теперь прокидывается наружу: `BadRequestException` несёт
- `${err.name}: ${err.message}`, а в лог пишется полный `err` со стеком.
- Раньше наружу уходила generic-строка "Error processing file content".
- Теперь при повторе 400 на проде реальный reason будет виден прямо в теле
- ответа — без необходимости лезть в логи.
-2. **Корневой фикс — ⏳ НЕ ПОДТВЕРЖДЁН.** Гипотеза happy-dom 14→20 **не
- подтвердилась** при локальном воспроизведении на текущем коде (см. ниже).
- Применять блайнд-даунгрейд happy-dom нельзя — нужен реальный stack из
- логов/ответа после повторения.
-
-## Локальное воспроизведение (выполнено)
-
-На текущем `main` (happy-dom 20.8.9) вся цепочка импорта `.md` отработала
-без ошибок через `tsx` (импорты прямо из source, не из dist):
-
-- `markdownToHtml` → cheerio `load` → `normalizeImportHtml` → `generateJSON`
- с полным набором из 44 `tiptapExtensions` — **OK** для:
- - базового markdown (заголовки, bold/italic, списки, таблицы, code-block,
- blockquote)
- - edge-cases: пустой контент, whitespace, HTML-сущности, вложенные списки,
- task-list, emoji, кириллица, спецсимволы в code, ссылки, изображения, hr
-- API happy-dom 20.8.9, используемые в `generateJSON`, существуют и работают:
- `new Window()`, `new localWindow.DOMParser()`, `parseFromString('…',
- 'text/html')`, `happyDOM.abort()` (async), `happyDOM.close()` (async).
-- Блок `finally` в `generateJSON` вызывает `abort()/close()` без `await` и без
- `try/catch`, но эти методы не бросают синхронно и не перезаписывают
- результат — **не является** причиной 400 (проверено отдельным тестом).
-- Все `parseHTML`-правила расширений (status, transclusion, page-break,
- columns, subpages и т.д.) участвуют в успешном тесте — ни одно не падает.
-
-Вывод: на текущем коде баг **не воспроизводится**. Вероятные объяснения —
-контент-специфичный кейс, которого нет в тестах; разница между source и
-собранным `dist`; либо временное состояние задеплоенного инстанса. После
-применения fix #1 повторный 400 покажет реальный reason — по нему и искать
-корень.
diff --git a/docs/git-sync-plan.md b/docs/git-sync-plan.md
new file mode 100644
index 00000000..4e3a5f11
--- /dev/null
+++ b/docs/git-sync-plan.md
@@ -0,0 +1,534 @@
+# Git-sync: спека реализации (встраивание docmost-sync в gitmost)
+
+Статус: **спецификация, код не менялся.** Детальный план реализации фичи
+«двусторонний синк страниц Docmost ↔ локальная git-папка Markdown», встроенной
+прямо в gitmost.
+
+Источник движка: `https://gitea.vvzvlad.xyz/vvzvlad/docmost-sync`
+(ветка `main`, на момент спеки HEAD `b03eb35`). Все сигнатуры ниже сверены с этим
+исходником и с текущим кодом gitmost.
+
+Предыстория и обоснование архитектурных развилок — в бэклоге
+[ai-chat-tool-definitions-duplicated.md](backlog/ai-chat-tool-definitions-duplicated.md)
+(раздел про дублирование конвертера) и в исходном `SPEC.md` репозитория
+docmost-sync (нумерация §-параграфов ниже ссылается на него).
+
+---
+
+## 0. Зафиксированные решения
+
+Из обсуждения архитектуры (выбор пользователя) и трёх суб-решений:
+
+1. **Нативная in-process интеграция.** Никаких REST-к-себе и сервис-юзера: чтение
+ через репозитории gitmost, запись тела — через collab `openDirectConnection`,
+ триггеры — через `EventEmitter2` вместо поллинга `/recent`.
+2. **Встроенный NestJS-модуль** `GitSyncModule` в `apps/server/src/integrations/git-sync`
+ с `@Interval`/событиями и **leader-lock на Redis** (single-writer при нескольких
+ репликах).
+3. **Настройка по спейсам в UI** — флаг в `space.settings.gitSync`, секреты
+ (git-remote) — через ENV/`EnvironmentService`.
+4. **Конвертер** — вендорим *чистую* часть из docmost-sync в `packages/git-sync`,
+ гейт = round-trip-идемпотентность против схемы `@docmost/editor-ext`.
+5. **Vault** — **репозиторий на спейс**; `move-to-space` = кросс-репо delete+create.
+6. **Провенанс** — отдельное значение `lastUpdatedSource = 'git-sync'`.
+
+Вне scope v1 (как и в SPEC): комментарии (только якоря, без тредов), права/ACL,
+вложения как отдельный поток (едут ссылками внутри контента), realtime-подписка
+на Hocuspocus (остаётся поллинг-страховка + события).
+
+---
+
+## 1. Архитектура верхнего уровня
+
+```
+ gitmost server (NestJS, один процесс)
+ ┌─────────────────────────────────────────────────────────────┐
+ │ GitSyncModule │
+ │ │
+ │ GitSyncOrchestrator ── @Interval + Redis leader-lock │
+ │ │ (per enabled space: pull-cycle / push-cycle) │
+ │ │ │
+ │ ├── engine (vendored docmost-sync, IO инжектируется) │
+ │ │ pull.ts / push.ts / reconcile / layout / stabilize │
+ │ │ │
+ │ ├── GitmostDataSource ── реализует подмножество │
+ │ │ DocmostClient НАТИВНО: │
+ │ │ reads → PageRepo / SpaceRepo (Kysely) │
+ │ │ writes → CollaborationGateway.openDirectConnection│
+ │ │ + PageService (create/move/delete/...) │
+ │ │ │
+ │ └── VaultGit ── shell-out в системный git (как есть) │
+ │ │
+ │ PageChangeListener ── подписка на EventName.PAGE_* → │
+ │ debounce → enqueue push-cycle │
+ └─────────────────────────────────────────────────────────────┘
+ ▲ читает/пишет страницы ▼ git push/pull
+ PostgreSQL (pages/spaces) data/git-sync/
/ (vault) → remote
+```
+
+Ключ интеграции: движок docmost-sync уже **полностью построен на dependency
+injection** — весь внешний IO (REST-клиент, git, файловая система) передаётся
+через узкие интерфейсы. Мы НЕ переписываем движок; мы подставляем нативные
+реализации в его DI-швы.
+
+---
+
+## 2. Состав вендоринга из docmost-sync
+
+В новый пакет `packages/git-sync` копируем (с сохранением истории смысла —
+backport-friendly, как сделано с `packages/mcp`):
+
+### 2.1. Движок (engine) — `src/engine/`
+| Файл | Что несёт | IO | Берём |
+| --- | --- | --- | --- |
+| `pull.ts` | Docmost→FS: reconcile + write + commit + merge | client+git+fs (инжектируется) | да |
+| `push.ts` | FS→Docmost: diff + classify + apply + refs | client+git+fs (инжектируется) | да |
+| `git.ts` | `VaultGit` — обёртка git shell-out | системный `git` | да, как есть |
+| `reconcile.ts` | чистый планировщик | нет | да |
+| `layout.ts` | чистый маппер дерево→пути | нет | да |
+| `sanitize.ts` | чистая санитизация имён | нет | да |
+| `stabilize.ts` | fixpoint-нормализация md (SPEC §11) | нет (lib-вызовы) | да |
+| `loop-guard.ts` | `bodyHash` (sha256) | нет | да |
+| `settings.ts` | zod-конфиг | `.env` | **адаптируем** (см. §7) |
+| `index.ts` | тонкий CLI-скаффолд | — | нет (заменяем на NestJS) |
+
+### 2.2. Конвертер (чистая часть) — `src/lib/`
+Из `packages/docmost-client/src/lib/` берём **только** чистый конвертер и формат
+файла (collab/auth REST-части НЕ нужны — запись нативная):
+
+| Файл | Экспорт |
+| --- | --- |
+| `markdown-converter.ts` | `convertProseMirrorToMarkdown(content): string` |
+| `collaboration.ts` (только конвертер-функция) | `markdownToProseMirror(md): Promise` ⚠️ |
+| `markdown-document.ts` | `serializeDocmostMarkdownBody`, `parseDocmostMarkdown`, `serializeDocmostMarkdown`, тип `DocmostMdMeta` |
+| `canonicalize.ts` | `canonicalizeContent(node)`, `docsCanonicallyEqual(a,b)` |
+| `docmost-schema.ts` | tiptap-схема для `markdownToProseMirror` |
+| `node-ops.ts`, `diff.ts` | трансформации/диф (нужны транзитивно) |
+
+⚠️ `markdownToProseMirror` физически лежит в `collaboration.ts` docmost-client
+(строка 289) — это **чистая** функция (marked→HTML→generateJSON), не путать с
+collab/websocket write-path из того же файла, который НЕ берём.
+
+> **Долг (зафиксирован в бэклоге):** это третья копия конвертера (есть в
+> docmost-sync, в `packages/mcp`, теперь в `packages/git-sync`). Конвергенция в
+> общий пакет — отдельная задача; здесь сознательно вендорим валидированную
+> копию ради сохранения идемпотентности.
+
+### 2.3. НЕ берём
+`pull`/`push` CLI-обёртки, `roundtrip.ts` (харнес переносим в тесты, см. §13),
+`docmost-client` REST-клиент целиком, `lib/collaboration.ts` (websocket-write),
+`lib/auth-utils.ts`, `Makefile`, Docker-обвязку docmost-sync.
+
+---
+
+## 3. Главный шов: `GitmostDataSource`
+
+Движок дёргает Docmost через `Pick`. Мы реализуем класс,
+**структурно совместимый** с этими сигнатурами, но нативный внутри. Это
+единственный нетривиальный новый код.
+
+### 3.1. Точный набор методов, которых требует движок
+
+Из `pull.ts` (`ApplyPullActionsDeps.client`) и обхода дерева:
+```ts
+listSpaceTree(spaceId: string, rootPageId?: string): Promise<{ pages: PageNode[]; complete: boolean }>;
+getPageJson(pageId: string): Promise<{ id; slugId; title; parentPageId; spaceId; updatedAt; content }>;
+```
+
+Из `push.ts` (`ApplyPushDeps.client`):
+```ts
+importPageMarkdown(pageId: string, fullMarkdown: string): Promise<{ updatedAt?: string; /* … */ }>;
+createPage(title: string, content: string, spaceId: string, parentPageId?: string): Promise<{ data: { id: string }; updatedAt?: string }>;
+deletePage(pageId: string): Promise;
+movePage(pageId: string, parentPageId: string | null, position?: string): Promise;
+renamePage(pageId: string, title: string): Promise;
+```
+
+Для непрерывного режима/детекции удалений (фаза B+, SPEC §8):
+```ts
+listRecentSince(spaceId: string | undefined, sinceIso: string | null, hardPageCap?: number): Promise;
+listTrash(spaceId: string): Promise;
+restorePage(pageId: string): Promise;
+```
+
+### 3.2. Маппинг на нативные сервисы gitmost
+
+| Метод адаптера | Нативная реализация |
+| --- | --- |
+| `listSpaceTree(spaceId)` | `SpaceRepo.findById(spaceId, wsId)` + `PageRepo.getSpaceDescendants(spaceId, { includeContent: false })` → map в `PageNode { id, title, slugId, parentPageId, hasChildren }`. **`complete: true` всегда** (читаем БД, не пагинированный REST) → суппрессия `incomplete-fetch` из SPEC §8 нативно не срабатывает. |
+| `getPageJson(pageId)` | `PageRepo.findById(pageId, { includeContent: true })` → `{ id, slugId, title, parentPageId, spaceId, updatedAt, content }`. `content` — ProseMirror JSON в схеме `editor-ext`. |
+| `importPageMarkdown(pageId, fullMd)` | `parseDocmostMarkdown(fullMd)` → body; `await markdownToProseMirror(body)` → doc; **запись через collab** (см. §3.3). Вернуть `{ updatedAt }` свежей страницы. |
+| `createPage(title, body, spaceId, parent?)` | `PageService.create(userId, wsId, { spaceId, title, parentPageId }, provenance)` → shell; затем тело через collab (§3.3). Вернуть `{ data: { id }, updatedAt }`. |
+| `deletePage(pageId)` | `PageService.removePage(pageId, userId, wsId)` (soft-delete → Trash, обратимо). |
+| `movePage(pageId, parent, pos?)` | `PageService.movePage({ pageId, parentPageId: parent, position }, movedPage, provenance)`. **`position` обязателен** для Docmost-move — вычисляем `fractional-indexing-jittered` ключ между соседями (соседей берём из `PageRepo`). |
+| `renamePage(pageId, title)` | `PageService.update(page, { title }, user, provenance)`. |
+| `listRecentSince` | `PageRepo.getRecentPagesInSpace(spaceId, { … })`, фильтр по `updatedAt > since`. |
+| `listTrash(spaceId)` | `PageRepo` запрос с `deletedAt IS NOT NULL` по спейсу. |
+| `restorePage(pageId)` | `PageService.restore(...)`. |
+
+`userId`/`wsId` берём из конфигурации спейса (сервисный аккаунт воркспейса или
+владелец спейса — см. §7). `provenance` всегда несёт `source: 'git-sync'` (§8).
+
+### 3.3. Нативная запись тела (linchpin)
+
+Подтверждено в коде: `CollaborationGateway.openDirectConnection(documentName, context)`
+([collaboration.gateway.ts:148](../apps/server/src/collaboration/collaboration.gateway.ts#L148-L150))
++ паттерн `withYdocConnection`
+([collaboration.handler.ts:118-133](../apps/server/src/collaboration/collaboration.handler.ts#L118-L133)).
+Имя документа — `page.` ([getPageId](../apps/server/src/collaboration/collaboration.util.ts#L163-L165)).
+Схему берём из `tiptapExtensions` ([collaboration.util.ts](../apps/server/src/collaboration/collaboration.util.ts)).
+
+```ts
+// In-process body write — no loopback websocket, no service-user token.
+// Mirrors collaboration.handler.ts 'replace' operation exactly.
+private async writeBody(pageId: string, prosemirrorJson: JSONContent): Promise {
+ const conn = await this.collabGateway.openDirectConnection(
+ `page.${pageId}`,
+ { actor: 'git-sync' }, // provenance flows into PersistenceExtension (see §8)
+ );
+ try {
+ await conn.transact((doc) => {
+ const fragment = doc.getXmlFragment('default');
+ if (fragment.length > 0) fragment.delete(0, fragment.length);
+ const next = TiptapTransformer.toYdoc(prosemirrorJson, 'default', tiptapExtensions);
+ Y.applyUpdate(doc, Y.encodeStateAsUpdate(next));
+ });
+ } finally {
+ await conn.disconnect();
+ }
+ // PersistenceExtension.onStoreDocument persists ydoc+content+textContent
+ // consistently, stamps lastUpdatedSource, broadcasts 'page.updated'.
+}
+```
+
+**Схема-совместимость (критично).** `markdownToProseMirror` производит
+ProseMirror JSON в схеме docmost-client, а `TiptapTransformer.toYdoc` валидирует
+его в схеме `editor-ext`. Аналогично на чтении `convertProseMirrorToMarkdown`
+получает `content` в схеме `editor-ext`. Эти две схемы **должны совпадать по
+именам нод/марок/атрибутов**, иначе ноды потеряются. Это и есть гейт §13.1.
+
+---
+
+## 4. `VaultGit` и git-бинарь
+
+`VaultGit` (engine/git.ts) оставляем как есть — он шеллит в системный `git` через
+`execFile` (args-массив, без инъекций), всегда `cwd=`. Константы:
+`DEFAULT_BRANCH = "main"`, `BOT_AUTHOR_NAME = "Docmost Sync"`,
+`BOT_AUTHOR_EMAIL = "docmost-sync@local"`; в push.ts: `DOCMOST_BRANCH = "docmost"`,
+`LAST_PUSHED_REF = "refs/docmost/last-pushed"`, провенанс-трейлеры
+`Docmost-Sync-Source: docmost|local`.
+
+**Ops-требование:** в рантайм-образ gitmost добавить пакет `git`
+([Dockerfile](../Dockerfile)) — сейчас его там может не быть. Без бинаря
+`VaultGit.assertGitAvailable()` падает на старте цикла.
+
+**Модель веток (пер-репо, SPEC §5):** `main` (правит человек/файлы) ↔ `docmost`
+(зеркало Docmost, пишет только движок) ↔ `merge-base` как базлайн;
+`refs/docmost/last-pushed` — что из `main` уже отражено в Docmost.
+
+---
+
+## 5. Топология vault: репозиторий на спейс
+
+- Корень: `/git-sync//` — отдельный git-репо на каждый
+ включённый спейс. `layout.ts` уже спейс-скоупный (корень спейса → `segments: []`).
+- Remote — пер-спейс (из конфигурации спейса/ENV). Изоляция конфликтов, блокировок
+ и blast-radius.
+- `move-to-space` (страница меняет спейс) → **кросс-репо**: `delete` в исходном
+ репо + `create` в целевом. Ловим по событию `PAGE_MOVED_TO_SPACE`.
+- Redis-lock ключ — `git-sync:lock:` (§9).
+
+---
+
+## 6. NestJS-модуль `GitSyncModule`
+
+Структура (шаблон — `McpModule`):
+```
+apps/server/src/integrations/git-sync/
+ git-sync.module.ts
+ git-sync.constants.ts # QueueJob/event-имена, дефолты
+ services/
+ gitmost-datasource.service.ts # §3 адаптер
+ git-sync.orchestrator.ts # @Interval + leader-lock + цикл по спейсам
+ vault-registry.service.ts # путь vault на спейс, VaultGit-инстансы
+ fractional-index.util.ts # position для move (reuse server util)
+ listeners/
+ page-change.listener.ts # подписка на EventName.PAGE_* + debounce
+ git-sync.controller.ts # (опц.) ручной trigger/status для админа
+```
+
+```ts
+@Module({
+ imports: [DatabaseModule, EnvironmentModule, ScheduleModule.forRoot()],
+ providers: [
+ GitmostDataSourceService,
+ GitSyncOrchestrator,
+ VaultRegistryService,
+ PageChangeListener,
+ ],
+})
+export class GitSyncModule {}
+```
+- Регистрируем в [app.module.ts](../apps/server/src/app.module.ts) рядом с `McpModule`.
+- Зависимости: `PageRepo`/`SpaceRepo` (через `DatabaseModule`), `PageService`,
+ `CollaborationGateway` (экспортировать из `CollaborationModule`),
+ `EnvironmentService`, ioredis-клиент.
+- `ScheduleModule.forRoot()` уже подключается в `TelemetryModule`; повторный вызов
+ безопасен, но лучше вынести в общий модуль или убедиться, что forRoot один раз.
+
+---
+
+## 7. Конфигурация
+
+### 7.1. Per-space (UI) — `space.settings.gitSync`
+Расширяем существующий паттерн `settings.sharing` / `settings.comments`.
+
+Сервер:
+- `UpdateSpaceDto` ([update-space.dto.ts](../apps/server/src/core/space/dto/update-space.dto.ts)):
+ добавить `@IsOptional() @IsBoolean() gitSyncEnabled?: boolean;` (+ опц.
+ `gitSyncRemote?: string`, если решим хранить remote в БД, а не только в ENV).
+- `SpaceService.updateSpace(dto, wsId)`
+ ([space.service.ts:120](../apps/server/src/core/space/services/space.service.ts#L120)):
+ обработать как `disablePublicSharing`/`allowViewerComments`.
+- `SpaceRepo`: добавить `updateGitSyncSettings(spaceId, wsId, prefKey, prefValue, trx?)`
+ по образцу `updateSharingSettings`
+ ([space.repo.ts:92](../apps/server/src/database/repos/space/space.repo.ts#L92)) —
+ jsonb-merge в `settings.gitSync.`.
+- Гард: CASL `SpaceCaslAction.Manage / SpaceCaslSubject.Settings` (как в
+ [space.controller.ts:147](../apps/server/src/core/space/space.controller.ts#L147)).
+
+Клиент:
+- Тоггл в форме настроек спейса
+ ([edit-space-form.tsx](../apps/client/src/features/space/components/edit-space-form.tsx))
+ через `useUpdateSpaceMutation()` → `updateSpace({ spaceId, gitSyncEnabled })`.
+ Образец — `mcp-settings.tsx`. `readOnly` при отсутствии `Manage/Settings`.
+
+Форма `space.settings.gitSync`:
+```jsonc
+{ "gitSync": { "enabled": true, "remote": "git@…", "branch": "main" } }
+```
+
+### 7.2. Секреты/тюнинг (ENV) — `EnvironmentService`
+Движковый `settings.ts` (zod, читает `.env`) **заменяем** на чтение из gitmost
+`EnvironmentService`: `parseSettings(env)` оставляем как чистую функцию для тестов,
+но в проде собираем `Settings` из `EnvironmentService`-геттеров.
+
+Новые переменные (объявить в
+[environment.validation.ts](../apps/server/src/integrations/environment/environment.validation.ts)
+class-validator-декораторами, геттеры — в
+[environment.service.ts](../apps/server/src/integrations/environment/environment.service.ts)):
+
+| ENV | Назначение | Обяз. |
+| --- | --- | --- |
+| `GIT_SYNC_ENABLED` | глобальный мастер-выключатель | нет (default false) |
+| `GIT_SYNC_DATA_DIR` | корень vault'ов (default `/git-sync`) | нет |
+| `GIT_SYNC_REMOTE_TEMPLATE` | шаблон remote, напр. `git@host:vault-{spaceId}.git` | нет |
+| `GIT_SYNC_SSH_KEY_PATH` / креды remote | доступ к git-remote (secret) | по ситуации |
+| `GIT_SYNC_POLL_INTERVAL_MS` | страховочный поллинг (default 15000) | нет |
+| `GIT_SYNC_DEBOUNCE_MS` | окно дебаунса событий (default 2000) | нет |
+| `GIT_SYNC_SERVICE_USER_ID` | от чьего имени писать в Docmost | да (если синк включён) |
+
+> git-remote = доступ ко всей вики спейса (SPEC §12): креды только в ENV/secret
+> store, никогда в БД/коммиты. В UI — только `enabled` (+ опц. имя remote из
+> заранее разрешённого списка).
+
+---
+
+## 8. Провенанс и loop-guard
+
+### 8.1. Значение `'git-sync'`
+Сегодня `lastUpdatedSource ∈ { 'user', 'agent' }`
+([persistence.extension.ts:132-134](../apps/server/src/collaboration/extensions/persistence.extension.ts#L132-L134)).
+Добавляем `'git-sync'`:
+- `PersistenceExtension`: `context.actor === 'git-sync'` → `lastUpdatedSource = 'git-sync'`.
+- Снапшот истории для `'git-sync'` — дебаунс (как у человека), а не немедленный
+ (немедленный — только для `'agent'`,
+ [persistence.extension.ts:321](../apps/server/src/collaboration/extensions/persistence.extension.ts#L321)).
+- Для `create/move/rename/delete` через `PageService` передаём
+ `AuthProvenanceData` c `source: 'git-sync'` (тип уже используется для агента —
+ расширить допустимые значения; точную форму подтвердить на реализации).
+- Клиент: в истории
+ ([history-item.tsx:128](../apps/client/src/features/page-history/components/history-item.tsx#L128))
+ не показывать агентский бейдж/дип-линк для `'git-sync'`; добавить значение в
+ тип [page.types.ts:23-26](../apps/client/src/features/page-history/types/page.types.ts#L23-L26)
+ (опц. свой бейдж «sync»).
+
+### 8.2. Подавление петли (SPEC §10)
+На pull-стороне игнорируем страницу как «свою запись», если:
+`page.lastUpdatedSource === 'git-sync'` **И** `bodyHash(exportedBody)` совпадает
+с последним запушенным (`PushedPageRecord.bodyHash` из `push.ts`). После записи в
+Docmost сохраняем `updatedAt` ответа, чтобы поллинг-страховка не утянул свою же
+запись обратно.
+
+---
+
+## 9. Single-writer (Redis leader-lock)
+
+В кодовой базе `@Interval`-задачи (`trash-cleanup`, `telemetry`, `session-cleanup`)
+**не защищены** от мультиинстанса. Для синка добавляем явный лок.
+
+- ioredis уже есть (`RedisModule` из `@nestjs-labs/nestjs-ioredis`,
+ [app.module.ts](../apps/server/src/app.module.ts); прямой `RedisClient`
+ используется в collab-gateway).
+- Лок на спейс: `SET git-sync:lock: NX PX `; держим
+ цикл только при успехе, продлеваем по heartbeat, освобождаем в `finally`
+ (Lua-CAS на удаление по `instanceId`, чтобы не снять чужой лок).
+- TTL > максимальной длительности цикла; на краше лок истекает сам.
+
+```ts
+// Acquire per-space leadership; returns false if another replica holds it.
+private async acquire(spaceId: string): Promise {
+ const ok = await this.redis.set(`git-sync:lock:${spaceId}`, this.instanceId, 'PX', LOCK_TTL_MS, 'NX');
+ return ok === 'OK';
+}
+```
+
+---
+
+## 10. Планировщик и событийные триггеры
+
+- **События (основной триггер).** `PageChangeListener` подписывается на
+ `EventName.PAGE_CREATED | PAGE_UPDATED | PAGE_MOVED | PAGE_SOFT_DELETED |
+ PAGE_RESTORED | PAGE_MOVED_TO_SPACE` и job `PAGE_CONTENT_UPDATED`
+ ([event.contants.ts](../apps/server/src/common/events/event.contants.ts)).
+ Фильтр по `spaceId` (только включённые спейсы) → дебаунс (`GIT_SYNC_DEBOUNCE_MS`)
+ → ставит pull/push-цикл спейса в очередь оркестратора.
+ - Loop-guard: события от собственных записей (`source==='git-sync'` + совпавший
+ хэш) пропускаем (§8.2).
+- **Поллинг-страховка.** `@Interval(GIT_SYNC_POLL_INTERVAL_MS)` в оркестраторе:
+ по каждому включённому спейсу (под локом) — реконсиляция (`listRecentSince` +
+ `listTrash`), ловит пропущенные события и стартовую сверку после простоя
+ (SPEC §12).
+- Один цикл на спейс за раз (внутри-процессный мьютекс на `spaceId` поверх
+ Redis-лока).
+
+---
+
+## 11. Потоки данных (walkthroughs)
+
+### 11.1. Первичный клон спейса (initial clone, SPEC §12)
+1. `VaultGit.ensureRepo()` + `ensureBranch('docmost','main')` + `checkout('docmost')`.
+2. `dataSource.listSpaceTree(spaceId)` → `{ pages, complete:true }`.
+3. `readExisting({ listTracked: () => git.listTrackedFiles('*.md'), readFile })`.
+4. `computePullActions({ pages, treeComplete:true, existing })` → план.
+5. `applyPullActions(deps, actions, vaultRoot)`: на каждую страницу
+ `getPageJson` → `stabilizePageFile(content, meta)` (export→import→export
+ fixpoint, SPEC §11) → запись файла; затем `stageAll` + `commit` (трейлер
+ `docmost`) на `docmost`; `checkout('main')` + `merge('docmost')`.
+6. Зафиксировать max `updatedAt` как стартовый `T_last`; `git push` в remote.
+
+### 11.2. Docmost → FS (pull-цикл)
+Триггер: событие/поллинг → (под локом) шаги §11.1 п.1–5 инкрементально. 3-way
+merge `docmost→main` делает git: непересекающиеся правки сливаются, реальное
+пересечение → conflict-маркеры в файле. **При конфликте push этой страницы в
+Docmost блокируется** до ручного резолва (SPEC §9; фаза D).
+
+### 11.3. FS → Docmost (push-цикл)
+`runPush(deps, { dryRun })`:
+1. `git.ensureRepo` / `isMergeInProgress` (abort при merge) / `checkout('main')`.
+2. `stageAll` + `commit('local: working-tree changes')` (локально, в Docmost не шлёт).
+3. База диффа: `readRef(LAST_PUSHED_REF)` ?? `docmost`; `revParse('main')` → `pushedCommit`.
+4. `diffNameStatus(base, 'main')` → changes; префетч `metaAt(path, side)`.
+5. `computePushActions({ changes, metaAt })` → creates/updates/deletes/renamesMoves/skipped.
+6. `dryRun` → лог плана и выход (клиент НЕ создаётся).
+7. `--apply`: `makeClient(settings)` → наш `GitmostDataSource`;
+ `applyPushActions`:
+ - update → `importPageMarkdown(pageId, fullMd)` (collab-write, §3.3);
+ - create → `createPage(...)` → записать присвоенный `pageId` обратно в meta;
+ - delete → `deletePage(pageId)` (Trash);
+ - rename/move → `classifyRenameMoves` → `movePage`/`renamePage`;
+ - при пустых failures: `updateRef(LAST_PUSHED_REF, pushedCommit)` +
+ `fastForwardBranch('docmost', pushedCommit)`.
+8. Записать `bodyHash` + `updatedAt` (loop-guard, §8.2); `git push`.
+
+---
+
+## 12. Фазирование
+
+- **A. Каркас + односторонний pull (нативно).** `packages/git-sync` (вендоринг
+ §2), `GitmostDataSource` (чтение через репозитории), `GitSyncModule`, конфиг из
+ `EnvironmentService`, ручной/однократный pull-цикл на один спейс. **Гейт §13.1.**
+- **B. Push + непрерывность.** Нативная запись (§3.3), `runPush`, ветки/refs,
+ loop-guard (§8), Redis-лок (§9), `@Interval` + `PageChangeListener` (§10).
+- **C. Per-space UI.** `space.settings.gitSync` (§7.1), DTO/сервис/репо/гард,
+ тоггл на клиенте, скоуп оркестратора по включённым спейсам.
+- **D. Харднинг.** Conflict-gating (SPEC §9), удаления через Trash + git (§5),
+ стартовая реконсиляция и `move-to-space` кросс-репо, провенанс на клиенте,
+ Dockerfile `git`, полный набор тестов.
+
+---
+
+## 13. Тестирование
+
+### 13.1. Гейт идемпотентности (блокирует фазу B)
+Перенести round-trip-харнес docmost-sync (`roundtrip.ts` + `test/fixtures/corpus`)
+в тесты `packages/git-sync`, но прогонять **против схемы `editor-ext`**:
+`content (editor-ext) → convertProseMirrorToMarkdown → markdownToProseMirror →
+TiptapTransformer.toYdoc(…, tiptapExtensions) → fromYdoc → canonicalizeContent`
+должно давать `docsCanonicallyEqual === true`. Любая потеря нод/атрибутов =
+расхождение схем → чинить `docmost-schema.ts` под `editor-ext`.
+
+### 13.2. Юнит (чистая логика, переносится как есть)
+`reconcile` (planReconciliation / decideAbsenceDeletions / mass-delete guards),
+`layout` (коллизии/санитизация), `computePullActions`, `computePushActions`,
+`classifyRenameMoves`, `bodyHash`.
+
+### 13.3. Интеграция (нативный адаптер)
+`GitmostDataSource` против тестовой БД: `listSpaceTree`/`getPageJson` корректно
+маппят; `createPage`/`movePage`/`deletePage`/`importPageMarkdown` пишут через
+collab и проставляют `lastUpdatedSource='git-sync'`; loop-guard не зацикливается
+(write → poll → no-op).
+
+### 13.4. e2e (под локом)
+Полный pull→push round-trip на временном vault + временном спейсе: правка в
+Docmost доезжает в файл и наоборот; конфликт даёт маркеры и блокирует push.
+
+---
+
+## 14. Риски и открытые пункты
+
+1. **Схема-совместимость конвертера** (§3.3, §13.1) — главный риск; гейт
+ обязателен до фазы B.
+2. **`AuthProvenanceData`** — точную форму типа подтвердить; возможно, потребует
+ расширения enum источника на сервере и в истории.
+3. **Согласованность Yjs** — писать строго через `openDirectConnection`/`transact`;
+ не трогать `content`-колонку напрямую.
+4. **`position` для move** — обязателен в Docmost-move; нужен
+ `fractional-indexing-jittered` между соседями (соседей брать сортировкой
+ `position COLLATE "C"`).
+5. **`git` в рантайме** — добавить в Dockerfile.
+6. **`ScheduleModule.forRoot()`** — не задублировать `forRoot`.
+7. **Сервисный пользователь записи** (`GIT_SYNC_SERVICE_USER_ID`) — от чьего имени
+ идут create/move (влияет на `creatorId`/права); согласовать политику.
+8. **Конфликты и удаления** — фаза D строго по SPEC §8/§9 (маркеры никогда не
+ уезжают в Docmost).
+
+---
+
+## 15. Чек-лист изменений по файлам
+
+**Новый пакет**
+- `packages/git-sync/**` — движок + чистый конвертер (§2), `package.json`
+ (`@docmost/git-sync`, `workspace:*`), `tsconfig.json`.
+
+**Сервер (`apps/server/src`)**
+- `integrations/git-sync/**` — модуль, оркестратор, адаптер, листенер (§6).
+- `app.module.ts` — импорт `GitSyncModule`.
+- `collaboration/collaboration.module.ts` — экспорт `CollaborationGateway`.
+- `collaboration/extensions/persistence.extension.ts` — источник `'git-sync'` (§8.1).
+- `core/space/dto/update-space.dto.ts` — `gitSyncEnabled?` (§7.1).
+- `core/space/services/space.service.ts` — обработка флага.
+- `database/repos/space/space.repo.ts` — `updateGitSyncSettings` (§7.1).
+- `integrations/environment/environment.validation.ts` + `environment.service.ts` —
+ новые ENV (§7.2).
+- `Dockerfile` — пакет `git`.
+
+**Клиент (`apps/client/src`)**
+- `features/space/components/edit-space-form.tsx` — тоггл git-sync.
+- `features/space/types` — поле `settings.gitSync`.
+- `features/page-history/types/page.types.ts` + `components/history-item.tsx` —
+ значение `'git-sync'` в `lastUpdatedSource`.
+
+**Корень**
+- `pnpm-workspace.yaml` уже покрывает `packages/*`; `apps/server/package.json` —
+ зависимость `@docmost/git-sync: workspace:*`.
diff --git a/docs/rag-improvements-plan.md b/docs/rag-improvements-plan.md
deleted file mode 100644
index fbbb51df..00000000
--- a/docs/rag-improvements-plan.md
+++ /dev/null
@@ -1,145 +0,0 @@
-# Улучшение качества RAG-поиска агента — план по итерациям
-
-> Статус: живой документ. Итерация 1 **реализована** (см. ниже). Остальное —
-> бэклог на следующие итерации, отсортированный по «качество / усилие».
-> Контекст: gitmost — форк Docmost. Семантический поиск агента: per-workspace
-> эмбеддинги в `page_embeddings` (pgvector, dimension-agnostic колонка, seq-scan
-> с `<=>`), индексация через BullMQ (`reindexPage` / `reindexWorkspace`).
-> Активная embedding-модель деплоя: OpenAI `text-embedding-3-large` (3072d).
-
-## Как сверялось с реальным кодом
-
-Внешнее предложение по улучшению RAG было сверено с кодовой базой. Точные факты
-на момент итерации 1:
-
-- Хранилище: [page_embeddings](../apps/server/src/database/migrations/20260617T120000-page-embeddings.ts),
- колонка `embedding` сделана dimension-agnostic в
- [20260617T140000](../apps/server/src/database/migrations/20260617T140000-page-embeddings-dimension-agnostic.ts);
- `model_name` / `model_dimensions` хранятся по строке.
-- Полнотекстовые индексы **уже существуют** (предложение ошибочно утверждало
- обратное): `pages_tsv_idx` на `pages.tsv` и `attachments_tsv_idx`. Конфигурация —
- `to_tsvector('english', f_unaccent(...))` + `setweight`
- ([тут](../apps/server/src/database/migrations/20250729T213756-add-unaccent-pg_trm-update-tsvector..ts)).
-- Чанкинг: `RecursiveCharacterTextSplitter` 1000/200, без префиксов.
-- Префиксы `query:` / `passage:` **не нужны**: они требуются для e5/bge/gte/Qwen3,
- а деплой на OpenAI `text-embedding-3-large` (этот пункт предложения неприменим).
-- Вложения (`attachment_id` в схеме есть) **не индексируются** — индексатор всегда
- пишет `attachmentId: null`.
-
----
-
-## Итерация 1 — РЕАЛИЗОВАНО
-
-Три «низковисящих фрукта»:
-
-### 1. Хлебные крошки заголовков в чанках
-Файл: [embedding-indexer.service.ts](../apps/server/src/core/ai-chat/embedding/embedding-indexer.service.ts).
-Каждый чанк префиксуется путём заголовков `«Заголовок страницы > H1 > H2»` перед
-эмбеддингом. Крошки строятся обходом **ProseMirror JSON** (`heading`-ноды с
-`attrs.level`), а не markdown-текста — поэтому `#` внутри fenced-код-блока (типичный
-bash-сниппет в WirenBoard-вики) **никогда** не принимается за заголовок. Деградация
-к старому plain-text чанкингу при отсутствии/сбое `content`. Префикс попадает и в
-эмбеддинг, и в `content` (а значит — в лексический индекс `fts` и в сниппет агента).
-
-### 2. Гибридный поиск (RRF), слияние двух инструментов в один
-- Миграция [20260618T150000-page-embeddings-fts.ts](../apps/server/src/database/migrations/20260618T150000-page-embeddings-fts.ts):
- генерируемая колонка `fts tsvector GENERATED ALWAYS AS (to_tsvector('english',
- f_unaccent(content))) STORED` + GIN-индекс. Конфиг совпадает с `pages.tsv` (та же
- обработка unaccent/Cyrillic); `f_unaccent` IMMUTABLE → триггер не нужен.
-- Репозиторий: метод `hybridSearch` в
- [page-embedding.repo.ts](../apps/server/src/database/repos/ai-chat/page-embedding.repo.ts) —
- один SQL-запрос, два CTE (cosine + `websearch_to_tsquery`), слияние Reciprocal Rank
- Fusion через FULL OUTER JOIN на уровне чанков. `k=60` (дефолт Cormack 2009 /
- ES / OpenSearch / Weaviate), равные веса 1.0/1.0. RRF сливает **ранги**, поэтому
- несовместимость шкал BM25 и косинуса не требует нормализации. Dimension-фильтр —
- только на семантической стороне.
-- Инструменты: `semanticSearch` удалён, `searchPages` стал единым гибридным
- инструментом ([ai-chat-tools.service.ts](../apps/server/src/core/ai-chat/tools/ai-chat-tools.service.ts)).
- Контроль доступа сохранён 1-в-1 (scope по доступным спейсам + пост-фильтр прав
- страниц). Если эмбеддинги не настроены / эмбеддинг упал / нет доступных спейсов /
- гибрид пуст → graceful fallback на прежний REST-полнотекст (CASL-enforced).
-
-### 3. Переписывание запроса + описания инструментов
-- Описание `searchPages` теперь явно просит агента переформулировать вопрос в
- сфокусированный поисковый запрос и переискивать при слабой выдаче (это переживает
- кастомный admin-промпт, т.к. лежит в описании инструмента).
-- Одна строка-подсказка добавлена в `DEFAULT_PROMPT`
- ([ai-chat.prompt.ts](../apps/server/src/core/ai-chat/ai-chat.prompt.ts)).
-
-> ВАЖНО после деплоя: чтобы крошки и `fts` появились у существующих страниц, нужна
-> **переиндексация корпуса** (кнопка «Reindex now» / `WORKSPACE_CREATE_EMBEDDINGS`).
-> Миграция заполнит `fts` у текущих строк автоматически, но крошки добавляются только
-> при переиндексации (она же перезапишет `content`).
-
-### Известные нюансы текущей реализации (осознанные компромиссы)
-- Гибрид покрывает только проиндексированные чанки. Свежесозданная страница
- становится искомой после отработки её BullMQ-`reindexPage`. Пока эмбеддинги не
- настроены — работает только REST-fallback (полнотекст уровня страницы по `pages.tsv`).
-- Если **весь** пул кандидатов гибрида (до 200 чанков) оказался из закрытых для
- пользователя страниц, инструмент вернёт пусто, а не уйдёт в keyword-fallback.
- Узкий кейс; возможное улучшение — fallback и при пустом результате пост-фильтра.
-- `fts` использует конфиг `english` (как и `pages.tsv`) — без русской стеммизации.
- Для русской вики это консистентно с текущим поиском; переход на `simple`/`russian`
- конфиг — отдельная задача с переиндексацией.
-- `candidates` (=clamp(limit×5, 50, 200)) служит и per-CTE лимитом, и финальным
- лимитом слияния; веса RRF равные. Тюнится после появления оценочного харнесса.
-
----
-
-## Бэклог следующих итераций (по приоритету «качество / усилие»)
-
-### A. Реранкер (cross-encoder) — наибольший ROI после гибрида
-Вставить между over-fetch гибрида и дедупом: брать топ-50–100 кандидатов от
-`hybridSearch`, реранкать, оставлять топ-5–10. Ожидаемый прирост precision/MRR
-+10–25 %. Точка вставки уже готова — это шаг между `hybridSearch(... candidates)` и
-циклом дедупа в `searchPages`.
-- Хостовый старт (раз уже на OpenAI-инфраструктуре): **Cohere Rerank** или
- **Voyage `rerank-2.5`** — провайдер по аналогии с текущим pluggable embedding-конфигом.
-- Self-hosted (под Ollama-этос): **BGE-reranker-v2-m3** через HF Text Embeddings
- Inference (`/rerank`), либо FlashRank (ONNX/CPU, ~15–30 мс).
-- Диагностика: если реранк не двигает метрики — узкое место в recall (чанкинг/гибрид),
- а не в ранжировании.
-
-### B. Индексация вложений — закрыть пробел покрытия
-Схема уже готова (`attachment_id`). Добавить в BullMQ-flow шаг извлечения текста из
-PDF/документов (PyMuPDF для цифровых PDF; OCR для сканов; для таблиц — markdown через
-LLM-парсер) и вливать его в тот же путь чанк→эмбеддинг→`fts`, помечая `attachment_id`.
-Структура извлечённых данных важнее голой точности OCR.
-
-### C. Тюнинг гибрида и оценочный харнесс
-- Золотой датасет 30–100 примеров (вопрос → нужная страница/чанк) + Ragas/DeepEval
- (Recall@k, MRR/nDCG, context precision/recall, faithfulness). Прогон до/после
- каждого изменения. **Прерогатива пропущена в итерации 1 осознанно** — без неё все
- нижеследующие тюнинги делаются «на глаз».
-- После харнесса: тюнить веса RRF (старт 1.0/1.0), `k` (старт 60), число `candidates`.
-- Эксперимент: чанки ~512 симв. против 1000 (предложение указывает на рост precision).
-
-### D. Contextual Retrieval (Anthropic), если крошек мало
-Один LLM-вызов на чанк добавляет предложение-контекст. Снижение провалов выдачи
-на 35–49 %. Ложится в BullMQ-`reindexPage`; на сотнях страниц с prompt caching — копейки.
-Применять, только если хлебных крошек окажется недостаточно против потери контекста.
-
-### E. ParadeDB `pg_search` (настоящий BM25), если лексика станет узким местом
-Нативный `ts_rank` использует только TF и длину документа, без IDF. `pg_search`
-(Rust/Tantivy) даёт честный BM25-индекс. Не drop-in (свои операторы вместо `@@`) —
-это изменение кода, а не флаг. На сотнях страниц нативного `tsvector` хватает; брать
-только если качество лексического ранжирования упрётся в потолок.
-
-### F. Прочее
-- **Префиксы query/passage** — НЕ нужны на OpenAI. Внедрять только при переходе на
- e5/bge/gte/Qwen3 (тогда индексатор ставит `passage:`, запрос — `query:`; BGE-v1.5,
- наоборот, префиксов НЕ должна получать). Зафиксировано как ловушка на будущее.
-- **Апгрейд embedding-модели** — уже на `text-embedding-3-large` (топ среди закрытых).
- Matryoshka (обрезка размерности) — запас на будущее; dimension-agnostic колонка
- делает миграцию тривиальной (цена — переэмбеддинг корпуса).
-- **HyDE и широкий multi-query/RAG-Fusion** — НЕ рекомендуются как дефолт: в свежих
- бенчмарках уступали и добавляют задержку/галлюцинации.
-
-## Оговорки
-- Все внешние числа (62→84 % precision, +17 % Recall@5, −35…49 % провалов, +10–25 %
- от реранка) получены на ДРУГИХ корпусах (SEC-отчёты, финтекст, право, медицина).
- На этой вики величины будут иными — поэтому пункт C (свой датасет) обязателен перед
- тонким тюнингом. Внешние числа — направление, не гарантия величины.
-- Часть источников предложения — вендорский маркетинг (Cohere, Voyage, ParadeDB);
- направление подтверждается независимыми (T2-RAGBench, оценка Anthropic), но величины
- у вендоров могут быть завышены.
diff --git a/packages/editor-ext/src/lib/html-embed/html-embed.ts b/packages/editor-ext/src/lib/html-embed/html-embed.ts
index 2a47eb43..baa396e1 100644
--- a/packages/editor-ext/src/lib/html-embed/html-embed.ts
+++ b/packages/editor-ext/src/lib/html-embed/html-embed.ts
@@ -7,8 +7,10 @@ export interface HtmlEmbedOptions {
}
export interface HtmlEmbedAttributes {
- // Raw HTML/CSS/JS string that is injected verbatim into the wiki origin.
+ // Raw HTML/CSS/JS string rendered inside a sandboxed iframe by the NodeView.
source?: string;
+ // Fixed iframe height in pixels. null/absent => auto-resize via postMessage.
+ height?: number | null;
}
declare module "@tiptap/core" {
@@ -98,6 +100,21 @@ export const HtmlEmbed = Node.create({
"data-source": encodeHtmlEmbedSource(attributes.source || ""),
}),
},
+ // Fixed iframe height in px. null/absent => auto-resize on the client.
+ height: {
+ default: null,
+ parseHTML: (el) => {
+ const v = el.getAttribute("data-height");
+ if (!v) return null;
+ const n = parseInt(v, 10);
+ // A non-numeric data-height (e.g. crafted/corrupted import) must not
+ // become NaN: NaN is typeof "number" and would disable auto-resize and
+ // yield an unclamped iframe height downstream. Treat it as auto (null).
+ return Number.isFinite(n) ? n : null;
+ },
+ renderHTML: (attrs: HtmlEmbedAttributes) =>
+ attrs.height ? { "data-height": String(attrs.height) } : {},
+ },
};
},
diff --git a/packages/mcp/src/lib/docmost-schema.ts b/packages/mcp/src/lib/docmost-schema.ts
index 3d8d25d7..63bef5c2 100644
--- a/packages/mcp/src/lib/docmost-schema.ts
+++ b/packages/mcp/src/lib/docmost-schema.ts
@@ -797,6 +797,60 @@ const Embed = Node.create({
},
});
+/**
+ * Docmost raw HTML embed. Block atom; the client renders `source` inside a
+ * sandboxed iframe. The MCP server never renders it — it only needs the
+ * schema to accept and carry the node so a fromYdoc -> transform -> toYdoc
+ * round-trip does not throw "Unknown node type: htmlEmbed". Mirrors the
+ * @docmost/editor-ext node name, attribute keys and flags; keep in sync when
+ * the editor-ext htmlEmbed schema changes.
+ *
+ * NOTE: unlike the canonical editor-ext node, `data-source` here is mapped as
+ * plain text rather than base64-encoded. That is intentional: the MCP write
+ * path carries the node through Yjs (fromYdoc -> toYdoc) on its JSON `source`
+ * attribute and never invokes parseHTML/renderHTML, and htmlEmbed is not
+ * produced from the markdown/HTML (generateJSON) path. If a future HTML path
+ * for htmlEmbed is added here, this mapping must adopt editor-ext's base64
+ * encode/decode to avoid double-encoding `source`.
+ */
+const HtmlEmbed = Node.create({
+ name: "htmlEmbed",
+ group: "block",
+ inline: false,
+ isolating: true,
+ atom: true,
+ defining: true,
+ draggable: true,
+ addAttributes() {
+ return {
+ source: {
+ default: "",
+ parseHTML: (el: HTMLElement) => el.getAttribute("data-source") ?? "",
+ renderHTML: (attrs: Record) => ({
+ "data-source": attrs.source ?? "",
+ }),
+ },
+ height: {
+ default: null,
+ parseHTML: (el: HTMLElement) => {
+ const v = el.getAttribute("data-height");
+ if (!v) return null;
+ const n = parseInt(v, 10);
+ return Number.isFinite(n) ? n : null;
+ },
+ renderHTML: (attrs: Record) =>
+ attrs.height != null ? { "data-height": String(attrs.height) } : {},
+ },
+ };
+ },
+ parseHTML() {
+ return [{ tag: 'div[data-type="htmlEmbed"]' }];
+ },
+ renderHTML({ HTMLAttributes }) {
+ return ["div", { "data-type": "htmlEmbed", ...HTMLAttributes }, 0];
+ },
+});
+
/** Shared attribute set for drawio/excalidraw diagram nodes. */
const diagramAttributes = () => ({
src: {
@@ -1158,6 +1212,7 @@ export const docmostExtensions = [
Video,
Youtube,
Embed,
+ HtmlEmbed,
Drawio,
Excalidraw,
Columns,