Bug #1 (push 503 starvation): an external receive-pack that briefly overlapped a poll cycle immediately 503'd because the per-space single-writer lock was held. Add a BOUNDED retry-acquire on the PUSH path only (SpaceLockService .withSpaceLock acquireRetry: capped exponential backoff up to ~5s); a transient overlap now waits and succeeds, a genuinely stuck cycle still 503s after the bound. The poll cycle passes no retry (immediate skip). Push result stays deterministic: the receive-pack only runs once the lock is held, so a 503 never leaves a half-applied ref. Bug #2 (concurrent-edit marker leak + silent same-block loss): - Marker leak (a): the push UPDATE path stripped markers for the body sent to Docmost but left raw <<<<<<</>>>>>>> committed on the published `main` vault forever (autoMergeConflicts ON). Now the cleaned body is written back to the vault file + recorded in writtenBack so runPush commits it on `main` and the vault converges to clean bytes. - Marker leak (b): pin merge.conflictStyle=merge in ensureRepo and teach stripConflictMarkers/hasConflictMarkers about the diff3 `|||||||` base section (drop the marker AND the stale base region) so diff3/zdiff3 conflicts can never leak `|||||||` + base content into a page. Also scrub the 3-way merge BASE markdown. - Silent same-block loss: the block 3-way merge still resolves same-block conflicts deterministically to git, but it is no longer silent: diff3Plan now reports a conflict count (mergeXmlFragments3WayWithStats), gitSyncWriteBody logs it, and the persistence boundary-snapshot now fires for git-sync writes over a non-git-sync baseline so the human's pre-merge content is preserved in page history (recoverable). Full both-preserved persisted-conflict UI remains the deferred redesign. Tests: space-lock bounded-retry (success/stuck/poll-immediate); push vault-clean + diff3 ||||||| strip; ensureRepo conflictStyle pin; diff3Plan/3-way conflict counts; persistence git-sync boundary snapshot. Server tsc clean; git-sync vitest + server collaboration/git-sync jest all green. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
515 lines
19 KiB
TypeScript
515 lines
19 KiB
TypeScript
import {
|
|
afterUnloadDocumentPayload,
|
|
Extension,
|
|
onChangePayload,
|
|
onDisconnectPayload,
|
|
onLoadDocumentPayload,
|
|
onStoreDocumentPayload,
|
|
} from '@hocuspocus/server';
|
|
import * as Y from 'yjs';
|
|
import { Injectable, Logger } from '@nestjs/common';
|
|
import { TiptapTransformer } from '@hocuspocus/transformer';
|
|
import {
|
|
getPageId,
|
|
isEmptyParagraphDoc,
|
|
jsonToText,
|
|
tiptapExtensions,
|
|
} from '../collaboration.util';
|
|
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
|
import { PageHistoryRepo } from '@docmost/db/repos/page/page-history.repo';
|
|
import { InjectKysely } from 'nestjs-kysely';
|
|
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
|
import { executeTx } from '@docmost/db/utils';
|
|
import { InjectQueue } from '@nestjs/bullmq';
|
|
import { QueueJob, QueueName } from '../../integrations/queue/constants';
|
|
import { ProvenanceSource } from '../../core/auth/dto/jwt-payload';
|
|
import { Queue } from 'bullmq';
|
|
import {
|
|
extractMentions,
|
|
extractUserMentions,
|
|
} from '../../common/helpers/prosemirror/utils';
|
|
import { isDeepStrictEqual } from 'node:util';
|
|
import {
|
|
IPageHistoryJob,
|
|
IPageMentionNotificationJob,
|
|
} from '../../integrations/queue/constants/queue.interface';
|
|
import { Page } from '@docmost/db/types/entity.types';
|
|
import { CollabHistoryService } from '../services/collab-history.service';
|
|
import {
|
|
HISTORY_FAST_INTERVAL,
|
|
HISTORY_FAST_THRESHOLD,
|
|
HISTORY_INTERVAL,
|
|
} from '../constants';
|
|
import { TransclusionService } from '../../core/page/transclusion/transclusion.service';
|
|
|
|
/**
|
|
* Resolve the provenance source for a coalesced snapshot.
|
|
*
|
|
* The snapshot is tagged 'agent' if any agent edit landed in the coalescing
|
|
* window (sticky marker) OR if the current writer is the agent; otherwise
|
|
* 'user'. Pure so the §15 H2 marker logic is unit-testable in isolation.
|
|
*/
|
|
export function resolveSource(
|
|
stickyTouched: boolean,
|
|
contextActor?: string,
|
|
): ProvenanceSource {
|
|
// An EXPLICIT current-write actor is authoritative for THIS write and wins
|
|
// over the sticky-agent fallback. Order: explicit 'agent' > explicit
|
|
// 'git-sync' > sticky agent marker > plain human 'user'. The git-sync case
|
|
// must NOT be masked by the sticky marker, or the PageChangeListener
|
|
// loop-guard (which keys on lastUpdatedSource === 'git-sync') would re-export
|
|
// git-sync's own writes (#14). Explicit agent still wins so a window that
|
|
// mixed an agent edit stays tagged 'agent'.
|
|
if (contextActor === 'agent') return 'agent';
|
|
if (contextActor === 'git-sync') return 'git-sync';
|
|
if (stickyTouched) return 'agent';
|
|
return 'user';
|
|
}
|
|
|
|
/**
|
|
* Compute the BullMQ job id + delay for a page-history snapshot job. Pure so
|
|
* the data-loss-sensitive timing arithmetic is unit-testable; `now` is injected
|
|
* (caller passes `Date.now()`) for determinism.
|
|
*
|
|
* - Agent edits: delay 0 and a source-keyed job id `${page.id}-agent`. The
|
|
* delay MUST stay 0 — the worker re-reads the page row at run time, so any
|
|
* delay risks reading content a later human edit has already overwritten
|
|
* (mis-tagged snapshot). 0 minimizes that window. The `-agent` suffix keeps
|
|
* the job from coalescing with the bare-page.id human job.
|
|
* - Human edits: age-based debounce so rapid human edits coalesce into one
|
|
* snapshot; job id is the bare `page.id`.
|
|
*
|
|
* BullMQ forbids ':' in custom job ids (Redis key separator), so '-' is used;
|
|
* page.id is a UUID, so `${page.id}-agent` cannot collide with a human job.
|
|
*/
|
|
export function computeHistoryJob(
|
|
page: Pick<Page, 'id' | 'createdAt'>,
|
|
source: string,
|
|
now: number,
|
|
): { jobId: string; delay: number } {
|
|
const isAgent = source === 'agent';
|
|
const pageAge = now - new Date(page.createdAt).getTime();
|
|
const delay = isAgent
|
|
? 0
|
|
: pageAge < HISTORY_FAST_THRESHOLD
|
|
? HISTORY_FAST_INTERVAL
|
|
: HISTORY_INTERVAL;
|
|
const jobId = isAgent ? `${page.id}-agent` : page.id;
|
|
return { jobId, delay };
|
|
}
|
|
|
|
@Injectable()
|
|
export class PersistenceExtension implements Extension {
|
|
private readonly logger = new Logger(PersistenceExtension.name);
|
|
private contributors: Map<string, Set<string>> = new Map();
|
|
// Sticky agent-edit marker (§15 H2): a coalesced snapshot may mix human and
|
|
// agent edits. We accumulate "an agent touched this document during the
|
|
// coalescing window" per document and OR it across all edits in the window,
|
|
// so the snapshot is marked 'agent' regardless of who wrote last.
|
|
private agentTouched: Map<string, boolean> = new Map();
|
|
|
|
constructor(
|
|
private readonly pageRepo: PageRepo,
|
|
private readonly pageHistoryRepo: PageHistoryRepo,
|
|
@InjectKysely() private readonly db: KyselyDB,
|
|
@InjectQueue(QueueName.AI_QUEUE) private aiQueue: Queue,
|
|
@InjectQueue(QueueName.HISTORY_QUEUE) private historyQueue: Queue,
|
|
@InjectQueue(QueueName.NOTIFICATION_QUEUE) private notificationQueue: Queue,
|
|
private readonly collabHistory: CollabHistoryService,
|
|
private readonly transclusionService: TransclusionService,
|
|
) {}
|
|
|
|
async onLoadDocument(data: onLoadDocumentPayload) {
|
|
const { documentName, document } = data;
|
|
const pageId = getPageId(documentName);
|
|
|
|
if (!document.isEmpty('default')) {
|
|
return;
|
|
}
|
|
|
|
const page = await this.pageRepo.findById(pageId, {
|
|
includeContent: true,
|
|
includeYdoc: true,
|
|
});
|
|
|
|
if (!page) {
|
|
this.logger.warn('page not found');
|
|
return;
|
|
}
|
|
|
|
if (page.ydoc) {
|
|
this.logger.debug(`ydoc loaded from db: ${pageId}`);
|
|
|
|
const doc = new Y.Doc();
|
|
const dbState = new Uint8Array(page.ydoc);
|
|
|
|
Y.applyUpdate(doc, dbState);
|
|
return doc;
|
|
}
|
|
|
|
// if no ydoc state in db convert json in page.content to Ydoc.
|
|
if (page.content) {
|
|
this.logger.debug(`converting json to ydoc: ${pageId}`);
|
|
|
|
const ydoc = TiptapTransformer.toYdoc(
|
|
page.content,
|
|
'default',
|
|
tiptapExtensions,
|
|
);
|
|
|
|
Y.encodeStateAsUpdate(ydoc);
|
|
return ydoc;
|
|
}
|
|
|
|
this.logger.debug(`creating fresh ydoc: ${pageId}`);
|
|
return new Y.Doc();
|
|
}
|
|
|
|
/**
|
|
* LOSS-ON-FAST-CLOSE FIX (QA #119). When the LAST editor disconnects, FLUSH any
|
|
* pending (debounced) store to the DB IMMEDIATELY instead of waiting out the
|
|
* up-to-10s `debounce` window.
|
|
*
|
|
* The collab server runs with `unloadImmediately: false` (collaboration.gateway),
|
|
* so on a last-client disconnect Hocuspocus does NOT flush the debounced
|
|
* onStoreDocument — it relies on the timer firing later. A quick edit-then-close
|
|
* (closing the tab within the debounce window, ~3-18s) therefore left the edit
|
|
* only in the soon-to-be-unloaded in-memory Y.Doc; meanwhile git-sync mirrored
|
|
* the STALE/empty DB body to the vault (the reported "59-byte frontmatter-only"
|
|
* data loss). Running the already-scheduled store now closes that window.
|
|
*
|
|
* Gated tightly so it never adds a redundant write: only on the LAST disconnect
|
|
* (`clientsCount === 0`), only for a fully-loaded doc, and only when a store is
|
|
* actually pending (`isDebounced`). `executeNow` runs the SAME payload Hocuspocus
|
|
* scheduled (preserving the edit's context/actor) and clears the timer.
|
|
*/
|
|
async onDisconnect(data: onDisconnectPayload) {
|
|
const { instance, document, documentName, clientsCount } = data;
|
|
if (clientsCount > 0) return;
|
|
if (!document || document.isLoading) return;
|
|
const debounceId = `onStoreDocument-${documentName}`;
|
|
if (!instance?.debouncer?.isDebounced(debounceId)) return;
|
|
try {
|
|
await instance.debouncer.executeNow(debounceId);
|
|
} catch (err) {
|
|
this.logger.error(
|
|
`onDisconnect flush failed for ${documentName}: ` +
|
|
(err instanceof Error ? err.message : String(err)),
|
|
);
|
|
}
|
|
}
|
|
|
|
async onStoreDocument(data: onStoreDocumentPayload) {
|
|
const { documentName, document, context } = data;
|
|
|
|
const pageId = getPageId(documentName);
|
|
|
|
const tiptapJson = TiptapTransformer.fromYdoc(document, 'default');
|
|
|
|
const ydocState = Buffer.from(Y.encodeStateAsUpdate(document));
|
|
|
|
let textContent = null;
|
|
|
|
try {
|
|
textContent = jsonToText(tiptapJson);
|
|
} catch (err) {
|
|
this.logger.warn('jsonToText' + err?.['message']);
|
|
}
|
|
|
|
let page: Page = null;
|
|
const editingUserIds = this.consumeContributors(documentName);
|
|
// Sticky agent marker: 'agent' if any agent edit landed in this window, OR
|
|
// if the current writer is the agent (covers a store with no prior onChange
|
|
// agent event in the same window). §15 H2.
|
|
// Provenance precedence: agent > git-sync > user (see resolveSource). A
|
|
// 'git-sync' store is NOT given an immediate history snapshot — it is
|
|
// debounced like a human edit (a git-sync write is a block-level merge into
|
|
// the live doc, so it reads like an incremental human edit, not a bulk
|
|
// import that would warrant its own immediate snapshot).
|
|
const lastUpdatedSource = resolveSource(
|
|
this.consumeAgentTouched(documentName),
|
|
context?.actor,
|
|
);
|
|
|
|
// Persist with a small bounded retry. The in-memory Y.Doc is the ONLY copy
|
|
// of the latest edit until this hook returns: hocuspocus destroys/unloads the
|
|
// doc right after onStoreDocument resolves (see storeDocumentHooks' finally
|
|
// -> unloadDocument). If a transient DB error (deadlock, serialization
|
|
// failure, dropped connection) is merely logged and swallowed, the function
|
|
// resolves "successfully", the doc is unloaded, and the edit is lost silently
|
|
// (#206 persist-1). Retrying here re-attempts the write while we still hold
|
|
// the doc; on total failure we clear `page` so the post-store side effects
|
|
// (badge broadcast, history snapshot) never report a save that didn't happen.
|
|
const MAX_STORE_ATTEMPTS = 3;
|
|
for (let attempt = 1; attempt <= MAX_STORE_ATTEMPTS; attempt++) {
|
|
try {
|
|
await executeTx(this.db, async (trx) => {
|
|
page = await this.pageRepo.findById(pageId, {
|
|
withLock: true,
|
|
includeContent: true,
|
|
trx,
|
|
});
|
|
|
|
if (!page) {
|
|
this.logger.error(`Page with id ${pageId} not found`);
|
|
return;
|
|
}
|
|
|
|
if (isDeepStrictEqual(tiptapJson, page.content)) {
|
|
page = null;
|
|
return;
|
|
}
|
|
|
|
let contributorIds = undefined;
|
|
try {
|
|
const existingContributors = page.contributorIds || [];
|
|
contributorIds = Array.from(
|
|
new Set([
|
|
...existingContributors,
|
|
...editingUserIds,
|
|
page.creatorId,
|
|
]),
|
|
);
|
|
} catch (err) {
|
|
//this.logger.debug('Contributors error:' + err?.['message']);
|
|
}
|
|
|
|
// Approach A — boundary snapshot before a MACHINE write overwrites a
|
|
// human (or other-source) baseline. When this store is from a machine
|
|
// source — the AGENT or GIT-SYNC — and the page's currently persisted
|
|
// state was authored by a DIFFERENT source, pin that prior state as its
|
|
// own history version BEFORE the machine write overwrites it. `page`
|
|
// still holds the OLD content/provenance here, so saveHistory(page)
|
|
// captures the pre-write state. The machine's new content is snapshotted
|
|
// later by the debounced PAGE_HISTORY job.
|
|
//
|
|
// For GIT-SYNC this is the OBSERVABLE-LOSS guard (SPEC §9 conflict
|
|
// contract): a git-sync body write is a block-level 3-way merge whose
|
|
// same-block rule is "git wins". Without this pin, a concurrent human
|
|
// edit to a block git also changed would be overwritten with NO trace.
|
|
// Pinning the pre-merge state here means the human's content is always
|
|
// RECOVERABLE via page history rather than silently lost — git still
|
|
// wins the live doc deterministically, but nothing is destroyed.
|
|
//
|
|
// Skip if the prior state was already authored by THIS machine source
|
|
// (boundary already pinned on the transition into it), if the page is
|
|
// effectively empty, or if the latest existing snapshot already equals
|
|
// the prior state (avoid duplicates).
|
|
const isMachineWrite =
|
|
lastUpdatedSource === 'agent' || lastUpdatedSource === 'git-sync';
|
|
if (isMachineWrite && page.lastUpdatedSource !== lastUpdatedSource) {
|
|
const lastHistory = await this.pageHistoryRepo.findPageLastHistory(
|
|
pageId,
|
|
{ includeContent: true, trx },
|
|
);
|
|
const humanBaselineMissing =
|
|
!lastHistory ||
|
|
!isDeepStrictEqual(lastHistory.content, page.content);
|
|
if (
|
|
!isEmptyParagraphDoc(page.content as any) &&
|
|
humanBaselineMissing
|
|
) {
|
|
await this.pageHistoryRepo.saveHistory(page, {
|
|
contributorIds: page.contributorIds ?? undefined,
|
|
trx,
|
|
});
|
|
}
|
|
}
|
|
|
|
await this.pageRepo.updatePage(
|
|
{
|
|
content: tiptapJson,
|
|
textContent: textContent,
|
|
ydoc: ydocState,
|
|
lastUpdatedById: context.user.id,
|
|
// Human stays the responsible author; these annotate the source.
|
|
lastUpdatedSource,
|
|
lastUpdatedAiChatId: context?.aiChatId ?? null,
|
|
contributorIds: contributorIds,
|
|
},
|
|
pageId,
|
|
trx,
|
|
);
|
|
|
|
this.logger.debug(`Page updated: ${pageId} - SlugId: ${page.slugId}`);
|
|
});
|
|
break;
|
|
} catch (err) {
|
|
this.logger.error(
|
|
`Failed to update page ${pageId} (attempt ${attempt}/${MAX_STORE_ATTEMPTS})`,
|
|
err,
|
|
);
|
|
// The write failed and rolled back; clear the partially-assigned `page`
|
|
// so the post-store success branch below is skipped (no false "saved"
|
|
// broadcast / history snapshot for content that was never persisted).
|
|
page = null;
|
|
if (attempt < MAX_STORE_ATTEMPTS) {
|
|
await new Promise((resolve) => setTimeout(resolve, attempt * 50));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (page) {
|
|
document.broadcastStateless(
|
|
JSON.stringify({
|
|
type: 'page.updated',
|
|
updatedAt: new Date().toISOString(),
|
|
// Provenance for a future live badge; 'user' for human edits.
|
|
source: lastUpdatedSource,
|
|
lastUpdatedById: context?.user?.id,
|
|
lastUpdatedBy: context?.user
|
|
? {
|
|
id: context.user?.id,
|
|
name: context.user?.name,
|
|
avatarUrl: context.user?.avatarUrl,
|
|
}
|
|
: undefined,
|
|
}),
|
|
);
|
|
|
|
await this.syncTransclusion(pageId, page.workspaceId, tiptapJson);
|
|
}
|
|
|
|
if (page) {
|
|
await this.collabHistory.addContributors(pageId, editingUserIds);
|
|
|
|
const mentions = extractMentions(tiptapJson);
|
|
|
|
const userMentions = extractUserMentions(mentions);
|
|
const oldMentions = page.content ? extractMentions(page.content) : [];
|
|
const oldMentionedUserIds = extractUserMentions(oldMentions).map(
|
|
(m) => m.entityId,
|
|
);
|
|
|
|
if (userMentions.length > 0) {
|
|
await this.notificationQueue.add(QueueJob.PAGE_MENTION_NOTIFICATION, {
|
|
userMentions: userMentions.map((m) => ({
|
|
userId: m.entityId,
|
|
mentionId: m.id,
|
|
creatorId: m.creatorId,
|
|
})),
|
|
oldMentionedUserIds,
|
|
pageId,
|
|
spaceId: page.spaceId,
|
|
workspaceId: page.workspaceId,
|
|
} as IPageMentionNotificationJob);
|
|
}
|
|
|
|
await this.aiQueue.add(QueueJob.PAGE_CONTENT_UPDATED, {
|
|
pageIds: [pageId],
|
|
workspaceId: page.workspaceId,
|
|
});
|
|
|
|
await this.enqueuePageHistory(page, lastUpdatedSource);
|
|
}
|
|
}
|
|
|
|
async onChange(data: onChangePayload) {
|
|
const documentName = data.documentName;
|
|
const userId = data.context?.user?.id;
|
|
|
|
if (!userId) return;
|
|
|
|
if (!this.contributors.has(documentName)) {
|
|
this.contributors.set(documentName, new Set());
|
|
}
|
|
|
|
this.contributors.get(documentName).add(userId);
|
|
|
|
// Sticky agent marker: once an agent connection touches the document in the
|
|
// coalescing window, keep it marked until the next snapshot consumes it.
|
|
if (data.context?.actor === 'agent') {
|
|
this.agentTouched.set(documentName, true);
|
|
}
|
|
}
|
|
|
|
async afterUnloadDocument(data: afterUnloadDocumentPayload) {
|
|
const documentName = data.documentName;
|
|
this.contributors.delete(documentName);
|
|
this.agentTouched.delete(documentName);
|
|
}
|
|
|
|
private consumeContributors(documentName: string): string[] {
|
|
const contributorSet = this.contributors.get(documentName);
|
|
if (!contributorSet) return [];
|
|
const userIds = [...contributorSet];
|
|
this.contributors.delete(documentName);
|
|
return userIds;
|
|
}
|
|
|
|
/** Read and clear the sticky agent-touched flag for this coalescing window. */
|
|
private consumeAgentTouched(documentName: string): boolean {
|
|
const touched = this.agentTouched.get(documentName) ?? false;
|
|
this.agentTouched.delete(documentName);
|
|
return touched;
|
|
}
|
|
|
|
private async enqueuePageHistory(
|
|
page: Page,
|
|
lastUpdatedSource: string,
|
|
): Promise<void> {
|
|
// Job id + delay arithmetic lives in the pure `computeHistoryJob` (see its
|
|
// doc comment for the agent-delay-0 / age-based-debounce invariants).
|
|
const { jobId, delay } = computeHistoryJob(
|
|
page,
|
|
lastUpdatedSource,
|
|
Date.now(),
|
|
);
|
|
|
|
await this.historyQueue.add(
|
|
QueueJob.PAGE_HISTORY,
|
|
{ pageId: page.id } as IPageHistoryJob,
|
|
{ jobId, delay },
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Refresh `page_transclusions` and `page_transclusion_references` to match
|
|
* the page's current content. Runs outside the page-write transaction and
|
|
* isolates each call so a failure here cannot affect the page save itself.
|
|
* The diff is idempotent — the next save converges if a round drops anything.
|
|
*/
|
|
private async syncTransclusion(
|
|
pageId: string,
|
|
workspaceId: string,
|
|
tiptapJson: unknown,
|
|
): Promise<void> {
|
|
try {
|
|
await this.transclusionService.syncPageTransclusions(
|
|
pageId,
|
|
workspaceId,
|
|
tiptapJson,
|
|
);
|
|
} catch (err) {
|
|
this.logger.error(
|
|
{ err, pageId },
|
|
'Failed to sync transclusions for page',
|
|
);
|
|
}
|
|
try {
|
|
await this.transclusionService.syncPageReferences(
|
|
pageId,
|
|
workspaceId,
|
|
tiptapJson,
|
|
);
|
|
} catch (err) {
|
|
this.logger.error(
|
|
{ err, pageId },
|
|
'Failed to sync transclusion references for page',
|
|
);
|
|
}
|
|
try {
|
|
await this.transclusionService.syncPageTemplateReferences(
|
|
pageId,
|
|
workspaceId,
|
|
tiptapJson,
|
|
);
|
|
} catch (err) {
|
|
this.logger.error(
|
|
{ err, pageId },
|
|
'Failed to sync page template references for page',
|
|
);
|
|
}
|
|
}
|
|
}
|