A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML <img> (like video/diagrams) when it carries layout attrs; bare images stay . Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
209 lines
8.2 KiB
TypeScript
209 lines
8.2 KiB
TypeScript
// Stub collaboration.util so importing the extension does not drag in the
|
|
// editor-ext -> @tiptap/react -> react-dom graph (unloadable under jest's node
|
|
// env, same coupling the gitmost-datasource / mcp specs document). The
|
|
// extension only calls getPageId, jsonToText and isEmptyParagraphDoc from it on
|
|
// the store path; tiptapExtensions is unused by onStoreDocument.
|
|
jest.mock('../collaboration.util', () => ({
|
|
tiptapExtensions: [],
|
|
getPageId: (name: string) => name.replace(/^page\./, ''),
|
|
jsonToText: () => 'text',
|
|
isEmptyParagraphDoc: () => false,
|
|
// The post-write mention extraction walks the doc via jsonToNode().descendants;
|
|
// return a node-like stub with no descendants so no mentions are produced
|
|
// (mention handling is out of scope here — we only assert provenance).
|
|
jsonToNode: () => ({ descendants: () => undefined }),
|
|
}));
|
|
|
|
// Control the Yjs<->JSON bridge: fromYdoc returns the "incoming" doc the writer
|
|
// is storing. We keep it distinct from the page's persisted content so the
|
|
// no-op guard (isDeepStrictEqual) never short-circuits the write.
|
|
const INCOMING_JSON = { type: 'doc', content: [{ type: 'paragraph' }, { t: 1 }] };
|
|
jest.mock('@hocuspocus/transformer', () => ({
|
|
TiptapTransformer: {
|
|
fromYdoc: jest.fn(() => INCOMING_JSON),
|
|
toYdoc: jest.fn(),
|
|
},
|
|
}));
|
|
|
|
// Run the executeTx callback inline with a passthrough trx.
|
|
jest.mock('@docmost/db/utils', () => ({
|
|
executeTx: jest.fn(async (_db: any, cb: any) => cb({} as any)),
|
|
}));
|
|
|
|
import * as Y from 'yjs';
|
|
import { PersistenceExtension } from './persistence.extension';
|
|
import {
|
|
onChangePayload,
|
|
onStoreDocumentPayload,
|
|
} from '@hocuspocus/server';
|
|
|
|
/**
|
|
* Provenance-precedence coverage for PersistenceExtension.onStoreDocument
|
|
* (test-strategy Module 4 / item #2): the contract `agent > git-sync > user`,
|
|
* plus the negative that a git-sync store does NOT pin a boundary history
|
|
* snapshot. We drive the precedence through the real public method (onChange to
|
|
* arm the sticky agent marker, then onStoreDocument), mocking the repos / db /
|
|
* Yjs bridge so no real database or collab server is needed. The store's
|
|
* persisted `lastUpdatedSource` and the saveHistory call are the observable
|
|
* outputs.
|
|
*/
|
|
describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)', () => {
|
|
const DOCUMENT_NAME = 'page.page-1';
|
|
const PAGE_ID = 'page-1';
|
|
|
|
// `page.content` differs from INCOMING_JSON so the write is never skipped.
|
|
const persistedPage = (overrides?: { lastUpdatedSource?: string }) => ({
|
|
id: PAGE_ID,
|
|
slugId: 'slug-1',
|
|
spaceId: 'space-1',
|
|
workspaceId: 'ws-1',
|
|
creatorId: 'creator-1',
|
|
contributorIds: ['creator-1'],
|
|
content: { type: 'doc', content: [{ type: 'paragraph', content: [] }] },
|
|
lastUpdatedSource: overrides?.lastUpdatedSource ?? 'user',
|
|
createdAt: new Date(),
|
|
});
|
|
|
|
const build = (pageOverrides?: { lastUpdatedSource?: string }) => {
|
|
const pageRepo = {
|
|
findById: jest.fn().mockResolvedValue(persistedPage(pageOverrides)),
|
|
updatePage: jest.fn().mockResolvedValue({ numUpdatedRows: 1n }),
|
|
};
|
|
const pageHistoryRepo = {
|
|
// No prior snapshot -> humanBaselineMissing is true, so the ONLY thing
|
|
// gating the boundary snapshot in these tests is the source precedence.
|
|
findPageLastHistory: jest.fn().mockResolvedValue(null),
|
|
saveHistory: jest.fn().mockResolvedValue(undefined),
|
|
};
|
|
const aiQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
|
const historyQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
|
const notificationQueue = { add: jest.fn().mockResolvedValue(undefined) };
|
|
const collabHistory = {
|
|
addContributors: jest.fn().mockResolvedValue(undefined),
|
|
};
|
|
const transclusionService = {
|
|
syncPageTransclusions: jest.fn().mockResolvedValue(undefined),
|
|
syncPageReferences: jest.fn().mockResolvedValue(undefined),
|
|
syncPageTemplateReferences: jest.fn().mockResolvedValue(undefined),
|
|
};
|
|
|
|
const ext = new PersistenceExtension(
|
|
pageRepo as any,
|
|
pageHistoryRepo as any,
|
|
{} as any, // db
|
|
aiQueue as any,
|
|
historyQueue as any,
|
|
notificationQueue as any,
|
|
collabHistory as any,
|
|
transclusionService as any,
|
|
);
|
|
|
|
return { ext, pageRepo, pageHistoryRepo, historyQueue };
|
|
};
|
|
|
|
// A real Y.Doc is required for Y.encodeStateAsUpdate(document); broadcastStateless
|
|
// is a no-op spy. The fromYdoc bridge is mocked, so the doc's contents are
|
|
// irrelevant to the JSON path.
|
|
const makeStorePayload = (context: any): onStoreDocumentPayload =>
|
|
({
|
|
documentName: DOCUMENT_NAME,
|
|
document: Object.assign(new Y.Doc(), {
|
|
broadcastStateless: jest.fn(),
|
|
}),
|
|
context,
|
|
}) as any;
|
|
|
|
const makeChangePayload = (actor: string): onChangePayload =>
|
|
({
|
|
documentName: DOCUMENT_NAME,
|
|
context: { user: { id: 'user-1' }, actor },
|
|
}) as any;
|
|
|
|
const sourceOf = (pageRepo: { updatePage: jest.Mock }) =>
|
|
pageRepo.updatePage.mock.calls[0][0].lastUpdatedSource;
|
|
|
|
it("tags 'user' for a plain write (no agent touch, no git-sync actor)", async () => {
|
|
const { ext, pageRepo } = build();
|
|
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'user-1' }, actor: 'user' }),
|
|
);
|
|
|
|
expect(sourceOf(pageRepo)).toBe('user');
|
|
});
|
|
|
|
it("tags 'git-sync' when the writer's actor is 'git-sync' and no agent touched the window", async () => {
|
|
const { ext, pageRepo } = build();
|
|
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
|
);
|
|
|
|
expect(sourceOf(pageRepo)).toBe('git-sync');
|
|
});
|
|
|
|
it("keeps 'git-sync' for an explicit git-sync store even with a sticky agent marker (#14 loop-guard)", async () => {
|
|
const { ext, pageRepo } = build();
|
|
|
|
// An agent edit landed earlier in the coalescing window (sticky marker),
|
|
// then a git-sync writer performs the store. Red-team finding #14: an
|
|
// EXPLICIT current-write actor is authoritative for THIS write, so the
|
|
// store must stay 'git-sync' — otherwise the PageChangeListener loop-guard
|
|
// (keyed on lastUpdatedSource === 'git-sync') fails to recognize git-sync's
|
|
// own write and re-exports it. Explicit 'agent' still wins (see below); the
|
|
// sticky marker only promotes a plain human writer to 'agent'.
|
|
await ext.onChange(makeChangePayload('agent'));
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
|
);
|
|
|
|
expect(sourceOf(pageRepo)).toBe('git-sync');
|
|
});
|
|
|
|
it("tags 'agent' when the storing writer itself is the agent (no prior onChange)", async () => {
|
|
const { ext, pageRepo } = build();
|
|
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'agent-user' }, actor: 'agent' }),
|
|
);
|
|
|
|
expect(sourceOf(pageRepo)).toBe('agent');
|
|
});
|
|
|
|
// --- negative: a git-sync store must NOT pin a boundary history snapshot ----
|
|
// The boundary-snapshot branch only fires when the resolved source is 'agent'
|
|
// AND the prior persisted source is not 'agent'. A git-sync store resolves to
|
|
// 'git-sync', so saveHistory must NOT be called.
|
|
it('does NOT write a boundary history snapshot for a git-sync store', async () => {
|
|
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
|
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
|
);
|
|
|
|
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it('DOES pin a boundary snapshot for an agent store over a prior human state (control)', async () => {
|
|
// Confirms the negative above is meaningful: under the SAME mocks, an agent
|
|
// store over a 'user' baseline DOES trigger the boundary snapshot.
|
|
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
|
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'agent-user' }, actor: 'agent' }),
|
|
);
|
|
|
|
expect(pageHistoryRepo.saveHistory).toHaveBeenCalledTimes(1);
|
|
});
|
|
|
|
it('does NOT pin a boundary snapshot for a plain user store', async () => {
|
|
const { ext, pageHistoryRepo } = build({ lastUpdatedSource: 'user' });
|
|
|
|
await ext.onStoreDocument(
|
|
makeStorePayload({ user: { id: 'user-1' }, actor: 'user' }),
|
|
);
|
|
|
|
expect(pageHistoryRepo.saveHistory).not.toHaveBeenCalled();
|
|
});
|
|
});
|