feat(git-sync): three-way body merge using the last-synced base (no edit loss)

Upgrades the 2-way body merge to a real diff3 three-way merge (review #5), so a
block ONLY the human changed is KEPT when git changed a DIFFERENT block — the
2-way merge would revert it to git's stale version.

Engine: the push update loop reads the last-synced pre-image
(`git.showFileAtRef(refs/docmost/last-pushed, path)`) and passes it as the
optional `baseMarkdown` to `client.importPageMarkdown` (the common ancestor).

Server: gitmost-datasource converts base+incoming, and writeBody runs a block-
level diff3 (new three-way-merge.ts `diff3Plan`): live-only change -> keep live,
git-only change -> take git, both-changed -> git wins (conflict policy), inserts/
deletes from either side preserved. Without a base (createPage) it falls back to
the 2-way merge. Crash-safety unchanged (docs built before the connection opens).

Tests: three-way-merge.spec.ts (14 — every diff3 case incl. the cross-block
preservation and conflict policy), yjs-body-merge 3-way (real Y.Docs: human's
block instance preserved while git's block is applied), plus an engine test that
the base is forwarded from showFileAtRef. Existing push assertions updated for the
new base arg. git-sync 589 pass; server merge/datasource/gate 62 pass; typecheck
clean.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
claude code agent 227
2026-06-23 15:47:56 +03:00
parent 5c178e95c9
commit 0197a0db52
10 changed files with 429 additions and 27 deletions

View File

@@ -14,7 +14,7 @@ import { KyselyDB } from '@docmost/db/types/kysely.types';
import { PageService } from '../../../core/page/services/page.service';
import { CollaborationGateway } from '../../../collaboration/collaboration.gateway';
import { tiptapExtensions } from '../../../collaboration/collaboration.util';
import { mergeXmlFragments } from './yjs-body-merge';
import { mergeXmlFragments, mergeXmlFragments3Way } from './yjs-body-merge';
import { AuthProvenanceData } from '../../../common/decorators/auth-provenance.decorator';
/**
@@ -74,8 +74,8 @@ export class GitmostDataSourceService {
listSpaceTree: (spaceId, rootPageId) =>
this.listSpaceTree(ctx, spaceId, rootPageId),
getPageJson: (pageId) => this.getPageJson(ctx, pageId),
importPageMarkdown: (pageId, fullMarkdown) =>
this.importPageMarkdown(ctx, pageId, fullMarkdown),
importPageMarkdown: (pageId, fullMarkdown, baseMarkdown) =>
this.importPageMarkdown(ctx, pageId, fullMarkdown, baseMarkdown),
createPage: (title, content, spaceId, parentPageId) =>
this.createPage(ctx, title, content, spaceId, parentPageId),
deletePage: (pageId) => this.deletePage(ctx, pageId),
@@ -164,18 +164,29 @@ export class GitmostDataSourceService {
// --- writes (push) --------------------------------------------------------
/**
* Replace a page's body from a self-contained markdown file: parse the meta+
* body envelope, convert the body to ProseMirror, then write it through collab
* (§3.3). Returns the fresh page's `updatedAt` for the loop-guard.
* Merge a page's body from a self-contained markdown file: parse the meta+body
* envelope, convert the body to ProseMirror, then merge it through collab
* (§3.3). When `baseMarkdown` (the last-synced version of the file) is given,
* the body write is a THREE-WAY merge against the live doc so concurrent human
* edits survive (review #5); without it, a 2-way merge. Returns the fresh
* page's `updatedAt` for the loop-guard.
*/
private async importPageMarkdown(
ctx: GitSyncBindContext,
pageId: string,
fullMarkdown: string,
baseMarkdown?: string | null,
): Promise<{ updatedAt?: string }> {
const { body } = parseDocmostMarkdown(fullMarkdown);
const doc = await markdownToProseMirror(body);
await this.writeBody(pageId, doc, ctx.userId);
let baseDoc: unknown;
if (baseMarkdown != null) {
const { body: baseBody } = parseDocmostMarkdown(baseMarkdown);
baseDoc = await markdownToProseMirror(baseBody);
}
await this.writeBody(pageId, doc, ctx.userId, baseDoc);
const page = await this.pageRepo.findById(pageId);
return {
@@ -378,18 +389,23 @@ export class GitmostDataSourceService {
pageId: string,
prosemirrorJson: unknown,
userId: string,
baseProsemirrorJson?: unknown,
): Promise<void> {
const documentName = `page.${pageId}`;
// Build the incoming Yjs doc BEFORE opening the connection / touching the
// live doc. If the transform throws (a malformed/unsupported doc) we must NOT
// have mutated the live body — otherwise a conversion failure could leave the
// page empty (review #5 — crash-safe conversion).
// Build the incoming (and base) Yjs docs BEFORE opening the connection /
// touching the live doc. If a transform throws (a malformed/unsupported doc)
// we must NOT have mutated the live body — otherwise a conversion failure
// could leave the page empty (review #5 — crash-safe conversion).
const targetDoc = TiptapTransformer.toYdoc(
prosemirrorJson,
'default',
tiptapExtensions,
);
const baseDoc =
baseProsemirrorJson != null
? TiptapTransformer.toYdoc(baseProsemirrorJson, 'default', tiptapExtensions)
: null;
const conn = await this.collabGateway.openDirectConnection(documentName, {
actor: 'git-sync',
@@ -400,15 +416,24 @@ export class GitmostDataSourceService {
});
try {
await conn.transact((doc) => {
const liveFrag = doc.getXmlFragment('default');
const targetFrag = targetDoc.getXmlFragment('default');
// Block-level MERGE rather than a full-body replace (review #5): diff the
// live body against the incoming git body and apply only the blocks that
// actually changed. Blocks a human is concurrently editing — anything git
// did not change — are left untouched, and an unchanged resync is a 0-op
// write. Yjs CRDT-merges the minimal ops with live edits.
mergeXmlFragments(
doc.getXmlFragment('default'),
targetDoc.getXmlFragment('default'),
);
// actually changed; concurrently-edited blocks are left untouched and an
// unchanged resync is a 0-op write. With a `base` (the last-synced
// version) do a THREE-WAY merge so a block ONLY the human changed is kept
// and a block ONLY git changed is taken (conflicts -> git). Without a base
// (e.g. createPage), fall back to the 2-way merge.
if (baseDoc) {
mergeXmlFragments3Way(
liveFrag,
targetFrag,
baseDoc.getXmlFragment('default'),
);
} else {
mergeXmlFragments(liveFrag, targetFrag);
}
});
} finally {
await conn.disconnect();

View File

@@ -0,0 +1,112 @@
import { diff3Plan, type Pick } from './three-way-merge';
// Materialize a plan into the merged key sequence for assertion.
function apply(plan: Pick[], live: string[], target: string[]): string[] {
return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index]));
}
const merge = (o: string[], a: string[], b: string[]): string[] =>
apply(diff3Plan(o, a, b), a, b);
describe('diff3Plan (block-level three-way merge)', () => {
it('identical on all three sides -> unchanged (all from live)', () => {
const plan = diff3Plan(['1', '2', '3'], ['1', '2', '3'], ['1', '2', '3']);
expect(plan.every((p) => p.src === 'live')).toBe(true);
expect(apply(plan, ['1', '2', '3'], ['1', '2', '3'])).toEqual(['1', '2', '3']);
});
it('git changed a block the human did not -> takes git', () => {
expect(merge(['1', '2', '3'], ['1', '2', '3'], ['1', '9', '3'])).toEqual([
'1',
'9',
'3',
]);
});
it('human changed a block git did not -> KEEPS the human edit (the core 3-way win)', () => {
expect(merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '3'])).toEqual([
'1',
'H',
'3',
]);
});
it('human and git changed DIFFERENT blocks -> both preserved', () => {
// human rewrote block 1, git rewrote block 3.
expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
'H',
'2',
'G',
]);
});
it('human inserted a block AND git changed a different block -> both preserved', () => {
expect(
merge(['1', '2', '3'], ['1', '1.5', '2', '3'], ['1', '2', 'G']),
).toEqual(['1', '1.5', '2', 'G']);
});
it('both changed the SAME block -> conflict resolves to git', () => {
expect(merge(['1', '2', '3'], ['1', 'H', '3'], ['1', 'G', '3'])).toEqual([
'1',
'G',
'3',
]);
});
it('both made the SAME edit -> that edit (no duplication)', () => {
expect(merge(['1', '2', '3'], ['1', 'X', '3'], ['1', 'X', '3'])).toEqual([
'1',
'X',
'3',
]);
});
it('human deleted a block git left alone -> deletion preserved', () => {
expect(merge(['1', '2', '3'], ['1', '3'], ['1', '2', '3'])).toEqual([
'1',
'3',
]);
});
it('git deleted a block the human left alone -> deletion applied', () => {
expect(merge(['1', '2', '3'], ['1', '2', '3'], ['1', '3'])).toEqual([
'1',
'3',
]);
});
it('both deleted the same block -> gone (no conflict)', () => {
expect(merge(['1', '2', '3'], ['1', '3'], ['1', '3'])).toEqual(['1', '3']);
});
it('git appended a trailing block -> appended', () => {
expect(merge(['1', '2'], ['1', '2'], ['1', '2', '3'])).toEqual([
'1',
'2',
'3',
]);
});
it('human appended a trailing block git did not -> kept', () => {
expect(merge(['1', '2'], ['1', '2', '3'], ['1', '2'])).toEqual([
'1',
'2',
'3',
]);
});
it('empty base, git provides content (brand-new page body) -> git content', () => {
expect(merge([], [], ['1', '2'])).toEqual(['1', '2']);
});
it('git changed block 1, human edited block 3, far apart -> both kept', () => {
expect(
merge(
['a', 'b', 'c', 'd', 'e'],
['a', 'b', 'c', 'd', 'E'],
['A', 'b', 'c', 'd', 'e'],
),
).toEqual(['A', 'b', 'c', 'd', 'E']);
});
});

View File

@@ -0,0 +1,128 @@
/**
* Pure block-level THREE-WAY merge planner (diff3) over arrays of opaque block
* keys. Used by the git-sync body write to merge an incoming git body into the
* live page using the last-synced version as the common ancestor (review #5):
*
* - a block only the human changed (live != base, git == base) -> keep LIVE
* - a block only git changed (git != base, live == base) -> take GIT
* - a block both sides changed (a real conflict) -> GIT wins
* - inserts/deletes from either side are preserved when unambiguous
*
* Content-agnostic: it works on string keys and returns the merged block order as
* picks ({ src: 'live'|'target', index }) — the caller (the Yjs applier)
* materializes them — so the whole algorithm is unit-testable on plain arrays.
*
* Algorithm: anchor on base blocks present (unchanged) in BOTH live and target
* (their LCS-with-base intersection). Between consecutive anchors lies one region
* the human and/or git rewrote; resolve each region three-way. Stable anchor
* blocks are emitted from LIVE so the applier keeps the existing Yjs block
* instances (and the human's in-flight edits) in place.
*/
/** Matched index pairs of the longest common subsequence of `a` and `b`. */
function lcsPairs(a: string[], b: string[]): Array<[number, number]> {
const n = a.length;
const m = b.length;
const dp: number[][] = Array.from({ length: n + 1 }, () =>
new Array(m + 1).fill(0),
);
for (let i = n - 1; i >= 0; i--) {
for (let j = m - 1; j >= 0; j--) {
dp[i][j] =
a[i] === b[j]
? dp[i + 1][j + 1] + 1
: Math.max(dp[i + 1][j], dp[i][j + 1]);
}
}
const pairs: Array<[number, number]> = [];
let i = 0;
let j = 0;
while (i < n && j < m) {
if (a[i] === b[j]) {
pairs.push([i, j]);
i++;
j++;
} else if (dp[i + 1][j] >= dp[i][j + 1]) {
i++;
} else {
j++;
}
}
return pairs;
}
/** o-index -> matched index in the other side (only for LCS-matched blocks). */
function matchMap(pairs: Array<[number, number]>): Map<number, number> {
const m = new Map<number, number>();
for (const [o, x] of pairs) m.set(o, x);
return m;
}
const keysEqual = (x: string[], y: string[]): boolean =>
x.length === y.length && x.every((v, k) => v === y[k]);
/**
* Resolve one region (the live slice, target slice, and base slice that occupy
* the same span between two anchors). 'target' (git) wins ties and conflicts.
*/
function decideRegion(
aRegion: string[],
bRegion: string[],
oRegion: string[],
): 'live' | 'target' {
if (keysEqual(aRegion, bRegion)) return 'target'; // same edit on both sides
if (keysEqual(aRegion, oRegion)) return 'target'; // live unchanged -> git's edit
if (keysEqual(bRegion, oRegion)) return 'live'; // git unchanged -> human's edit
return 'target'; // genuine conflict -> git wins
}
export interface Pick {
src: 'live' | 'target';
index: number;
}
/**
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
* Returns the merged block order as picks from live/target.
*/
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
const oToA = matchMap(lcsPairs(o, a));
const oToB = matchMap(lcsPairs(o, b));
const res: Pick[] = [];
let oi = 0;
let ai = 0;
let bi = 0;
for (;;) {
// Next anchor: a base block present (unchanged) in BOTH live and target.
let anchor = oi;
while (anchor < o.length && !(oToA.has(anchor) && oToB.has(anchor))) {
anchor++;
}
const aEnd = anchor < o.length ? (oToA.get(anchor) as number) : a.length;
const bEnd = anchor < o.length ? (oToB.get(anchor) as number) : b.length;
// Resolve the region [oi,anchor) that one or both sides rewrote/inserted.
const take = decideRegion(
a.slice(ai, aEnd),
b.slice(bi, bEnd),
o.slice(oi, anchor),
);
if (take === 'live') {
for (let k = ai; k < aEnd; k++) res.push({ src: 'live', index: k });
} else {
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
}
if (anchor >= o.length) break;
// Emit the stable anchor block from LIVE, then advance past it on all sides.
res.push({ src: 'live', index: aEnd });
ai = aEnd + 1;
bi = bEnd + 1;
oi = anchor + 1;
}
return res;
}

View File

@@ -2,6 +2,7 @@ import * as Y from 'yjs';
import {
mergeXmlFragments,
mergeXmlFragments3Way,
cloneXmlNode,
diffBlocks,
} from './yjs-body-merge';
@@ -142,6 +143,57 @@ describe('yjs-body-merge', () => {
});
});
describe('mergeXmlFragments3Way', () => {
it('keeps a human edit to one block while applying a git change to another (3-way)', () => {
// base (last synced): [a, b, c]. Human edited block 0 in the live doc; git
// changed block 2 in the incoming file. 3-way must keep BOTH — the 2-way
// merge would instead revert the human's block 0 to git's stale version.
const base = new Y.Doc();
const live = new Y.Doc();
const target = new Y.Doc();
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
const liveFrag = buildFragment(live, ['HUMAN', 'b', 'c']);
const targetFrag = buildFragment(target, ['a', 'b', 'GIT']);
const humanBlock = liveFrag.get(0); // the human's live instance
live.transact(() =>
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
);
// Human's block preserved as the SAME instance; git's change applied.
expect(liveFrag.get(0)).toBe(humanBlock);
expect(texts(liveFrag)).toEqual(['HUMAN', 'b', 'GIT']);
});
it('a block both sides changed resolves to git (conflict policy)', () => {
const base = new Y.Doc();
const live = new Y.Doc();
const target = new Y.Doc();
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
const liveFrag = buildFragment(live, ['a', 'HUMAN', 'c']);
const targetFrag = buildFragment(target, ['a', 'GIT', 'c']);
live.transact(() =>
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
);
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
});
it('git change with no concurrent human edit (live == base) applies cleanly', () => {
const base = new Y.Doc();
const live = new Y.Doc();
const target = new Y.Doc();
const baseFrag = buildFragment(base, ['a', 'b']);
const liveFrag = buildFragment(live, ['a', 'b']);
const targetFrag = buildFragment(target, ['a', 'B2']);
live.transact(() =>
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
);
expect(texts(liveFrag)).toEqual(['a', 'B2']);
});
});
describe('cloneXmlNode', () => {
it('preserves text marks (XmlText delta) across docs', () => {
const src = new Y.Doc();

View File

@@ -1,5 +1,7 @@
import * as Y from 'yjs';
import { diff3Plan } from './three-way-merge';
/**
* Block-level merge of an incoming (git) page body into a LIVE Yjs document,
* replacing the previous full-body "delete everything + re-insert" write that
@@ -158,3 +160,42 @@ export function mergeXmlFragments(
}
return applied;
}
/**
* THREE-WAY block merge: reconcile `live` toward `target` using `base` (the
* last-synced common ancestor) so a block only the human changed is KEPT and a
* block only git changed is taken — instead of git's version always winning
* (review #5). Conflicts (both changed the same block) resolve to git.
*
* Implementation: diff3Plan computes the merged block ORDER (picks from live or
* target); we materialize that as a virtual target fragment and reuse the 2-way
* `mergeXmlFragments` to splice it into `live` minimally (so untouched live block
* instances — and their in-flight edits — stay put). MUST be called inside a Yjs
* transaction. Returns the number of block operations applied.
*/
export function mergeXmlFragments3Way(
live: Y.XmlFragment,
target: Y.XmlFragment,
base: Y.XmlFragment,
): number {
const liveKids = live.toArray();
const targetKids = target.toArray();
const liveKeys = liveKids.map(key);
const targetKeys = targetKids.map(key);
const baseKeys = base.toArray().map(key);
const plan = diff3Plan(baseKeys, liveKeys, targetKeys);
// Build the merged block sequence in a throwaway doc, cloning from whichever
// side each pick came from, then 2-way merge it back into the live fragment.
const merged = new Y.Doc();
const mergedFrag = merged.getXmlFragment('default');
const nodes = plan.map((p) =>
cloneXmlNode(
(p.src === 'live' ? liveKids[p.index] : targetKids[p.index]) as XmlNode,
),
);
if (nodes.length) mergedFrag.insert(0, nodes);
return mergeXmlFragments(live, mergedFrag);
}

View File

@@ -73,14 +73,19 @@ export interface GitSyncClient {
// --- writes (push) --------------------------------------------------------
/**
* Replace a page's body from a self-contained markdown file (meta + body).
* The collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite.
* Merge a page's body from a self-contained markdown file (meta + body). The
* collab/Yjs write path (SPEC §2/§15.6) — never a raw jsonb overwrite.
* `applyPushActions` reads only an optional `updatedAt` off the result
* (via `extractUpdatedAt`, tolerant of extra fields).
*
* `baseMarkdown` is the last-synced version of the file (`refs/docmost/
* last-pushed`), the common ancestor for a THREE-WAY merge against the live
* doc so concurrent human edits survive (review #5). Optional/null -> 2-way.
*/
importPageMarkdown(
pageId: string,
fullMarkdown: string,
baseMarkdown?: string | null,
): Promise<{ updatedAt?: string; [key: string]: unknown }>;
/**

View File

@@ -552,7 +552,18 @@ export async function applyPushActions(
for (const u of actions.updates) {
try {
const fullMarkdown = await deps.readFile(u.path);
const result = await client.importPageMarkdown(u.pageId, fullMarkdown);
// The last-synced version of this file (pre-image) is the common ancestor
// for a 3-way merge against the live page, so concurrent human edits are
// not clobbered (review #5). Null when the file is new at last-pushed.
const baseMarkdown = await deps.git.showFileAtRef(
LAST_PUSHED_REF,
u.path,
);
const result = await client.importPageMarkdown(
u.pageId,
fullMarkdown,
baseMarkdown,
);
updated++;
// §10 loop-guard data: hash the body we pushed + capture `updatedAt`.
pushed.push({

View File

@@ -143,12 +143,36 @@ describe('applyPushActions — update (collab path, SPEC §2/§15.6)', () => {
expect(res.updated).toBe(1);
// The collab/Yjs write path is used — NOT a raw jsonb overwrite.
expect(client.importPageMarkdown).toHaveBeenCalledTimes(1);
expect(client.importPageMarkdown).toHaveBeenCalledWith('p-1', fileBody);
expect(client.importPageMarkdown).toHaveBeenCalledWith('p-1', fileBody, null);
// No raw-overwrite path exists on the injected client surface at all.
expect((client as any).updatePageJson).toBeUndefined();
expect(client.createPage).not.toHaveBeenCalled();
expect(client.deletePage).not.toHaveBeenCalled();
});
it('forwards the last-pushed base body (3-way merge ancestor) when present', async () => {
const baseBody =
'<!-- docmost:meta\n{"version":1,"pageId":"p-1"}\n-->\n\nbase body\n';
const fileBody =
'<!-- docmost:meta\n{"version":1,"pageId":"p-1"}\n-->\n\nupdated body\n';
const client = makeClient();
// The pre-image (refs/docmost/last-pushed) carries the base version.
const { git } = makeGit({ prevTree: { 'Doc.md': baseBody } });
const fs = makeFs({ 'Doc.md': fileBody });
await applyPushActions(
deps(client, git, fs),
actions({ updates: [{ pageId: 'p-1', path: 'Doc.md' }] }),
);
// importPageMarkdown receives the base so the server can 3-way merge it.
expect(client.importPageMarkdown).toHaveBeenCalledWith(
'p-1',
fileBody,
baseBody,
);
expect(git.showFileAtRef).toHaveBeenCalledWith(LAST_PUSHED_REF, 'Doc.md');
});
});
describe('applyPushActions — create (assigned pageId written back to meta)', () => {
@@ -542,8 +566,8 @@ describe('applyPushActions — per-page error isolation + refs gated on success
// The 1st and 3rd were applied; the 2nd threw.
expect(res.updated).toBe(2);
expect(client.importPageMarkdown).toHaveBeenCalledTimes(3);
expect(client.importPageMarkdown).toHaveBeenNthCalledWith(1, 'p-a', 'a body');
expect(client.importPageMarkdown).toHaveBeenNthCalledWith(3, 'p-c', 'c body');
expect(client.importPageMarkdown).toHaveBeenNthCalledWith(1, 'p-a', 'a body', null);
expect(client.importPageMarkdown).toHaveBeenNthCalledWith(3, 'p-c', 'c body', null);
// The failure is recorded with kind/pageId/path/error.
expect(res.failures).toEqual([
@@ -649,7 +673,7 @@ describe('applyPushActions — mixed batch + skipped passthrough', () => {
expect(res.writtenBack).toEqual([{ path: 'N.md', pageId: 'created-1' }]);
expect(res.skipped).toEqual(skipped);
expect(updateRefCalls).toEqual([{ ref: LAST_PUSHED_REF, target: 'sha-9' }]);
expect(client.importPageMarkdown).toHaveBeenCalledWith('u-1', updFile);
expect(client.importPageMarkdown).toHaveBeenCalledWith('u-1', updFile, null);
expect(client.deletePage).toHaveBeenCalledWith('d-1');
});
});

View File

@@ -300,7 +300,11 @@ describe('applyPushActions (push.ts) — move prefetch isolation', () => {
// The update and the delete in the SAME batch still applied.
expect(res.updated).toBe(1);
expect(res.deleted).toBe(1);
expect(client.importPageMarkdown).toHaveBeenCalledWith('u1', store['Up.md']);
expect(client.importPageMarkdown).toHaveBeenCalledWith(
'u1',
store['Up.md'],
null,
);
expect(client.deletePage).toHaveBeenCalledWith('d1');
// The broken move was ISOLATED: no movePage/renamePage call, recorded as a

View File

@@ -312,7 +312,7 @@ describe('runPush — --apply is the ONLY write path', () => {
const res = await runPush(deps, { dryRun: false });
expect(client.importPageMarkdown).toHaveBeenCalledWith('p-9', file);
expect(client.importPageMarkdown).toHaveBeenCalledWith('p-9', file, null);
expect(res.applied?.updated).toBe(1);
});
});