fix(git-sync): address PR #119 review #4 — symlink guard, dead-code cull, changelog + warnings/suggestions
Blocking (review id 2514): - [security] Forbid symlinks in vaults. ensureServable now sets core.symlinks=false in each vault's local git config (a pushed symlink is checked out as a plain file, never a real link), and the engine cycle wraps every read/write/mkdir in an lstat/realpath guard (new path-guard.ts) that refuses a path that is — or traverses — a symlink, or whose realpath escapes the vault root. Prevents a writer from publishing /etc/passwd or the server .env, or writing outside the vault. Adds unit tests (path-guard.test.ts) + a read-guard integration test (cycle.test.ts) + real lstat/realpath in the roundtrip integration test. - [simplification] Delete dead lib/diff.ts + test/diff.test.ts and drop the now-unused @fellow/prosemirror-recreate-transform dependency. - [documentation] Add a CHANGELOG [Unreleased] → Added entry for git-sync. Warnings: - [test-coverage] Cover the CREATE-branch conflict-markers guard (a new .md with markers and no gitmost_id is recorded as a create failure, never created). Suggestions: - [stability] Bound each `git config` in ensureServable with a timeout. - [authz] Trigger endpoint resolves spaceId workspace-scoped and 404s a foreign space before any vault directory is created. - [stability] Attribute git-initiated moves to the service account (lastUpdatedById), via an optional actor param on PageService.movePage. - [documentation] Document the per-space autoMergeConflicts toggle in AGENTS.md. - [test-coverage] Cover the unterminated `:::` callout fence fallback. - [simplification] Move test-only roundtrip-helpers.ts out of src/ into test/. Architecture: - Move the Yjs/ProseMirror merge primitives (yjs-body-merge, three-way-merge, lcs + specs) into collaboration/merge/, breaking the collaboration → integrations/git-sync dependency cycle this PR introduced. - Port the schema-surface drift gate to packages/mcp (the mcp schema mirror had none); pins 52 entries. Deferred (with rationale in the review thread): the incremental-pull perf warning (correctness-neutral; needs a high-water-mark design + its own tests on the data-loss-critical path) and the redis-sync rolling-deploy mixed-version edge (the deficient behavior is in already-released old-instance code; the new code is correct on both sides; impact is a transient rollout-window artifact). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import { mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
import { mergeXmlFragments3Way } from '../../../collaboration/merge/yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Convergence repro for the git-ingest "silent revert" data-loss bug.
|
||||
|
||||
@@ -5,7 +5,14 @@ import {
|
||||
OnModuleInit,
|
||||
} from '@nestjs/common';
|
||||
import { SchedulerRegistry } from '@nestjs/schedule';
|
||||
import { mkdir, readFile, rm, writeFile } from 'node:fs/promises';
|
||||
import {
|
||||
lstat,
|
||||
mkdir,
|
||||
readFile,
|
||||
realpath,
|
||||
rm,
|
||||
writeFile,
|
||||
} from 'node:fs/promises';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import { sql } from 'kysely';
|
||||
@@ -303,11 +310,31 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
vault,
|
||||
settings,
|
||||
// ABSOLUTE-path fs primitives the engine cycle injects (it stays IO-free).
|
||||
// `lstat`/`realpath` back the engine's symlink guard: both MUST yield
|
||||
// `null` on ENOENT (a not-yet-created file is the normal write case) so the
|
||||
// guard can tell "absent" (safe to create) from "is a symlink" (refuse).
|
||||
// `lstat` does NOT follow the final link; `realpath` resolves it.
|
||||
fs: {
|
||||
readFile: (absPath) => readFile(absPath, 'utf8'),
|
||||
writeFile: (absPath, text) => writeFile(absPath, text, 'utf8'),
|
||||
mkdir: (absDir) => mkdir(absDir, { recursive: true }).then(() => undefined),
|
||||
rm: (absPath) => rm(absPath, { force: true }),
|
||||
lstat: (absPath) =>
|
||||
lstat(absPath).then(
|
||||
(st) => ({ isSymbolicLink: st.isSymbolicLink() }),
|
||||
(err: NodeJS.ErrnoException) => {
|
||||
if (err && err.code === 'ENOENT') return null;
|
||||
throw err;
|
||||
},
|
||||
),
|
||||
realpath: (absPath) =>
|
||||
realpath(absPath).then(
|
||||
(p) => p,
|
||||
(err: NodeJS.ErrnoException) => {
|
||||
if (err && err.code === 'ENOENT') return null;
|
||||
throw err;
|
||||
},
|
||||
),
|
||||
},
|
||||
// Every cycle logs its full push plan + per-action lines + completion
|
||||
// counts (created/updated/deleted/skipped/failures) through this `log`, so
|
||||
|
||||
@@ -362,13 +362,17 @@ describe('GitmostDataSourceService', () => {
|
||||
await service.bind(CTX).movePage('p1', 'parent-1');
|
||||
|
||||
expect(mocks.pageService.movePage).toHaveBeenCalledTimes(1);
|
||||
const [dto, page, provenance] = mocks.pageService.movePage.mock.calls[0];
|
||||
const [dto, page, provenance, actorUserId] =
|
||||
mocks.pageService.movePage.mock.calls[0];
|
||||
expect(dto.pageId).toBe('p1');
|
||||
expect(dto.parentPageId).toBe('parent-1');
|
||||
expect(typeof dto.position).toBe('string');
|
||||
expect(dto.position.length).toBeGreaterThan(0);
|
||||
expect(page).toEqual({ id: 'p1', spaceId: 'space-1' });
|
||||
expect(provenance).toEqual({ actor: 'git-sync', aiChatId: null });
|
||||
// The git-initiated move is attributed to the service user (lastUpdatedById
|
||||
// parity with create/delete/rename).
|
||||
expect(actorUserId).toBe('svc-user');
|
||||
});
|
||||
|
||||
it('passes through an explicit position unchanged', async () => {
|
||||
|
||||
@@ -76,7 +76,7 @@ export class GitmostDataSourceService {
|
||||
this.createPage(ctx, title, content, spaceId, parentPageId),
|
||||
deletePage: (pageId) => this.deletePage(ctx, pageId),
|
||||
movePage: (pageId, parentPageId, position) =>
|
||||
this.movePage(pageId, parentPageId, position),
|
||||
this.movePage(ctx, pageId, parentPageId, position),
|
||||
renamePage: (pageId, title) => this.renamePage(ctx, pageId, title),
|
||||
listRecentSince: (spaceId, sinceIso, hardPageCap) =>
|
||||
this.listRecentSince(spaceId, sinceIso, hardPageCap),
|
||||
@@ -252,6 +252,7 @@ export class GitmostDataSourceService {
|
||||
* §3.2 / §14.4).
|
||||
*/
|
||||
private async movePage(
|
||||
ctx: GitSyncBindContext,
|
||||
pageId: string,
|
||||
parentPageId: string | null,
|
||||
position?: string,
|
||||
@@ -268,6 +269,10 @@ export class GitmostDataSourceService {
|
||||
{ pageId, parentPageId: parentPageId ?? null, position: resolvedPosition },
|
||||
page,
|
||||
GIT_SYNC_PROVENANCE,
|
||||
// Attribute the git-initiated move to the service user (lastUpdatedById),
|
||||
// matching create/delete/rename — the contract is "git-operations are
|
||||
// attributed to the service account".
|
||||
ctx.userId,
|
||||
);
|
||||
return { id: pageId };
|
||||
}
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
/**
|
||||
* Backward-filled LCS length table for sequences `a` and `b`: `dp[i][j]` is the
|
||||
* length of the longest common subsequence of the suffixes `a[i:]` and `b[j:]`.
|
||||
* O(n*m) time/space — fine for page block counts.
|
||||
*
|
||||
* Shared by the two-way block diff (`yjs-body-merge.diffBlocks`) and the
|
||||
* three-way merge planner (`three-way-merge.lcsPairs`) so the (identical) table
|
||||
* construction lives in ONE place; each caller does its own traceback over the
|
||||
* returned table.
|
||||
*/
|
||||
export function buildLcsTable(a: string[], b: string[]): number[][] {
|
||||
const n = a.length;
|
||||
const m = b.length;
|
||||
const dp: number[][] = Array.from({ length: n + 1 }, () =>
|
||||
new Array(m + 1).fill(0),
|
||||
);
|
||||
for (let i = n - 1; i >= 0; i--) {
|
||||
for (let j = m - 1; j >= 0; j--) {
|
||||
dp[i][j] =
|
||||
a[i] === b[j]
|
||||
? dp[i + 1][j + 1] + 1
|
||||
: Math.max(dp[i + 1][j], dp[i][j + 1]);
|
||||
}
|
||||
}
|
||||
return dp;
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
import { diff3Plan, type Pick } from './three-way-merge';
|
||||
|
||||
// Materialize a plan into the merged key sequence for assertion.
|
||||
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
||||
return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index]));
|
||||
}
|
||||
|
||||
const merge = (o: string[], a: string[], b: string[]): string[] =>
|
||||
apply(diff3Plan(o, a, b), a, b);
|
||||
|
||||
describe('diff3Plan red-team #9 (human edit + adjacent git insert)', () => {
|
||||
it('keeps human block-2 edit AND applies git insert of 2.5', () => {
|
||||
// base: 1 2 3
|
||||
// live: 1 H 3 (human rewrote block 2)
|
||||
// target: 1 2 2.5 3 (git inserted 2.5 after block 2)
|
||||
expect(
|
||||
merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '2.5', '3']),
|
||||
).toEqual(['1', 'H', '2.5', '3']);
|
||||
});
|
||||
});
|
||||
@@ -1,112 +0,0 @@
|
||||
import { diff3Plan, type Pick } from './three-way-merge';
|
||||
|
||||
// Materialize a plan into the merged key sequence for assertion.
|
||||
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
||||
return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index]));
|
||||
}
|
||||
|
||||
const merge = (o: string[], a: string[], b: string[]): string[] =>
|
||||
apply(diff3Plan(o, a, b), a, b);
|
||||
|
||||
describe('diff3Plan (block-level three-way merge)', () => {
|
||||
it('identical on all three sides -> unchanged (all from live)', () => {
|
||||
const plan = diff3Plan(['1', '2', '3'], ['1', '2', '3'], ['1', '2', '3']);
|
||||
expect(plan.every((p) => p.src === 'live')).toBe(true);
|
||||
expect(apply(plan, ['1', '2', '3'], ['1', '2', '3'])).toEqual(['1', '2', '3']);
|
||||
});
|
||||
|
||||
it('git changed a block the human did not -> takes git', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '2', '3'], ['1', '9', '3'])).toEqual([
|
||||
'1',
|
||||
'9',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human changed a block git did not -> KEEPS the human edit (the core 3-way win)', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '3'])).toEqual([
|
||||
'1',
|
||||
'H',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human and git changed DIFFERENT blocks -> both preserved', () => {
|
||||
// human rewrote block 1, git rewrote block 3.
|
||||
expect(merge(['1', '2', '3'], ['H', '2', '3'], ['1', '2', 'G'])).toEqual([
|
||||
'H',
|
||||
'2',
|
||||
'G',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human inserted a block AND git changed a different block -> both preserved', () => {
|
||||
expect(
|
||||
merge(['1', '2', '3'], ['1', '1.5', '2', '3'], ['1', '2', 'G']),
|
||||
).toEqual(['1', '1.5', '2', 'G']);
|
||||
});
|
||||
|
||||
it('both changed the SAME block -> conflict resolves to git', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', 'H', '3'], ['1', 'G', '3'])).toEqual([
|
||||
'1',
|
||||
'G',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('both made the SAME edit -> that edit (no duplication)', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', 'X', '3'], ['1', 'X', '3'])).toEqual([
|
||||
'1',
|
||||
'X',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human deleted a block git left alone -> deletion preserved', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '3'], ['1', '2', '3'])).toEqual([
|
||||
'1',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('git deleted a block the human left alone -> deletion applied', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '2', '3'], ['1', '3'])).toEqual([
|
||||
'1',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('both deleted the same block -> gone (no conflict)', () => {
|
||||
expect(merge(['1', '2', '3'], ['1', '3'], ['1', '3'])).toEqual(['1', '3']);
|
||||
});
|
||||
|
||||
it('git appended a trailing block -> appended', () => {
|
||||
expect(merge(['1', '2'], ['1', '2'], ['1', '2', '3'])).toEqual([
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('human appended a trailing block git did not -> kept', () => {
|
||||
expect(merge(['1', '2'], ['1', '2', '3'], ['1', '2'])).toEqual([
|
||||
'1',
|
||||
'2',
|
||||
'3',
|
||||
]);
|
||||
});
|
||||
|
||||
it('empty base, git provides content (brand-new page body) -> git content', () => {
|
||||
expect(merge([], [], ['1', '2'])).toEqual(['1', '2']);
|
||||
});
|
||||
|
||||
it('git changed block 1, human edited block 3, far apart -> both kept', () => {
|
||||
expect(
|
||||
merge(
|
||||
['a', 'b', 'c', 'd', 'e'],
|
||||
['a', 'b', 'c', 'd', 'E'],
|
||||
['A', 'b', 'c', 'd', 'e'],
|
||||
),
|
||||
).toEqual(['A', 'b', 'c', 'd', 'E']);
|
||||
});
|
||||
});
|
||||
@@ -1,239 +0,0 @@
|
||||
/**
|
||||
* Pure block-level THREE-WAY merge planner (diff3) over arrays of opaque block
|
||||
* keys. Used by the git-sync body write to merge an incoming git body into the
|
||||
* live page using the last-synced version as the common ancestor (review #5):
|
||||
*
|
||||
* - a block only the human changed (live != base, git == base) -> keep LIVE
|
||||
* - a block only git changed (git != base, live == base) -> take GIT
|
||||
* - a block both sides changed (a real conflict) -> GIT wins
|
||||
* - inserts/deletes from either side are preserved when unambiguous
|
||||
*
|
||||
* Content-agnostic: it works on string keys and returns the merged block order as
|
||||
* picks ({ src: 'live'|'target', index }) — the caller (the Yjs applier)
|
||||
* materializes them — so the whole algorithm is unit-testable on plain arrays.
|
||||
*
|
||||
* Algorithm: anchor on base blocks present (unchanged) in BOTH live and target
|
||||
* (their LCS-with-base intersection). Between consecutive anchors lies one region
|
||||
* the human and/or git rewrote; resolve each region three-way. Stable anchor
|
||||
* blocks are emitted from LIVE so the applier keeps the existing Yjs block
|
||||
* instances (and the human's in-flight edits) in place.
|
||||
*
|
||||
* LOCATION (deferred): this and its `lcs.ts` sibling are pure, framework-free and
|
||||
* could conceptually live in `packages/git-sync` (the engine). They are kept in
|
||||
* the server integration on purpose: `packages/git-sync` is a VENDORED engine
|
||||
* (pinned upstream, manually re-synced), so adding first-party files there
|
||||
* complicates the re-sync story, and the only consumer today is the server. Move
|
||||
* them into the engine only once the vendoring re-sync story is settled.
|
||||
*/
|
||||
|
||||
import { buildLcsTable } from './lcs';
|
||||
|
||||
/** Matched index pairs of the longest common subsequence of `a` and `b`. */
|
||||
function lcsPairs(a: string[], b: string[]): Array<[number, number]> {
|
||||
const n = a.length;
|
||||
const m = b.length;
|
||||
const dp = buildLcsTable(a, b);
|
||||
const pairs: Array<[number, number]> = [];
|
||||
let i = 0;
|
||||
let j = 0;
|
||||
while (i < n && j < m) {
|
||||
if (a[i] === b[j]) {
|
||||
pairs.push([i, j]);
|
||||
i++;
|
||||
j++;
|
||||
} else if (dp[i + 1][j] >= dp[i][j + 1]) {
|
||||
i++;
|
||||
} else {
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return pairs;
|
||||
}
|
||||
|
||||
/** o-index -> matched index in the other side (only for LCS-matched blocks). */
|
||||
function matchMap(pairs: Array<[number, number]>): Map<number, number> {
|
||||
const m = new Map<number, number>();
|
||||
for (const [o, x] of pairs) m.set(o, x);
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* One change `side` made to `base` within a region: base blocks `[oStart,oEnd)`
|
||||
* were replaced by the side's blocks listed in `content` (region-local indices).
|
||||
* A pure insert has `oStart === oEnd`; a pure delete has empty `content`.
|
||||
*/
|
||||
interface Hunk {
|
||||
oStart: number;
|
||||
oEnd: number;
|
||||
content: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Diff `o` against one side as a list of non-overlapping hunks (the base spans
|
||||
* the side rewrote/inserted/deleted), derived from their LCS alignment.
|
||||
*/
|
||||
function buildHunks(o: string[], side: string[]): Hunk[] {
|
||||
const pairs = lcsPairs(o, side); // [oIdx, sideIdx] kept (unchanged) blocks
|
||||
const hunks: Hunk[] = [];
|
||||
let prevO = -1;
|
||||
let prevS = -1;
|
||||
const flush = (curO: number, curS: number): void => {
|
||||
const oStart = prevO + 1;
|
||||
const oEnd = curO;
|
||||
const content: number[] = [];
|
||||
for (let s = prevS + 1; s < curS; s++) content.push(s);
|
||||
if (oEnd > oStart || content.length > 0) hunks.push({ oStart, oEnd, content });
|
||||
};
|
||||
for (const [oIdx, sIdx] of pairs) {
|
||||
flush(oIdx, sIdx);
|
||||
prevO = oIdx;
|
||||
prevS = sIdx;
|
||||
}
|
||||
flush(o.length, side.length);
|
||||
return hunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do two hunks (one per side) touch the same base region? Pure inserts only
|
||||
* collide when nested strictly inside the other hunk's base span (or, for two
|
||||
* inserts, at the same gap); changes sitting at a shared boundary do not.
|
||||
*/
|
||||
function hunksOverlap(a: Hunk, b: Hunk): boolean {
|
||||
const aIns = a.oStart === a.oEnd;
|
||||
const bIns = b.oStart === b.oEnd;
|
||||
if (aIns && bIns) return a.oStart === b.oStart;
|
||||
if (aIns) return b.oStart < a.oStart && a.oStart < b.oEnd;
|
||||
if (bIns) return a.oStart < b.oStart && b.oStart < a.oEnd;
|
||||
return Math.max(a.oStart, b.oStart) < Math.min(a.oEnd, b.oEnd);
|
||||
}
|
||||
|
||||
interface LocalPick {
|
||||
src: 'live' | 'target';
|
||||
local: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fine-grained three-way merge of ONE inter-anchor region. Combines the human's
|
||||
* and git's NON-overlapping hunks (e.g. a human edit to one block plus a git
|
||||
* insert/delete of OTHER blocks in the same region) so neither change is lost.
|
||||
* Returns the merged region as region-local picks, or `null` when the two sides
|
||||
* changed the SAME base block — a genuine conflict the caller resolves by the
|
||||
* original all-or-nothing rule (git wins the whole region).
|
||||
*/
|
||||
function tryMergeRegion(
|
||||
o: string[],
|
||||
a: string[],
|
||||
b: string[],
|
||||
): LocalPick[] | null {
|
||||
const aHunks = buildHunks(o, a);
|
||||
const bHunks = buildHunks(o, b);
|
||||
|
||||
// Any overlap between a human hunk and a git hunk is a real conflict; bail so
|
||||
// the caller falls back to git-wins (preserving the original behavior).
|
||||
for (const ah of aHunks) {
|
||||
for (const bh of bHunks) {
|
||||
if (hunksOverlap(ah, bh)) return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Disjoint: live index of each base block that BOTH sides kept (stable).
|
||||
const aKept = matchMap(lcsPairs(o, a)); // base index -> live index
|
||||
|
||||
const out: LocalPick[] = [];
|
||||
let pa = 0;
|
||||
let pb = 0;
|
||||
let oi = 0;
|
||||
while (oi < o.length || pa < aHunks.length || pb < bHunks.length) {
|
||||
const ah = pa < aHunks.length ? aHunks[pa] : null;
|
||||
const bh = pb < bHunks.length ? bHunks[pb] : null;
|
||||
const nextStart = Math.min(
|
||||
ah ? ah.oStart : o.length,
|
||||
bh ? bh.oStart : o.length,
|
||||
);
|
||||
|
||||
// Emit stable base blocks (kept by both) until the next hunk, from LIVE.
|
||||
while (oi < nextStart) {
|
||||
out.push({ src: 'live', local: aKept.get(oi) as number });
|
||||
oi++;
|
||||
}
|
||||
if (!ah && !bh) break;
|
||||
|
||||
// Apply the hunk at oi. When both sides act here they are disjoint, so the
|
||||
// pure-insert (oEnd === oi) is emitted before the side that consumes base oi.
|
||||
const aHere = ah !== null && ah.oStart === oi;
|
||||
const bHere = bh !== null && bh.oStart === oi;
|
||||
let useA: boolean;
|
||||
if (aHere && bHere) {
|
||||
useA = ah!.oEnd === oi; // insert side first; otherwise either order is fine
|
||||
} else {
|
||||
useA = aHere;
|
||||
}
|
||||
const h = (useA ? ah : bh) as Hunk;
|
||||
const src: 'live' | 'target' = useA ? 'live' : 'target';
|
||||
for (const idx of h.content) out.push({ src, local: idx });
|
||||
oi = h.oEnd;
|
||||
if (useA) pa++;
|
||||
else pb++;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export interface Pick {
|
||||
src: 'live' | 'target';
|
||||
index: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Three-way merge of base `o`, live `a`, target `b` (arrays of block keys).
|
||||
* Returns the merged block order as picks from live/target.
|
||||
*/
|
||||
export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
||||
const oToA = matchMap(lcsPairs(o, a));
|
||||
const oToB = matchMap(lcsPairs(o, b));
|
||||
|
||||
const res: Pick[] = [];
|
||||
let oi = 0;
|
||||
let ai = 0;
|
||||
let bi = 0;
|
||||
|
||||
for (;;) {
|
||||
// Next anchor: a base block present (unchanged) in BOTH live and target.
|
||||
let anchor = oi;
|
||||
while (anchor < o.length && !(oToA.has(anchor) && oToB.has(anchor))) {
|
||||
anchor++;
|
||||
}
|
||||
const aEnd = anchor < o.length ? (oToA.get(anchor) as number) : a.length;
|
||||
const bEnd = anchor < o.length ? (oToB.get(anchor) as number) : b.length;
|
||||
|
||||
// Resolve the region [oi,anchor) that one or both sides rewrote/inserted.
|
||||
// Try a fine-grained three-way merge first so a human block-edit survives a
|
||||
// git insert/delete of OTHER blocks in the same region; only a genuine
|
||||
// same-block conflict (null) falls back to the original git-wins rule.
|
||||
const merged = tryMergeRegion(
|
||||
o.slice(oi, anchor),
|
||||
a.slice(ai, aEnd),
|
||||
b.slice(bi, bEnd),
|
||||
);
|
||||
if (merged) {
|
||||
for (const p of merged) {
|
||||
res.push(
|
||||
p.src === 'live'
|
||||
? { src: 'live', index: ai + p.local }
|
||||
: { src: 'target', index: bi + p.local },
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
|
||||
}
|
||||
|
||||
if (anchor >= o.length) break;
|
||||
|
||||
// Emit the stable anchor block from LIVE, then advance past it on all sides.
|
||||
res.push({ src: 'live', index: aEnd });
|
||||
ai = aEnd + 1;
|
||||
bi = bEnd + 1;
|
||||
oi = anchor + 1;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
@@ -16,8 +16,8 @@ jest.mock('node:fs/promises', () => ({
|
||||
|
||||
// ensureServable shells out via `promisify(execFile)`; mock execFile with a
|
||||
// callback-style fn so promisify resolves. Each `git config <key> <value>` call
|
||||
// is recorded so the four config writes (incl. the security-critical
|
||||
// receive.denyNonFastForwards=true) can be asserted.
|
||||
// is recorded so the config writes (incl. the security-critical
|
||||
// receive.denyNonFastForwards=true and core.symlinks=false) can be asserted.
|
||||
jest.mock('node:child_process', () => ({
|
||||
execFile: jest.fn((_cmd: string, _args: string[], _opts: any, cb: any) =>
|
||||
cb(null, { stdout: '', stderr: '' }),
|
||||
@@ -54,6 +54,7 @@ void loadGitSync;
|
||||
function build(dataDir: string): { service: VaultRegistryService } {
|
||||
const env = {
|
||||
getGitSyncDataDir: jest.fn(() => dataDir),
|
||||
getGitSyncBackendTimeoutMs: jest.fn(() => 120000),
|
||||
};
|
||||
const service = new VaultRegistryService(env as any);
|
||||
return { service };
|
||||
@@ -96,7 +97,7 @@ describe('VaultRegistryService', () => {
|
||||
});
|
||||
|
||||
describe('ensureServable', () => {
|
||||
it('ensures the repo then writes the four force-push-protection git configs', async () => {
|
||||
it('ensures the repo then writes the force-push-protection + symlink-guard git configs', async () => {
|
||||
const { service } = build('/vaults');
|
||||
|
||||
const path = await service.ensureServable('space-1');
|
||||
@@ -117,12 +118,18 @@ describe('VaultRegistryService', () => {
|
||||
['receive.denyNonFastForwards', 'true'],
|
||||
['http.receivepack', 'true'],
|
||||
['http.uploadpack', 'true'],
|
||||
// Security-critical (PR #119 review): a pushed symlink is checked out as
|
||||
// a plain file, never a real link, so it cannot be followed to leak/
|
||||
// overwrite a file outside the vault.
|
||||
['core.symlinks', 'false'],
|
||||
]);
|
||||
|
||||
// Every config write targets THIS vault's cwd.
|
||||
// Every config write targets THIS vault's cwd and is time-bounded so a
|
||||
// wedged git cannot hang the request path.
|
||||
for (const [cmd, args, opts] of execFileMock.mock.calls) {
|
||||
if (cmd === 'git' && args[0] === 'config') {
|
||||
expect(opts.cwd).toBe('/vaults/space-1');
|
||||
expect(opts.timeout).toBe(120000);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
@@ -62,10 +62,20 @@ export class VaultRegistryService {
|
||||
* rewrite the engine's history on `main`.
|
||||
* - http.receivepack=true / http.uploadpack=true — explicitly allow the
|
||||
* receive/upload services over HTTP.
|
||||
* - core.symlinks=false — SECURITY (PR #119 review). A writer could push a
|
||||
* `.md` entry that is a SYMLINK (e.g. `leak.md -> /etc/passwd` or
|
||||
* `-> .env`); with symlinks enabled `updateInstead` would materialize a
|
||||
* real link in the working tree, and the next push cycle would follow it
|
||||
* and PUBLISH the target's contents as a Docmost page (server-file
|
||||
* disclosure), or use a symlinked directory to write OUTSIDE the vault on
|
||||
* pull. With `core.symlinks=false` git checks out such a blob as a PLAIN
|
||||
* FILE containing the link text, never a real link, defusing the primitive
|
||||
* at the git layer. (The engine's per-access lstat/realpath guard is the
|
||||
* second layer — see path-guard.ts.)
|
||||
*
|
||||
* All four are set idempotently (plain `git config` overwrites the local
|
||||
* value). Returns the absolute vault path. Idempotent and safe to call before
|
||||
* every request.
|
||||
* All are set idempotently (plain `git config` overwrites the local value).
|
||||
* Returns the absolute vault path. Idempotent and safe to call before every
|
||||
* request.
|
||||
*/
|
||||
async ensureServable(spaceId: string): Promise<string> {
|
||||
const { vaultGitEnv } = await loadGitSync();
|
||||
@@ -81,13 +91,21 @@ export class VaultRegistryService {
|
||||
['receive.denyNonFastForwards', 'true'],
|
||||
['http.receivepack', 'true'],
|
||||
['http.uploadpack', 'true'],
|
||||
['core.symlinks', 'false'],
|
||||
];
|
||||
// Bound each `git config` (review suggestion): this runs in the request path
|
||||
// BEFORE the watchdog, so a wedged git (a stale `.git/config.lock`) would
|
||||
// otherwise hang the request indefinitely. Mirror the engine's GIT_EXEC
|
||||
// bound via the configured backend timeout.
|
||||
const timeout = this.environmentService.getGitSyncBackendTimeoutMs();
|
||||
for (const [key, value] of configs) {
|
||||
await execFileAsync('git', ['config', key, value], {
|
||||
cwd: path,
|
||||
// Use the engine's cwd-isolated env (strips GIT_DIR / GIT_WORK_TREE) so
|
||||
// the config is written to THIS vault's local config, nothing else.
|
||||
env: vaultGitEnv(),
|
||||
timeout,
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,161 +0,0 @@
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import * as Y from 'yjs';
|
||||
import {
|
||||
markdownToProseMirror,
|
||||
convertProseMirrorToMarkdown,
|
||||
} from '@docmost/git-sync';
|
||||
|
||||
import { tiptapExtensions } from '../../../collaboration/collaboration.util';
|
||||
import { mergeXmlFragments, mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Regression for the QA #119 callout findings (body-duplication re-verify +
|
||||
* "callout strips the whole body"). These reproduce the ACTUAL live merge path:
|
||||
*
|
||||
* live = TiptapTransformer.toYdoc(editor JSON, tiptapExtensions) (the
|
||||
* collaboration server's materialization — schema defaults stamped)
|
||||
* git = toYdoc(markdownToProseMirror(convertProseMirrorToMarkdown(editor)))
|
||||
* (the engine round-trip the push side feeds into writePageBody)
|
||||
*
|
||||
* A page containing a callout (with a neighbouring heading + paragraphs) must:
|
||||
* - merge with ZERO ops on an unchanged resync (no duplication — bug #1), and
|
||||
* - NEVER lose blocks / collapse to empty (no strip — bug #2),
|
||||
* across repeated cycles, for every editor-canonical callout type.
|
||||
*/
|
||||
|
||||
const toYdoc = (content: unknown[]) =>
|
||||
TiptapTransformer.toYdoc(
|
||||
{ type: 'doc', content },
|
||||
'default',
|
||||
tiptapExtensions as any,
|
||||
);
|
||||
|
||||
const blockTypes = (f: Y.XmlFragment) =>
|
||||
f.toArray().map((n: any) => n.nodeName);
|
||||
|
||||
function editorPage(calloutType: string) {
|
||||
return [
|
||||
{
|
||||
type: 'heading',
|
||||
attrs: { id: 'h1', level: 1 },
|
||||
content: [{ type: 'text', text: 'Title here' }],
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p1' },
|
||||
content: [{ type: 'text', text: 'Para before callout' }],
|
||||
},
|
||||
{
|
||||
type: 'callout',
|
||||
attrs: { type: calloutType },
|
||||
content: [
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'pc' },
|
||||
content: [{ type: 'text', text: 'Inside the callout' }],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p2' },
|
||||
content: [{ type: 'text', text: 'Para after callout' }],
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async function gitRoundTrip(content: unknown[]): Promise<any[]> {
|
||||
const md = await convertProseMirrorToMarkdown({ type: 'doc', content });
|
||||
const json = await markdownToProseMirror(md);
|
||||
return json.content;
|
||||
}
|
||||
|
||||
describe('git-sync callout merge is idempotent + non-destructive (QA #119)', () => {
|
||||
for (const type of ['info', 'note', 'warning', 'danger', 'success', 'default']) {
|
||||
it(`callout(${type}) resyncs with 0 ops and never strips the body`, async () => {
|
||||
const editor = editorPage(type);
|
||||
const gitContent = await gitRoundTrip(editor);
|
||||
|
||||
const liveDoc = toYdoc(editor);
|
||||
const live = liveDoc.getXmlFragment('default');
|
||||
const before = live.toArray().length;
|
||||
expect(before).toBe(4);
|
||||
|
||||
// 2-way: live vs the git round-trip -> no-op (no dup, no strip).
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, toYdoc(gitContent).getXmlFragment('default'));
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
expect(live.toArray().length).toBe(before);
|
||||
|
||||
// 3-way across 4 cycles with base == git (the steady-state) -> stable.
|
||||
for (let cycle = 0; cycle < 4; cycle++) {
|
||||
let a = -1;
|
||||
liveDoc.transact(() => {
|
||||
a = mergeXmlFragments3Way(
|
||||
live,
|
||||
toYdoc(gitContent).getXmlFragment('default'),
|
||||
toYdoc(gitContent).getXmlFragment('default'),
|
||||
);
|
||||
});
|
||||
expect(a).toBe(0);
|
||||
expect(live.toArray().length).toBe(before);
|
||||
expect(blockTypes(live)).toEqual([
|
||||
'heading',
|
||||
'paragraph',
|
||||
'callout',
|
||||
'paragraph',
|
||||
]);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
it('3-way with a stale base (callout JUST added) keeps the callout + neighbours', async () => {
|
||||
// base = the previously-synced version WITHOUT the callout (git round-trip);
|
||||
// the human just inserted the callout -> the merge must KEEP everything.
|
||||
const prev = [
|
||||
{ type: 'heading', attrs: { id: 'h1', level: 1 }, content: [{ type: 'text', text: 'Title here' }] },
|
||||
{ type: 'paragraph', attrs: { id: 'p1' }, content: [{ type: 'text', text: 'Para before callout' }] },
|
||||
{ type: 'paragraph', attrs: { id: 'p2' }, content: [{ type: 'text', text: 'Para after callout' }] },
|
||||
];
|
||||
const editor = editorPage('info');
|
||||
const baseContent = await gitRoundTrip(prev);
|
||||
const gitContent = await gitRoundTrip(editor);
|
||||
|
||||
const liveDoc = toYdoc(editor);
|
||||
const live = liveDoc.getXmlFragment('default');
|
||||
liveDoc.transact(() => {
|
||||
mergeXmlFragments3Way(
|
||||
live,
|
||||
toYdoc(gitContent).getXmlFragment('default'),
|
||||
toYdoc(baseContent).getXmlFragment('default'),
|
||||
);
|
||||
});
|
||||
// Body survives in full — NOT stripped to empty / a lone paragraph.
|
||||
expect(blockTypes(live)).toEqual([
|
||||
'heading',
|
||||
'paragraph',
|
||||
'callout',
|
||||
'paragraph',
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('git-sync callout type fidelity (QA "callout type -> [!info]")', () => {
|
||||
for (const type of ['info', 'note', 'warning', 'danger', 'success', 'default']) {
|
||||
it(`preserves callout type "${type}" across the engine round-trip`, async () => {
|
||||
const content = editorPage(type);
|
||||
const gitContent = await gitRoundTrip(content);
|
||||
const co = gitContent.find((b: any) => b.type === 'callout');
|
||||
expect(co?.attrs?.type).toBe(type);
|
||||
});
|
||||
}
|
||||
|
||||
it('flattens a genuinely unknown callout type to info', async () => {
|
||||
const content = editorPage('tip'); // not an editor-canonical type
|
||||
const gitContent = await gitRoundTrip(content);
|
||||
const co = gitContent.find((b: any) => b.type === 'callout');
|
||||
expect(co?.attrs?.type).toBe('info');
|
||||
});
|
||||
});
|
||||
@@ -1,198 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import { mergeXmlFragments, mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Regression for the HIGH-severity runaway whole-body duplication: a page body
|
||||
* was RE-APPENDED in full on every git-sync reconcile cycle, unbounded, with NO
|
||||
* client connected.
|
||||
*
|
||||
* ROOT CAUSE (confirmed in-process against the real failing page): the LIVE Yjs
|
||||
* document materializes the editor-schema default `indent: 0` on every
|
||||
* paragraph/heading (and on the paragraph inside every list item, callout, and
|
||||
* table cell), but a body re-imported from git — parsed from clean markdown —
|
||||
* carries NO indent attribute. So every live block's comparison key differed from
|
||||
* the same block coming back from git; the three-way merge could anchor on
|
||||
* NOTHING, and the trailing unit that git's export already contained (but the
|
||||
* merge could not match against the byte-identical live tail) was re-appended
|
||||
* each cycle. Each grown export then diverged from the last-pushed base by one
|
||||
* more unit — a self-sustaining loop.
|
||||
*
|
||||
* The fix normalizes the materialized default (`indent: 0`) out of the block key
|
||||
* (the schema-derived `serializeXmlNode` normalization in yjs-body-merge.ts drops
|
||||
* every attr equal to its ProseMirror-schema default; `indent: 0` is one such),
|
||||
* so a live block compares equal to its git-round-tripped twin and the resync is
|
||||
* a true no-op. The sibling `yjs-body-merge.schema-defaults.spec.ts` covers the
|
||||
* rest of the bug class (image.align, link mark internal, …).
|
||||
*
|
||||
* These tests model that EXACTLY at the Yjs level: a LIVE fragment whose blocks
|
||||
* carry `indent: 0` + block ids, versus a git-derived fragment of the SAME
|
||||
* content with neither — for a body built from BYTE-IDENTICAL units that each
|
||||
* contain a heading, a paragraph, a callout, and a table with empty cells (the
|
||||
* trigger). RED before the fix (the merge applies > 0 ops and the body grows),
|
||||
* GREEN after (0 ops, no growth).
|
||||
*/
|
||||
|
||||
type Attrs = Record<string, string | number>;
|
||||
|
||||
function el(
|
||||
name: string,
|
||||
attrs: Attrs,
|
||||
children: (Y.XmlElement | Y.XmlText)[],
|
||||
) {
|
||||
const e = new Y.XmlElement(name);
|
||||
for (const [k, v] of Object.entries(attrs)) e.setAttribute(k, v as string);
|
||||
if (children.length) e.insert(0, children);
|
||||
return e;
|
||||
}
|
||||
|
||||
function text(s: string): Y.XmlText {
|
||||
const t = new Y.XmlText();
|
||||
if (s) t.insert(0, s);
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* One byte-identical content unit (heading / paragraph / callout / table-with-
|
||||
* empty-cells). `live` toggles the two things that exist ONLY in the live Yjs
|
||||
* doc and NOT in a git round-trip: the materialized `indent: 0` default and the
|
||||
* per-block `id`. `n` makes each unit's ids unique (as the editor would stamp)
|
||||
* while keeping the visible CONTENT byte-identical across units.
|
||||
*/
|
||||
function unit(
|
||||
live: boolean,
|
||||
n: number,
|
||||
headingText = 'Big Heading',
|
||||
): Y.XmlElement[] {
|
||||
const ind: Attrs = live ? { indent: 0 } : {};
|
||||
const id = (base: string): Attrs => (live ? { id: `${base}${n}` } : {});
|
||||
const para = (attrs: Attrs, s: string) =>
|
||||
el('paragraph', { ...attrs, ...ind }, [text(s)]);
|
||||
|
||||
const cell = (name: string) =>
|
||||
el(name, { colspan: 1, rowspan: 1 }, [para({}, '')]);
|
||||
|
||||
return [
|
||||
el('heading', { ...id('h'), level: 1, ...ind }, [text(headingText)]),
|
||||
para(id('p'), 'Para with the same words'),
|
||||
el('callout', { type: 'info' }, [para(id('c'), 'CalloutText here')]),
|
||||
el('table', {}, [
|
||||
el('tableRow', {}, [cell('tableHeader'), cell('tableHeader')]),
|
||||
el('tableRow', {}, [cell('tableCell'), cell('tableCell')]),
|
||||
]),
|
||||
];
|
||||
}
|
||||
|
||||
function fragmentOf(units: Y.XmlElement[][]): {
|
||||
doc: Y.Doc;
|
||||
frag: Y.XmlFragment;
|
||||
} {
|
||||
const doc = new Y.Doc();
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = units.flat();
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return { doc, frag };
|
||||
}
|
||||
|
||||
const blockCount = (frag: Y.XmlFragment): number => frag.toArray().length;
|
||||
|
||||
describe('git-sync reconcile import is idempotent (no whole-body duplication)', () => {
|
||||
const UNITS = 3;
|
||||
|
||||
it('3-way: identical content, live carries indent:0, base stale-by-one -> 0 ops, no growth', () => {
|
||||
// LIVE: the editor-stamped Yjs doc (indent:0 + ids on every block).
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
// INCOMING (git export -> re-import): same content, NO indent / ids.
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||
);
|
||||
// BASE = last-pushed file, lagging by ONE unit (the realistic divergence
|
||||
// that drives the trailing insert-vs-insert).
|
||||
const { frag: base } = fragmentOf(
|
||||
Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('3-way is a fixpoint across repeated cycles (does not grow)', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
const incomingUnits = () =>
|
||||
fragmentOf(Array.from({ length: UNITS }, (_, i) => unit(false, i))).frag;
|
||||
const baseUnits = () =>
|
||||
fragmentOf(Array.from({ length: UNITS - 1 }, (_, i) => unit(false, i)))
|
||||
.frag;
|
||||
|
||||
const before = blockCount(live);
|
||||
for (let cycle = 0; cycle < 5; cycle++) {
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incomingUnits(), baseUnits());
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
}
|
||||
});
|
||||
|
||||
it('2-way: identical content, live carries indent:0 -> 0 ops, no growth', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, incoming);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('does NOT regress real edits: a git change to one block still lands', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(true, i)),
|
||||
);
|
||||
const base = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => unit(false, i)),
|
||||
).frag;
|
||||
// git edits the heading text of the LAST unit.
|
||||
const incoming = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) =>
|
||||
unit(false, i, i === UNITS - 1 ? 'EDITED Heading' : 'Big Heading'),
|
||||
),
|
||||
).frag;
|
||||
|
||||
const before = blockCount(live);
|
||||
liveDoc.transact(() => {
|
||||
mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
// The edit landed, and the body did NOT grow (one block changed in place).
|
||||
const headings = live
|
||||
.toArray()
|
||||
.filter((b) => (b as Y.XmlElement).nodeName === 'heading')
|
||||
.map((b) =>
|
||||
(b as Y.XmlElement)
|
||||
.toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''),
|
||||
);
|
||||
expect(headings).toContain('EDITED Heading');
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
});
|
||||
-316
@@ -1,316 +0,0 @@
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import { tiptapExtensions } from '../../../collaboration/collaboration.util';
|
||||
import { mergeXmlFragments, mergeXmlFragments3Way } from './yjs-body-merge';
|
||||
|
||||
/**
|
||||
* Regression for the BUG CLASS behind the runaway whole-body duplication: the
|
||||
* point-fix (7a7b840e) only normalized `indent: 0`, but the SAME divergence
|
||||
* recurs for every attribute whose editor-ext (server) schema default the live
|
||||
* Yjs doc MATERIALIZES while the git round-trip — which comes through the engine
|
||||
* schema (different, usually null, defaults) plus `y-prosemirror`'s null-attr
|
||||
* dropping — does NOT carry. Confirmed triggers beyond `indent`:
|
||||
*
|
||||
* - `image.align` : editor-ext default "center" (materialized) vs engine
|
||||
* default null (dropped) -> element-attr divergence.
|
||||
* - link mark `internal`: editor-ext default false (materialized) vs engine
|
||||
* default null -> MARK-attr divergence (the prior denylist
|
||||
* could not reach marks at all — they are serialized raw in
|
||||
* the XmlText delta).
|
||||
*
|
||||
* `highlight.colorName` is normalized too (defense-in-depth); it is NOT a strong
|
||||
* real-world trigger because BOTH schemas default it to null, but the schema-
|
||||
* derived normalization handles it for free and stays idempotent.
|
||||
*
|
||||
* The fix derives the defaults from the ACTUAL ProseMirror schema (getSchema of
|
||||
* the server tiptapExtensions) and drops any element- OR mark-attribute equal to
|
||||
* its schema default (or null/undefined) from the block comparison key — so a
|
||||
* live block compares equal to its git-round-tripped twin and an unchanged
|
||||
* resync applies 0 ops. RED before the fix (keys diverge -> ops > 0 / growth),
|
||||
* GREEN after.
|
||||
*/
|
||||
|
||||
type Attrs = Record<string, unknown>;
|
||||
|
||||
function el(
|
||||
name: string,
|
||||
attrs: Attrs,
|
||||
children: (Y.XmlElement | Y.XmlText)[],
|
||||
): Y.XmlElement {
|
||||
const e = new Y.XmlElement(name);
|
||||
for (const [k, v] of Object.entries(attrs)) e.setAttribute(k, v as string);
|
||||
if (children.length) e.insert(0, children);
|
||||
return e;
|
||||
}
|
||||
|
||||
/** Text carrying marks, as the live Yjs doc stores them (XmlText format ops). */
|
||||
function markedText(s: string, marks: Record<string, unknown>): Y.XmlText {
|
||||
const t = new Y.XmlText();
|
||||
t.insert(0, s, marks);
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* One byte-identical RICH unit: a paragraph with a LINK, a top-level IMAGE, and
|
||||
* a paragraph with a HIGHLIGHT. `live` toggles exactly what the editor
|
||||
* materializes but a git round-trip does not: block `id`, `indent: 0`,
|
||||
* `image.align: "center"`, the link mark's `internal: false`, and the
|
||||
* highlight's `colorName: null`.
|
||||
*/
|
||||
function richUnit(live: boolean, n: number): Y.XmlElement[] {
|
||||
const ind: Attrs = live ? { indent: 0 } : {};
|
||||
const id = (base: string): Attrs => (live ? { id: `${base}${n}` } : {});
|
||||
|
||||
const linkMarks = live
|
||||
? {
|
||||
link: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
class: null,
|
||||
title: null,
|
||||
internal: false, // editor-ext default, materialized
|
||||
},
|
||||
}
|
||||
: {
|
||||
link: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
internal: null, // engine default
|
||||
},
|
||||
};
|
||||
|
||||
const hlMarks = live
|
||||
? { highlight: { color: '#ffd43b', colorName: null } }
|
||||
: { highlight: { color: '#ffd43b' } };
|
||||
|
||||
const imageAttrs: Attrs = live
|
||||
? { src: 'https://img.example.com/a.png', align: 'center' } // materialized
|
||||
: { src: 'https://img.example.com/a.png' }; // align:null dropped on git side
|
||||
|
||||
return [
|
||||
el('paragraph', { ...id('lp'), ...ind }, [
|
||||
markedText('click here', linkMarks),
|
||||
]),
|
||||
el('image', imageAttrs, []),
|
||||
el('paragraph', { ...id('hp'), ...ind }, [markedText('hot', hlMarks)]),
|
||||
];
|
||||
}
|
||||
|
||||
function fragmentOf(units: Y.XmlElement[][]): {
|
||||
doc: Y.Doc;
|
||||
frag: Y.XmlFragment;
|
||||
} {
|
||||
const doc = new Y.Doc();
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = units.flat();
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return { doc, frag };
|
||||
}
|
||||
|
||||
const blockCount = (frag: Y.XmlFragment): number => frag.toArray().length;
|
||||
|
||||
describe('git-sync reconcile is idempotent for schema-default attrs (image/link/highlight)', () => {
|
||||
const UNITS = 3;
|
||||
|
||||
it('3-way: live carries image.align/link.internal/indent defaults, base stale-by-one -> 0 ops', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(true, i)),
|
||||
);
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(false, i)),
|
||||
);
|
||||
const { frag: base } = fragmentOf(
|
||||
Array.from({ length: UNITS - 1 }, (_, i) => richUnit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('2-way: live carries the materialized defaults -> 0 ops, no growth', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(true, i)),
|
||||
);
|
||||
const { frag: incoming } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(false, i)),
|
||||
);
|
||||
|
||||
const before = blockCount(live);
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, incoming);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
});
|
||||
|
||||
it('is a fixpoint across repeated cycles (does not grow)', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf(
|
||||
Array.from({ length: UNITS }, (_, i) => richUnit(true, i)),
|
||||
);
|
||||
const incoming = () =>
|
||||
fragmentOf(Array.from({ length: UNITS }, (_, i) => richUnit(false, i)))
|
||||
.frag;
|
||||
const base = () =>
|
||||
fragmentOf(
|
||||
Array.from({ length: UNITS - 1 }, (_, i) => richUnit(false, i)),
|
||||
).frag;
|
||||
|
||||
const before = blockCount(live);
|
||||
for (let cycle = 0; cycle < 5; cycle++) {
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(live, incoming(), base());
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
expect(blockCount(live)).toBe(before);
|
||||
}
|
||||
});
|
||||
|
||||
it('does NOT regress a genuine non-default value (a real link.href / image.align:left still diffs)', () => {
|
||||
const { doc: liveDoc, frag: live } = fragmentOf([richUnit(true, 0)]);
|
||||
const base = fragmentOf([richUnit(false, 0)]).frag;
|
||||
// git genuinely changes the image alignment to a NON-default value.
|
||||
const incomingUnit = richUnit(false, 0);
|
||||
(incomingUnit[1] as Y.XmlElement).setAttribute('align', 'left');
|
||||
const incoming = fragmentOf([incomingUnit]).frag;
|
||||
|
||||
liveDoc.transact(() => {
|
||||
mergeXmlFragments3Way(live, incoming, base);
|
||||
});
|
||||
|
||||
const img = live
|
||||
.toArray()
|
||||
.find((b) => (b as Y.XmlElement).nodeName === 'image') as Y.XmlElement;
|
||||
expect(img.getAttribute('align')).toBe('left');
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* FAITHFUL end-to-end proof through the REAL server transformer: build the live
|
||||
* doc the way the collaboration server does (defaults omitted in the JSON ->
|
||||
* TiptapTransformer.toYdoc MATERIALIZES image.align:"center", link.internal:false,
|
||||
* indent:0) versus the git-derived doc (engine-style: defaults emitted as
|
||||
* explicit null, no block ids). An unchanged resync must apply 0 ops.
|
||||
*/
|
||||
describe('git-sync reconcile is idempotent through the real toYdoc materialization', () => {
|
||||
const liveContent = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p1' },
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'click here',
|
||||
marks: [{ type: 'link', attrs: { href: 'https://example.com' } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
{ type: 'image', attrs: { src: 'https://img.example.com/a.png' } },
|
||||
{
|
||||
type: 'paragraph',
|
||||
attrs: { id: 'p2' },
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'hot',
|
||||
marks: [{ type: 'highlight', attrs: { color: '#ffd43b' } }],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
// git/engine-style: explicit nulls for the engine-default attrs, no ids.
|
||||
const gitContent = [
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'click here',
|
||||
marks: [
|
||||
{
|
||||
type: 'link',
|
||||
attrs: {
|
||||
href: 'https://example.com',
|
||||
target: '_blank',
|
||||
rel: 'noopener noreferrer nofollow',
|
||||
class: null,
|
||||
title: null,
|
||||
internal: null,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'image',
|
||||
attrs: { src: 'https://img.example.com/a.png', align: null },
|
||||
},
|
||||
{
|
||||
type: 'paragraph',
|
||||
content: [
|
||||
{
|
||||
type: 'text',
|
||||
text: 'hot',
|
||||
marks: [
|
||||
{ type: 'highlight', attrs: { color: '#ffd43b', colorName: null } },
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
];
|
||||
|
||||
const toYdoc = (content: unknown[]) =>
|
||||
TiptapTransformer.toYdoc(
|
||||
{ type: 'doc', content },
|
||||
'default',
|
||||
tiptapExtensions as any,
|
||||
);
|
||||
|
||||
it('3-way: materialized-default live vs engine-style git, base stale-by-one -> 0 ops', () => {
|
||||
const liveDoc = toYdoc(liveContent);
|
||||
const targetDoc = toYdoc(gitContent);
|
||||
const baseDoc = toYdoc(gitContent.slice(0, gitContent.length - 1));
|
||||
|
||||
const live = liveDoc.getXmlFragment('default');
|
||||
const before = live.toArray().length;
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments3Way(
|
||||
live,
|
||||
targetDoc.getXmlFragment('default'),
|
||||
baseDoc.getXmlFragment('default'),
|
||||
);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(live.toArray().length).toBe(before);
|
||||
});
|
||||
|
||||
it('2-way: materialized-default live vs engine-style git -> 0 ops', () => {
|
||||
const liveDoc = toYdoc(liveContent);
|
||||
const targetDoc = toYdoc(gitContent);
|
||||
|
||||
const live = liveDoc.getXmlFragment('default');
|
||||
const before = live.toArray().length;
|
||||
let applied = -1;
|
||||
liveDoc.transact(() => {
|
||||
applied = mergeXmlFragments(live, targetDoc.getXmlFragment('default'));
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
expect(live.toArray().length).toBe(before);
|
||||
});
|
||||
});
|
||||
@@ -1,338 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
|
||||
import {
|
||||
mergeXmlFragments,
|
||||
mergeXmlFragments3Way,
|
||||
cloneXmlNode,
|
||||
diffBlocks,
|
||||
} from './yjs-body-merge';
|
||||
|
||||
// Build a Y.XmlFragment('default') in `doc` from a list of paragraph specs.
|
||||
// Each spec is the paragraph's plain text (a single XmlText child).
|
||||
function buildFragment(doc: Y.Doc, paragraphs: string[]): Y.XmlFragment {
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = paragraphs.map((text) => {
|
||||
const el = new Y.XmlElement('paragraph');
|
||||
const t = new Y.XmlText();
|
||||
if (text) t.insert(0, text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
});
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return frag;
|
||||
}
|
||||
|
||||
function texts(frag: Y.XmlFragment): string[] {
|
||||
return frag.toArray().map((el) => (el as Y.XmlElement).toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''));
|
||||
}
|
||||
|
||||
describe('yjs-body-merge', () => {
|
||||
describe('diffBlocks (LCS edit script)', () => {
|
||||
it('identical sequences produce only keeps (no edits)', () => {
|
||||
const ops = diffBlocks(['a', 'b', 'c'], ['a', 'b', 'c']);
|
||||
expect(ops.every((o) => o.op === 'keep')).toBe(true);
|
||||
});
|
||||
|
||||
it('a single changed middle element is one del + one ins', () => {
|
||||
const ops = diffBlocks(['a', 'b', 'c'], ['a', 'B', 'c']);
|
||||
expect(ops.filter((o) => o.op === 'del')).toHaveLength(1);
|
||||
expect(ops.filter((o) => o.op === 'ins')).toHaveLength(1);
|
||||
expect(ops.filter((o) => o.op === 'keep')).toHaveLength(2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mergeXmlFragments', () => {
|
||||
it('identical content is a complete no-op (0 ops) — never clobbers an unchanged resync', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['one', 'two', 'three']);
|
||||
const targetFrag = buildFragment(target, ['one', 'two', 'three']);
|
||||
|
||||
// Capture block identities to prove they are left untouched.
|
||||
const before = liveFrag.toArray();
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
expect(applied).toBe(0);
|
||||
// Same Y.XmlElement instances — nothing was deleted/recreated.
|
||||
expect(liveFrag.toArray()).toEqual(before);
|
||||
expect(texts(liveFrag)).toEqual(['one', 'two', 'three']);
|
||||
});
|
||||
|
||||
it('a human edit to one block survives a git change to a DIFFERENT block', () => {
|
||||
// Live: the human has the doc open; block 0 holds their edit. Git changed
|
||||
// only block 2. The merge must touch ONLY block 2 and leave block 0 (and
|
||||
// its in-flight edit) exactly as-is.
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['HUMAN EDIT', 'shared', 'old tail']);
|
||||
const targetFrag = buildFragment(target, [
|
||||
'HUMAN EDIT',
|
||||
'shared',
|
||||
'new tail from git',
|
||||
]);
|
||||
|
||||
const block0Before = liveFrag.get(0); // the human's block instance
|
||||
const block1Before = liveFrag.get(1);
|
||||
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
// Only block 2 was replaced: one del + one ins.
|
||||
expect(applied).toBe(2);
|
||||
// The human's block and the shared block are the SAME instances (untouched).
|
||||
expect(liveFrag.get(0)).toBe(block0Before);
|
||||
expect(liveFrag.get(1)).toBe(block1Before);
|
||||
// Block 2 now carries git's content.
|
||||
expect(texts(liveFrag)).toEqual([
|
||||
'HUMAN EDIT',
|
||||
'shared',
|
||||
'new tail from git',
|
||||
]);
|
||||
});
|
||||
|
||||
it('appends a new trailing block without disturbing existing ones', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['a', 'b']);
|
||||
const targetFrag = buildFragment(target, ['a', 'b', 'c']);
|
||||
const a = liveFrag.get(0);
|
||||
const b = liveFrag.get(1);
|
||||
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
expect(applied).toBe(1); // single insert
|
||||
expect(liveFrag.get(0)).toBe(a);
|
||||
expect(liveFrag.get(1)).toBe(b);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'b', 'c']);
|
||||
});
|
||||
|
||||
it('deletes a removed block, keeping its neighbours', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['a', 'b', 'c']);
|
||||
const targetFrag = buildFragment(target, ['a', 'c']);
|
||||
const a = liveFrag.get(0);
|
||||
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments(liveFrag, targetFrag);
|
||||
});
|
||||
|
||||
expect(applied).toBe(1); // single delete
|
||||
expect(liveFrag.get(0)).toBe(a);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'c']);
|
||||
});
|
||||
|
||||
it('a fully different body is replaced (and stays valid)', () => {
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const liveFrag = buildFragment(live, ['x', 'y']);
|
||||
const targetFrag = buildFragment(target, ['p', 'q', 'r']);
|
||||
live.transact(() => mergeXmlFragments(liveFrag, targetFrag));
|
||||
expect(texts(liveFrag)).toEqual(['p', 'q', 'r']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('mergeXmlFragments3Way', () => {
|
||||
it('keeps a human edit to one block while applying a git change to another (3-way)', () => {
|
||||
// base (last synced): [a, b, c]. Human edited block 0 in the live doc; git
|
||||
// changed block 2 in the incoming file. 3-way must keep BOTH — the 2-way
|
||||
// merge would instead revert the human's block 0 to git's stale version.
|
||||
const base = new Y.Doc();
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
|
||||
const liveFrag = buildFragment(live, ['HUMAN', 'b', 'c']);
|
||||
const targetFrag = buildFragment(target, ['a', 'b', 'GIT']);
|
||||
|
||||
const humanBlock = liveFrag.get(0); // the human's live instance
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
|
||||
);
|
||||
|
||||
// Human's block preserved as the SAME instance; git's change applied.
|
||||
expect(liveFrag.get(0)).toBe(humanBlock);
|
||||
expect(texts(liveFrag)).toEqual(['HUMAN', 'b', 'GIT']);
|
||||
});
|
||||
|
||||
it('a block both sides changed resolves to git (conflict policy)', () => {
|
||||
const base = new Y.Doc();
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const baseFrag = buildFragment(base, ['a', 'b', 'c']);
|
||||
const liveFrag = buildFragment(live, ['a', 'HUMAN', 'c']);
|
||||
const targetFrag = buildFragment(target, ['a', 'GIT', 'c']);
|
||||
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
|
||||
);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'GIT', 'c']);
|
||||
});
|
||||
|
||||
it('git change with no concurrent human edit (live == base) applies cleanly', () => {
|
||||
const base = new Y.Doc();
|
||||
const live = new Y.Doc();
|
||||
const target = new Y.Doc();
|
||||
const baseFrag = buildFragment(base, ['a', 'b']);
|
||||
const liveFrag = buildFragment(live, ['a', 'b']);
|
||||
const targetFrag = buildFragment(target, ['a', 'B2']);
|
||||
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, targetFrag, baseFrag),
|
||||
);
|
||||
expect(texts(liveFrag)).toEqual(['a', 'B2']);
|
||||
});
|
||||
});
|
||||
|
||||
// Regression: start-of-document content duplicating on every two-way sync.
|
||||
//
|
||||
// The LIVE Docmost doc stamps a per-block UniqueID on every heading/paragraph;
|
||||
// a body arriving FROM git is parsed from clean markdown and carries NO block
|
||||
// ids. If the merge comparison key includes that `id`, an unchanged live block
|
||||
// never matches the SAME block coming from git, so the three-way merge cannot
|
||||
// anchor on it — and an incoming block with no anchor (content inserted at the
|
||||
// TOP of the page) is RE-ADDED on every cycle, an unbounded duplication loop.
|
||||
// These tests model that exact id-asymmetry and assert the reconciliation is
|
||||
// IDEMPOTENT (no block growth). They are RED before excluding `id` from the
|
||||
// key in `serializeXmlNode`.
|
||||
describe('idempotent reconciliation with live block ids (start-of-doc dup)', () => {
|
||||
// Build a fragment from block specs. `id` is set only when provided, mirroring
|
||||
// the live doc (ids present) vs a git-parsed body (ids absent).
|
||||
type Spec = { tag: 'heading' | 'paragraph'; text: string; id?: string };
|
||||
function buildDoc(doc: Y.Doc, specs: Spec[]): Y.XmlFragment {
|
||||
const frag = doc.getXmlFragment('default');
|
||||
const blocks = specs.map((s) => {
|
||||
const el = new Y.XmlElement(s.tag);
|
||||
if (s.id) el.setAttribute('id', s.id);
|
||||
if (s.tag === 'heading') el.setAttribute('level', '2');
|
||||
const t = new Y.XmlText();
|
||||
if (s.text) t.insert(0, s.text);
|
||||
el.insert(0, [t]);
|
||||
return el;
|
||||
});
|
||||
if (blocks.length) frag.insert(0, blocks);
|
||||
return frag;
|
||||
}
|
||||
const textsOf = (frag: Y.XmlFragment): string[] =>
|
||||
frag.toArray().map((el) =>
|
||||
(el as Y.XmlElement)
|
||||
.toArray()
|
||||
.map((c) => (c as Y.XmlText).toString())
|
||||
.join(''),
|
||||
);
|
||||
|
||||
it('re-merging the SAME git body does NOT re-add the top block (idempotent)', () => {
|
||||
// last-synced base (from git markdown): NO block ids.
|
||||
const base = new Y.Doc();
|
||||
const baseFrag = buildDoc(base, [
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Some paragraph.' },
|
||||
{ tag: 'paragraph', text: 'End block.' },
|
||||
]);
|
||||
// live Docmost doc: SAME content, but every block carries a UniqueID.
|
||||
const live = new Y.Doc();
|
||||
const liveFrag = buildDoc(live, [
|
||||
{ tag: 'heading', text: 'Title', id: 'ida' },
|
||||
{ tag: 'paragraph', text: 'Some paragraph.', id: 'idb' },
|
||||
{ tag: 'paragraph', text: 'End block.', id: 'idc' },
|
||||
]);
|
||||
// incoming git body: the user inserted a heading at the very TOP.
|
||||
const buildTarget = (): Y.XmlFragment =>
|
||||
buildDoc(new Y.Doc(), [
|
||||
{ tag: 'heading', text: 'TOPDUP' },
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Some paragraph.' },
|
||||
{ tag: 'paragraph', text: 'End block.' },
|
||||
]);
|
||||
|
||||
// First sync: the top block is added once.
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, buildTarget(), baseFrag),
|
||||
);
|
||||
expect(textsOf(liveFrag)).toEqual([
|
||||
'TOPDUP',
|
||||
'Title',
|
||||
'Some paragraph.',
|
||||
'End block.',
|
||||
]);
|
||||
|
||||
// Subsequent sync of the SAME git body against the SAME base must be a
|
||||
// NO-OP — not a second copy of the top block. Before the fix this re-adds
|
||||
// 'TOPDUP', growing the doc on every cycle.
|
||||
live.transact(() =>
|
||||
mergeXmlFragments3Way(liveFrag, buildTarget(), baseFrag),
|
||||
);
|
||||
expect(textsOf(liveFrag)).toEqual([
|
||||
'TOPDUP',
|
||||
'Title',
|
||||
'Some paragraph.',
|
||||
'End block.',
|
||||
]);
|
||||
expect(textsOf(liveFrag).filter((t) => t === 'TOPDUP')).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('an unchanged git body (live ids, none in git) is a complete no-op', () => {
|
||||
// base == git body (no pending git change); live is the same content with
|
||||
// ids. With `id` in the key the whole body looks rewritten; the merge must
|
||||
// still leave live byte-identical (block instances untouched).
|
||||
const base = new Y.Doc();
|
||||
const baseFrag = buildDoc(base, [
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Body.' },
|
||||
]);
|
||||
const live = new Y.Doc();
|
||||
const liveFrag = buildDoc(live, [
|
||||
{ tag: 'heading', text: 'Title', id: 'ida' },
|
||||
{ tag: 'paragraph', text: 'Body.', id: 'idb' },
|
||||
]);
|
||||
const before = liveFrag.toArray();
|
||||
let applied = -1;
|
||||
live.transact(() => {
|
||||
applied = mergeXmlFragments3Way(
|
||||
liveFrag,
|
||||
buildDoc(new Y.Doc(), [
|
||||
{ tag: 'heading', text: 'Title' },
|
||||
{ tag: 'paragraph', text: 'Body.' },
|
||||
]),
|
||||
baseFrag,
|
||||
);
|
||||
});
|
||||
expect(applied).toBe(0);
|
||||
// Same live block instances (ids preserved) — nothing recreated.
|
||||
expect(liveFrag.toArray()).toEqual(before);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cloneXmlNode', () => {
|
||||
it('preserves text marks (XmlText delta) across docs', () => {
|
||||
const src = new Y.Doc();
|
||||
const srcFrag = src.getXmlFragment('default');
|
||||
const el = new Y.XmlElement('paragraph');
|
||||
const t = new Y.XmlText();
|
||||
t.insert(0, 'plain ');
|
||||
t.insert(6, 'bold', { bold: true });
|
||||
el.insert(0, [t]);
|
||||
srcFrag.insert(0, [el]);
|
||||
|
||||
const dst = new Y.Doc();
|
||||
const dstFrag = dst.getXmlFragment('default');
|
||||
dstFrag.insert(0, [cloneXmlNode(srcFrag.get(0) as Y.XmlElement)]);
|
||||
|
||||
const clonedText = (dstFrag.get(0) as Y.XmlElement).get(0) as Y.XmlText;
|
||||
expect(clonedText.toDelta()).toEqual([
|
||||
{ insert: 'plain ' },
|
||||
{ insert: 'bold', attributes: { bold: true } },
|
||||
]);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,335 +0,0 @@
|
||||
import * as Y from 'yjs';
|
||||
import { getSchema } from '@tiptap/core';
|
||||
import type { Schema } from '@tiptap/pm/model';
|
||||
|
||||
import { tiptapExtensions } from '../../../collaboration/collaboration.util';
|
||||
import { diff3Plan } from './three-way-merge';
|
||||
import { buildLcsTable } from './lcs';
|
||||
|
||||
/**
|
||||
* Block-level merge of an incoming (git) page body into a LIVE Yjs document,
|
||||
* replacing the previous full-body "delete everything + re-insert" write that
|
||||
* clobbered concurrent human edits on every sync (review #5 — "do the write as a
|
||||
* merge").
|
||||
*
|
||||
* Strategy: diff the two documents at TOP-LEVEL BLOCK granularity (an LCS over a
|
||||
* canonical structural serialization of each block) and apply only the minimal
|
||||
* insert/delete operations. Blocks that are byte-identical on both sides are
|
||||
* left UNTOUCHED in the live doc — so a human editing one paragraph is unaffected
|
||||
* when git changes a different paragraph, and an unchanged re-sync is a complete
|
||||
* no-op (zero Yjs operations). Yjs then CRDT-merges the minimal ops with any
|
||||
* concurrent edits.
|
||||
*
|
||||
* Limitation (honest): this is a 2-way merge (live vs incoming). For a block that
|
||||
* BOTH sides changed since the last sync it cannot tell which is newer without a
|
||||
* common ancestor, so the incoming (git) version wins for that one block. A full
|
||||
* 3-way merge would need the last-synced base plumbed from the engine; the common
|
||||
* cases — unchanged resync, and edits to DIFFERENT blocks — are handled losslessly.
|
||||
*/
|
||||
|
||||
type XmlNode = Y.XmlElement | Y.XmlText | Y.XmlHook;
|
||||
|
||||
/**
|
||||
* Node attributes that are VOLATILE identity (not content) and so must be
|
||||
* excluded from the block comparison key.
|
||||
*
|
||||
* `id` is the per-block UniqueID the editor stamps on every heading/paragraph
|
||||
* (and transclusionSource). It exists ONLY in the live Yjs document — a body
|
||||
* arriving from git is parsed from clean markdown, which carries no block ids
|
||||
* (`markdownToProseMirror` materializes `id: null`, which the Yjs transform then
|
||||
* drops). If `id` were part of the key, an UNCHANGED live block (id "abc123")
|
||||
* would never match the SAME block coming from git (no id), so the three-way
|
||||
* merge's LCS could not anchor on it. The merge would then treat every live
|
||||
* block as deleted-and-reinserted and, when an incoming block has no matching
|
||||
* anchor (e.g. content inserted at the very TOP of the page), RE-ADD a copy of
|
||||
* it on every sync cycle — a non-convergent, unbounded duplication loop
|
||||
* (start-of-document content duplicating each push/pull cycle).
|
||||
*
|
||||
* Excluding `id` makes blocks compare by CONTENT, so an unchanged block matches
|
||||
* across the git round-trip and the reconciliation is idempotent. Block identity
|
||||
* is still preserved in the merged output: `diff3Plan` keeps the LIVE block
|
||||
* INSTANCE (with its id) for an anchor — picks are by index, not by key — so the
|
||||
* stable Yjs block (and any in-flight human edit on it) stays put. This mirrors
|
||||
* `canonicalize.ts`, which already strips the regenerated block `id` from the
|
||||
* round-trip idempotency comparison for exactly the same reason.
|
||||
*
|
||||
* Known limitation (accepted trade-off of content-based matching): two GENUINELY
|
||||
* DISTINCT blocks whose content is byte-identical now collapse to the same content
|
||||
* key, so when git deletes one of the duplicates the LCS may drop the OTHER live
|
||||
* instance instead. The visible result is identical (one copy removed, one kept),
|
||||
* but a concurrent in-flight human edit on the dropped instance could be lost.
|
||||
*/
|
||||
const VOLATILE_KEY_ATTRS = new Set(['id']);
|
||||
|
||||
/**
|
||||
* The editor (ProseMirror) schema, built ONCE from the same `tiptapExtensions`
|
||||
* the collaboration server uses to materialize Yjs docs. Memoized: building the
|
||||
* schema is non-trivial and the block key is computed per block per cycle.
|
||||
*
|
||||
* Why the schema (not a hardcoded denylist): the LIVE Yjs document is produced by
|
||||
* `TiptapTransformer.toYdoc(pm, 'default', tiptapExtensions)`, which STAMPS every
|
||||
* schema-default attribute onto every node and mark — `indent: 0` on every
|
||||
* paragraph/heading, `image.align: "center"`, the link mark's `internal: false`,
|
||||
* `highlight.colorName: null`, and so on for youtube/pdf/any future node. A body
|
||||
* re-imported from git comes through the engine's `markdownToProseMirror`, whose
|
||||
* schema declares those attrs with DIFFERENT (usually null) defaults; the
|
||||
* resulting null/absent element attrs are then DROPPED by `y-prosemirror`'s
|
||||
* toYdoc. So the SAME block carries materialized defaults on the live side and
|
||||
* nothing on the git side, its key diverges, the three-way merge anchors on
|
||||
* NOTHING, and the whole body is RE-APPENDED every reconcile cycle — an unbounded
|
||||
* duplication loop with no client connected.
|
||||
*
|
||||
* Deriving the defaults from the actual schema normalizes ALL such attributes
|
||||
* generally (it is not another per-attribute denylist): any attribute whose value
|
||||
* equals the schema default — or is null/undefined — is dropped from the key, on
|
||||
* BOTH element attributes and the mark attributes inside each XmlText delta, so a
|
||||
* live block compares equal to its git-round-tripped twin and an unchanged resync
|
||||
* applies zero ops. Genuinely non-default values (a real `indent: 2`, an
|
||||
* `align: "left"`, a real `link.href`, a real highlight color) are content and
|
||||
* stay in the key, so real edits still diff and land.
|
||||
*/
|
||||
let memoSchema: Schema | null = null;
|
||||
let memoSchemaTried = false;
|
||||
function getMergeSchema(): Schema | null {
|
||||
if (!memoSchemaTried) {
|
||||
memoSchemaTried = true;
|
||||
try {
|
||||
memoSchema = getSchema(tiptapExtensions as any);
|
||||
} catch {
|
||||
// Defensive: if the schema can't be built (e.g. a degenerate extension
|
||||
// set in a unit test that stubs `tiptapExtensions`), fall back to dropping
|
||||
// only null/undefined attrs. The real server always builds it fine.
|
||||
memoSchema = null;
|
||||
}
|
||||
}
|
||||
return memoSchema;
|
||||
}
|
||||
|
||||
/** True if `value` is the schema default for `attrName` of `attrSpecs`, or is
|
||||
* null/undefined (which a git round-trip drops). Such attributes are excluded
|
||||
* from the comparison key. `attrSpecs` is a ProseMirror node/mark spec attr map
|
||||
* (`{ [name]: { default } }`); a missing map (unknown node/mark) only drops
|
||||
* null/undefined. (A non-null value matching an attr declared without a default
|
||||
* cannot occur — `spec.default === value` is then `undefined === value`, false.) */
|
||||
function isDefaultAttr(
|
||||
attrSpecs: Record<string, any> | undefined | null,
|
||||
attrName: string,
|
||||
value: unknown,
|
||||
): boolean {
|
||||
if (value === null || value === undefined) return true;
|
||||
const spec = attrSpecs?.[attrName];
|
||||
return !!spec && spec.default === value;
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize one XmlText delta op's mark attributes: drop every mark-attr whose
|
||||
* value equals the mark's schema default (or is null/undefined), so the link
|
||||
* mark's materialized `internal: false`/`target: "_blank"` and a highlight's
|
||||
* `colorName: null` no longer diverge from a git round-trip that carries neither.
|
||||
* The text (op.insert) and genuinely-set mark attrs (a real `href`, a real
|
||||
* highlight color) are preserved verbatim. `attributes` maps markName -> mark
|
||||
* attrs object (or `true`/boolean for attr-less marks); each is handled safely.
|
||||
*/
|
||||
function normalizeDelta(delta: any[]): any[] {
|
||||
const schema = getMergeSchema();
|
||||
return delta.map((op) => {
|
||||
if (!op || op.attributes == null || typeof op.attributes !== 'object') {
|
||||
return op;
|
||||
}
|
||||
const marks: Record<string, unknown> = {};
|
||||
for (const markName of Object.keys(op.attributes).sort()) {
|
||||
const markVal = op.attributes[markName];
|
||||
if (markVal === null || markVal === undefined) continue;
|
||||
if (typeof markVal !== 'object') {
|
||||
// attr-less mark stored as a primitive (e.g. `true`) — keep as-is.
|
||||
marks[markName] = markVal;
|
||||
continue;
|
||||
}
|
||||
const markSpec = schema?.marks[markName]?.spec.attrs as
|
||||
| Record<string, any>
|
||||
| undefined;
|
||||
const cleaned: Record<string, unknown> = {};
|
||||
for (const ak of Object.keys(markVal as object).sort()) {
|
||||
const av = (markVal as Record<string, unknown>)[ak];
|
||||
if (isDefaultAttr(markSpec, ak, av)) continue;
|
||||
cleaned[ak] = av;
|
||||
}
|
||||
marks[markName] = cleaned;
|
||||
}
|
||||
return { ...op, attributes: marks };
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Canonical, comparable serialization of a Yjs XML node (structure + text +
|
||||
* marks + attributes), with attribute keys sorted so equal blocks always produce
|
||||
* an identical string regardless of attribute insertion order. The volatile
|
||||
* block `id` (see `VOLATILE_KEY_ATTRS`) and every schema-default attribute (see
|
||||
* `getMergeSchema`) are excluded at every level — on element attributes AND on
|
||||
* the mark attributes inside each XmlText delta — so a block compares equal by
|
||||
* CONTENT across the git round-trip (which materializes neither), keeping the
|
||||
* merge anchor-able and idempotent.
|
||||
*/
|
||||
export function serializeXmlNode(node: unknown): unknown {
|
||||
if (node instanceof Y.XmlText) {
|
||||
return { t: normalizeDelta(node.toDelta()) };
|
||||
}
|
||||
if (node instanceof Y.XmlElement) {
|
||||
const attrs = node.getAttributes() as Record<string, unknown>;
|
||||
const attrSpecs = getMergeSchema()?.nodes[node.nodeName]?.spec.attrs as
|
||||
| Record<string, any>
|
||||
| undefined;
|
||||
const sorted: Record<string, unknown> = {};
|
||||
for (const k of Object.keys(attrs).sort()) {
|
||||
if (VOLATILE_KEY_ATTRS.has(k)) continue;
|
||||
if (isDefaultAttr(attrSpecs, k, attrs[k])) continue;
|
||||
sorted[k] = attrs[k];
|
||||
}
|
||||
return {
|
||||
n: node.nodeName,
|
||||
a: sorted,
|
||||
c: node.toArray().map(serializeXmlNode),
|
||||
};
|
||||
}
|
||||
// XmlHook / unknown: fall back to a stable string so it compares by identity
|
||||
// of its serialized form (these do not occur in the Docmost block schema).
|
||||
return { u: String(node) };
|
||||
}
|
||||
|
||||
const key = (node: unknown): string => JSON.stringify(serializeXmlNode(node));
|
||||
|
||||
/**
|
||||
* Deep-clone a detached/owned Yjs XML node into a fresh node that can be inserted
|
||||
* into ANOTHER document (Yjs types are bound to their doc, so cross-doc moves are
|
||||
* impossible — we rebuild). Preserves nodeName, attributes, text+marks (via the
|
||||
* XmlText delta) and the full child subtree.
|
||||
*/
|
||||
export function cloneXmlNode(node: XmlNode): Y.XmlElement | Y.XmlText {
|
||||
if (node instanceof Y.XmlText) {
|
||||
const t = new Y.XmlText();
|
||||
const delta = node.toDelta();
|
||||
if (delta.length) t.applyDelta(delta);
|
||||
return t;
|
||||
}
|
||||
if (node instanceof Y.XmlElement) {
|
||||
const el = new Y.XmlElement(node.nodeName);
|
||||
const attrs = node.getAttributes() as Record<string, unknown>;
|
||||
for (const k of Object.keys(attrs)) el.setAttribute(k, attrs[k] as string);
|
||||
const kids = node.toArray().map((c) => cloneXmlNode(c as XmlNode));
|
||||
if (kids.length) el.insert(0, kids);
|
||||
return el;
|
||||
}
|
||||
// Best-effort for any other node type (XmlHook — does not occur in the
|
||||
// Docmost block schema): an empty paragraph so the merge never crashes.
|
||||
return new Y.XmlElement('paragraph');
|
||||
}
|
||||
|
||||
type Op = { op: 'keep' } | { op: 'del' } | { op: 'ins'; bi: number };
|
||||
|
||||
/**
|
||||
* LCS-based edit script turning sequence `a` (live block keys) into `b` (incoming
|
||||
* block keys): a run of keep/del/ins ops. O(n*m) table — fine for page block
|
||||
* counts.
|
||||
*/
|
||||
export function diffBlocks(a: string[], b: string[]): Op[] {
|
||||
const n = a.length;
|
||||
const m = b.length;
|
||||
const dp = buildLcsTable(a, b);
|
||||
const ops: Op[] = [];
|
||||
let i = 0;
|
||||
let j = 0;
|
||||
while (i < n && j < m) {
|
||||
if (a[i] === b[j]) {
|
||||
ops.push({ op: 'keep' });
|
||||
i++;
|
||||
j++;
|
||||
} else if (dp[i + 1][j] >= dp[i][j + 1]) {
|
||||
ops.push({ op: 'del' });
|
||||
i++;
|
||||
} else {
|
||||
ops.push({ op: 'ins', bi: j });
|
||||
j++;
|
||||
}
|
||||
}
|
||||
while (i < n) {
|
||||
ops.push({ op: 'del' });
|
||||
i++;
|
||||
}
|
||||
while (j < m) {
|
||||
ops.push({ op: 'ins', bi: j });
|
||||
j++;
|
||||
}
|
||||
return ops;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge `target` block children into `live`, mutating `live` in place with the
|
||||
* minimal set of inserts/deletes. MUST be called inside a Yjs transaction.
|
||||
* Returns the number of block operations applied (0 == content already identical).
|
||||
*/
|
||||
export function mergeXmlFragments(
|
||||
live: Y.XmlFragment,
|
||||
target: Y.XmlFragment,
|
||||
): number {
|
||||
const liveKids = live.toArray();
|
||||
const targetKids = target.toArray();
|
||||
const liveKeys = liveKids.map(key);
|
||||
const targetKeys = targetKids.map(key);
|
||||
|
||||
const ops = diffBlocks(liveKeys, targetKeys);
|
||||
|
||||
let cursor = 0; // index into the LIVE fragment as we mutate it
|
||||
let applied = 0;
|
||||
for (const op of ops) {
|
||||
if (op.op === 'keep') {
|
||||
cursor++;
|
||||
} else if (op.op === 'del') {
|
||||
live.delete(cursor, 1); // remove the live block at the cursor; do not advance
|
||||
applied++;
|
||||
} else {
|
||||
live.insert(cursor, [cloneXmlNode(targetKids[op.bi] as XmlNode)]);
|
||||
cursor++;
|
||||
applied++;
|
||||
}
|
||||
}
|
||||
return applied;
|
||||
}
|
||||
|
||||
/**
|
||||
* THREE-WAY block merge: reconcile `live` toward `target` using `base` (the
|
||||
* last-synced common ancestor) so a block only the human changed is KEPT and a
|
||||
* block only git changed is taken — instead of git's version always winning
|
||||
* (review #5). Conflicts (both changed the same block) resolve to git.
|
||||
*
|
||||
* Implementation: diff3Plan computes the merged block ORDER (picks from live or
|
||||
* target); we materialize that as a virtual target fragment and reuse the 2-way
|
||||
* `mergeXmlFragments` to splice it into `live` minimally (so untouched live block
|
||||
* instances — and their in-flight edits — stay put). MUST be called inside a Yjs
|
||||
* transaction. Returns the number of block operations applied.
|
||||
*/
|
||||
export function mergeXmlFragments3Way(
|
||||
live: Y.XmlFragment,
|
||||
target: Y.XmlFragment,
|
||||
base: Y.XmlFragment,
|
||||
): number {
|
||||
const liveKids = live.toArray();
|
||||
const targetKids = target.toArray();
|
||||
const liveKeys = liveKids.map(key);
|
||||
const targetKeys = targetKids.map(key);
|
||||
const baseKeys = base.toArray().map(key);
|
||||
|
||||
const plan = diff3Plan(baseKeys, liveKeys, targetKeys);
|
||||
|
||||
// Build the merged block sequence in a throwaway doc, cloning from whichever
|
||||
// side each pick came from, then 2-way merge it back into the live fragment.
|
||||
const merged = new Y.Doc();
|
||||
const mergedFrag = merged.getXmlFragment('default');
|
||||
const nodes = plan.map((p) =>
|
||||
cloneXmlNode(
|
||||
(p.src === 'live' ? liveKids[p.index] : targetKids[p.index]) as XmlNode,
|
||||
),
|
||||
);
|
||||
if (nodes.length) mergedFrag.insert(0, nodes);
|
||||
|
||||
return mergeXmlFragments(live, mergedFrag);
|
||||
}
|
||||
Reference in New Issue
Block a user