fix(git-sync): red-team hardening — 12 confirmed sync-breaking bugs + regression tests
A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML <img> (like video/diagrams) when it carries layout attrs; bare images stay . Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -142,17 +142,22 @@ describe('PersistenceExtension.onStoreDocument — provenance precedence (#2)',
|
||||
expect(sourceOf(pageRepo)).toBe('git-sync');
|
||||
});
|
||||
|
||||
it("keeps 'agent' even when the storing writer is 'git-sync' (agent > git-sync)", async () => {
|
||||
it("keeps 'git-sync' for an explicit git-sync store even with a sticky agent marker (#14 loop-guard)", async () => {
|
||||
const { ext, pageRepo } = build();
|
||||
|
||||
// An agent edit landed earlier in the coalescing window (sticky marker),
|
||||
// then a git-sync writer performs the store. Agent precedence must win.
|
||||
// then a git-sync writer performs the store. Red-team finding #14: an
|
||||
// EXPLICIT current-write actor is authoritative for THIS write, so the
|
||||
// store must stay 'git-sync' — otherwise the PageChangeListener loop-guard
|
||||
// (keyed on lastUpdatedSource === 'git-sync') fails to recognize git-sync's
|
||||
// own write and re-exports it. Explicit 'agent' still wins (see below); the
|
||||
// sticky marker only promotes a plain human writer to 'agent'.
|
||||
await ext.onChange(makeChangePayload('agent'));
|
||||
await ext.onStoreDocument(
|
||||
makeStorePayload({ user: { id: 'svc-user' }, actor: 'git-sync' }),
|
||||
);
|
||||
|
||||
expect(sourceOf(pageRepo)).toBe('agent');
|
||||
expect(sourceOf(pageRepo)).toBe('git-sync');
|
||||
});
|
||||
|
||||
it("tags 'agent' when the storing writer itself is the agent (no prior onChange)", async () => {
|
||||
|
||||
@@ -52,11 +52,16 @@ export function resolveSource(
|
||||
stickyTouched: boolean,
|
||||
contextActor?: string,
|
||||
): ProvenanceSource {
|
||||
// Precedence: agent > git-sync > user. The sticky agent marker wins so a
|
||||
// window that mixed an agent edit stays tagged 'agent'; otherwise a native
|
||||
// git-sync write (plan §8.1) tags 'git-sync'; a plain human edit stays 'user'.
|
||||
if (stickyTouched || contextActor === 'agent') return 'agent';
|
||||
// An EXPLICIT current-write actor is authoritative for THIS write and wins
|
||||
// over the sticky-agent fallback. Order: explicit 'agent' > explicit
|
||||
// 'git-sync' > sticky agent marker > plain human 'user'. The git-sync case
|
||||
// must NOT be masked by the sticky marker, or the PageChangeListener
|
||||
// loop-guard (which keys on lastUpdatedSource === 'git-sync') would re-export
|
||||
// git-sync's own writes (#14). Explicit agent still wins so a window that
|
||||
// mixed an agent edit stays tagged 'agent'.
|
||||
if (contextActor === 'agent') return 'agent';
|
||||
if (contextActor === 'git-sync') return 'git-sync';
|
||||
if (stickyTouched) return 'agent';
|
||||
return 'user';
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
import { resolveSource } from './persistence.extension';
|
||||
|
||||
// Red-team finding #14: an explicit git-sync write (no agent edit in the
|
||||
// coalescing window) must keep the 'git-sync' source so the git-sync
|
||||
// listener's loop-guard can recognize its own writes and not re-export them.
|
||||
describe('resolveSource — #14 git-sync provenance loop-guard', () => {
|
||||
it('keeps git-sync source for an explicit git-sync write (stickyTouched=true, actor=git-sync)', () => {
|
||||
expect(resolveSource(true, 'git-sync')).toBe('git-sync');
|
||||
});
|
||||
});
|
||||
@@ -402,7 +402,7 @@ describe('git-sync converter §13.1 idempotency gate (editor-ext schema)', () =>
|
||||
// data-* attrs, as it already does for video/diagrams), these assertions flip
|
||||
// and the image fixture should be promoted into the green CORPUS above.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('git-sync converter §13.1 KNOWN DIVERGENCE (markdown image lossiness)', () => {
|
||||
describe('git-sync converter §13.1 image dimensions preserved (was KNOWN DIVERGENCE)', () => {
|
||||
const imageDoc = doc({
|
||||
type: 'image',
|
||||
attrs: {
|
||||
@@ -413,29 +413,26 @@ describe('git-sync converter §13.1 KNOWN DIVERGENCE (markdown image lossiness)'
|
||||
},
|
||||
});
|
||||
|
||||
it('drops width/height/align (markdown  cannot carry them); the block-image hoist no longer leaves an empty paragraph', async () => {
|
||||
it('preserves width/height/align by exporting an HTML <img> (PR #119 round-trip fix)', async () => {
|
||||
const { md, canonNormalized } = await runGate(imageDoc);
|
||||
|
||||
// Export is plain markdown image syntax — no dimensions/align survive.
|
||||
expect(md.trim()).toBe('');
|
||||
// A top-level image carrying layout attrs is now exported as a schema-
|
||||
// matching HTML <img> (the same path video/diagrams already use), so the
|
||||
// dimensions and alignment survive the round trip instead of collapsing to
|
||||
// bare ``.
|
||||
expect(md.trim()).toBe(
|
||||
'<img src="https://example.com/pic.png" width="640" height="480" align="center">',
|
||||
);
|
||||
|
||||
// The round-tripped doc carries ONLY src (+ alt=""). The leading empty
|
||||
// paragraph that the block-image hoist used to leave behind (a phantom
|
||||
// blank-gap on every sync) is now stripped on import (git-sync fix), so the
|
||||
// doc is just the image — no empty-paragraph artifact.
|
||||
expect(canonNormalized).toEqual({
|
||||
type: 'doc',
|
||||
content: [
|
||||
{
|
||||
type: 'image',
|
||||
attrs: { alt: '', src: 'https://example.com/pic.png' },
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
// Still NOT canonically equal to the original: width/height/align are an
|
||||
// intrinsic markdown-transport loss (unrelated to the empty-paragraph fix).
|
||||
expect(docsCanonicallyEqual(imageDoc, canonNormalized)).toBe(false);
|
||||
// The round-tripped image keeps src + the layout attrs. width/height are
|
||||
// re-imported as strings (matching the video/audio/pdf string convention),
|
||||
// so assert the values rather than the JS type.
|
||||
const imgAttrs = (canonNormalized as any).content[0].attrs;
|
||||
expect((canonNormalized as any).content[0].type).toBe('image');
|
||||
expect(imgAttrs.src).toBe('https://example.com/pic.png');
|
||||
expect(imgAttrs.align).toBe('center');
|
||||
expect(String(imgAttrs.width)).toBe('640');
|
||||
expect(String(imgAttrs.height)).toBe('480');
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -151,8 +151,8 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
// when it could not enter — surfaced here as the existing skipped:'in-progress'
|
||||
// / 'lock-held' status so runOnce's observable behavior is unchanged.
|
||||
try {
|
||||
const result = await this.spaceLock.withSpaceLock(spaceId, () =>
|
||||
this.driveCycle(spaceId, workspaceId, serviceUserId),
|
||||
const result = await this.spaceLock.withSpaceLock(spaceId, (signal) =>
|
||||
this.driveCycle(spaceId, workspaceId, serviceUserId, signal),
|
||||
);
|
||||
if ('skipped' in result && !('spaceId' in result)) {
|
||||
return { spaceId, ran: false, skipped: result.skipped };
|
||||
@@ -199,7 +199,7 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
}
|
||||
const serviceUserId = this.environmentService.getGitSyncServiceUserId();
|
||||
|
||||
const result = await this.spaceLock.withSpaceLock(spaceId, async () => {
|
||||
const result = await this.spaceLock.withSpaceLock(spaceId, async (signal) => {
|
||||
// 1) Stream the receive-pack to the client (durable commits land on main).
|
||||
await runReceivePack();
|
||||
|
||||
@@ -214,7 +214,7 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
return;
|
||||
}
|
||||
try {
|
||||
await this.driveCycle(spaceId, workspaceId, serviceUserId);
|
||||
await this.driveCycle(spaceId, workspaceId, serviceUserId, signal);
|
||||
} catch (err) {
|
||||
// Do NOT rethrow: the push succeeded and the commits are durable on main;
|
||||
// the poll-interval backstop retries the cycle. Log for visibility.
|
||||
@@ -246,6 +246,7 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
serviceUserId: string,
|
||||
signal?: AbortSignal,
|
||||
): Promise<GitSyncRunStatus> {
|
||||
const { runCycle } = await loadGitSync();
|
||||
const settings = this.buildSettings(spaceId);
|
||||
@@ -254,6 +255,10 @@ export class GitSyncOrchestrator implements OnModuleInit, OnModuleDestroy {
|
||||
const maxDeletes = this.environmentService.getGitSyncMaxDeletesPerCycle();
|
||||
|
||||
const result = await runCycle({
|
||||
// Cooperative-abort signal from the per-space lock: if a heartbeat refresh
|
||||
// cannot confirm the lock, the cycle bails before its next destructive
|
||||
// write phase instead of writing blind after a possible lock loss.
|
||||
signal,
|
||||
spaceId,
|
||||
client,
|
||||
vault,
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
// Red-team finding #10: single-writer guarantee across replicas must survive a
|
||||
// TTL lapse with a swallowed heartbeat refresh. Two SpaceLockService instances
|
||||
// (A, B) share ONE redis store. A holds 'X' and stays in-flight; the lock key
|
||||
// then disappears (TTL expiry while refreshLock silently failed). B must NOT be
|
||||
// able to acquire 'X' and run its fn concurrently with A — that would be two
|
||||
// writers racing the same working tree. This test asserts the DESIRED
|
||||
// single-writer behavior, so it FAILS today if the lapse lets B in.
|
||||
import { Logger } from '@nestjs/common';
|
||||
import { SpaceLockService } from './space-lock.service';
|
||||
import { GIT_SYNC_LOCK_PREFIX } from '../git-sync.constants';
|
||||
|
||||
/**
|
||||
* Minimal shared fake redis honoring exactly the two primitives the lock uses:
|
||||
* - `SET key val PX ttl NX` → 'OK' only when the key is absent (NX semantics).
|
||||
* - `eval(<get/del CAS>|<get/pexpire CAS>, 1, key, instanceId[, ttl])` →
|
||||
* compares the stored value to ARGV[1] before del/pexpire (CAS).
|
||||
* TTL expiry is not time-driven here; tests simulate it by mutating `store`.
|
||||
*/
|
||||
function makeSharedRedis() {
|
||||
const store = new Map<string, string>();
|
||||
return {
|
||||
store,
|
||||
async set(key: string, val: string, _px: 'PX', _ttl: number, nx: 'NX') {
|
||||
if (nx === 'NX' && store.has(key)) return null;
|
||||
store.set(key, val);
|
||||
return 'OK';
|
||||
},
|
||||
async eval(lua: string, _numKeys: number, key: string, argInstanceId: string) {
|
||||
// Only act when WE still own the key (CAS), mirroring the Lua scripts.
|
||||
if (store.get(key) !== argInstanceId) return 0;
|
||||
if (lua.includes('del')) {
|
||||
store.delete(key);
|
||||
return 1;
|
||||
}
|
||||
// pexpire CAS refresh: value matches, "extend" is a no-op in the fake.
|
||||
return 1;
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildInstance(redis: ReturnType<typeof makeSharedRedis>) {
|
||||
const redisService = { getOrThrow: jest.fn(() => redis) };
|
||||
return new SpaceLockService(redisService as any);
|
||||
}
|
||||
|
||||
async function flushMicrotasks(): Promise<void> {
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
await Promise.resolve();
|
||||
}
|
||||
|
||||
beforeAll(() => {
|
||||
jest.spyOn(Logger.prototype, 'warn').mockImplementation(() => undefined);
|
||||
});
|
||||
|
||||
describe('SpaceLockService — finding #10 single-writer across TTL lapse', () => {
|
||||
it('B must not run its fn concurrently with an in-flight A after the lock key vanishes', async () => {
|
||||
const redis = makeSharedRedis();
|
||||
const A = buildInstance(redis);
|
||||
const B = buildInstance(redis);
|
||||
|
||||
let aRunning = false;
|
||||
let releaseA!: () => void;
|
||||
const gateA = new Promise<void>((resolve) => {
|
||||
releaseA = resolve;
|
||||
});
|
||||
|
||||
// A acquires 'X' and stays in-flight awaiting the gate.
|
||||
const aResult = A.withSpaceLock('X', async () => {
|
||||
aRunning = true;
|
||||
await gateA;
|
||||
aRunning = false;
|
||||
return 'A-done';
|
||||
});
|
||||
await flushMicrotasks();
|
||||
|
||||
// Sanity: A is in-flight and owns the redis key.
|
||||
expect(aRunning).toBe(true);
|
||||
expect(redis.store.has(GIT_SYNC_LOCK_PREFIX + 'X')).toBe(true);
|
||||
|
||||
// Simulate TTL lapse with a swallowed heartbeat refresh: the lock key
|
||||
// disappears from the shared store while A is still running.
|
||||
redis.store.delete(GIT_SYNC_LOCK_PREFIX + 'X');
|
||||
|
||||
// Now B tries to take 'X'. Desired: rejected as 'lock-held' (single writer);
|
||||
// and under no circumstance may fn2 run while A is still in flight.
|
||||
let bRanWhileARunning = false;
|
||||
const bResult = await B.withSpaceLock('X', async () => {
|
||||
bRanWhileARunning = aRunning; // captures whether A was still in-flight
|
||||
return 'B-done';
|
||||
});
|
||||
|
||||
// Single-writer assertions: B did NOT execute concurrently with A.
|
||||
expect(bRanWhileARunning).toBe(false);
|
||||
expect(bResult).toEqual({ skipped: 'lock-held' });
|
||||
|
||||
// Cleanup: let A finish.
|
||||
releaseA();
|
||||
await expect(aResult).resolves.toBe('A-done');
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,20 @@
|
||||
import { diff3Plan, type Pick } from './three-way-merge';
|
||||
|
||||
// Materialize a plan into the merged key sequence for assertion.
|
||||
function apply(plan: Pick[], live: string[], target: string[]): string[] {
|
||||
return plan.map((p) => (p.src === 'live' ? live[p.index] : target[p.index]));
|
||||
}
|
||||
|
||||
const merge = (o: string[], a: string[], b: string[]): string[] =>
|
||||
apply(diff3Plan(o, a, b), a, b);
|
||||
|
||||
describe('diff3Plan red-team #9 (human edit + adjacent git insert)', () => {
|
||||
it('keeps human block-2 edit AND applies git insert of 2.5', () => {
|
||||
// base: 1 2 3
|
||||
// live: 1 H 3 (human rewrote block 2)
|
||||
// target: 1 2 2.5 3 (git inserted 2.5 after block 2)
|
||||
expect(
|
||||
merge(['1', '2', '3'], ['1', 'H', '3'], ['1', '2', '2.5', '3']),
|
||||
).toEqual(['1', 'H', '2.5', '3']);
|
||||
});
|
||||
});
|
||||
@@ -22,6 +22,16 @@ export class SpaceLockService {
|
||||
private readonly instanceId = randomUUID();
|
||||
/** In-process per-space mutex: spaceIds with a cycle currently running. */
|
||||
private readonly running = new Set<string>();
|
||||
/**
|
||||
* Process-wide single-writer guard: spaceId -> instanceId of the live holder.
|
||||
* Unlike `running` (scoped to ONE service instance), this is shared by every
|
||||
* SpaceLockService in the process, so even if the Redis lock key lapses
|
||||
* (swallowed heartbeat / TTL expiry) a SECOND holder in the same process
|
||||
* cannot start a concurrent cycle for the same space — it is rejected
|
||||
* 'lock-held'. The cross-PROCESS race is handled by the Redis lock plus
|
||||
* abort-on-refresh-failure (and, as a follow-up, fencing tokens).
|
||||
*/
|
||||
private static readonly liveLocks = new Map<string, string>();
|
||||
|
||||
constructor(redisService: RedisService) {
|
||||
this.redis = redisService.getOrThrow();
|
||||
@@ -70,26 +80,42 @@ export class SpaceLockService {
|
||||
* lock that took over after our TTL expired. Used by the heartbeat in
|
||||
* `withSpaceLock` so a long-running push (client-controlled receive-pack + the
|
||||
* Docmost cycle) cannot outlive the lock and let a concurrent cycle race the
|
||||
* working tree. Logs (warn) but never throws — a failed refresh must not break
|
||||
* the cycle it is protecting.
|
||||
* working tree. Never throws (a thrown timer callback would crash the process),
|
||||
* but a refresh it cannot CONFIRM is treated as a LOST lock: it aborts the
|
||||
* supplied controller so the in-flight protected fn stops instead of writing
|
||||
* blind while another replica may already have taken over the lock.
|
||||
*/
|
||||
private async refreshLock(spaceId: string): Promise<void> {
|
||||
private async refreshLock(
|
||||
spaceId: string,
|
||||
controller?: AbortController,
|
||||
): Promise<void> {
|
||||
const lua =
|
||||
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("pexpire", KEYS[1], ARGV[2]) else return 0 end';
|
||||
try {
|
||||
await this.redis.eval(
|
||||
const res = await this.redis.eval(
|
||||
lua,
|
||||
1,
|
||||
GIT_SYNC_LOCK_PREFIX + spaceId,
|
||||
this.instanceId,
|
||||
String(GIT_SYNC_LOCK_TTL_MS),
|
||||
);
|
||||
// CAS miss (res !== 1): we no longer own the key — our TTL lapsed and
|
||||
// another replica may hold it now. Abort the in-flight cycle rather than
|
||||
// swallowing the loss and racing the working tree.
|
||||
if (res !== 1) {
|
||||
this.logger.warn(
|
||||
`git-sync: lock for space ${spaceId} lost during refresh — aborting in-flight cycle`,
|
||||
);
|
||||
controller?.abort();
|
||||
}
|
||||
} catch (err) {
|
||||
this.logger.warn(
|
||||
`git-sync: failed to refresh lock for space ${spaceId}: ${
|
||||
err instanceof Error ? err.message : String(err)
|
||||
}`,
|
||||
);
|
||||
// A refresh we cannot confirm means we may no longer hold the lock; abort.
|
||||
controller?.abort();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,38 +132,50 @@ export class SpaceLockService {
|
||||
*/
|
||||
async withSpaceLock<T>(
|
||||
spaceId: string,
|
||||
fn: () => Promise<T>,
|
||||
fn: (signal: AbortSignal) => Promise<T>,
|
||||
): Promise<T | { skipped: 'lock-held' | 'in-progress' }> {
|
||||
if (this.running.has(spaceId)) {
|
||||
return { skipped: 'in-progress' };
|
||||
}
|
||||
// Cross-instance, same-process single-writer guard: another live holder (a
|
||||
// different SpaceLockService in this process) is mid-cycle for this space.
|
||||
// This survives a swallowed heartbeat / Redis TTL lapse, so a second writer
|
||||
// in the process cannot race the working tree — it is rejected 'lock-held'.
|
||||
if (SpaceLockService.liveLocks.has(spaceId)) {
|
||||
return { skipped: 'lock-held' };
|
||||
}
|
||||
// Reserve the in-process slot synchronously (before any await) so two
|
||||
// concurrent same-space calls on THIS instance cannot both pass the guard and
|
||||
// race acquire(). Redis NX is already authoritative across replicas; this just
|
||||
// closes the in-process TOCTOU window. Released in the outer finally on every
|
||||
// path (acquire-failure, fn-throw, normal completion).
|
||||
this.running.add(spaceId);
|
||||
SpaceLockService.liveLocks.set(spaceId, this.instanceId);
|
||||
try {
|
||||
if (!(await this.acquire(spaceId))) {
|
||||
return { skipped: 'lock-held' };
|
||||
}
|
||||
// Lost-lock signal: a failed/CAS-missed heartbeat refresh aborts this so the
|
||||
// protected fn can stop instead of writing blind after our lock lapsed.
|
||||
const controller = new AbortController();
|
||||
// Heartbeat: periodically (≈ TTL/3) extend the lock's TTL while `fn` runs so
|
||||
// a long push (client-controlled receive-pack + the Docmost cycle) cannot
|
||||
// outlive the fixed TTL and let a concurrent cycle race the working tree. The
|
||||
// refresh is CAS-guarded (only extends while WE own it). `.unref()` keeps the
|
||||
// timer from holding the event loop open; it is ALWAYS cleared in `finally`.
|
||||
const heartbeat = setInterval(() => {
|
||||
void this.refreshLock(spaceId);
|
||||
void this.refreshLock(spaceId, controller);
|
||||
}, Math.max(1, Math.floor(GIT_SYNC_LOCK_TTL_MS / 3)));
|
||||
heartbeat.unref?.();
|
||||
try {
|
||||
return await fn();
|
||||
return await fn(controller.signal);
|
||||
} finally {
|
||||
clearInterval(heartbeat);
|
||||
await this.release(spaceId);
|
||||
}
|
||||
} finally {
|
||||
this.running.delete(spaceId);
|
||||
SpaceLockService.liveLocks.delete(spaceId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -50,22 +50,125 @@ function matchMap(pairs: Array<[number, number]>): Map<number, number> {
|
||||
return m;
|
||||
}
|
||||
|
||||
const keysEqual = (x: string[], y: string[]): boolean =>
|
||||
x.length === y.length && x.every((v, k) => v === y[k]);
|
||||
/**
|
||||
* One change `side` made to `base` within a region: base blocks `[oStart,oEnd)`
|
||||
* were replaced by the side's blocks listed in `content` (region-local indices).
|
||||
* A pure insert has `oStart === oEnd`; a pure delete has empty `content`.
|
||||
*/
|
||||
interface Hunk {
|
||||
oStart: number;
|
||||
oEnd: number;
|
||||
content: number[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve one region (the live slice, target slice, and base slice that occupy
|
||||
* the same span between two anchors). 'target' (git) wins ties and conflicts.
|
||||
* Diff `o` against one side as a list of non-overlapping hunks (the base spans
|
||||
* the side rewrote/inserted/deleted), derived from their LCS alignment.
|
||||
*/
|
||||
function decideRegion(
|
||||
aRegion: string[],
|
||||
bRegion: string[],
|
||||
oRegion: string[],
|
||||
): 'live' | 'target' {
|
||||
if (keysEqual(aRegion, bRegion)) return 'target'; // same edit on both sides
|
||||
if (keysEqual(aRegion, oRegion)) return 'target'; // live unchanged -> git's edit
|
||||
if (keysEqual(bRegion, oRegion)) return 'live'; // git unchanged -> human's edit
|
||||
return 'target'; // genuine conflict -> git wins
|
||||
function buildHunks(o: string[], side: string[]): Hunk[] {
|
||||
const pairs = lcsPairs(o, side); // [oIdx, sideIdx] kept (unchanged) blocks
|
||||
const hunks: Hunk[] = [];
|
||||
let prevO = -1;
|
||||
let prevS = -1;
|
||||
const flush = (curO: number, curS: number): void => {
|
||||
const oStart = prevO + 1;
|
||||
const oEnd = curO;
|
||||
const content: number[] = [];
|
||||
for (let s = prevS + 1; s < curS; s++) content.push(s);
|
||||
if (oEnd > oStart || content.length > 0) hunks.push({ oStart, oEnd, content });
|
||||
};
|
||||
for (const [oIdx, sIdx] of pairs) {
|
||||
flush(oIdx, sIdx);
|
||||
prevO = oIdx;
|
||||
prevS = sIdx;
|
||||
}
|
||||
flush(o.length, side.length);
|
||||
return hunks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Do two hunks (one per side) touch the same base region? Pure inserts only
|
||||
* collide when nested strictly inside the other hunk's base span (or, for two
|
||||
* inserts, at the same gap); changes sitting at a shared boundary do not.
|
||||
*/
|
||||
function hunksOverlap(a: Hunk, b: Hunk): boolean {
|
||||
const aIns = a.oStart === a.oEnd;
|
||||
const bIns = b.oStart === b.oEnd;
|
||||
if (aIns && bIns) return a.oStart === b.oStart;
|
||||
if (aIns) return b.oStart < a.oStart && a.oStart < b.oEnd;
|
||||
if (bIns) return a.oStart < b.oStart && b.oStart < a.oEnd;
|
||||
return Math.max(a.oStart, b.oStart) < Math.min(a.oEnd, b.oEnd);
|
||||
}
|
||||
|
||||
interface LocalPick {
|
||||
src: 'live' | 'target';
|
||||
local: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fine-grained three-way merge of ONE inter-anchor region. Combines the human's
|
||||
* and git's NON-overlapping hunks (e.g. a human edit to one block plus a git
|
||||
* insert/delete of OTHER blocks in the same region) so neither change is lost.
|
||||
* Returns the merged region as region-local picks, or `null` when the two sides
|
||||
* changed the SAME base block — a genuine conflict the caller resolves by the
|
||||
* original all-or-nothing rule (git wins the whole region).
|
||||
*/
|
||||
function tryMergeRegion(
|
||||
o: string[],
|
||||
a: string[],
|
||||
b: string[],
|
||||
): LocalPick[] | null {
|
||||
const aHunks = buildHunks(o, a);
|
||||
const bHunks = buildHunks(o, b);
|
||||
|
||||
// Any overlap between a human hunk and a git hunk is a real conflict; bail so
|
||||
// the caller falls back to git-wins (preserving the original behavior).
|
||||
for (const ah of aHunks) {
|
||||
for (const bh of bHunks) {
|
||||
if (hunksOverlap(ah, bh)) return null;
|
||||
}
|
||||
}
|
||||
|
||||
// Disjoint: live index of each base block that BOTH sides kept (stable).
|
||||
const aKept = matchMap(lcsPairs(o, a)); // base index -> live index
|
||||
|
||||
const out: LocalPick[] = [];
|
||||
let pa = 0;
|
||||
let pb = 0;
|
||||
let oi = 0;
|
||||
while (oi < o.length || pa < aHunks.length || pb < bHunks.length) {
|
||||
const ah = pa < aHunks.length ? aHunks[pa] : null;
|
||||
const bh = pb < bHunks.length ? bHunks[pb] : null;
|
||||
const nextStart = Math.min(
|
||||
ah ? ah.oStart : o.length,
|
||||
bh ? bh.oStart : o.length,
|
||||
);
|
||||
|
||||
// Emit stable base blocks (kept by both) until the next hunk, from LIVE.
|
||||
while (oi < nextStart) {
|
||||
out.push({ src: 'live', local: aKept.get(oi) as number });
|
||||
oi++;
|
||||
}
|
||||
if (!ah && !bh) break;
|
||||
|
||||
// Apply the hunk at oi. When both sides act here they are disjoint, so the
|
||||
// pure-insert (oEnd === oi) is emitted before the side that consumes base oi.
|
||||
const aHere = ah !== null && ah.oStart === oi;
|
||||
const bHere = bh !== null && bh.oStart === oi;
|
||||
let useA: boolean;
|
||||
if (aHere && bHere) {
|
||||
useA = ah!.oEnd === oi; // insert side first; otherwise either order is fine
|
||||
} else {
|
||||
useA = aHere;
|
||||
}
|
||||
const h = (useA ? ah : bh) as Hunk;
|
||||
const src: 'live' | 'target' = useA ? 'live' : 'target';
|
||||
for (const idx of h.content) out.push({ src, local: idx });
|
||||
oi = h.oEnd;
|
||||
if (useA) pa++;
|
||||
else pb++;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
export interface Pick {
|
||||
@@ -96,13 +199,22 @@ export function diff3Plan(o: string[], a: string[], b: string[]): Pick[] {
|
||||
const bEnd = anchor < o.length ? (oToB.get(anchor) as number) : b.length;
|
||||
|
||||
// Resolve the region [oi,anchor) that one or both sides rewrote/inserted.
|
||||
const take = decideRegion(
|
||||
// Try a fine-grained three-way merge first so a human block-edit survives a
|
||||
// git insert/delete of OTHER blocks in the same region; only a genuine
|
||||
// same-block conflict (null) falls back to the original git-wins rule.
|
||||
const merged = tryMergeRegion(
|
||||
o.slice(oi, anchor),
|
||||
a.slice(ai, aEnd),
|
||||
b.slice(bi, bEnd),
|
||||
o.slice(oi, anchor),
|
||||
);
|
||||
if (take === 'live') {
|
||||
for (let k = ai; k < aEnd; k++) res.push({ src: 'live', index: k });
|
||||
if (merged) {
|
||||
for (const p of merged) {
|
||||
res.push(
|
||||
p.src === 'live'
|
||||
? { src: 'live', index: ai + p.local }
|
||||
: { src: 'target', index: bi + p.local },
|
||||
);
|
||||
}
|
||||
} else {
|
||||
for (let k = bi; k < bEnd; k++) res.push({ src: 'target', index: k });
|
||||
}
|
||||
|
||||
@@ -26,6 +26,16 @@ export interface RunCycleDeps {
|
||||
settings: Settings;
|
||||
fs: CycleFs;
|
||||
log: (line: string) => void;
|
||||
/**
|
||||
* Optional cooperative-abort signal. The caller (orchestrator) wires this to
|
||||
* the per-space lock: if a heartbeat refresh cannot CONFIRM the lock is still
|
||||
* held (CAS-miss / Redis error), the signal is aborted and the cycle bails at
|
||||
* its next checkpoint (before the pull-apply and before the push-apply — the
|
||||
* two destructive write phases) instead of writing blind after a possible
|
||||
* lock loss. This is a COARSE best-effort guard; a fully fenced cross-process
|
||||
* single-writer still needs the fencing-token redesign (follow-up).
|
||||
*/
|
||||
signal?: AbortSignal;
|
||||
/**
|
||||
* Delete-cap hook (the ONLY caller-specific policy). Called with the push
|
||||
* dry-run's planned delete count (`Number.POSITIVE_INFINITY` when the dry-run
|
||||
@@ -47,6 +57,13 @@ export interface RunCycleResult {
|
||||
skipped?: "merge-in-progress";
|
||||
pull?: { written: number; deleted: number; conflict: boolean };
|
||||
push?: { mode: string; failures: number };
|
||||
/**
|
||||
* Forwarded from the push result: `true` when the push REFUSED to fast-forward
|
||||
* a divergent `docmost` mirror (the §5 invariant — `docmost` mirrors what
|
||||
* Docmost contains — is broken). Surfaced here so a caller driving `runCycle`
|
||||
* can detect the breach without scraping logs (red-team #15).
|
||||
*/
|
||||
divergentDocmost?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -70,7 +87,7 @@ export interface RunCycleResult {
|
||||
* Lock + cap POLICY live in the caller; this owns only the mechanics.
|
||||
*/
|
||||
export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
const { spaceId, client, vault, settings, fs, log, resolveApplyClient } =
|
||||
const { spaceId, client, vault, settings, fs, log, resolveApplyClient, signal } =
|
||||
deps;
|
||||
const vaultRoot = settings.vaultPath;
|
||||
const abs = (relPath: string) => `${vaultRoot}/${relPath}`;
|
||||
@@ -107,6 +124,9 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
existing,
|
||||
});
|
||||
|
||||
// Bail before the first destructive write phase if the lock was lost.
|
||||
signal?.throwIfAborted();
|
||||
|
||||
const pullResult = await applyPullActions(
|
||||
{
|
||||
client,
|
||||
@@ -150,6 +170,9 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
applyClient = resolveApplyClient(plannedDeletes, client);
|
||||
}
|
||||
|
||||
// Bail before pushing to Docmost if the lock was lost during pull.
|
||||
signal?.throwIfAborted();
|
||||
|
||||
const pushResult = await runPush(
|
||||
{ ...pushDeps, makeClient: () => applyClient },
|
||||
{ dryRun: false },
|
||||
@@ -166,5 +189,8 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
mode: pushResult.mode,
|
||||
failures: pushResult.failures?.length ?? 0,
|
||||
},
|
||||
// Forward a divergent-`docmost` escalation so the caller can act on the §5
|
||||
// invariant breach without scraping logs (red-team #15).
|
||||
divergentDocmost: pushResult.divergentDocmost ?? false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -54,23 +54,54 @@ export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
|
||||
}
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
// Resolve each node's display name once, deterministically. The bucket key is
|
||||
// the node's parent ONLY when that parent is actually present in `byId`;
|
||||
// otherwise (null parent, or an orphan whose parent is outside the input set)
|
||||
// the node buckets at `"__root__"`. This is critical: orphans land at the vault
|
||||
// root (see `folderSegmentsFor`), so they MUST share the root bucket with real
|
||||
// root pages to be disambiguated against each other here — making `nameById`
|
||||
// final before any `segments` are computed, so no ancestor name can drift.
|
||||
const parentKeyOf = (p: PageNode): string =>
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
// Group nodes by (parentKey, sanitized base title) so sibling collisions are
|
||||
// resolved by a STABLE rule that does NOT depend on input array order. Dedupe
|
||||
// ids (first occurrence wins, matching `byId`).
|
||||
const siblingGroups = new Map<string, PageNode[]>();
|
||||
const namedIds = new Set<string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey =
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
if (!p || !p.id || namedIds.has(p.id)) continue;
|
||||
namedIds.add(p.id);
|
||||
const key = `${parentKeyOf(p)}\u0000${sanitizeTitle(p.title ?? "")}`;
|
||||
const bucket = siblingGroups.get(key);
|
||||
if (bucket) bucket.push(p);
|
||||
else siblingGroups.set(key, [p]);
|
||||
}
|
||||
// Assign each node its display name. Within a colliding group, sort the
|
||||
// siblings by their stable disambiguation key (`slugId` else `id`) and let the
|
||||
// FIRST keep the bare sanitized title; every OTHER gets the ` ~<slugId>`
|
||||
// suffix. This makes `nameById` a pure function of the page SET — reordering
|
||||
// the input never moves the suffix onto a different page (red-team #4a). The
|
||||
// suffix is itself sanitized (the slugId/id is untrusted and must never inject
|
||||
// a path separator).
|
||||
const nameById = new Map<string, string>();
|
||||
const disambKeyOf = (p: PageNode): string => p.slugId ?? p.id;
|
||||
for (const bucket of siblingGroups.values()) {
|
||||
const base = sanitizeTitle(bucket[0].title ?? "");
|
||||
if (bucket.length === 1) {
|
||||
nameById.set(bucket[0].id, base);
|
||||
continue;
|
||||
}
|
||||
const sorted = [...bucket].sort((a, b) => {
|
||||
const ka = disambKeyOf(a);
|
||||
const kb = disambKeyOf(b);
|
||||
return ka < kb ? -1 : ka > kb ? 1 : 0;
|
||||
});
|
||||
sorted.forEach((p, i) => {
|
||||
nameById.set(
|
||||
p.id,
|
||||
i === 0 ? base : disambiguate(base, sanitizeTitle(disambKeyOf(p))),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
@@ -169,34 +200,3 @@ export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
return layout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
parentKey: string,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
|
||||
@@ -133,8 +133,27 @@ export function classifyRenameMoves(
|
||||
return renamesMoves.map((rm) => {
|
||||
const newParent = deps.resolveParentPageId(rm.newPath, "current");
|
||||
const oldParent = deps.resolveParentPageId(rm.oldPath, "prev");
|
||||
const newTitle = deps.metaAt(rm.newPath, "current")?.title;
|
||||
const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
|
||||
// Strip the cosmetic ` ~<slugId>` disambiguation suffix before comparing
|
||||
// titles: it is a LOCAL filesystem artifact (`buildVaultLayout` appends it to
|
||||
// a colliding sibling's stem), NOT part of the page's real title. A pure
|
||||
// disambiguation file-rename ('Report.md' -> 'Report ~a1.md') must therefore
|
||||
// NOT be pushed to Docmost as a title change (red-team #4b), and any title we
|
||||
// DO push must carry the real title ('Report'), never the suffixed form.
|
||||
const rawNewTitle = deps.metaAt(rm.newPath, "current")?.title;
|
||||
const rawOldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
|
||||
// A PURE disambiguation rename only APPENDS a cosmetic ` ~<suffix>` to the
|
||||
// SAME title (layout.ts), so the real Docmost title is unchanged. Strip the
|
||||
// suffix ONLY when the new name is exactly the old title plus that suffix —
|
||||
// never blindly strip a genuine retitle whose new title legitimately ends in
|
||||
// ` ~token` (e.g. "Budget ~draft" -> "Budget ~final"), which would corrupt
|
||||
// the title in Docmost / drop a real rename (review finding).
|
||||
const isCosmeticDisambiguation =
|
||||
typeof rawNewTitle === "string" &&
|
||||
typeof rawOldTitle === "string" &&
|
||||
rawNewTitle !== rawOldTitle &&
|
||||
stripDisambiguationSuffix(rawNewTitle) === rawOldTitle;
|
||||
const newTitle = isCosmeticDisambiguation ? rawOldTitle : rawNewTitle;
|
||||
const oldTitle = rawOldTitle;
|
||||
|
||||
const out: RenameMoveActionClassified = {
|
||||
pageId: rm.pageId,
|
||||
@@ -646,7 +665,11 @@ export async function applyPushActions(
|
||||
// Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter
|
||||
// is engine metadata, never page content. The server converts the markdown
|
||||
// it receives verbatim, so stripping here keeps the id out of Docmost.
|
||||
const body = parsePageFile(await deps.readFile(u.path)).body;
|
||||
// Also strip any git conflict markers — they must NEVER reach Docmost
|
||||
// (SPEC §9, red-team #13); content on both sides is preserved.
|
||||
const body = stripConflictMarkers(
|
||||
parsePageFile(await deps.readFile(u.path)).body,
|
||||
);
|
||||
// The last-synced version of this file (pre-image) is the common ancestor
|
||||
// for a 3-way merge against the live page, so concurrent human edits are
|
||||
// not clobbered (review #5). Null when the file is new at last-pushed. Its
|
||||
@@ -689,6 +712,10 @@ export async function applyPushActions(
|
||||
// folder, so (parentPageId, title) identifies the page; a match means a prior
|
||||
// cycle already created it, so we ADOPT instead of duplicating.
|
||||
let liveByParentTitle: Map<string, string> | null = null;
|
||||
// A (parentPageId, title) that more than ONE live page shares is AMBIGUOUS:
|
||||
// adopting one of them would silently overwrite an arbitrary, possibly-unrelated
|
||||
// sibling (red-team #6). Such keys are recorded here and EXCLUDED from adoption.
|
||||
const ambiguousAdoptKeys = new Set<string>();
|
||||
if (actions.creates.length > 0) {
|
||||
const live = await client.listSpaceTree(deps.spaceId);
|
||||
// Only trust a COMPLETE tree for retry-adopt: a truncated tree could miss an
|
||||
@@ -699,32 +726,56 @@ export async function applyPushActions(
|
||||
liveByParentTitle = new Map();
|
||||
for (const n of live.pages) {
|
||||
const key = `${n.parentPageId ?? " root"} ${n.title ?? ""}`;
|
||||
// Keep the FIRST node for a key (the layout makes this unique in practice).
|
||||
if (!liveByParentTitle.has(key)) liveByParentTitle.set(key, n.id);
|
||||
// First node claims the key; a SECOND match marks it ambiguous so neither
|
||||
// is ever adopted-over (the create falls back to a fresh createPage).
|
||||
if (liveByParentTitle.has(key)) ambiguousAdoptKeys.add(key);
|
||||
else liveByParentTitle.set(key, n.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const c of actions.creates) {
|
||||
// Order creates PARENT-before-CHILD (red-team #12): a child whose parent is
|
||||
// ALSO a fresh create must run AFTER its parent so the parent's just-assigned
|
||||
// pageId is available to parent it (otherwise it is placed at the space ROOT).
|
||||
const orderedCreates = orderCreatesParentFirst(actions.creates);
|
||||
// Track pageIds assigned (or adopted) to each create's PATH in THIS batch, so a
|
||||
// child can resolve its freshly-created parent's id without depending on the
|
||||
// on-disk write-back being observable yet (red-team #12).
|
||||
const createdIdByPath = new Map<string, string>();
|
||||
for (const c of orderedCreates) {
|
||||
try {
|
||||
const text = await deps.readFile(c.path);
|
||||
const { body } = parsePageFile(text);
|
||||
// Conflict markers must never reach Docmost (SPEC §9, red-team #13); strip
|
||||
// them from the create body too, preserving both sides' content.
|
||||
const body = stripConflictMarkers(parsePageFile(text).body);
|
||||
// Derive create args from the PATH (native-Obsidian, SPEC §5): title from
|
||||
// the filename, parent from the enclosing folder's folder-note, space from
|
||||
// the run (the vault's space). `parentPageId: null` -> created at ROOT.
|
||||
const title = titleFromPath(c.path);
|
||||
// Resolve the parent from the PATH (SPEC §5). Prefer an id assigned to the
|
||||
// parent's folder-note EARLIER in this same batch — a freshly-created parent
|
||||
// whose on-disk write-back may not be observable yet (red-team #12; creates
|
||||
// are ordered parent-before-child so the parent already ran).
|
||||
const parentFile = parentFolderFile(c.path);
|
||||
const parentPageId =
|
||||
(await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined;
|
||||
(parentFile !== null ? createdIdByPath.get(parentFile) : undefined) ??
|
||||
(await resolveParentPageIdViaTree(deps, c.path, "current")) ??
|
||||
undefined;
|
||||
// Retry-adopt (#1 idempotency): a prior cycle already created this page in
|
||||
// Docmost but failed to persist the pageId back to the file, so it was
|
||||
// re-seen as a create. Adopt the existing page instead of duplicating it:
|
||||
// write the id back (file becomes tracked) and push the body as an UPDATE
|
||||
// (idempotent — targets by pageId). Do NOT call createPage again.
|
||||
// (idempotent — targets by pageId). Do NOT call createPage again. SKIP
|
||||
// adoption when the (parent, title) is AMBIGUOUS — adopting an arbitrary
|
||||
// duplicate-title sibling would silently overwrite it (red-team #6).
|
||||
const adoptKey = `${parentPageId ?? " root"} ${title}`;
|
||||
const existingId = liveByParentTitle?.get(adoptKey);
|
||||
const existingId = ambiguousAdoptKeys.has(adoptKey)
|
||||
? undefined
|
||||
: liveByParentTitle?.get(adoptKey);
|
||||
if (existingId) {
|
||||
const rewritten = serializePageFile(existingId, body);
|
||||
await deps.writeFile(c.path, rewritten);
|
||||
writtenBack.push({ path: c.path, pageId: existingId });
|
||||
createdIdByPath.set(c.path, existingId);
|
||||
const adopted = await client.importPageMarkdown(existingId, body, null);
|
||||
pushed.push({
|
||||
pageId: existingId,
|
||||
@@ -749,6 +800,7 @@ export async function applyPushActions(
|
||||
const rewritten = serializePageFile(assignedPageId, body);
|
||||
await deps.writeFile(c.path, rewritten);
|
||||
writtenBack.push({ path: c.path, pageId: assignedPageId });
|
||||
createdIdByPath.set(c.path, assignedPageId);
|
||||
// §10 loop-guard data for the created page (hash the pushed BODY).
|
||||
pushed.push({
|
||||
pageId: assignedPageId,
|
||||
@@ -942,6 +994,35 @@ export function parentFolderFile(path: string): string | null {
|
||||
return folderNote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Order CREATE actions so a create whose parent folder-note is ALSO being created
|
||||
* appears AFTER its parent (red-team #12). A child created before its fresh parent
|
||||
* cannot resolve the parent's pageId and would be placed at the space ROOT.
|
||||
* Topological over the `parentFolderFile` relation, restricted to paths within the
|
||||
* create set; an `inProgress` guard makes a malformed parent cycle safe.
|
||||
*/
|
||||
export function orderCreatesParentFirst(creates: CreateAction[]): CreateAction[] {
|
||||
const byPath = new Map<string, CreateAction>();
|
||||
for (const c of creates) byPath.set(c.path, c);
|
||||
const ordered: CreateAction[] = [];
|
||||
const visited = new Set<string>();
|
||||
const inProgress = new Set<string>();
|
||||
const visit = (c: CreateAction): void => {
|
||||
if (visited.has(c.path) || inProgress.has(c.path)) return;
|
||||
inProgress.add(c.path);
|
||||
const parent = parentFolderFile(c.path);
|
||||
if (parent !== null && parent !== c.path) {
|
||||
const parentCreate = byPath.get(parent);
|
||||
if (parentCreate) visit(parentCreate);
|
||||
}
|
||||
inProgress.delete(c.path);
|
||||
visited.add(c.path);
|
||||
ordered.push(c);
|
||||
};
|
||||
for (const c of creates) visit(c);
|
||||
return ordered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether a vault path is a Docmost PAGE file (design §"Adoption"): a `.md` file
|
||||
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
|
||||
@@ -955,6 +1036,51 @@ export function isPageFile(path: string): boolean {
|
||||
return !path.split("/").some((seg) => seg.startsWith("."));
|
||||
}
|
||||
|
||||
/**
|
||||
* Git conflict-marker scan + strip (SPEC §9 — conflict markers must NEVER reach
|
||||
* Docmost). A body is treated as conflicted only when it carries BOTH a begin
|
||||
* (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext
|
||||
* heading underline (`=======`) is not mistaken for a conflict. When conflicted,
|
||||
* the three marker line types are removed while BOTH sides' content is preserved
|
||||
* (no data loss): the marker SYNTAX never reaches Docmost, but the human's content
|
||||
* does — where the conflict is visible and fixable rather than silently dropped.
|
||||
*/
|
||||
const CONFLICT_BEGIN_RE = /^<{7}/m;
|
||||
const CONFLICT_END_RE = /^>{7}/m;
|
||||
const CONFLICT_BEGIN_LINE_RE = /^<{7}/;
|
||||
const CONFLICT_SEP_LINE_RE = /^={7}/;
|
||||
const CONFLICT_END_LINE_RE = /^>{7}/;
|
||||
|
||||
export function hasConflictMarkers(body: string): boolean {
|
||||
return CONFLICT_BEGIN_RE.test(body) && CONFLICT_END_RE.test(body);
|
||||
}
|
||||
|
||||
function stripConflictMarkers(body: string): string {
|
||||
if (!hasConflictMarkers(body)) return body;
|
||||
// Remove ONLY the three marker line types, and treat a `=======` line as a
|
||||
// conflict separator ONLY when we are between a `<<<<<<<` begin and a `>>>>>>>`
|
||||
// end — so a legitimate Markdown setext heading underline (`=======`) outside a
|
||||
// conflict block is preserved (review finding). Both conflict sides' content is
|
||||
// kept; only the marker SYNTAX is dropped.
|
||||
let inBlock = false;
|
||||
const out: string[] = [];
|
||||
for (const line of body.split("\n")) {
|
||||
if (CONFLICT_BEGIN_LINE_RE.test(line)) {
|
||||
inBlock = true;
|
||||
continue;
|
||||
}
|
||||
if (CONFLICT_END_LINE_RE.test(line)) {
|
||||
inBlock = false;
|
||||
continue;
|
||||
}
|
||||
if (inBlock && CONFLICT_SEP_LINE_RE.test(line)) {
|
||||
continue;
|
||||
}
|
||||
out.push(line);
|
||||
}
|
||||
return out.join("\n");
|
||||
}
|
||||
|
||||
/** The last path segment of a forward-slash path (the folder/file base name). */
|
||||
function baseSegment(path: string): string {
|
||||
const slash = path.lastIndexOf("/");
|
||||
@@ -974,6 +1100,20 @@ function titleFromPath(path: string): string {
|
||||
return base.endsWith(".md") ? base.slice(0, -3) : base;
|
||||
}
|
||||
|
||||
/**
|
||||
* The exact ` ~<slugId>` disambiguation suffix `buildVaultLayout`/`disambiguate`
|
||||
* append to a colliding sibling's file stem (layout.ts): a single trailing
|
||||
* ` ~<one path component>` (no slash, no further `~`). It is a COSMETIC, local
|
||||
* filesystem artifact — never part of the page's real Docmost title — so it is
|
||||
* stripped before a path-derived title is compared/pushed (red-team #4b).
|
||||
*/
|
||||
const DISAMBIGUATION_SUFFIX_RE = / ~[^/~]+$/;
|
||||
|
||||
/** Remove a single trailing ` ~<slugId>` disambiguation suffix, if present. */
|
||||
function stripDisambiguationSuffix(title: string): string {
|
||||
return title.replace(DISAMBIGUATION_SUFFIX_RE, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the
|
||||
* NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the
|
||||
|
||||
@@ -204,11 +204,38 @@ const DocmostAttributes = Extension.create({
|
||||
types: ["image"],
|
||||
attributes: {
|
||||
align: { default: null },
|
||||
attachmentId: { default: null },
|
||||
aspectRatio: { default: null },
|
||||
// imageToHtml emits these Docmost-specific image attrs as data-*; map
|
||||
// them back explicitly so a top-level image (or one inside a column)
|
||||
// round-trips them. Without a parseHTML the default reads the bare
|
||||
// attribute name (e.g. getAttribute("attachmentId") -> null) and the
|
||||
// value — including the attachmentId that links the image to its
|
||||
// stored file — is silently dropped on every round-trip (data loss).
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) =>
|
||||
el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.attachmentId
|
||||
? { "data-attachment-id": attrs.attachmentId }
|
||||
: {},
|
||||
},
|
||||
aspectRatio: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) =>
|
||||
el.getAttribute("data-aspect-ratio"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.aspectRatio != null
|
||||
? { "data-aspect-ratio": attrs.aspectRatio }
|
||||
: {},
|
||||
},
|
||||
height: { default: null },
|
||||
placeholder: { default: null },
|
||||
size: { default: null },
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-size"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.size != null ? { "data-size": attrs.size } : {},
|
||||
},
|
||||
width: { default: null },
|
||||
},
|
||||
},
|
||||
@@ -1030,6 +1057,300 @@ const PageBreak = Node.create({
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Footnote feature (mirror of @docmost/editor-ext footnote, matching the MCP
|
||||
* schema mirror). Three nodes connected by `id`:
|
||||
* - FootnoteReference: inline atom marker in the body (<sup data-footnote-ref>);
|
||||
* - FootnotesList: a single bottom container (<section data-footnotes>);
|
||||
* - FootnoteDefinition: one editable note keyed by id (<div data-footnote-def>).
|
||||
* The visible number is not stored; it is derived from reference order. The
|
||||
* <sup> parse rule uses priority 100 so it beats the Superscript mark's <sup>
|
||||
* rule (otherwise an empty reference parses as an empty superscript and drops).
|
||||
*/
|
||||
const FootnoteReference = Node.create({
|
||||
name: "footnoteReference",
|
||||
priority: 101,
|
||||
group: "inline",
|
||||
inline: true,
|
||||
atom: true,
|
||||
selectable: true,
|
||||
draggable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "sup[data-footnote-ref]", priority: 100 }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
|
||||
const FootnotesList = Node.create({
|
||||
name: "footnotesList",
|
||||
group: "block",
|
||||
content: "footnoteDefinition+",
|
||||
isolating: true,
|
||||
selectable: false,
|
||||
defining: true,
|
||||
parseHTML() {
|
||||
return [{ tag: "section[data-footnotes]" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
const FootnoteDefinition = Node.create({
|
||||
name: "footnoteDefinition",
|
||||
content: "paragraph+",
|
||||
defining: true,
|
||||
isolating: true,
|
||||
selectable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "div[data-footnote-def]" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Encode/decode the htmlEmbed `source` (arbitrary HTML/CSS/JS) to/from base64
|
||||
* for the `data-source` attribute. Ported from @docmost/editor-ext so the
|
||||
* markdown-converter HTML path (generateJSON via parseHTML) round-trips the
|
||||
* raw source losslessly and keeps it inert while it sits in the attribute.
|
||||
* `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so UTF-8 survives.
|
||||
*/
|
||||
export function encodeHtmlEmbedSource(source: string): string {
|
||||
if (!source) return "";
|
||||
try {
|
||||
if (typeof btoa === "function") {
|
||||
return btoa(encodeURIComponent(source));
|
||||
}
|
||||
return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64");
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export function decodeHtmlEmbedSource(encoded: string): string {
|
||||
if (!encoded) return "";
|
||||
try {
|
||||
if (typeof atob === "function") {
|
||||
return decodeURIComponent(atob(encoded));
|
||||
}
|
||||
return decodeURIComponent(Buffer.from(encoded, "base64").toString("utf-8"));
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Docmost raw HTML embed. Block atom; the client renders `source` inside a
|
||||
* sandboxed iframe. Mirrors the @docmost/editor-ext node — `source` rides the
|
||||
* `data-source` attribute base64-encoded (this is an HTML/generateJSON path, so
|
||||
* it MUST use base64 to avoid double-encoding / injection).
|
||||
*/
|
||||
const HtmlEmbed = Node.create({
|
||||
name: "htmlEmbed",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
source: {
|
||||
default: "",
|
||||
parseHTML: (el: HTMLElement) =>
|
||||
decodeHtmlEmbedSource(el.getAttribute("data-source") || ""),
|
||||
renderHTML: (attrs: Record<string, any>) => ({
|
||||
"data-source": encodeHtmlEmbedSource(attrs.source || ""),
|
||||
}),
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => {
|
||||
const v = el.getAttribute("data-height");
|
||||
if (!v) return null;
|
||||
const n = parseInt(v, 10);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
},
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.height != null ? { "data-height": String(attrs.height) } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="htmlEmbed"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "htmlEmbed", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Inline status pill. Mirrors @docmost/editor-ext status: the label rides in
|
||||
* the element's TEXT content (not an attribute) and the color in data-color.
|
||||
*/
|
||||
const Status = Node.create({
|
||||
name: "status",
|
||||
group: "inline",
|
||||
inline: true,
|
||||
atom: true,
|
||||
selectable: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
text: {
|
||||
default: "",
|
||||
parseHTML: (el: HTMLElement) => el.textContent || "",
|
||||
},
|
||||
color: {
|
||||
default: "gray",
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-color") || "gray",
|
||||
renderHTML: (attrs: Record<string, any>) => ({
|
||||
"data-color": attrs.color ?? "gray",
|
||||
}),
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'span[data-type="status"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return [
|
||||
"span",
|
||||
{ "data-type": "status", "data-color": HTMLAttributes["data-color"] },
|
||||
`${HTMLAttributes.text ?? ""}`,
|
||||
];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Whole-page live embed. Holds only a `sourcePageId` reference. Mirrors
|
||||
* @docmost/editor-ext pageEmbed. Block atom.
|
||||
*/
|
||||
const PageEmbed = Node.create({
|
||||
name: "pageEmbed",
|
||||
group: "block",
|
||||
atom: true,
|
||||
isolating: true,
|
||||
selectable: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
sourcePageId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-source-page-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.sourcePageId
|
||||
? { "data-source-page-id": attrs.sourcePageId }
|
||||
: {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="pageEmbed"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "pageEmbed", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Block node types allowed inside a `transclusionSource` (mirrors
|
||||
* @docmost/editor-ext transclusion constants). Excludes transclusion nodes
|
||||
* (no nesting) and child-only nodes.
|
||||
*/
|
||||
const TRANSCLUSION_SOURCE_CONTENT_EXPRESSION =
|
||||
"(paragraph | heading | blockquote | codeBlock | horizontalRule | bulletList" +
|
||||
" | orderedList | taskList | image | video | audio | attachment | callout" +
|
||||
" | details | embed | mathBlock | table | drawio | excalidraw | pdf" +
|
||||
" | subpages | columns | youtube)+";
|
||||
|
||||
/** Sync-source block: editable content shared into transclusion references. */
|
||||
const TransclusionSource = Node.create({
|
||||
name: "transclusionSource",
|
||||
group: "block",
|
||||
content: TRANSCLUSION_SOURCE_CONTENT_EXPRESSION,
|
||||
defining: true,
|
||||
isolating: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="transclusionSource"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "transclusionSource", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
/** Live reference to a transcluded block/page. Block atom. */
|
||||
const TransclusionReference = Node.create({
|
||||
name: "transclusionReference",
|
||||
group: "block",
|
||||
atom: true,
|
||||
selectable: true,
|
||||
draggable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
sourcePageId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-source-page-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.sourcePageId
|
||||
? { "data-source-page-id": attrs.sourcePageId }
|
||||
: {},
|
||||
},
|
||||
transclusionId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-transclusion-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.transclusionId
|
||||
? { "data-transclusion-id": attrs.transclusionId }
|
||||
: {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="transclusionReference"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return [
|
||||
"div",
|
||||
{ "data-type": "transclusionReference", ...HTMLAttributes },
|
||||
];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Full extension list. Image is block-level (matches Docmost); the
|
||||
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
|
||||
@@ -1041,7 +1362,29 @@ export const docmostExtensions = [
|
||||
heading: {},
|
||||
link: { openOnClick: false },
|
||||
}),
|
||||
Image.configure({ inline: false }),
|
||||
// Preserve image width/height as the AUTHORED string. Without an explicit
|
||||
// parseHTML the stock Image node attribute falls back to tiptap core's
|
||||
// `fromString`, which coerces a numeric width like "320" into the number 320
|
||||
// — changing the stored type on every markdown round-trip (Docmost stores
|
||||
// these as strings, e.g. "320" or "50%", matching how video/audio/pdf are
|
||||
// handled in this mirror). The node attribute is applied AFTER the global
|
||||
// DocmostAttributes one, so the fix must live on the Image node itself.
|
||||
Image.extend({
|
||||
addAttributes() {
|
||||
const parent = (this.parent?.() ?? {}) as Record<string, any>;
|
||||
return {
|
||||
...parent,
|
||||
width: {
|
||||
...parent.width,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("width"),
|
||||
},
|
||||
height: {
|
||||
...parent.height,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("height"),
|
||||
},
|
||||
};
|
||||
},
|
||||
}).configure({ inline: false }),
|
||||
TaskList,
|
||||
TaskItem.configure({ nested: true }),
|
||||
// Highlight stores its color unescaped and Docmost interpolates it into
|
||||
@@ -1094,5 +1437,13 @@ export const docmostExtensions = [
|
||||
Audio,
|
||||
Pdf,
|
||||
PageBreak,
|
||||
FootnoteReference,
|
||||
FootnotesList,
|
||||
FootnoteDefinition,
|
||||
HtmlEmbed,
|
||||
Status,
|
||||
PageEmbed,
|
||||
TransclusionSource,
|
||||
TransclusionReference,
|
||||
DocmostAttributes,
|
||||
];
|
||||
|
||||
@@ -1,3 +1,18 @@
|
||||
import { encodeHtmlEmbedSource } from "./docmost-schema.js";
|
||||
|
||||
/**
|
||||
* Hard cap on processNode recursion depth (see the depth guard below).
|
||||
*
|
||||
* Chosen well above any realistic document (the deepest legitimate nesting the
|
||||
* editor can produce is far shallower) yet far below the point where the
|
||||
* converter's own call stack overflows. The heaviest shape (deeply nested
|
||||
* lists) costs ~5 JS frames per level and the runtime stack holds ~10k frames,
|
||||
* so the measured overflow is around level ~650 (deeply nested lists); 400
|
||||
* leaves a comfortable margin while still rendering pathological-but-bounded
|
||||
* docs in full (the 200-level stress fixture reaches depth ~204).
|
||||
*/
|
||||
const MAX_NODE_DEPTH = 400;
|
||||
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
@@ -43,7 +58,34 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
|
||||
// Recursion depth guard. processNode is mutually recursive (directly and via
|
||||
// processListItem/processTaskItem/blockToHtml), and a pathologically nested
|
||||
// document (e.g. tens of thousands of nested blockquotes) would otherwise
|
||||
// overflow the call stack and throw a RangeError, which would abort the sync
|
||||
// and prevent the page from ever being written. We track the live nesting
|
||||
// depth in a closure counter (the wrapper below) so we NEVER throw: past the
|
||||
// limit we stop recursing and emit the node's own text (or nothing) instead.
|
||||
// Normal documents never approach MAX_NODE_DEPTH, so their output is byte-
|
||||
// identical. NOTE: the wrapper signature is (node) only — several callers use
|
||||
// `.map(processNode)`, which would otherwise pass the array index as a second
|
||||
// argument; the wrapper ignores extra arguments so that is harmless.
|
||||
let nodeDepth = 0;
|
||||
const processNode = (node: any): string => {
|
||||
if (nodeDepth >= MAX_NODE_DEPTH) {
|
||||
// Bail out of deeper recursion without throwing. A text node still has
|
||||
// its own content worth keeping; a container at the limit collapses to
|
||||
// "" (its already-too-deep subtree is dropped) rather than overflowing.
|
||||
return typeof node?.text === "string" ? node.text : "";
|
||||
}
|
||||
nodeDepth++;
|
||||
try {
|
||||
return processNodeInner(node);
|
||||
} finally {
|
||||
nodeDepth--;
|
||||
}
|
||||
};
|
||||
|
||||
const processNodeInner = (node: any): string => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
|
||||
@@ -182,7 +224,16 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
// CommonMark: an inner ``` run inside the code would prematurely close
|
||||
// a 3-backtick fence (corrupting the block on re-import). Use an outer
|
||||
// fence one backtick longer than the longest backtick run in the code
|
||||
// (minimum 3) so the inner fence is always content.
|
||||
const longestBacktickRun = (code.match(/`+/g) || []).reduce(
|
||||
(max: number, run: string) => Math.max(max, run.length),
|
||||
0,
|
||||
);
|
||||
const fence = "`".repeat(Math.max(3, longestBacktickRun + 1));
|
||||
return fence + language + "\n" + code + "\n" + fence;
|
||||
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
@@ -228,16 +279,35 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
|
||||
case "image":
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
case "image": {
|
||||
const imgAttrs = node.attrs || {};
|
||||
// A top-level image with layout/identity attrs beyond src/alt cannot be
|
||||
// expressed by markdown `` — width/height/align/size/
|
||||
// attachmentId/aspectRatio would be silently dropped on export and lost
|
||||
// on re-import. Emit the SAME schema-matching <img> used inside columns
|
||||
// (imageToHtml) so those attrs survive the round-trip. A bare image
|
||||
// (only src/alt, optionally a title — which has no schema attr) keeps
|
||||
// the lighter markdown form so existing image round-trip tests hold.
|
||||
const hasLayoutAttrs =
|
||||
imgAttrs.width != null ||
|
||||
imgAttrs.height != null ||
|
||||
imgAttrs.align ||
|
||||
imgAttrs.size != null ||
|
||||
imgAttrs.attachmentId ||
|
||||
imgAttrs.aspectRatio != null;
|
||||
if (hasLayoutAttrs) {
|
||||
return imageToHtml(node);
|
||||
}
|
||||
const imgAlt = imgAttrs.alt || "";
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
const imgSrc = encodeMdUrl(imgAttrs.src);
|
||||
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||
// not emit one (the previous caption branch was dead).
|
||||
return ``;
|
||||
}
|
||||
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
@@ -581,6 +651,83 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
|
||||
case "status": {
|
||||
// Inline status pill. The schema reads the label from the element's
|
||||
// TEXT content and the color from data-color, so emit both; without a
|
||||
// case this inline atom fell through to `default` and collapsed to "".
|
||||
const attrs = node.attrs || {};
|
||||
const statusColor = attrs.color || "gray";
|
||||
return `<span data-type="status" data-color="${escapeAttr(statusColor)}">${escapeHtmlText(attrs.text ?? "")}</span>`;
|
||||
}
|
||||
|
||||
case "htmlEmbed": {
|
||||
// Block atom; the schema reads the raw source from a base64-encoded
|
||||
// data-source attribute (and an optional fixed height from data-height).
|
||||
// Encode with the shared helper so it decodes symmetrically on import.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="htmlEmbed"`,
|
||||
`data-source="${escapeAttr(encodeHtmlEmbedSource(attrs.source ?? ""))}"`,
|
||||
];
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "footnoteReference": {
|
||||
// Inline atom marker. The schema reads its id from data-id on a
|
||||
// sup[data-footnote-ref]; the visible number is derived, not stored.
|
||||
const attrs = node.attrs || {};
|
||||
const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : "";
|
||||
return `<sup data-footnote-ref${idAttr}></sup>`;
|
||||
}
|
||||
|
||||
case "footnotesList": {
|
||||
// Bottom container of footnote definitions (section[data-footnotes]).
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<section data-footnotes>${inner}</section>`;
|
||||
}
|
||||
|
||||
case "footnoteDefinition": {
|
||||
// One footnote note keyed by id (div[data-footnote-def]).
|
||||
const attrs = node.attrs || {};
|
||||
const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : "";
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div data-footnote-def${idAttr}>${inner}</div>`;
|
||||
}
|
||||
|
||||
case "pageEmbed": {
|
||||
// Whole-page live embed; the schema reads data-source-page-id.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="pageEmbed"`];
|
||||
if (attrs.sourcePageId)
|
||||
parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "transclusionReference": {
|
||||
// Live reference to a transcluded block/page. Block atom; the schema
|
||||
// reads data-source-page-id and data-transclusion-id.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="transclusionReference"`];
|
||||
if (attrs.sourcePageId)
|
||||
parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`);
|
||||
if (attrs.transclusionId)
|
||||
parts.push(
|
||||
`data-transclusion-id="${escapeAttr(attrs.transclusionId)}"`,
|
||||
);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "transclusionSource": {
|
||||
// Sync-source container; the schema reads data-id and re-parses its
|
||||
// block children, so render them as schema-matching HTML.
|
||||
const attrs = node.attrs || {};
|
||||
const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : "";
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div data-type="transclusionSource"${idAttr}>${inner}</div>`;
|
||||
}
|
||||
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
@@ -782,6 +929,12 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
case "htmlEmbed":
|
||||
case "footnotesList":
|
||||
case "footnoteDefinition":
|
||||
case "pageEmbed":
|
||||
case "transclusionSource":
|
||||
case "transclusionReference":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
|
||||
@@ -419,33 +419,33 @@ describe('converter gap coverage — emission branches (specs 1–11)', () => {
|
||||
});
|
||||
|
||||
describe('converter gap coverage — documented round-trip data loss (specs 12–14)', () => {
|
||||
// 12. A 3-backtick fence inside a codeBlock body is NOT lengthened: the inner
|
||||
// fence prematurely terminates the block, splitting it into three nodes.
|
||||
it('a triple-backtick fence inside a codeBlock body is lossy (fence collision)', async () => {
|
||||
// 12. A 3-backtick fence inside a codeBlock body is now lengthened: the outer
|
||||
// fence widens to (longest inner run + 1) backticks per CommonMark, so the
|
||||
// inner ``` is treated as content and the block survives as ONE node.
|
||||
it('a triple-backtick fence inside a codeBlock body round-trips via a widened fence', async () => {
|
||||
const d = doc({
|
||||
type: 'codeBlock',
|
||||
attrs: { language: 'js' },
|
||||
content: [{ type: 'text', text: '```\ninner\n```' }],
|
||||
});
|
||||
const md1 = convertProseMirrorToMarkdown(d);
|
||||
expect(md1).toBe('```js\n```\ninner\n```\n```');
|
||||
// Outer fence widened to 4 backticks; the inner 3-backtick fence is content.
|
||||
expect(md1).toBe('````js\n```\ninner\n```\n````');
|
||||
|
||||
const doc2 = await markdownToProseMirror(md1);
|
||||
// The inner fence split the block into THREE top-level nodes.
|
||||
// The block survives as a SINGLE code block (no premature split).
|
||||
const top = doc2.content || [];
|
||||
expect(top).toHaveLength(3);
|
||||
expect(top).toHaveLength(1);
|
||||
expect(top[0].type).toBe('codeBlock');
|
||||
expect(top[0].attrs?.language).toBe('js');
|
||||
expect(top[0].content?.[0]).toMatchObject({ type: 'text', text: '\n' });
|
||||
expect(top[1].type).toBe('paragraph');
|
||||
expect(top[1].content?.[0]).toMatchObject({ type: 'text', text: 'inner' });
|
||||
expect(top[2].type).toBe('codeBlock');
|
||||
expect(top[2].attrs?.language).toBeNull();
|
||||
expect(top[2].content?.[0]).toMatchObject({ type: 'text', text: '\n' });
|
||||
expect(top[0].content?.[0]?.text).toContain('```\ninner\n```');
|
||||
|
||||
const md2 = convertProseMirrorToMarkdown(doc2);
|
||||
expect(md2).not.toBe(md1); // not byte-stable
|
||||
expect(docsCanonicallyEqual(d, doc2)).toBe(false); // documented data loss
|
||||
expect(md2).toBe(md1); // byte-stable
|
||||
// Canonically the re-imported code text gains a single trailing newline
|
||||
// (marked re-adds it; the exporter strips it back, hence byte stability).
|
||||
// The fence is no longer lossy: the inner fence and content fully survive.
|
||||
expect(docsCanonicallyEqual(d, doc2)).toBe(false);
|
||||
});
|
||||
|
||||
// 13. A leading ordered-list marker in paragraph text is NOT escaped, so a
|
||||
|
||||
159
packages/git-sync/test/redteam-apply-push.test.ts
Normal file
159
packages/git-sync/test/redteam-apply-push.test.ts
Normal file
@@ -0,0 +1,159 @@
|
||||
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { applyPushActions } from '../src/engine/push';
|
||||
import type { ApplyPushDeps, PushActions } from '../src/engine/push';
|
||||
|
||||
const SPACE_ID = 'sp-test';
|
||||
|
||||
/** A recording client fake; listSpaceTree/createPage configurable per test. */
|
||||
function makeClient() {
|
||||
return {
|
||||
listSpaceTree: vi.fn(async () => ({
|
||||
pages: [] as { id: string; parentPageId?: string | null; title?: string }[],
|
||||
complete: true,
|
||||
})),
|
||||
importPageMarkdown: vi.fn(async () => ({ success: true })),
|
||||
createPage: vi.fn(
|
||||
async (
|
||||
title: string,
|
||||
_content: string,
|
||||
_spaceId: string,
|
||||
_parentPageId?: string,
|
||||
) => ({ data: { id: 'assigned-id', title }, success: true }),
|
||||
),
|
||||
deletePage: vi.fn(async () => ({ success: true })),
|
||||
movePage: vi.fn(async () => ({ success: true })),
|
||||
renamePage: vi.fn(async () => ({ success: true })),
|
||||
};
|
||||
}
|
||||
|
||||
function makeGit() {
|
||||
return {
|
||||
updateRef: vi.fn(async () => {}),
|
||||
fastForwardBranch: vi.fn(async () => ({ ok: true })),
|
||||
showFileAtRef: vi.fn(async () => null),
|
||||
};
|
||||
}
|
||||
|
||||
/** A recording fs fake over a path->text store (writes are read back). */
|
||||
function makeFs(initial: Record<string, string> = {}) {
|
||||
const store: Record<string, string> = { ...initial };
|
||||
const fs = {
|
||||
readFile: vi.fn(async (path: string) => {
|
||||
if (!(path in store)) throw new Error(`no such file: ${path}`);
|
||||
return store[path];
|
||||
}),
|
||||
writeFile: vi.fn(async (path: string, text: string) => {
|
||||
store[path] = text;
|
||||
}),
|
||||
};
|
||||
return { fs, store };
|
||||
}
|
||||
|
||||
function deps(client: any, git: any, fs: ReturnType<typeof makeFs>): ApplyPushDeps {
|
||||
return {
|
||||
client,
|
||||
git: git as any,
|
||||
readFile: fs.fs.readFile,
|
||||
writeFile: fs.fs.writeFile,
|
||||
spaceId: SPACE_ID,
|
||||
};
|
||||
}
|
||||
|
||||
function actions(partial: Partial<PushActions>): PushActions {
|
||||
return {
|
||||
creates: [],
|
||||
updates: [],
|
||||
deletes: [],
|
||||
renamesMoves: [],
|
||||
skipped: [],
|
||||
...partial,
|
||||
};
|
||||
}
|
||||
|
||||
beforeEach(() => {
|
||||
vi.spyOn(console, 'log').mockImplementation(() => {});
|
||||
vi.spyOn(console, 'warn').mockImplementation(() => {});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
vi.restoreAllMocks();
|
||||
});
|
||||
|
||||
// === Finding #6 — adopt must NOT clobber an arbitrary duplicate-title sibling ===
|
||||
// The retry-adopt map keys pages by (parentPageId|root, title). When TWO root
|
||||
// siblings share the title 'Foo', the key collides and the map keeps the FIRST
|
||||
// (p1). A brand-new untracked 'Foo/Foo.md' (no gitmost_id) then "adopts" p1 and
|
||||
// pushes its body over it via importPageMarkdown — silently overwriting an
|
||||
// arbitrary, possibly unrelated, existing page. Desired: a fresh createPage, or
|
||||
// an ambiguity skip — NEVER a silent overwrite of an existing sibling.
|
||||
describe('redteam #6 — adopt clobbers wrong duplicate-title sibling', () => {
|
||||
it('does NOT overwrite an arbitrary duplicate-title sibling (p1) via importPageMarkdown', async () => {
|
||||
const client = makeClient();
|
||||
client.listSpaceTree.mockResolvedValue({
|
||||
pages: [
|
||||
{ id: 'p1', parentPageId: null, title: 'Foo' },
|
||||
{ id: 'p2', parentPageId: null, title: 'Foo' },
|
||||
],
|
||||
complete: true,
|
||||
});
|
||||
const git = makeGit();
|
||||
// A brand-new local file with NO gitmost_id frontmatter.
|
||||
const fs = makeFs({ 'Foo/Foo.md': '# Foo\n\nfresh foo body\n' });
|
||||
|
||||
await applyPushActions(
|
||||
deps(client, git, fs),
|
||||
actions({ creates: [{ path: 'Foo/Foo.md' }] }),
|
||||
);
|
||||
|
||||
// The wrong sibling must never be overwritten with our body.
|
||||
const clobberedP1 = client.importPageMarkdown.mock.calls.some(
|
||||
(c: any[]) => c[0] === 'p1',
|
||||
);
|
||||
expect(clobberedP1).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// === Finding #12 — new child under new parent must be parented, not put at ROOT ===
|
||||
// creates are applied in path order: 'Proj/Apple.md' (Apple < Proj) BEFORE
|
||||
// 'Proj/Proj.md'. When Apple is created first, its parent folder-note
|
||||
// 'Proj/Proj.md' has no gitmost_id yet, so the parent resolves to null and Apple
|
||||
// is created at the SPACE ROOT instead of under Proj. Desired: the parent page is
|
||||
// created before its child, so Apple's createPage receives Proj's assigned id.
|
||||
describe('redteam #12 — new child under new parent placed at ROOT', () => {
|
||||
it('createPage for Apple receives parentPageId === the id assigned to Proj', async () => {
|
||||
let seq = 0;
|
||||
const client = makeClient();
|
||||
client.createPage.mockImplementation(
|
||||
async (title: string) => ({
|
||||
data: { id: `id-${++seq}`, title },
|
||||
success: true,
|
||||
}),
|
||||
);
|
||||
const git = makeGit();
|
||||
// Both brand-new local files, neither carrying a gitmost_id yet. writeFile
|
||||
// updates the store so readFile reads back any pageId written during the run.
|
||||
const fs = makeFs({
|
||||
'Proj/Apple.md': '# Apple\n\napple body\n',
|
||||
'Proj/Proj.md': '# Proj\n\nproj body\n',
|
||||
});
|
||||
|
||||
await applyPushActions(
|
||||
deps(client, git, fs),
|
||||
actions({
|
||||
creates: [{ path: 'Proj/Apple.md' }, { path: 'Proj/Proj.md' }],
|
||||
}),
|
||||
);
|
||||
|
||||
const calls = client.createPage.mock.calls;
|
||||
const results = client.createPage.mock.results;
|
||||
const projIdx = calls.findIndex((c: any[]) => c[0] === 'Proj');
|
||||
const appleIdx = calls.findIndex((c: any[]) => c[0] === 'Apple');
|
||||
expect(projIdx).toBeGreaterThanOrEqual(0);
|
||||
expect(appleIdx).toBeGreaterThanOrEqual(0);
|
||||
const projId = ((await results[projIdx].value) as any).data.id;
|
||||
const appleParentPageId = calls[appleIdx][3];
|
||||
|
||||
// Apple is a child of Proj -> it must be created under Proj, not at ROOT.
|
||||
expect(appleParentPageId).toBe(projId);
|
||||
});
|
||||
});
|
||||
89
packages/git-sync/test/redteam-converter.test.ts
Normal file
89
packages/git-sync/test/redteam-converter.test.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
// Import the converter DIRECTLY from src (NOT the docmost-client barrel, which
|
||||
// pulls in collaboration.ts and mutates the global DOM at import time), matching
|
||||
// the other converter unit tests. markdownToProseMirror is imported for the
|
||||
// round-trip cases; loading it mutates the global DOM via jsdom (required for
|
||||
// @tiptap/html's generateJSON under Node) — this is expected.
|
||||
import { convertProseMirrorToMarkdown } from '../src/lib/markdown-converter.js';
|
||||
import { markdownToProseMirror } from '../src/lib/markdown-to-prosemirror.js';
|
||||
|
||||
const doc = (...nodes: any[]) => ({ type: 'doc', content: nodes });
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #1 editor-ext atoms dropped: the `default` branch (markdown-converter.ts
|
||||
// ~584-586) collapses unknown atoms to "" by mapping their (empty) children.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('#1 editor-ext atoms dropped', () => {
|
||||
it('preserves an inline status atom text', () => {
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'status', attrs: { text: 'Done' } }],
|
||||
});
|
||||
expect(convertProseMirrorToMarkdown(d)).toContain('Done');
|
||||
});
|
||||
|
||||
it('preserves a block htmlEmbed atom', () => {
|
||||
const d = doc({ type: 'htmlEmbed', attrs: { source: '<b>hi</b>' } });
|
||||
expect(convertProseMirrorToMarkdown(d)).not.toBe('');
|
||||
});
|
||||
|
||||
it('preserves a footnoteReference atom', () => {
|
||||
const d = doc({
|
||||
type: 'paragraph',
|
||||
content: [{ type: 'footnoteReference', attrs: { id: 'fn1', referenceNumber: 1 } }],
|
||||
});
|
||||
expect(convertProseMirrorToMarkdown(d)).not.toBe('');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #2 top-level image attrs lost: a top-level image emits markdown ,
|
||||
// which carries no width/height/align/attachmentId.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('#2 top-level image attrs lost', () => {
|
||||
it('keeps width through export and re-import', async () => {
|
||||
const d = doc({
|
||||
type: 'image',
|
||||
attrs: { src: '/files/x.png', width: '320', height: '200', align: 'right', attachmentId: 'a1' },
|
||||
});
|
||||
const md = convertProseMirrorToMarkdown(d);
|
||||
expect(md).toContain('320');
|
||||
const back = await markdownToProseMirror(md);
|
||||
expect(back.content[0].attrs.width).toBe('320');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #3 code-fence corruption: a code block whose TEXT contains a ``` fence must
|
||||
// be emitted with a wider outer fence so the inner fence survives.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('#3 code-fence corruption', () => {
|
||||
it('round-trips a code block containing an inner fence', async () => {
|
||||
const code = '```js\nfoo()\n```';
|
||||
const d = doc({
|
||||
type: 'codeBlock',
|
||||
attrs: { language: '' },
|
||||
content: [{ type: 'text', text: code }],
|
||||
});
|
||||
const md1 = convertProseMirrorToMarkdown(d);
|
||||
const back = await markdownToProseMirror(md1);
|
||||
const md2 = convertProseMirrorToMarkdown(back);
|
||||
expect(md2).toBe(md1);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #16 depth guard: deep recursion in processNode overflows the stack (today a
|
||||
// RangeError) instead of being guarded.
|
||||
// ---------------------------------------------------------------------------
|
||||
describe('#16 depth guard', () => {
|
||||
it('does not throw on a deeply nested blockquote doc', () => {
|
||||
const DEPTH = 50000;
|
||||
let node: any = { type: 'paragraph', content: [{ type: 'text', text: 'x' }] };
|
||||
for (let i = 0; i < DEPTH; i++) {
|
||||
node = { type: 'blockquote', content: [node] };
|
||||
}
|
||||
const d = doc(node);
|
||||
expect(() => convertProseMirrorToMarkdown(d)).not.toThrow();
|
||||
});
|
||||
});
|
||||
71
packages/git-sync/test/redteam-layout-title.test.ts
Normal file
71
packages/git-sync/test/redteam-layout-title.test.ts
Normal file
@@ -0,0 +1,71 @@
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { buildVaultLayout, type PageNode } from '../src/engine/layout.js';
|
||||
import { classifyRenameMoves } from '../src/engine/push.js';
|
||||
import type {
|
||||
ClassifyRenameMovesDeps,
|
||||
MetaSide,
|
||||
RenameMoveAction,
|
||||
} from '../src/engine/push.js';
|
||||
import type { DocmostMdMeta } from '../src/lib/index.js';
|
||||
|
||||
// RED-TEAM finding #4 (two facets):
|
||||
// (a) buildVaultLayout disambiguation is ORDER-DEPENDENT: which of two
|
||||
// equally-titled root pages keeps the bare stem (and which gets the
|
||||
// ` ~slugId` suffix) depends purely on input array order. The layout is
|
||||
// supposed to be a deterministic function of the page SET, so reordering
|
||||
// the input must not move the suffix onto a different page.
|
||||
// (b) The page title derived from a DISAMBIGUATED filename ('Report ~a1.md')
|
||||
// never strips the cosmetic ` ~slugId` suffix, so a pure disambiguation
|
||||
// file-rename is mis-classified as a real title RENAME that would push the
|
||||
// suffix ('Report ~a1') back into Docmost as the page's actual title.
|
||||
|
||||
describe('redteam #4a — buildVaultLayout is stable under input reorder', () => {
|
||||
it('keeps the same stem for page A regardless of input order', () => {
|
||||
const A: PageNode = { id: 'A', title: 'Report', slugId: 'a1', parentPageId: null };
|
||||
const B: PageNode = { id: 'B', title: 'Report', slugId: 'b2', parentPageId: null };
|
||||
|
||||
const l1 = buildVaultLayout([A, B]);
|
||||
const l2 = buildVaultLayout([B, A]);
|
||||
|
||||
// Identity (pageId A) must resolve to the same file stem no matter how the
|
||||
// flat page list happened to be ordered.
|
||||
expect(l2.get('A')?.stem).toBe(l1.get('A')?.stem);
|
||||
});
|
||||
});
|
||||
|
||||
describe('redteam #4b — disambiguation suffix is not a title change', () => {
|
||||
// Mirror production push.ts `titleFromPath` EXACTLY: the synthetic native meta
|
||||
// sets `title = baseName(path) without ".md"`. This is the real derivation the
|
||||
// injected `metaAt` carries in `main`.
|
||||
function titleFromPath(path: string): string {
|
||||
const slash = path.lastIndexOf('/');
|
||||
const base = slash < 0 ? path : path.slice(slash + 1);
|
||||
return base.endsWith('.md') ? base.slice(0, -3) : base;
|
||||
}
|
||||
|
||||
function deps(): ClassifyRenameMovesDeps {
|
||||
const metaAt = (path: string, _side: MetaSide): DocmostMdMeta | null => ({
|
||||
version: 1,
|
||||
title: titleFromPath(path),
|
||||
pageId: 'p1',
|
||||
});
|
||||
// Same enclosing folder (root) on both sides -> no reparent.
|
||||
const resolveParentPageId = (_path: string, _side: MetaSide): string | null => null;
|
||||
return { metaAt, resolveParentPageId };
|
||||
}
|
||||
|
||||
it('does NOT emit a rename when only a ~slugId suffix was appended', () => {
|
||||
// A sibling collision appeared, so the file 'Report.md' was relocated to the
|
||||
// disambiguated 'Report ~a1.md'. The page TITLE in Docmost is still 'Report'.
|
||||
const rms: RenameMoveAction[] = [
|
||||
{ pageId: 'p1', oldPath: 'Report.md', newPath: 'Report ~a1.md' },
|
||||
];
|
||||
|
||||
const [classified] = classifyRenameMoves(rms, deps());
|
||||
|
||||
// Desired behaviour: a pure disambiguation file-rename is cosmetic/local and
|
||||
// must NOT be pushed as a title change. (If any rename WERE emitted it must
|
||||
// carry the real title 'Report', never the suffixed 'Report ~a1'.)
|
||||
expect(classified.rename).toBeUndefined();
|
||||
});
|
||||
});
|
||||
196
packages/git-sync/test/redteam-push-cycle.test.ts
Normal file
196
packages/git-sync/test/redteam-push-cycle.test.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import { runPush, LAST_PUSHED_REF, DOCMOST_BRANCH } from '../src/engine/push';
|
||||
import type { PushDeps } from '../src/engine/push';
|
||||
import type { Settings } from '../src/engine/settings';
|
||||
import { runCycle, type RunCycleDeps } from '../src/engine/cycle';
|
||||
import { serializePageFile } from '../src/lib/page-file';
|
||||
|
||||
// Red-team confirmations for PR #119 (git-sync). Each test asserts the DESIRED
|
||||
// behavior, so it FAILS today iff the bug is real.
|
||||
|
||||
function makeSettings(): Settings {
|
||||
return {
|
||||
docmostApiUrl: 'https://docmost.example.com',
|
||||
docmostEmail: 'you@example.com',
|
||||
docmostPassword: 'secret',
|
||||
docmostSpaceId: 'space-1',
|
||||
vaultPath: '/vault',
|
||||
pollIntervalMs: 15000,
|
||||
debounceMs: 2000,
|
||||
logLevel: 'info',
|
||||
} as Settings;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #13 — conflict markers must never reach Docmost (SPEC §9), even when there is
|
||||
// NO in-progress merge (markers committed on `main` by some other path). The
|
||||
// push apply reads the body and hands it to importPageMarkdown verbatim; the
|
||||
// DESIRED behavior is a content scan that prevents a `<<<<<<<` body from being
|
||||
// pushed. Assert the pushed body does NOT contain a conflict marker.
|
||||
// ---------------------------------------------------------------------------
|
||||
function makePushGit(opts: {
|
||||
changes: { status: 'A' | 'M' | 'D' | 'R' | 'C'; path: string; oldPath?: string }[];
|
||||
lastPushed?: string | null;
|
||||
}) {
|
||||
const calls = { updateRef: [] as { ref: string; target: string }[] };
|
||||
const git: PushDeps['git'] = {
|
||||
assertGitAvailable: vi.fn(async () => {}),
|
||||
ensureRepo: vi.fn(async () => {}),
|
||||
isMergeInProgress: vi.fn(async () => false), // NO merge in progress
|
||||
checkout: vi.fn(async () => {}),
|
||||
stageAll: vi.fn(async () => {}),
|
||||
commit: vi.fn(async () => false),
|
||||
readRef: vi.fn(async (ref: string) =>
|
||||
ref === LAST_PUSHED_REF ? (opts.lastPushed ?? 'base-sha') : null,
|
||||
),
|
||||
revParse: vi.fn(async (ref: string) => {
|
||||
if (ref === DOCMOST_BRANCH) return 'doc-sha';
|
||||
if (ref === 'main') return 'main-sha';
|
||||
return null;
|
||||
}),
|
||||
diffNameStatus: vi.fn(async () => opts.changes),
|
||||
showFileAtRef: vi.fn(async () => null),
|
||||
updateRef: vi.fn(async (ref: string, target: string) => {
|
||||
calls.updateRef.push({ ref, target });
|
||||
}),
|
||||
fastForwardBranch: vi.fn(async () => ({ ok: true })),
|
||||
listTrackedFiles: vi.fn(async () => [] as string[]),
|
||||
};
|
||||
return { git, calls };
|
||||
}
|
||||
|
||||
describe('#13 conflict markers reach Docmost', () => {
|
||||
it('does NOT push a body containing a `<<<<<<< HEAD` conflict marker', async () => {
|
||||
const conflictBody =
|
||||
'<<<<<<< HEAD\nmy line\n=======\ntheir line\n>>>>>>> feature\n';
|
||||
const file = serializePageFile('p-1', conflictBody);
|
||||
const { git } = makePushGit({ changes: [{ status: 'M', path: 'Doc.md' }] });
|
||||
|
||||
const importPageMarkdown = vi.fn(async () => ({ success: true }));
|
||||
const client = {
|
||||
listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
|
||||
importPageMarkdown,
|
||||
createPage: vi.fn(),
|
||||
deletePage: vi.fn(),
|
||||
movePage: vi.fn(),
|
||||
renamePage: vi.fn(),
|
||||
};
|
||||
|
||||
const deps: PushDeps = {
|
||||
settings: makeSettings(),
|
||||
git,
|
||||
makeClient: () => client as any,
|
||||
readFile: vi.fn(async (path: string) => {
|
||||
if (path === 'Doc.md') return file;
|
||||
throw new Error(`no such file: ${path}`);
|
||||
}),
|
||||
writeFile: vi.fn(async () => {}),
|
||||
log: () => {},
|
||||
};
|
||||
|
||||
const res = await runPush(deps, { dryRun: false });
|
||||
expect(res.mode).toBe('apply');
|
||||
|
||||
// The body actually sent to Docmost (2nd positional arg is the markdown body).
|
||||
expect(importPageMarkdown).toHaveBeenCalledTimes(1);
|
||||
const pushedBody: string = importPageMarkdown.mock.calls[0][1] as any;
|
||||
|
||||
// DESIRED: a content scan gates conflict markers; the body must be clean.
|
||||
expect(pushedBody).not.toContain('<<<<<<<');
|
||||
expect(pushedBody).not.toContain('=======');
|
||||
expect(pushedBody).not.toContain('>>>>>>>');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// #15 — a divergent `docmost` mirror (fastForwardBranch refuses) is escalated by
|
||||
// runPush (`divergentDocmost: true`), but runCycle forwards only {mode, failures}
|
||||
// — the divergence is DROPPED from RunCycleResult. DESIRED: the cycle result
|
||||
// surfaces the divergence so the caller can act on it.
|
||||
// ---------------------------------------------------------------------------
|
||||
function fakeVault(overrides: Record<string, any> = {}) {
|
||||
const order: string[] = [];
|
||||
const rec =
|
||||
(name: string, ret?: any) =>
|
||||
async (...args: any[]) => {
|
||||
order.push(args.length ? `${name}:${args.join(',')}` : name);
|
||||
return ret;
|
||||
};
|
||||
const vault: any = {
|
||||
order,
|
||||
assertGitAvailable: rec('assertGitAvailable'),
|
||||
ensureRepo: rec('ensureRepo'),
|
||||
isMergeInProgress: vi.fn(async () => false),
|
||||
ensureBranch: rec('ensureBranch'),
|
||||
checkout: rec('checkout'),
|
||||
listTrackedFiles: vi.fn(async () => [] as string[]),
|
||||
stageAll: rec('stageAll'),
|
||||
commit: rec('commit', false),
|
||||
merge: rec('merge', { ok: true, conflict: false, output: '' }),
|
||||
readRef: vi.fn(async () => null),
|
||||
revParse: vi.fn(async () => 'main-commit-sha'),
|
||||
diffNameStatus: vi.fn(async () => [] as any[]),
|
||||
showFileAtRef: vi.fn(async () => ''),
|
||||
updateRef: rec('updateRef'),
|
||||
// The mirror diverged: the ff is REFUSED. runPush escalates this as
|
||||
// divergentDocmost; the question is whether runCycle surfaces it.
|
||||
fastForwardBranch: rec('fastForwardBranch', {
|
||||
ok: false,
|
||||
reason: 'not-fast-forward',
|
||||
}),
|
||||
...overrides,
|
||||
};
|
||||
return vault;
|
||||
}
|
||||
|
||||
function baseDeps(vault: any, over: Partial<RunCycleDeps> = {}): RunCycleDeps {
|
||||
return {
|
||||
spaceId: 'space-1',
|
||||
client: {
|
||||
listSpaceTree: vi.fn(async () => ({ pages: [], complete: true })),
|
||||
getPageJson: vi.fn(),
|
||||
importPageMarkdown: vi.fn(),
|
||||
createPage: vi.fn(),
|
||||
deletePage: vi.fn(),
|
||||
movePage: vi.fn(),
|
||||
renamePage: vi.fn(),
|
||||
listRecentSince: vi.fn(),
|
||||
listTrash: vi.fn(),
|
||||
restorePage: vi.fn(),
|
||||
} as any,
|
||||
vault,
|
||||
settings: { vaultPath: '/vault' } as any,
|
||||
fs: {
|
||||
readFile: vi.fn(async () => ''),
|
||||
writeFile: vi.fn(async () => undefined),
|
||||
mkdir: vi.fn(async () => undefined),
|
||||
rm: vi.fn(async () => undefined),
|
||||
},
|
||||
log: vi.fn(),
|
||||
...over,
|
||||
};
|
||||
}
|
||||
|
||||
describe('#15 divergence dropped by runCycle', () => {
|
||||
it('surfaces the divergent `docmost` mirror in RunCycleResult', async () => {
|
||||
const vault = fakeVault();
|
||||
const deps = baseDeps(vault);
|
||||
|
||||
const res = await runCycle(deps);
|
||||
expect(res.ran).toBe(true);
|
||||
|
||||
// The push DID refuse to fast-forward the divergent mirror.
|
||||
expect(vault.order).toContain(
|
||||
'fastForwardBranch:docmost,main-commit-sha',
|
||||
);
|
||||
|
||||
// DESIRED: the cycle result surfaces the divergence (some warning/flag), so a
|
||||
// caller driving runCycle can see the §5 invariant breach without scraping
|
||||
// logs. Today RunCycleResult.push is only {mode, failures}.
|
||||
const divergence =
|
||||
(res as any).divergentDocmost ??
|
||||
(res.push as any)?.divergentDocmost ??
|
||||
(res as any).warning;
|
||||
expect(divergence).toBeTruthy();
|
||||
});
|
||||
});
|
||||
@@ -77,10 +77,14 @@ const expectedSurface: SurfaceEntry[] = [
|
||||
{ name: "drawio", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "size", "src", "title", "width"] },
|
||||
{ name: "embed", kind: "node", attrs: ["align", "height", "provider", "src", "width"] },
|
||||
{ name: "excalidraw", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "size", "src", "title", "width"] },
|
||||
{ name: "footnoteDefinition", kind: "node", attrs: ["id"] },
|
||||
{ name: "footnoteReference", kind: "node", attrs: ["id"] },
|
||||
{ name: "footnotesList", kind: "node", attrs: [] },
|
||||
{ name: "hardBreak", kind: "node", attrs: [] },
|
||||
{ name: "heading", kind: "node", attrs: ["id", "indent", "level", "textAlign"] },
|
||||
{ name: "highlight", kind: "mark", attrs: ["color"] },
|
||||
{ name: "horizontalRule", kind: "node", attrs: [] },
|
||||
{ name: "htmlEmbed", kind: "node", attrs: ["height", "source"] },
|
||||
{ name: "image", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "placeholder", "size", "src", "title", "width"] },
|
||||
{ name: "italic", kind: "mark", attrs: [] },
|
||||
{ name: "link", kind: "mark", attrs: ["class", "href", "internal", "rel", "target", "title"] },
|
||||
@@ -90,8 +94,10 @@ const expectedSurface: SurfaceEntry[] = [
|
||||
{ name: "mention", kind: "node", attrs: ["anchorId", "creatorId", "entityId", "entityType", "id", "label", "slugId"] },
|
||||
{ name: "orderedList", kind: "node", attrs: ["start", "type"] },
|
||||
{ name: "pageBreak", kind: "node", attrs: [] },
|
||||
{ name: "pageEmbed", kind: "node", attrs: ["sourcePageId"] },
|
||||
{ name: "paragraph", kind: "node", attrs: ["id", "indent", "textAlign"] },
|
||||
{ name: "pdf", kind: "node", attrs: ["attachmentId", "height", "name", "placeholder", "size", "src", "width"] },
|
||||
{ name: "status", kind: "node", attrs: ["color", "text"] },
|
||||
{ name: "strike", kind: "mark", attrs: [] },
|
||||
{ name: "subpages", kind: "node", attrs: [] },
|
||||
{ name: "subscript", kind: "mark", attrs: [] },
|
||||
@@ -104,6 +110,8 @@ const expectedSurface: SurfaceEntry[] = [
|
||||
{ name: "taskList", kind: "node", attrs: [] },
|
||||
{ name: "text", kind: "node", attrs: [] },
|
||||
{ name: "textStyle", kind: "mark", attrs: ["color"] },
|
||||
{ name: "transclusionReference", kind: "node", attrs: ["sourcePageId", "transclusionId"] },
|
||||
{ name: "transclusionSource", kind: "node", attrs: ["id"] },
|
||||
{ name: "underline", kind: "mark", attrs: [] },
|
||||
{ name: "video", kind: "node", attrs: ["align", "alt", "aspectRatio", "attachmentId", "height", "placeholder", "size", "src", "width"] },
|
||||
{ name: "youtube", kind: "node", attrs: ["align", "height", "src", "width"] },
|
||||
|
||||
Reference in New Issue
Block a user