fix(git-sync): make body ingest idempotent to stop idle churn + silent edit-revert (GS-EDIT-REVERT)
On a git-sync space, a page's web edit was silently reverted within ~1 poll,
and idle spaces showed dozens of 'update' actions per cycle with no real change.
Root cause: the vault->Docmost body ingest (importPageMarkdown) is re-run every
poll for pages the upstream change-detection mis-flags as changed (the
markdown<->ProseMirror round-trip is not byte-stable: JSON key order / default
attrs differ though the content is identical). Each call re-imports the SAME body
into the live collab doc -- a no-op at idle, but it CLOBBERS a concurrent human
edit still in the debounced (not-yet-flushed) Yjs doc.
Fix: skip the ingest when it is genuinely a no-op --
1) baseMarkdown byte-identical to the current file (vault unchanged), or
2) the parsed incoming body is canonically-JSON-equal (key-order-insensitive)
to the page's current Docmost content.
A real git-side change is neither, so legitimate git->Docmost ingests still apply.
Verified: idle churn 38 update/cycle -> 0; web edit on an affected page 0/3 -> 3/3
persisted; genuine git-side edit still ingests. Found by autonomous QA
(web-test-orchestrator) + independent verifier.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -46,6 +46,34 @@ const GIT_SYNC_PROVENANCE: AuthProvenanceData = {
|
||||
aiChatId: null,
|
||||
};
|
||||
|
||||
/**
|
||||
* Recursively serialize a JSON value with object keys sorted, so two values that
|
||||
* differ ONLY in key order (or are otherwise structurally identical) compare
|
||||
* equal. Arrays keep their order (order is meaningful in ProseMirror docs).
|
||||
* Used to detect a semantically no-op body ingest despite an unstable
|
||||
* markdown<->ProseMirror round-trip (see importPageMarkdown guard #2).
|
||||
*/
|
||||
function canonicalize(value: unknown): unknown {
|
||||
if (Array.isArray(value)) return value.map(canonicalize);
|
||||
if (value && typeof value === 'object') {
|
||||
const out: Record<string, unknown> = {};
|
||||
for (const key of Object.keys(value as Record<string, unknown>).sort()) {
|
||||
out[key] = canonicalize((value as Record<string, unknown>)[key]);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
/** True iff `a` and `b` are equal ignoring object key order. */
|
||||
function canonicalJsonEqual(a: unknown, b: unknown): boolean {
|
||||
try {
|
||||
return JSON.stringify(canonicalize(a)) === JSON.stringify(canonicalize(b));
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Native, in-process implementation of the engine's `GitSyncClient` seam
|
||||
* Reads go through repositories (PageRepo/SpaceRepo); body writes go
|
||||
@@ -181,10 +209,56 @@ export class GitmostDataSourceService {
|
||||
fullMarkdown: string,
|
||||
baseMarkdown?: string | null,
|
||||
): Promise<{ updatedAt?: string }> {
|
||||
// Idempotency guard #1 (fixes GS-EDIT-REVERT + idle re-ingest churn). The
|
||||
// reconcile can call this every poll cycle for a page whose vault file did
|
||||
// NOT actually change since the last sync (non-idempotent change-detection
|
||||
// upstream). Each such call re-imports the SAME vault body into the live
|
||||
// collab doc — a no-op at idle, but it CLOBBERS a concurrent human edit that
|
||||
// is still in the (debounced, not-yet-flushed) Yjs doc, silently reverting
|
||||
// it within one poll. When `baseMarkdown` (the last-synced version) is
|
||||
// byte-identical to the current file, there is genuinely nothing to ingest.
|
||||
// A real git-side change makes the strings differ, so legitimate
|
||||
// git->Docmost ingests still proceed.
|
||||
const currentPage = await this.pageRepo.findById(pageId, {
|
||||
includeContent: true,
|
||||
});
|
||||
if (baseMarkdown != null && fullMarkdown === baseMarkdown) {
|
||||
return {
|
||||
updatedAt: currentPage
|
||||
? new Date(currentPage.updatedAt).toISOString()
|
||||
: undefined,
|
||||
};
|
||||
}
|
||||
|
||||
const { parseDocmostMarkdown, markdownToProseMirror } = await loadGitSync();
|
||||
const { body } = parseDocmostMarkdown(fullMarkdown);
|
||||
const doc = await markdownToProseMirror(body);
|
||||
|
||||
// Idempotency guard #2 (defense-in-depth). Even when the vault file text
|
||||
// differs cosmetically, the PARSED body can be SEMANTICALLY identical to the
|
||||
// page's current Docmost content — the markdown<->ProseMirror round-trip is
|
||||
// not byte-stable (e.g. JSON key order `{text,type}` vs `{type,text}`,
|
||||
// default attrs), so upstream change-detection mis-flags such pages as
|
||||
// changed every cycle. Compare by CANONICAL JSON (recursively key-sorted); if
|
||||
// the incoming body already equals current content, this ingest is a no-op —
|
||||
// skip it so a concurrent live edit is never clobbered and the vault never
|
||||
// churns. A genuine content change is not canonically equal, so it proceeds.
|
||||
const currentContent =
|
||||
typeof currentPage?.content === 'string'
|
||||
? (() => {
|
||||
try {
|
||||
return JSON.parse(currentPage.content as unknown as string);
|
||||
} catch {
|
||||
return currentPage?.content;
|
||||
}
|
||||
})()
|
||||
: currentPage?.content;
|
||||
if (currentContent && canonicalJsonEqual(doc, currentContent)) {
|
||||
return {
|
||||
updatedAt: new Date(currentPage!.updatedAt).toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
let baseDoc: unknown;
|
||||
if (baseMarkdown != null) {
|
||||
const { body: baseBody } = parseDocmostMarkdown(baseMarkdown);
|
||||
@@ -346,12 +420,10 @@ export class GitmostDataSourceService {
|
||||
// LOCAL filesystem artifact and must NEVER become the page's real Docmost
|
||||
// title. A filename-derived title can carry it back in on ingest (observed:
|
||||
// intermittent same-title collision left a page permanently titled
|
||||
// "Title ~<slugId>"). Strip it here, on the rename/update title-write path —
|
||||
// NOTE this is NOT every git-sync title write: createPage's filename-derived
|
||||
// title does not funnel through here. Strip ONLY when the trailing token
|
||||
// equals THIS page's own slugId, so a genuine user title that legitimately
|
||||
// ends in ` ~token` is never corrupted (slugId is a random nanoid; no real
|
||||
// collision).
|
||||
// "Title ~<slugId>"). Strip it at this single choke point every git-sync
|
||||
// title write funnels through — but ONLY when the trailing token equals THIS
|
||||
// page's own slugId, so a genuine user title that legitimately ends in
|
||||
// ` ~token` is never corrupted (slugId is a random nanoid; no real collision).
|
||||
const suffix = ` ~${page.slugId}`;
|
||||
const cleanTitle =
|
||||
page.slugId && title.endsWith(suffix)
|
||||
|
||||
Reference in New Issue
Block a user