fix(git-sync): red-team hardening — 12 confirmed sync-breaking bugs + regression tests
A 10-agent red-team pass on the two-way Docmost<->git sync surfaced 16 ranked findings (9 others triaged out as already-defended). Wrote a reproduction test per finding (each asserts the CORRECT behavior, so it fails on the bug), then fixed the production code so every repro goes green. All confirmed bugs: Round-trip data loss (markdown-converter.ts + docmost-schema.ts mirror): - #1 editor-ext node types silently dropped on export — ported the 8 missing canon nodes (footnoteReference/footnotesList/footnoteDefinition, htmlEmbed, status, pageEmbed, transclusionSource/Reference) into the git-sync schema mirror and added converter cases that emit their schema-matching HTML instead of flattening unknown nodes to '' (this was the critical data-loss flagged in review #1679: footnotes/htmlEmbed lost on sync). Snapshot surface updated. - #2 top-level image lost width/height/align/attachmentId — now emits an HTML <img> (like video/diagrams) when it carries layout attrs; bare images stay . Image node parses width/height as strings so they re-import. - #3 code block containing a ``` fence corrupted on round-trip — outer fence is now widened to (longest-inner-backtick-run + 1). - #16 deep nesting threw RangeError (page never synced) — added a depth guard (MAX_NODE_DEPTH=400) so the converter never overflows the stack. Push/layout/cycle (engine): - #4 disambiguation ' ~slugId' suffix corrupted Docmost titles + order-dependent layout — deterministic, order-independent sibling disambiguation; suffix is stripped from a path-derived title ONLY when the new name is exactly the old title plus the suffix (never a genuine retitle ending in ' ~token'). - #6 retry-adopt by (parent,title) clobbered the wrong duplicate-title sibling — ambiguous (parent,title) is no longer adopted (falls back to fresh create). - #12 a new child under a new parent was created at ROOT — creates are ordered parent-before-child with an in-memory created-id map for parent resolution. - #13 git conflict markers could reach Docmost — bodies are scanned and the marker lines stripped (a '=======' line is only treated as a conflict separator inside a <<<<<<< ... >>>>>>> block, so setext headings are safe). - #15 a divergent `docmost` mirror was escalated by runPush but dropped by runCycle — RunCycleResult now forwards divergentDocmost to the orchestrator. Server (merge / lock / provenance): - #9 3-way merge lost a human's block edit when git inserted an adjacent block — finer-grained diff3 region merge (via lcs) preserves non-overlapping human edits; genuine same-block conflicts still resolve git-wins. - #10 single-writer race — module-static liveLocks closes the same-process TOCTOU window, and a heartbeat refresh that cannot confirm the lock now aborts the cycle at its next write checkpoint (cooperative AbortSignal threaded through runCycle). Cross-process fencing tokens remain a follow-up. - #14 sticky-agent provenance overrode an explicit actor='git-sync' write, blinding the listener loop-guard — resolveSource now lets an explicit actor win over the sticky-agent fallback (explicit agent still wins). Verified: git-sync vitest 617 pass (+1 expected-fail), server unit jest 1541 pass, server tsc clean. A review pass over the fixes caught and corrected a title-suffix over-strip, an inert abort signal, a document-wide conflict-marker strip, and two leaf-atom content-holes. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -26,6 +26,16 @@ export interface RunCycleDeps {
|
||||
settings: Settings;
|
||||
fs: CycleFs;
|
||||
log: (line: string) => void;
|
||||
/**
|
||||
* Optional cooperative-abort signal. The caller (orchestrator) wires this to
|
||||
* the per-space lock: if a heartbeat refresh cannot CONFIRM the lock is still
|
||||
* held (CAS-miss / Redis error), the signal is aborted and the cycle bails at
|
||||
* its next checkpoint (before the pull-apply and before the push-apply — the
|
||||
* two destructive write phases) instead of writing blind after a possible
|
||||
* lock loss. This is a COARSE best-effort guard; a fully fenced cross-process
|
||||
* single-writer still needs the fencing-token redesign (follow-up).
|
||||
*/
|
||||
signal?: AbortSignal;
|
||||
/**
|
||||
* Delete-cap hook (the ONLY caller-specific policy). Called with the push
|
||||
* dry-run's planned delete count (`Number.POSITIVE_INFINITY` when the dry-run
|
||||
@@ -47,6 +57,13 @@ export interface RunCycleResult {
|
||||
skipped?: "merge-in-progress";
|
||||
pull?: { written: number; deleted: number; conflict: boolean };
|
||||
push?: { mode: string; failures: number };
|
||||
/**
|
||||
* Forwarded from the push result: `true` when the push REFUSED to fast-forward
|
||||
* a divergent `docmost` mirror (the §5 invariant — `docmost` mirrors what
|
||||
* Docmost contains — is broken). Surfaced here so a caller driving `runCycle`
|
||||
* can detect the breach without scraping logs (red-team #15).
|
||||
*/
|
||||
divergentDocmost?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -70,7 +87,7 @@ export interface RunCycleResult {
|
||||
* Lock + cap POLICY live in the caller; this owns only the mechanics.
|
||||
*/
|
||||
export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
const { spaceId, client, vault, settings, fs, log, resolveApplyClient } =
|
||||
const { spaceId, client, vault, settings, fs, log, resolveApplyClient, signal } =
|
||||
deps;
|
||||
const vaultRoot = settings.vaultPath;
|
||||
const abs = (relPath: string) => `${vaultRoot}/${relPath}`;
|
||||
@@ -107,6 +124,9 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
existing,
|
||||
});
|
||||
|
||||
// Bail before the first destructive write phase if the lock was lost.
|
||||
signal?.throwIfAborted();
|
||||
|
||||
const pullResult = await applyPullActions(
|
||||
{
|
||||
client,
|
||||
@@ -150,6 +170,9 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
applyClient = resolveApplyClient(plannedDeletes, client);
|
||||
}
|
||||
|
||||
// Bail before pushing to Docmost if the lock was lost during pull.
|
||||
signal?.throwIfAborted();
|
||||
|
||||
const pushResult = await runPush(
|
||||
{ ...pushDeps, makeClient: () => applyClient },
|
||||
{ dryRun: false },
|
||||
@@ -166,5 +189,8 @@ export async function runCycle(deps: RunCycleDeps): Promise<RunCycleResult> {
|
||||
mode: pushResult.mode,
|
||||
failures: pushResult.failures?.length ?? 0,
|
||||
},
|
||||
// Forward a divergent-`docmost` escalation so the caller can act on the §5
|
||||
// invariant breach without scraping logs (red-team #15).
|
||||
divergentDocmost: pushResult.divergentDocmost ?? false,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -54,23 +54,54 @@ export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
if (p && p.id && !byId.has(p.id)) byId.set(p.id, p);
|
||||
}
|
||||
|
||||
// Resolve each node's display name once, deterministically, tracking sibling
|
||||
// collisions per parent. `usedBySibling` maps a parent key -> set of names
|
||||
// already taken under that parent. The bucket key is the node's parent ONLY
|
||||
// when that parent is actually present in `byId`; otherwise (null parent, or
|
||||
// an orphan whose parent is outside the input set) the node buckets at
|
||||
// `"__root__"`. This is critical: orphans land at the vault root (see
|
||||
// `folderSegmentsFor`), so they MUST share the root bucket with real root
|
||||
// pages to be disambiguated against each other here — making `nameById` final
|
||||
// before any `segments` are computed, so no ancestor name can drift later.
|
||||
const usedBySibling = new Map<string, Set<string>>();
|
||||
const nameById = new Map<string, string>();
|
||||
// Resolve each node's display name once, deterministically. The bucket key is
|
||||
// the node's parent ONLY when that parent is actually present in `byId`;
|
||||
// otherwise (null parent, or an orphan whose parent is outside the input set)
|
||||
// the node buckets at `"__root__"`. This is critical: orphans land at the vault
|
||||
// root (see `folderSegmentsFor`), so they MUST share the root bucket with real
|
||||
// root pages to be disambiguated against each other here — making `nameById`
|
||||
// final before any `segments` are computed, so no ancestor name can drift.
|
||||
const parentKeyOf = (p: PageNode): string =>
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
// Group nodes by (parentKey, sanitized base title) so sibling collisions are
|
||||
// resolved by a STABLE rule that does NOT depend on input array order. Dedupe
|
||||
// ids (first occurrence wins, matching `byId`).
|
||||
const siblingGroups = new Map<string, PageNode[]>();
|
||||
const namedIds = new Set<string>();
|
||||
for (const p of pages) {
|
||||
if (p && p.id && !nameById.has(p.id)) {
|
||||
const parentKey =
|
||||
p.parentPageId && byId.has(p.parentPageId) ? p.parentPageId : "__root__";
|
||||
nameById.set(p.id, nameForNode(p, parentKey, usedBySibling));
|
||||
if (!p || !p.id || namedIds.has(p.id)) continue;
|
||||
namedIds.add(p.id);
|
||||
const key = `${parentKeyOf(p)}\u0000${sanitizeTitle(p.title ?? "")}`;
|
||||
const bucket = siblingGroups.get(key);
|
||||
if (bucket) bucket.push(p);
|
||||
else siblingGroups.set(key, [p]);
|
||||
}
|
||||
// Assign each node its display name. Within a colliding group, sort the
|
||||
// siblings by their stable disambiguation key (`slugId` else `id`) and let the
|
||||
// FIRST keep the bare sanitized title; every OTHER gets the ` ~<slugId>`
|
||||
// suffix. This makes `nameById` a pure function of the page SET — reordering
|
||||
// the input never moves the suffix onto a different page (red-team #4a). The
|
||||
// suffix is itself sanitized (the slugId/id is untrusted and must never inject
|
||||
// a path separator).
|
||||
const nameById = new Map<string, string>();
|
||||
const disambKeyOf = (p: PageNode): string => p.slugId ?? p.id;
|
||||
for (const bucket of siblingGroups.values()) {
|
||||
const base = sanitizeTitle(bucket[0].title ?? "");
|
||||
if (bucket.length === 1) {
|
||||
nameById.set(bucket[0].id, base);
|
||||
continue;
|
||||
}
|
||||
const sorted = [...bucket].sort((a, b) => {
|
||||
const ka = disambKeyOf(a);
|
||||
const kb = disambKeyOf(b);
|
||||
return ka < kb ? -1 : ka > kb ? 1 : 0;
|
||||
});
|
||||
sorted.forEach((p, i) => {
|
||||
nameById.set(
|
||||
p.id,
|
||||
i === 0 ? base : disambiguate(base, sanitizeTitle(disambKeyOf(p))),
|
||||
);
|
||||
});
|
||||
}
|
||||
|
||||
// Every id we index above MUST get a resolved name; this helper returns it
|
||||
@@ -169,34 +200,3 @@ export function buildVaultLayout(pages: PageNode[]): Map<string, VaultEntry> {
|
||||
return layout;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a deterministic, collision-free name for a node among its SIBLINGS.
|
||||
* `usedBySibling` maps a parent key -> set of names already taken, so two
|
||||
* siblings that sanitize to the same name get a stable ` ~slugId` suffix
|
||||
* (SPEC §12). The suffix is itself passed through `sanitizeTitle`, because the
|
||||
* slugId/id is a second untrusted-data channel that must never leak a path
|
||||
* separator into the name. `parentKey` is supplied by the caller (it resolves
|
||||
* to `"__root__"` for root pages AND for orphans whose parent is outside the
|
||||
* input set, so they share one bucket). The name is COSMETIC; identity lives in
|
||||
* the meta block.
|
||||
*/
|
||||
function nameForNode(
|
||||
node: PageNode,
|
||||
parentKey: string,
|
||||
usedBySibling: Map<string, Set<string>>,
|
||||
): string {
|
||||
let used = usedBySibling.get(parentKey);
|
||||
if (!used) {
|
||||
used = new Set<string>();
|
||||
usedBySibling.set(parentKey, used);
|
||||
}
|
||||
|
||||
let name = sanitizeTitle(node.title ?? "");
|
||||
if (used.has(name)) {
|
||||
// Sibling collision: disambiguate with the stable, sanitized slugId (fall
|
||||
// back to the sanitized pageId if no slugId is present).
|
||||
name = disambiguate(name, sanitizeTitle(node.slugId ?? node.id));
|
||||
}
|
||||
used.add(name);
|
||||
return name;
|
||||
}
|
||||
|
||||
@@ -133,8 +133,27 @@ export function classifyRenameMoves(
|
||||
return renamesMoves.map((rm) => {
|
||||
const newParent = deps.resolveParentPageId(rm.newPath, "current");
|
||||
const oldParent = deps.resolveParentPageId(rm.oldPath, "prev");
|
||||
const newTitle = deps.metaAt(rm.newPath, "current")?.title;
|
||||
const oldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
|
||||
// Strip the cosmetic ` ~<slugId>` disambiguation suffix before comparing
|
||||
// titles: it is a LOCAL filesystem artifact (`buildVaultLayout` appends it to
|
||||
// a colliding sibling's stem), NOT part of the page's real title. A pure
|
||||
// disambiguation file-rename ('Report.md' -> 'Report ~a1.md') must therefore
|
||||
// NOT be pushed to Docmost as a title change (red-team #4b), and any title we
|
||||
// DO push must carry the real title ('Report'), never the suffixed form.
|
||||
const rawNewTitle = deps.metaAt(rm.newPath, "current")?.title;
|
||||
const rawOldTitle = deps.metaAt(rm.oldPath, "prev")?.title;
|
||||
// A PURE disambiguation rename only APPENDS a cosmetic ` ~<suffix>` to the
|
||||
// SAME title (layout.ts), so the real Docmost title is unchanged. Strip the
|
||||
// suffix ONLY when the new name is exactly the old title plus that suffix —
|
||||
// never blindly strip a genuine retitle whose new title legitimately ends in
|
||||
// ` ~token` (e.g. "Budget ~draft" -> "Budget ~final"), which would corrupt
|
||||
// the title in Docmost / drop a real rename (review finding).
|
||||
const isCosmeticDisambiguation =
|
||||
typeof rawNewTitle === "string" &&
|
||||
typeof rawOldTitle === "string" &&
|
||||
rawNewTitle !== rawOldTitle &&
|
||||
stripDisambiguationSuffix(rawNewTitle) === rawOldTitle;
|
||||
const newTitle = isCosmeticDisambiguation ? rawOldTitle : rawNewTitle;
|
||||
const oldTitle = rawOldTitle;
|
||||
|
||||
const out: RenameMoveActionClassified = {
|
||||
pageId: rm.pageId,
|
||||
@@ -646,7 +665,11 @@ export async function applyPushActions(
|
||||
// Push the CLEAN body only (no `gitmost_id` frontmatter): the frontmatter
|
||||
// is engine metadata, never page content. The server converts the markdown
|
||||
// it receives verbatim, so stripping here keeps the id out of Docmost.
|
||||
const body = parsePageFile(await deps.readFile(u.path)).body;
|
||||
// Also strip any git conflict markers — they must NEVER reach Docmost
|
||||
// (SPEC §9, red-team #13); content on both sides is preserved.
|
||||
const body = stripConflictMarkers(
|
||||
parsePageFile(await deps.readFile(u.path)).body,
|
||||
);
|
||||
// The last-synced version of this file (pre-image) is the common ancestor
|
||||
// for a 3-way merge against the live page, so concurrent human edits are
|
||||
// not clobbered (review #5). Null when the file is new at last-pushed. Its
|
||||
@@ -689,6 +712,10 @@ export async function applyPushActions(
|
||||
// folder, so (parentPageId, title) identifies the page; a match means a prior
|
||||
// cycle already created it, so we ADOPT instead of duplicating.
|
||||
let liveByParentTitle: Map<string, string> | null = null;
|
||||
// A (parentPageId, title) that more than ONE live page shares is AMBIGUOUS:
|
||||
// adopting one of them would silently overwrite an arbitrary, possibly-unrelated
|
||||
// sibling (red-team #6). Such keys are recorded here and EXCLUDED from adoption.
|
||||
const ambiguousAdoptKeys = new Set<string>();
|
||||
if (actions.creates.length > 0) {
|
||||
const live = await client.listSpaceTree(deps.spaceId);
|
||||
// Only trust a COMPLETE tree for retry-adopt: a truncated tree could miss an
|
||||
@@ -699,32 +726,56 @@ export async function applyPushActions(
|
||||
liveByParentTitle = new Map();
|
||||
for (const n of live.pages) {
|
||||
const key = `${n.parentPageId ?? " root"} ${n.title ?? ""}`;
|
||||
// Keep the FIRST node for a key (the layout makes this unique in practice).
|
||||
if (!liveByParentTitle.has(key)) liveByParentTitle.set(key, n.id);
|
||||
// First node claims the key; a SECOND match marks it ambiguous so neither
|
||||
// is ever adopted-over (the create falls back to a fresh createPage).
|
||||
if (liveByParentTitle.has(key)) ambiguousAdoptKeys.add(key);
|
||||
else liveByParentTitle.set(key, n.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const c of actions.creates) {
|
||||
// Order creates PARENT-before-CHILD (red-team #12): a child whose parent is
|
||||
// ALSO a fresh create must run AFTER its parent so the parent's just-assigned
|
||||
// pageId is available to parent it (otherwise it is placed at the space ROOT).
|
||||
const orderedCreates = orderCreatesParentFirst(actions.creates);
|
||||
// Track pageIds assigned (or adopted) to each create's PATH in THIS batch, so a
|
||||
// child can resolve its freshly-created parent's id without depending on the
|
||||
// on-disk write-back being observable yet (red-team #12).
|
||||
const createdIdByPath = new Map<string, string>();
|
||||
for (const c of orderedCreates) {
|
||||
try {
|
||||
const text = await deps.readFile(c.path);
|
||||
const { body } = parsePageFile(text);
|
||||
// Conflict markers must never reach Docmost (SPEC §9, red-team #13); strip
|
||||
// them from the create body too, preserving both sides' content.
|
||||
const body = stripConflictMarkers(parsePageFile(text).body);
|
||||
// Derive create args from the PATH (native-Obsidian, SPEC §5): title from
|
||||
// the filename, parent from the enclosing folder's folder-note, space from
|
||||
// the run (the vault's space). `parentPageId: null` -> created at ROOT.
|
||||
const title = titleFromPath(c.path);
|
||||
// Resolve the parent from the PATH (SPEC §5). Prefer an id assigned to the
|
||||
// parent's folder-note EARLIER in this same batch — a freshly-created parent
|
||||
// whose on-disk write-back may not be observable yet (red-team #12; creates
|
||||
// are ordered parent-before-child so the parent already ran).
|
||||
const parentFile = parentFolderFile(c.path);
|
||||
const parentPageId =
|
||||
(await resolveParentPageIdViaTree(deps, c.path, "current")) ?? undefined;
|
||||
(parentFile !== null ? createdIdByPath.get(parentFile) : undefined) ??
|
||||
(await resolveParentPageIdViaTree(deps, c.path, "current")) ??
|
||||
undefined;
|
||||
// Retry-adopt (#1 idempotency): a prior cycle already created this page in
|
||||
// Docmost but failed to persist the pageId back to the file, so it was
|
||||
// re-seen as a create. Adopt the existing page instead of duplicating it:
|
||||
// write the id back (file becomes tracked) and push the body as an UPDATE
|
||||
// (idempotent — targets by pageId). Do NOT call createPage again.
|
||||
// (idempotent — targets by pageId). Do NOT call createPage again. SKIP
|
||||
// adoption when the (parent, title) is AMBIGUOUS — adopting an arbitrary
|
||||
// duplicate-title sibling would silently overwrite it (red-team #6).
|
||||
const adoptKey = `${parentPageId ?? " root"} ${title}`;
|
||||
const existingId = liveByParentTitle?.get(adoptKey);
|
||||
const existingId = ambiguousAdoptKeys.has(adoptKey)
|
||||
? undefined
|
||||
: liveByParentTitle?.get(adoptKey);
|
||||
if (existingId) {
|
||||
const rewritten = serializePageFile(existingId, body);
|
||||
await deps.writeFile(c.path, rewritten);
|
||||
writtenBack.push({ path: c.path, pageId: existingId });
|
||||
createdIdByPath.set(c.path, existingId);
|
||||
const adopted = await client.importPageMarkdown(existingId, body, null);
|
||||
pushed.push({
|
||||
pageId: existingId,
|
||||
@@ -749,6 +800,7 @@ export async function applyPushActions(
|
||||
const rewritten = serializePageFile(assignedPageId, body);
|
||||
await deps.writeFile(c.path, rewritten);
|
||||
writtenBack.push({ path: c.path, pageId: assignedPageId });
|
||||
createdIdByPath.set(c.path, assignedPageId);
|
||||
// §10 loop-guard data for the created page (hash the pushed BODY).
|
||||
pushed.push({
|
||||
pageId: assignedPageId,
|
||||
@@ -942,6 +994,35 @@ export function parentFolderFile(path: string): string | null {
|
||||
return folderNote;
|
||||
}
|
||||
|
||||
/**
|
||||
* Order CREATE actions so a create whose parent folder-note is ALSO being created
|
||||
* appears AFTER its parent (red-team #12). A child created before its fresh parent
|
||||
* cannot resolve the parent's pageId and would be placed at the space ROOT.
|
||||
* Topological over the `parentFolderFile` relation, restricted to paths within the
|
||||
* create set; an `inProgress` guard makes a malformed parent cycle safe.
|
||||
*/
|
||||
export function orderCreatesParentFirst(creates: CreateAction[]): CreateAction[] {
|
||||
const byPath = new Map<string, CreateAction>();
|
||||
for (const c of creates) byPath.set(c.path, c);
|
||||
const ordered: CreateAction[] = [];
|
||||
const visited = new Set<string>();
|
||||
const inProgress = new Set<string>();
|
||||
const visit = (c: CreateAction): void => {
|
||||
if (visited.has(c.path) || inProgress.has(c.path)) return;
|
||||
inProgress.add(c.path);
|
||||
const parent = parentFolderFile(c.path);
|
||||
if (parent !== null && parent !== c.path) {
|
||||
const parentCreate = byPath.get(parent);
|
||||
if (parentCreate) visit(parentCreate);
|
||||
}
|
||||
inProgress.delete(c.path);
|
||||
visited.add(c.path);
|
||||
ordered.push(c);
|
||||
};
|
||||
for (const c of creates) visit(c);
|
||||
return ordered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether a vault path is a Docmost PAGE file (design §"Adoption"): a `.md` file
|
||||
* with NO dot-segment anywhere in its path. This excludes `.obsidian/` config,
|
||||
@@ -955,6 +1036,51 @@ export function isPageFile(path: string): boolean {
|
||||
return !path.split("/").some((seg) => seg.startsWith("."));
|
||||
}
|
||||
|
||||
/**
|
||||
* Git conflict-marker scan + strip (SPEC §9 — conflict markers must NEVER reach
|
||||
* Docmost). A body is treated as conflicted only when it carries BOTH a begin
|
||||
* (`<<<<<<<`) and an end (`>>>>>>>`) marker line, so a legitimate Markdown setext
|
||||
* heading underline (`=======`) is not mistaken for a conflict. When conflicted,
|
||||
* the three marker line types are removed while BOTH sides' content is preserved
|
||||
* (no data loss): the marker SYNTAX never reaches Docmost, but the human's content
|
||||
* does — where the conflict is visible and fixable rather than silently dropped.
|
||||
*/
|
||||
const CONFLICT_BEGIN_RE = /^<{7}/m;
|
||||
const CONFLICT_END_RE = /^>{7}/m;
|
||||
const CONFLICT_BEGIN_LINE_RE = /^<{7}/;
|
||||
const CONFLICT_SEP_LINE_RE = /^={7}/;
|
||||
const CONFLICT_END_LINE_RE = /^>{7}/;
|
||||
|
||||
export function hasConflictMarkers(body: string): boolean {
|
||||
return CONFLICT_BEGIN_RE.test(body) && CONFLICT_END_RE.test(body);
|
||||
}
|
||||
|
||||
function stripConflictMarkers(body: string): string {
|
||||
if (!hasConflictMarkers(body)) return body;
|
||||
// Remove ONLY the three marker line types, and treat a `=======` line as a
|
||||
// conflict separator ONLY when we are between a `<<<<<<<` begin and a `>>>>>>>`
|
||||
// end — so a legitimate Markdown setext heading underline (`=======`) outside a
|
||||
// conflict block is preserved (review finding). Both conflict sides' content is
|
||||
// kept; only the marker SYNTAX is dropped.
|
||||
let inBlock = false;
|
||||
const out: string[] = [];
|
||||
for (const line of body.split("\n")) {
|
||||
if (CONFLICT_BEGIN_LINE_RE.test(line)) {
|
||||
inBlock = true;
|
||||
continue;
|
||||
}
|
||||
if (CONFLICT_END_LINE_RE.test(line)) {
|
||||
inBlock = false;
|
||||
continue;
|
||||
}
|
||||
if (inBlock && CONFLICT_SEP_LINE_RE.test(line)) {
|
||||
continue;
|
||||
}
|
||||
out.push(line);
|
||||
}
|
||||
return out.join("\n");
|
||||
}
|
||||
|
||||
/** The last path segment of a forward-slash path (the folder/file base name). */
|
||||
function baseSegment(path: string): string {
|
||||
const slash = path.lastIndexOf("/");
|
||||
@@ -974,6 +1100,20 @@ function titleFromPath(path: string): string {
|
||||
return base.endsWith(".md") ? base.slice(0, -3) : base;
|
||||
}
|
||||
|
||||
/**
|
||||
* The exact ` ~<slugId>` disambiguation suffix `buildVaultLayout`/`disambiguate`
|
||||
* append to a colliding sibling's file stem (layout.ts): a single trailing
|
||||
* ` ~<one path component>` (no slash, no further `~`). It is a COSMETIC, local
|
||||
* filesystem artifact — never part of the page's real Docmost title — so it is
|
||||
* stripped before a path-derived title is compared/pushed (red-team #4b).
|
||||
*/
|
||||
const DISAMBIGUATION_SUFFIX_RE = / ~[^/~]+$/;
|
||||
|
||||
/** Remove a single trailing ` ~<slugId>` disambiguation suffix, if present. */
|
||||
function stripDisambiguationSuffix(title: string): string {
|
||||
return title.replace(DISAMBIGUATION_SUFFIX_RE, "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the synthetic `DocmostMdMeta` the planner/classifier consume, from the
|
||||
* NATIVE format: `pageId` from the `gitmost_id` frontmatter, `title` from the
|
||||
|
||||
@@ -204,11 +204,38 @@ const DocmostAttributes = Extension.create({
|
||||
types: ["image"],
|
||||
attributes: {
|
||||
align: { default: null },
|
||||
attachmentId: { default: null },
|
||||
aspectRatio: { default: null },
|
||||
// imageToHtml emits these Docmost-specific image attrs as data-*; map
|
||||
// them back explicitly so a top-level image (or one inside a column)
|
||||
// round-trips them. Without a parseHTML the default reads the bare
|
||||
// attribute name (e.g. getAttribute("attachmentId") -> null) and the
|
||||
// value — including the attachmentId that links the image to its
|
||||
// stored file — is silently dropped on every round-trip (data loss).
|
||||
attachmentId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) =>
|
||||
el.getAttribute("data-attachment-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.attachmentId
|
||||
? { "data-attachment-id": attrs.attachmentId }
|
||||
: {},
|
||||
},
|
||||
aspectRatio: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) =>
|
||||
el.getAttribute("data-aspect-ratio"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.aspectRatio != null
|
||||
? { "data-aspect-ratio": attrs.aspectRatio }
|
||||
: {},
|
||||
},
|
||||
height: { default: null },
|
||||
placeholder: { default: null },
|
||||
size: { default: null },
|
||||
size: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-size"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.size != null ? { "data-size": attrs.size } : {},
|
||||
},
|
||||
width: { default: null },
|
||||
},
|
||||
},
|
||||
@@ -1030,6 +1057,300 @@ const PageBreak = Node.create({
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Footnote feature (mirror of @docmost/editor-ext footnote, matching the MCP
|
||||
* schema mirror). Three nodes connected by `id`:
|
||||
* - FootnoteReference: inline atom marker in the body (<sup data-footnote-ref>);
|
||||
* - FootnotesList: a single bottom container (<section data-footnotes>);
|
||||
* - FootnoteDefinition: one editable note keyed by id (<div data-footnote-def>).
|
||||
* The visible number is not stored; it is derived from reference order. The
|
||||
* <sup> parse rule uses priority 100 so it beats the Superscript mark's <sup>
|
||||
* rule (otherwise an empty reference parses as an empty superscript and drops).
|
||||
*/
|
||||
const FootnoteReference = Node.create({
|
||||
name: "footnoteReference",
|
||||
priority: 101,
|
||||
group: "inline",
|
||||
inline: true,
|
||||
atom: true,
|
||||
selectable: true,
|
||||
draggable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "sup[data-footnote-ref]", priority: 100 }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["sup", { "data-footnote-ref": "", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
|
||||
const FootnotesList = Node.create({
|
||||
name: "footnotesList",
|
||||
group: "block",
|
||||
content: "footnoteDefinition+",
|
||||
isolating: true,
|
||||
selectable: false,
|
||||
defining: true,
|
||||
parseHTML() {
|
||||
return [{ tag: "section[data-footnotes]" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["section", { "data-footnotes": "", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
const FootnoteDefinition = Node.create({
|
||||
name: "footnoteDefinition",
|
||||
content: "paragraph+",
|
||||
defining: true,
|
||||
isolating: true,
|
||||
selectable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: "div[data-footnote-def]" }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-footnote-def": "", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Encode/decode the htmlEmbed `source` (arbitrary HTML/CSS/JS) to/from base64
|
||||
* for the `data-source` attribute. Ported from @docmost/editor-ext so the
|
||||
* markdown-converter HTML path (generateJSON via parseHTML) round-trips the
|
||||
* raw source losslessly and keeps it inert while it sits in the attribute.
|
||||
* `encodeURIComponent`/`decodeURIComponent` wrap btoa/atob so UTF-8 survives.
|
||||
*/
|
||||
export function encodeHtmlEmbedSource(source: string): string {
|
||||
if (!source) return "";
|
||||
try {
|
||||
if (typeof btoa === "function") {
|
||||
return btoa(encodeURIComponent(source));
|
||||
}
|
||||
return Buffer.from(encodeURIComponent(source), "utf-8").toString("base64");
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export function decodeHtmlEmbedSource(encoded: string): string {
|
||||
if (!encoded) return "";
|
||||
try {
|
||||
if (typeof atob === "function") {
|
||||
return decodeURIComponent(atob(encoded));
|
||||
}
|
||||
return decodeURIComponent(Buffer.from(encoded, "base64").toString("utf-8"));
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Docmost raw HTML embed. Block atom; the client renders `source` inside a
|
||||
* sandboxed iframe. Mirrors the @docmost/editor-ext node — `source` rides the
|
||||
* `data-source` attribute base64-encoded (this is an HTML/generateJSON path, so
|
||||
* it MUST use base64 to avoid double-encoding / injection).
|
||||
*/
|
||||
const HtmlEmbed = Node.create({
|
||||
name: "htmlEmbed",
|
||||
group: "block",
|
||||
inline: false,
|
||||
isolating: true,
|
||||
atom: true,
|
||||
defining: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
source: {
|
||||
default: "",
|
||||
parseHTML: (el: HTMLElement) =>
|
||||
decodeHtmlEmbedSource(el.getAttribute("data-source") || ""),
|
||||
renderHTML: (attrs: Record<string, any>) => ({
|
||||
"data-source": encodeHtmlEmbedSource(attrs.source || ""),
|
||||
}),
|
||||
},
|
||||
height: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => {
|
||||
const v = el.getAttribute("data-height");
|
||||
if (!v) return null;
|
||||
const n = parseInt(v, 10);
|
||||
return Number.isFinite(n) ? n : null;
|
||||
},
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.height != null ? { "data-height": String(attrs.height) } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="htmlEmbed"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "htmlEmbed", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Inline status pill. Mirrors @docmost/editor-ext status: the label rides in
|
||||
* the element's TEXT content (not an attribute) and the color in data-color.
|
||||
*/
|
||||
const Status = Node.create({
|
||||
name: "status",
|
||||
group: "inline",
|
||||
inline: true,
|
||||
atom: true,
|
||||
selectable: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
text: {
|
||||
default: "",
|
||||
parseHTML: (el: HTMLElement) => el.textContent || "",
|
||||
},
|
||||
color: {
|
||||
default: "gray",
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-color") || "gray",
|
||||
renderHTML: (attrs: Record<string, any>) => ({
|
||||
"data-color": attrs.color ?? "gray",
|
||||
}),
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'span[data-type="status"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return [
|
||||
"span",
|
||||
{ "data-type": "status", "data-color": HTMLAttributes["data-color"] },
|
||||
`${HTMLAttributes.text ?? ""}`,
|
||||
];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Whole-page live embed. Holds only a `sourcePageId` reference. Mirrors
|
||||
* @docmost/editor-ext pageEmbed. Block atom.
|
||||
*/
|
||||
const PageEmbed = Node.create({
|
||||
name: "pageEmbed",
|
||||
group: "block",
|
||||
atom: true,
|
||||
isolating: true,
|
||||
selectable: true,
|
||||
draggable: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
sourcePageId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-source-page-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.sourcePageId
|
||||
? { "data-source-page-id": attrs.sourcePageId }
|
||||
: {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="pageEmbed"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "pageEmbed", ...HTMLAttributes }];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Block node types allowed inside a `transclusionSource` (mirrors
|
||||
* @docmost/editor-ext transclusion constants). Excludes transclusion nodes
|
||||
* (no nesting) and child-only nodes.
|
||||
*/
|
||||
const TRANSCLUSION_SOURCE_CONTENT_EXPRESSION =
|
||||
"(paragraph | heading | blockquote | codeBlock | horizontalRule | bulletList" +
|
||||
" | orderedList | taskList | image | video | audio | attachment | callout" +
|
||||
" | details | embed | mathBlock | table | drawio | excalidraw | pdf" +
|
||||
" | subpages | columns | youtube)+";
|
||||
|
||||
/** Sync-source block: editable content shared into transclusion references. */
|
||||
const TransclusionSource = Node.create({
|
||||
name: "transclusionSource",
|
||||
group: "block",
|
||||
content: TRANSCLUSION_SOURCE_CONTENT_EXPRESSION,
|
||||
defining: true,
|
||||
isolating: true,
|
||||
addAttributes() {
|
||||
return {
|
||||
id: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.id ? { "data-id": attrs.id } : {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="transclusionSource"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return ["div", { "data-type": "transclusionSource", ...HTMLAttributes }, 0];
|
||||
},
|
||||
});
|
||||
|
||||
/** Live reference to a transcluded block/page. Block atom. */
|
||||
const TransclusionReference = Node.create({
|
||||
name: "transclusionReference",
|
||||
group: "block",
|
||||
atom: true,
|
||||
selectable: true,
|
||||
draggable: false,
|
||||
addAttributes() {
|
||||
return {
|
||||
sourcePageId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-source-page-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.sourcePageId
|
||||
? { "data-source-page-id": attrs.sourcePageId }
|
||||
: {},
|
||||
},
|
||||
transclusionId: {
|
||||
default: null,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("data-transclusion-id"),
|
||||
renderHTML: (attrs: Record<string, any>) =>
|
||||
attrs.transclusionId
|
||||
? { "data-transclusion-id": attrs.transclusionId }
|
||||
: {},
|
||||
},
|
||||
};
|
||||
},
|
||||
parseHTML() {
|
||||
return [{ tag: 'div[data-type="transclusionReference"]' }];
|
||||
},
|
||||
renderHTML({ HTMLAttributes }) {
|
||||
return [
|
||||
"div",
|
||||
{ "data-type": "transclusionReference", ...HTMLAttributes },
|
||||
];
|
||||
},
|
||||
});
|
||||
|
||||
/**
|
||||
* Full extension list. Image is block-level (matches Docmost); the
|
||||
* ProseMirror DOM parser hoists <img> found inside <p> automatically.
|
||||
@@ -1041,7 +1362,29 @@ export const docmostExtensions = [
|
||||
heading: {},
|
||||
link: { openOnClick: false },
|
||||
}),
|
||||
Image.configure({ inline: false }),
|
||||
// Preserve image width/height as the AUTHORED string. Without an explicit
|
||||
// parseHTML the stock Image node attribute falls back to tiptap core's
|
||||
// `fromString`, which coerces a numeric width like "320" into the number 320
|
||||
// — changing the stored type on every markdown round-trip (Docmost stores
|
||||
// these as strings, e.g. "320" or "50%", matching how video/audio/pdf are
|
||||
// handled in this mirror). The node attribute is applied AFTER the global
|
||||
// DocmostAttributes one, so the fix must live on the Image node itself.
|
||||
Image.extend({
|
||||
addAttributes() {
|
||||
const parent = (this.parent?.() ?? {}) as Record<string, any>;
|
||||
return {
|
||||
...parent,
|
||||
width: {
|
||||
...parent.width,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("width"),
|
||||
},
|
||||
height: {
|
||||
...parent.height,
|
||||
parseHTML: (el: HTMLElement) => el.getAttribute("height"),
|
||||
},
|
||||
};
|
||||
},
|
||||
}).configure({ inline: false }),
|
||||
TaskList,
|
||||
TaskItem.configure({ nested: true }),
|
||||
// Highlight stores its color unescaped and Docmost interpolates it into
|
||||
@@ -1094,5 +1437,13 @@ export const docmostExtensions = [
|
||||
Audio,
|
||||
Pdf,
|
||||
PageBreak,
|
||||
FootnoteReference,
|
||||
FootnotesList,
|
||||
FootnoteDefinition,
|
||||
HtmlEmbed,
|
||||
Status,
|
||||
PageEmbed,
|
||||
TransclusionSource,
|
||||
TransclusionReference,
|
||||
DocmostAttributes,
|
||||
];
|
||||
|
||||
@@ -1,3 +1,18 @@
|
||||
import { encodeHtmlEmbedSource } from "./docmost-schema.js";
|
||||
|
||||
/**
|
||||
* Hard cap on processNode recursion depth (see the depth guard below).
|
||||
*
|
||||
* Chosen well above any realistic document (the deepest legitimate nesting the
|
||||
* editor can produce is far shallower) yet far below the point where the
|
||||
* converter's own call stack overflows. The heaviest shape (deeply nested
|
||||
* lists) costs ~5 JS frames per level and the runtime stack holds ~10k frames,
|
||||
* so the measured overflow is around level ~650 (deeply nested lists); 400
|
||||
* leaves a comfortable margin while still rendering pathological-but-bounded
|
||||
* docs in full (the 200-level stress fixture reaches depth ~204).
|
||||
*/
|
||||
const MAX_NODE_DEPTH = 400;
|
||||
|
||||
/**
|
||||
* Convert ProseMirror/TipTap JSON content to Markdown
|
||||
* Supports all Docmost-specific node types and extensions
|
||||
@@ -43,7 +58,34 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
.replace(/\(/g, "%28")
|
||||
.replace(/\)/g, "%29");
|
||||
|
||||
// Recursion depth guard. processNode is mutually recursive (directly and via
|
||||
// processListItem/processTaskItem/blockToHtml), and a pathologically nested
|
||||
// document (e.g. tens of thousands of nested blockquotes) would otherwise
|
||||
// overflow the call stack and throw a RangeError, which would abort the sync
|
||||
// and prevent the page from ever being written. We track the live nesting
|
||||
// depth in a closure counter (the wrapper below) so we NEVER throw: past the
|
||||
// limit we stop recursing and emit the node's own text (or nothing) instead.
|
||||
// Normal documents never approach MAX_NODE_DEPTH, so their output is byte-
|
||||
// identical. NOTE: the wrapper signature is (node) only — several callers use
|
||||
// `.map(processNode)`, which would otherwise pass the array index as a second
|
||||
// argument; the wrapper ignores extra arguments so that is harmless.
|
||||
let nodeDepth = 0;
|
||||
const processNode = (node: any): string => {
|
||||
if (nodeDepth >= MAX_NODE_DEPTH) {
|
||||
// Bail out of deeper recursion without throwing. A text node still has
|
||||
// its own content worth keeping; a container at the limit collapses to
|
||||
// "" (its already-too-deep subtree is dropped) rather than overflowing.
|
||||
return typeof node?.text === "string" ? node.text : "";
|
||||
}
|
||||
nodeDepth++;
|
||||
try {
|
||||
return processNodeInner(node);
|
||||
} finally {
|
||||
nodeDepth--;
|
||||
}
|
||||
};
|
||||
|
||||
const processNodeInner = (node: any): string => {
|
||||
const type = node.type;
|
||||
const nodeContent = node.content || [];
|
||||
|
||||
@@ -182,7 +224,16 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
.map(processNode)
|
||||
.join("")
|
||||
.replace(/\n+$/, "");
|
||||
return "```" + language + "\n" + code + "\n```";
|
||||
// CommonMark: an inner ``` run inside the code would prematurely close
|
||||
// a 3-backtick fence (corrupting the block on re-import). Use an outer
|
||||
// fence one backtick longer than the longest backtick run in the code
|
||||
// (minimum 3) so the inner fence is always content.
|
||||
const longestBacktickRun = (code.match(/`+/g) || []).reduce(
|
||||
(max: number, run: string) => Math.max(max, run.length),
|
||||
0,
|
||||
);
|
||||
const fence = "`".repeat(Math.max(3, longestBacktickRun + 1));
|
||||
return fence + language + "\n" + code + "\n" + fence;
|
||||
|
||||
case "bulletList":
|
||||
return nodeContent
|
||||
@@ -228,16 +279,35 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
// a bare "\n" would be reimported as a soft break and lost.
|
||||
return " \n";
|
||||
|
||||
case "image":
|
||||
const imgAlt = node.attrs?.alt || "";
|
||||
case "image": {
|
||||
const imgAttrs = node.attrs || {};
|
||||
// A top-level image with layout/identity attrs beyond src/alt cannot be
|
||||
// expressed by markdown `` — width/height/align/size/
|
||||
// attachmentId/aspectRatio would be silently dropped on export and lost
|
||||
// on re-import. Emit the SAME schema-matching <img> used inside columns
|
||||
// (imageToHtml) so those attrs survive the round-trip. A bare image
|
||||
// (only src/alt, optionally a title — which has no schema attr) keeps
|
||||
// the lighter markdown form so existing image round-trip tests hold.
|
||||
const hasLayoutAttrs =
|
||||
imgAttrs.width != null ||
|
||||
imgAttrs.height != null ||
|
||||
imgAttrs.align ||
|
||||
imgAttrs.size != null ||
|
||||
imgAttrs.attachmentId ||
|
||||
imgAttrs.aspectRatio != null;
|
||||
if (hasLayoutAttrs) {
|
||||
return imageToHtml(node);
|
||||
}
|
||||
const imgAlt = imgAttrs.alt || "";
|
||||
// Neutralize characters that could break out of the markdown image
|
||||
// URL: spaces/newlines and parentheses would terminate the (...) target
|
||||
// and let a stored src inject following markdown/HTML. Percent-encode
|
||||
// them so the URL stays a single inert token.
|
||||
const imgSrc = encodeMdUrl(node.attrs?.src);
|
||||
const imgSrc = encodeMdUrl(imgAttrs.src);
|
||||
// No "caption" attribute exists in the Docmost image schema, so we do
|
||||
// not emit one (the previous caption branch was dead).
|
||||
return ``;
|
||||
}
|
||||
|
||||
case "video": {
|
||||
// Emit the schema-matching <video> element so generateJSON rebuilds the
|
||||
@@ -581,6 +651,83 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
case "subpages":
|
||||
return "{{SUBPAGES}}";
|
||||
|
||||
case "status": {
|
||||
// Inline status pill. The schema reads the label from the element's
|
||||
// TEXT content and the color from data-color, so emit both; without a
|
||||
// case this inline atom fell through to `default` and collapsed to "".
|
||||
const attrs = node.attrs || {};
|
||||
const statusColor = attrs.color || "gray";
|
||||
return `<span data-type="status" data-color="${escapeAttr(statusColor)}">${escapeHtmlText(attrs.text ?? "")}</span>`;
|
||||
}
|
||||
|
||||
case "htmlEmbed": {
|
||||
// Block atom; the schema reads the raw source from a base64-encoded
|
||||
// data-source attribute (and an optional fixed height from data-height).
|
||||
// Encode with the shared helper so it decodes symmetrically on import.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [
|
||||
`data-type="htmlEmbed"`,
|
||||
`data-source="${escapeAttr(encodeHtmlEmbedSource(attrs.source ?? ""))}"`,
|
||||
];
|
||||
if (attrs.height != null)
|
||||
parts.push(`data-height="${escapeAttr(attrs.height)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "footnoteReference": {
|
||||
// Inline atom marker. The schema reads its id from data-id on a
|
||||
// sup[data-footnote-ref]; the visible number is derived, not stored.
|
||||
const attrs = node.attrs || {};
|
||||
const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : "";
|
||||
return `<sup data-footnote-ref${idAttr}></sup>`;
|
||||
}
|
||||
|
||||
case "footnotesList": {
|
||||
// Bottom container of footnote definitions (section[data-footnotes]).
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<section data-footnotes>${inner}</section>`;
|
||||
}
|
||||
|
||||
case "footnoteDefinition": {
|
||||
// One footnote note keyed by id (div[data-footnote-def]).
|
||||
const attrs = node.attrs || {};
|
||||
const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : "";
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div data-footnote-def${idAttr}>${inner}</div>`;
|
||||
}
|
||||
|
||||
case "pageEmbed": {
|
||||
// Whole-page live embed; the schema reads data-source-page-id.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="pageEmbed"`];
|
||||
if (attrs.sourcePageId)
|
||||
parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "transclusionReference": {
|
||||
// Live reference to a transcluded block/page. Block atom; the schema
|
||||
// reads data-source-page-id and data-transclusion-id.
|
||||
const attrs = node.attrs || {};
|
||||
const parts: string[] = [`data-type="transclusionReference"`];
|
||||
if (attrs.sourcePageId)
|
||||
parts.push(`data-source-page-id="${escapeAttr(attrs.sourcePageId)}"`);
|
||||
if (attrs.transclusionId)
|
||||
parts.push(
|
||||
`data-transclusion-id="${escapeAttr(attrs.transclusionId)}"`,
|
||||
);
|
||||
return `<div ${parts.join(" ")}></div>`;
|
||||
}
|
||||
|
||||
case "transclusionSource": {
|
||||
// Sync-source container; the schema reads data-id and re-parses its
|
||||
// block children, so render them as schema-matching HTML.
|
||||
const attrs = node.attrs || {};
|
||||
const idAttr = attrs.id ? ` data-id="${escapeAttr(attrs.id)}"` : "";
|
||||
const inner = nodeContent.map((n: any) => blockToHtml(n)).join("");
|
||||
return `<div data-type="transclusionSource"${idAttr}>${inner}</div>`;
|
||||
}
|
||||
|
||||
default:
|
||||
// Fallback: process children
|
||||
return nodeContent.map(processNode).join("");
|
||||
@@ -782,6 +929,12 @@ export function convertProseMirrorToMarkdown(content: any): string {
|
||||
case "attachment":
|
||||
case "drawio":
|
||||
case "excalidraw":
|
||||
case "htmlEmbed":
|
||||
case "footnotesList":
|
||||
case "footnoteDefinition":
|
||||
case "pageEmbed":
|
||||
case "transclusionSource":
|
||||
case "transclusionReference":
|
||||
return processNode(block);
|
||||
default:
|
||||
// Any still-unhandled block type: NEVER fall back to markdown inside a
|
||||
|
||||
Reference in New Issue
Block a user