# Conflicts: # apps/server/src/core/ai-chat/ai-chat.service.spec.ts # apps/server/src/core/ai-chat/ai-chat.service.ts
119 lines
5.7 KiB
JavaScript
119 lines
5.7 KiB
JavaScript
/**
|
|
* Self-contained Docmost-flavoured Markdown document (custom extensions).
|
|
*
|
|
* A single `.md` file that packages everything needed to losslessly round-trip
|
|
* a page through "download -> edit body -> re-upload":
|
|
* - a leading `docmost:meta` block: a one-line JSON object with page identity;
|
|
* - the Markdown body (carrying inline comment anchors and diagrams as HTML);
|
|
* - a trailing `docmost:comments` block: a one-line JSON array of comment
|
|
* threads.
|
|
*
|
|
* Both metadata blocks are HTML comments on purpose: `marked`/`generateJSON`
|
|
* drop HTML comments, so even if the WHOLE file were ever fed straight to the
|
|
* importer without first stripping the blocks, the metadata cannot leak into the
|
|
* document. (A fenced ```docmost-comments``` block would WRONGLY become a
|
|
* codeBlock node, so a fenced block is deliberately NOT used.)
|
|
*
|
|
* The delimiter literals may legitimately appear in the BODY too (e.g. a user
|
|
* re-pastes an exported `.md` into a page, or a page documents this very
|
|
* format). To stay robust, parsing treats only the FINAL, document-ending
|
|
* `docmost:comments` block as metadata: it is the last `<!-- docmost:comments`
|
|
* opener whose closing `-->` sits at the very end of the file. Any earlier
|
|
* literal occurrence is left in the body untouched.
|
|
*
|
|
* NOTE on comments: in this version the comment THREAD records are preserved in
|
|
* the file but are NOT pushed back to the server on import — only the inline
|
|
* comment marks (anchors) embedded in the body are restored. Managing comment
|
|
* records stays with the comment tools/UI.
|
|
*/
|
|
// Match the leading meta block (allow leading whitespace). Capture group 1 is
|
|
// the JSON text between the markers.
|
|
const META_RE = /^\s*<!--\s*docmost:meta\s*\n([\s\S]*?)\n-->/;
|
|
// Match a `docmost:comments` opener. Used globally to scan for the LAST opener
|
|
// rather than end-anchoring a single regex (which would mis-capture across a
|
|
// literal opener that appears earlier in the body).
|
|
const COMMENTS_OPEN_RE = /<!--[ \t]*docmost:comments[ \t]*\r?\n/g;
|
|
/**
|
|
* Assemble the full self-contained markdown file: meta block, body, and the
|
|
* comments block. The meta block is always emitted; the comments block is always
|
|
* emitted too (with `[]` when there are no comments) so the format stays uniform
|
|
* and parsing stays simple.
|
|
*/
|
|
export function serializeDocmostMarkdown(meta, body, comments) {
|
|
const metaJson = JSON.stringify(meta);
|
|
const commentsJson = JSON.stringify(Array.isArray(comments) ? comments : []);
|
|
const trimmedBody = (body ?? "").trim();
|
|
return (`<!-- docmost:meta\n${metaJson}\n-->\n\n` +
|
|
`${trimmedBody}\n\n` +
|
|
`<!-- docmost:comments\n${commentsJson}\n-->\n`);
|
|
}
|
|
/**
|
|
* Split a self-contained file back into its parts. Tolerant: if the meta or
|
|
* comments block is missing (e.g. a hand-written plain-markdown file), the
|
|
* corresponding value is returned as `null` and the whole input is treated as
|
|
* the body. This never throws on a MISSING block; only a `JSON.parse` failure
|
|
* inside a block that IS present is surfaced as a thrown Error with a clear
|
|
* message. Robust to `\r\n` line endings.
|
|
*/
|
|
export function parseDocmostMarkdown(full) {
|
|
// Normalize line endings so the anchored regexes work regardless of CRLF.
|
|
const normalized = (full ?? "").replace(/\r\n/g, "\n");
|
|
// Extract the leading meta block (start-anchored — already unambiguous).
|
|
let meta = null;
|
|
let metaEnd = 0;
|
|
const metaMatch = normalized.match(META_RE);
|
|
if (metaMatch) {
|
|
try {
|
|
meta = JSON.parse(metaMatch[1]);
|
|
}
|
|
catch (e) {
|
|
throw new Error(`Invalid docmost:meta JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
// Body starts right after the matched meta block.
|
|
metaEnd = (metaMatch.index ?? 0) + metaMatch[0].length;
|
|
}
|
|
// Find the LAST `<!-- docmost:comments` opener; the real file-level block is
|
|
// the final one whose closing `-->` ends the document. Any earlier literal
|
|
// occurrence inside the body (e.g. a re-pasted export) is left in the body.
|
|
let lastOpenStart = -1;
|
|
let lastOpenEnd = -1;
|
|
let m;
|
|
COMMENTS_OPEN_RE.lastIndex = 0;
|
|
while ((m = COMMENTS_OPEN_RE.exec(normalized)) !== null) {
|
|
lastOpenStart = m.index;
|
|
lastOpenEnd = m.index + m[0].length;
|
|
}
|
|
let comments = null;
|
|
let bodyEnd = normalized.length;
|
|
if (lastOpenStart !== -1) {
|
|
const rest = normalized.slice(lastOpenEnd);
|
|
const close = rest.match(/\r?\n-->[ \t]*\r?\n?\s*$/); // closer must end the doc
|
|
if (close) {
|
|
const jsonText = rest.slice(0, close.index);
|
|
try {
|
|
comments = JSON.parse(jsonText);
|
|
}
|
|
catch (e) {
|
|
throw new Error(`Invalid docmost:comments JSON block: ${e instanceof Error ? e.message : String(e)}`);
|
|
}
|
|
bodyEnd = lastOpenStart; // strip from the opener to end of document
|
|
}
|
|
}
|
|
const body = normalized.slice(metaEnd, bodyEnd).trim();
|
|
return { meta, body, comments };
|
|
}
|
|
/**
|
|
* Serialize a self-contained markdown file with the meta block + body ONLY —
|
|
* NO trailing `docmost:comments` block. The sync engine never touches
|
|
* `/comments` (SPEC §3): the synced file carries just page identity (meta) and
|
|
* the body, where comment threads survive only as inline `<span
|
|
* data-comment-id>` anchor marks inside the body.
|
|
*
|
|
* `parseDocmostMarkdown` already tolerates a missing comments block (it returns
|
|
* `comments: null` and treats the rest as body), so a file produced here
|
|
* round-trips cleanly through the parser.
|
|
*/
|
|
export function serializeDocmostMarkdownBody(meta, body) {
|
|
return `<!-- docmost:meta\n${JSON.stringify(meta)}\n-->\n\n${(body ?? "").trim()}\n`;
|
|
}
|