Compare commits
5 Commits
fix/244-da
...
refactor/1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
888deba891 | ||
|
|
4c7b671950 | ||
|
|
4131deaabb | ||
|
|
5b88e3dddf | ||
|
|
d0ca127d83 |
@@ -205,61 +205,31 @@ describe('PersistenceExtension.onStoreDocument — Approach-A boundary snapshot'
|
||||
expect(historyQueue.add).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
// #206 persist-6 — FIXED: a momentarily-empty live Y.Doc must not overwrite
|
||||
// non-empty persisted content. `onStoreDocument` empty-guarded the LOAD path
|
||||
// but not the STORE path, so an empty doc (a client/agent glitch, a bad
|
||||
// merge, an emptying transclusion) was written straight over the page and the
|
||||
// content was wiped silently. The store-side empty-guard now skips the write
|
||||
// when the incoming doc is empty and the stored page is non-empty. A real
|
||||
// intentional-clear UX is tracked separately in issue #251.
|
||||
it('does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)', async () => {
|
||||
const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] };
|
||||
const document = ydocFor(emptyDoc);
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
...persistedHumanPage('IGNORED'),
|
||||
content: doc('IMPORTANT RICH CONTENT'),
|
||||
});
|
||||
// #206 persist-6 — RED (it.failing): a momentarily-empty live Y.Doc must not
|
||||
// overwrite non-empty persisted content. `onStoreDocument` empty-guards the
|
||||
// LOAD path but not the STORE path, so today an empty doc (a client/agent
|
||||
// glitch, a bad merge, an emptying transclusion) is written straight over the
|
||||
// page and the content is wiped silently. A store-side empty-guard is a real
|
||||
// behaviour change (a deliberate "select-all + delete" is also empty), so it
|
||||
// is left UNFIXED pending a product decision; this documents the data-loss
|
||||
// path and flips to a normal passing test the moment the guard lands.
|
||||
it.failing(
|
||||
'does NOT overwrite non-empty content with a momentarily-empty live doc (persist-6)',
|
||||
async () => {
|
||||
const emptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] };
|
||||
const document = ydocFor(emptyDoc);
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
...persistedHumanPage('IGNORED'),
|
||||
content: doc('IMPORTANT RICH CONTENT'),
|
||||
});
|
||||
|
||||
await ext.onStoreDocument(buildData(document, 'user') as any);
|
||||
await ext.onStoreDocument(buildData(document, 'user') as any);
|
||||
|
||||
// The empty incoming doc is rejected and the rich page survives.
|
||||
expect(pageRepo.updatePage).not.toHaveBeenCalled();
|
||||
// No false-success side effects for a write that never happened.
|
||||
expect((document as any).broadcastStateless).not.toHaveBeenCalled();
|
||||
expect(historyQueue.add).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
// persist-6 — a legitimately-empty existing page must still be writable when
|
||||
// the empty live doc actually DIFFERS from the stored content (so the
|
||||
// unchanged short-circuit does NOT fire and execution reaches the empty-guard).
|
||||
// This exercises the guard's third condition `!isEmptyParagraphDoc(page.content)`:
|
||||
// because the stored page is ALSO empty, the guard must NOT block the write.
|
||||
// The live doc normalizes to a paragraph carrying `attrs: { indent: 0 }` and no
|
||||
// `content` key; the stored page is an empty paragraph with `content: []` —
|
||||
// both empty per `isEmptyParagraphDoc`, but NOT `isDeepStrictEqual`, so the
|
||||
// store passes the short-circuit (~line 208) and genuinely enters the guard
|
||||
// (~line 229). If the `!isEmptyParagraphDoc(page.content)` condition were
|
||||
// removed, the guard would block this write and updatePage would never run,
|
||||
// failing this test.
|
||||
it('does not block an empty store over an already-empty page (persist-6)', async () => {
|
||||
const liveEmptyDoc = { type: 'doc', content: [{ type: 'paragraph' }] };
|
||||
const document = ydocFor(liveEmptyDoc);
|
||||
// Stored content is empty per isEmptyParagraphDoc (paragraph with content:[])
|
||||
// but structurally NOT deep-equal to the normalized live doc — so execution
|
||||
// skips the unchanged short-circuit and reaches the empty-guard.
|
||||
const storedEmptyDoc = { type: 'doc', content: [{ type: 'paragraph', content: [] }] };
|
||||
pageRepo.findById.mockResolvedValue({
|
||||
...persistedHumanPage('IGNORED'),
|
||||
content: storedEmptyDoc,
|
||||
});
|
||||
|
||||
await ext.onStoreDocument(buildData(document, 'user') as any);
|
||||
|
||||
// Empty-over-empty reaches the guard, which must let the write through
|
||||
// (the stored page is empty, so the empty-overwrite protection does not
|
||||
// apply). updatePage IS called — proving `!isEmptyParagraphDoc(page.content)`.
|
||||
expect(pageRepo.updatePage).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
// Desired contract: the empty incoming doc is rejected and the rich page
|
||||
// survives. Today updatePage is called with the empty content (data loss).
|
||||
expect(pageRepo.updatePage).not.toHaveBeenCalled();
|
||||
},
|
||||
);
|
||||
|
||||
// persist-1 — when every attempt fails the hook must NOT report a phantom
|
||||
// success: no "page.updated" badge broadcast and no history snapshot for
|
||||
|
||||
@@ -210,35 +210,6 @@ export class PersistenceExtension implements Extension {
|
||||
return;
|
||||
}
|
||||
|
||||
// #206 persist-6 — store-side empty-guard. A momentarily-empty live
|
||||
// Y.Doc (a client/agent glitch, a bad merge, a transclusion that
|
||||
// emptied) must NOT overwrite non-empty persisted content. The LOAD
|
||||
// path already guards emptiness (onLoadDocument only hydrates from db
|
||||
// when the live doc isEmpty); the STORE path did not, so an empty
|
||||
// serialization was written straight over the page, wiping it
|
||||
// silently. Skip the write when the incoming doc is an empty
|
||||
// paragraph doc AND the stored page is non-empty. New/empty pages are
|
||||
// unaffected (stored content is already empty), and an unchanged doc
|
||||
// was already short-circuited above.
|
||||
//
|
||||
// This unconditionally blocks empty-over-non-empty: a deliberate
|
||||
// select-all + delete is currently indistinguishable from a glitch at
|
||||
// this layer, so data-loss prevention wins. A real intentional-clear
|
||||
// UX (a distinct signal threaded from the client) is tracked in issue
|
||||
// #251; do not re-add an escape hatch here without that signal.
|
||||
if (
|
||||
isEmptyParagraphDoc(tiptapJson as any) &&
|
||||
page.content &&
|
||||
!isEmptyParagraphDoc(page.content as any)
|
||||
) {
|
||||
this.logger.warn(
|
||||
`Skipping store for ${pageId}: empty live doc would overwrite ` +
|
||||
`non-empty persisted content`,
|
||||
);
|
||||
page = null;
|
||||
return;
|
||||
}
|
||||
|
||||
let contributorIds = undefined;
|
||||
try {
|
||||
const existingContributors = page.contributorIds || [];
|
||||
|
||||
@@ -5,6 +5,34 @@ import { pathToFileURL } from 'node:url';
|
||||
* ESM-only `@docmost/mcp` package. We only need the constructor + the read/write
|
||||
* methods used by the per-user tool adapter; the full client surface lives in
|
||||
* `packages/mcp/src/client.ts`. Signatures here mirror that file exactly.
|
||||
*
|
||||
* DRIFT GUARD: the method NAMES below are runtime-checked against the real
|
||||
* `DocmostClient` by `packages/mcp/test/unit/client-host-contract.test.mjs`
|
||||
* (which can import the ESM class directly). If you rename/remove a method here
|
||||
* or in client.ts, that test fails — so a stale mirror cannot silently ship a
|
||||
* runtime "x is not a function" into an agent tool call. Keep the two in sync.
|
||||
*
|
||||
* STAGED PLAN — full derivation `DocmostClientLike = <real DocmostClient type>`
|
||||
* (issue #193, layer 3) is intentionally NOT done; it stays a hand-mirror for
|
||||
* now because of two verified blockers across the ESM(mcp)/CJS(server) boundary:
|
||||
* 1. `@docmost/mcp` emits NO declaration files (its tsconfig has no
|
||||
* `declaration`, package.json has no `types`/types-export) and the server
|
||||
* tsconfig has no path mapping for it — the server only loads it via the
|
||||
* runtime `import()` trick below, so there is no type to import today.
|
||||
* 2. The real client methods have inferred, CONCRETE return types; the in-app
|
||||
* tool adapter reads results through loose `Record<string,unknown>` returns
|
||||
* + `as` casts (e.g. `(result?.data ?? {}) as { title?: string }`).
|
||||
* Deriving the exact type would make those casts non-overlapping ("may be a
|
||||
* mistake") and break the build, and `Partial<DocmostClientLike>` test stubs
|
||||
* would have to satisfy the full concrete surface.
|
||||
* To do it safely later (incrementally): (a) turn on `declaration: true` in
|
||||
* packages/mcp/tsconfig.json + add a `types` export condition and commit the
|
||||
* emitted `.d.ts`; (b) `import type { DocmostClient } from '@docmost/mcp'` here
|
||||
* and replace this interface with a `Pick<DocmostClient, ...>` of the consumed
|
||||
* methods; (c) audit every `as` cast in ai-chat-tools.service.ts against the now
|
||||
* concrete return types (double-cast through `unknown` only where genuinely
|
||||
* needed); (d) keep the runtime guard test as a belt-and-braces check. Until
|
||||
* then the guard test above is the cheap, behaviour-neutral protection.
|
||||
*/
|
||||
export interface DocmostClientLike {
|
||||
// --- read ---
|
||||
|
||||
@@ -1,147 +1,77 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { htmlToMarkdown } from "./turndown.utils";
|
||||
import { markdownToHtml } from "./marked.utils";
|
||||
|
||||
/**
|
||||
* #206 mdrt-2 — Markdown export must never SILENTLY drop a block. (FIXED)
|
||||
* #206 mdrt-2 — Markdown export must never SILENTLY drop a block.
|
||||
*
|
||||
* `htmlToMarkdown` (turndown) historically only registered rules for a fixed
|
||||
* set of custom nodes (callout, taskItem, details, math, iframe, htmlEmbed,
|
||||
* image, video, footnote). Any other custom node — `transclusionReference`,
|
||||
* `pageBreak`, `mention`, `status` — fell through to turndown's default
|
||||
* handling: an empty wrapper is "blank" and removed, so the block disappeared
|
||||
* from the exported Markdown with no trace, and `mention`/`status` collapsed to
|
||||
* bare text, losing their identity (data-id / data-color). The invariant
|
||||
* "never silently lose a block" was broken.
|
||||
* `htmlToMarkdown` (turndown) only registers rules for a fixed set of custom
|
||||
* nodes (callout, taskItem, details, math, iframe, htmlEmbed, image, video,
|
||||
* footnote). Any other custom node — `transclusionReference`, `pageBreak`,
|
||||
* `mention`, `status` — falls through to turndown's default handling: an empty
|
||||
* wrapper is "blank" and removed, so the block disappears from the exported
|
||||
* Markdown with no trace. The invariant "never silently lose a block" is broken.
|
||||
*
|
||||
* The fix adds lossless turndown rules that re-emit each of these nodes as raw
|
||||
* HTML carrying every `data-*` attribute. Plain-Markdown viewers ignore the
|
||||
* inert tag; the import path round-trips it (`markdownToHtml` passes the raw
|
||||
* HTML through and each node's `parseHTML` rebuilds the ProseMirror node). These
|
||||
* tests assert the surviving contract (the block is preserved AND its identity
|
||||
* round-trips back through import).
|
||||
* The `it.fails` cases assert the DESIRED contract (the block survives export in
|
||||
* SOME form) and are RED today: they document the unfixed data loss and flip to
|
||||
* green the moment a turndown rule (real syntax or a lossless HTML-comment
|
||||
* placeholder) is added. A normal characterization `it` pins the exact current
|
||||
* lossy output so the regression is unambiguous.
|
||||
*/
|
||||
describe("htmlToMarkdown — custom nodes are preserved losslessly (#206 mdrt-2)", () => {
|
||||
const wrap = (inner: string) => `<p>before</p>${inner}<p>after</p>`;
|
||||
describe("htmlToMarkdown — custom nodes without a turndown rule (#206 mdrt-2)", () => {
|
||||
const wrap = (inner: string) =>
|
||||
`<p>before</p>${inner}<p>after</p>`;
|
||||
|
||||
it("preserves a pageBreak block on Markdown export", () => {
|
||||
it("CURRENTLY drops a pageBreak entirely (data loss)", () => {
|
||||
const md = htmlToMarkdown(
|
||||
wrap('<div data-type="pageBreak" class="page-break"></div>'),
|
||||
);
|
||||
// The page break vanishes: only the two paragraphs remain, nothing between.
|
||||
expect(md).toContain("before");
|
||||
expect(md).toContain("after");
|
||||
// The break survives as an inert raw-HTML tag, not silently dropped.
|
||||
expect(md).toMatch(/data-type="pageBreak"/);
|
||||
expect(md).toMatch(/page-?break/i);
|
||||
expect(md).not.toMatch(/page-?break/i);
|
||||
expect(md).not.toContain("---"); // not even a horizontal-rule fallback
|
||||
});
|
||||
|
||||
it("preserves a transclusionReference's identity on Markdown export", () => {
|
||||
it("CURRENTLY drops a transclusionReference entirely (data loss)", () => {
|
||||
const md = htmlToMarkdown(
|
||||
wrap('<div data-type="transclusionReference" data-id="abc"></div>'),
|
||||
);
|
||||
expect(md).toContain("before");
|
||||
expect(md).toContain("after");
|
||||
// The data-id (the only thing that gives the reference identity) survives.
|
||||
expect(md).toContain("abc");
|
||||
expect(md).toMatch(/data-type="transclusionReference"/);
|
||||
// The data-id (the only thing that gives the reference identity) is gone.
|
||||
expect(md).not.toContain("abc");
|
||||
});
|
||||
|
||||
it("preserves a mention's data-id (stable identity) on Markdown export", () => {
|
||||
const md = htmlToMarkdown(
|
||||
'<p>hi <span data-type="mention" data-id="u1" data-label="Bob">@Bob</span> there</p>',
|
||||
);
|
||||
// The mention keeps its stable identity (data-id), not just the text.
|
||||
expect(md).toContain("u1");
|
||||
expect(md).toContain("Bob");
|
||||
expect(md).toMatch(/data-type="mention"/);
|
||||
});
|
||||
|
||||
it("preserves a status chip's color on Markdown export", () => {
|
||||
const md = htmlToMarkdown(
|
||||
'<p>s <span data-type="status" data-color="green">Done</span></p>',
|
||||
);
|
||||
// The chip's color (its identity) survives, not just the visible text.
|
||||
expect(md).toContain("green");
|
||||
expect(md).toContain("Done");
|
||||
expect(md).toMatch(/data-type="status"/);
|
||||
});
|
||||
|
||||
// The export form is only lossless if the import path can rebuild it. These
|
||||
// assert the full MD -> HTML round-trip restores the node + its attributes,
|
||||
// which is the marker <-> node contract each `parseHTML` relies on.
|
||||
describe("import round-trip (markdownToHtml restores the node)", () => {
|
||||
it("round-trips a pageBreak through export + import", async () => {
|
||||
it.fails(
|
||||
"should NOT lose a pageBreak block on Markdown export",
|
||||
() => {
|
||||
const md = htmlToMarkdown(
|
||||
wrap('<div data-type="pageBreak" class="page-break"></div>'),
|
||||
);
|
||||
const html = await markdownToHtml(md);
|
||||
expect(html).toMatch(/<div[^>]*data-type="pageBreak"[^>]*>/);
|
||||
expect(html).toContain("before");
|
||||
expect(html).toContain("after");
|
||||
});
|
||||
// Desired: the break survives in some form (e.g. a `---` rule or marker).
|
||||
expect(md).toMatch(/(-{3,}|page-?break)/i);
|
||||
},
|
||||
);
|
||||
|
||||
it("round-trips a transclusionReference (keeps data-id)", async () => {
|
||||
it.fails(
|
||||
"should NOT lose a transclusionReference's identity on Markdown export",
|
||||
() => {
|
||||
const md = htmlToMarkdown(
|
||||
wrap('<div data-type="transclusionReference" data-id="abc"></div>'),
|
||||
);
|
||||
const html = await markdownToHtml(md);
|
||||
expect(html).toMatch(/<div[^>]*data-type="transclusionReference"[^>]*>/);
|
||||
expect(html).toContain("abc");
|
||||
});
|
||||
// Desired: the referenced id survives so the block can be rebuilt.
|
||||
expect(md).toContain("abc");
|
||||
},
|
||||
);
|
||||
|
||||
it("round-trips a mention (keeps data-id + data-label)", async () => {
|
||||
it.fails(
|
||||
"should NOT lose a mention's data-id on Markdown export",
|
||||
() => {
|
||||
const md = htmlToMarkdown(
|
||||
'<p>hi <span data-type="mention" data-id="u1" data-label="Bob">@Bob</span> there</p>',
|
||||
);
|
||||
const html = await markdownToHtml(md);
|
||||
expect(html).toMatch(/<span[^>]*data-type="mention"[^>]*>/);
|
||||
expect(html).toContain("u1");
|
||||
expect(html).toContain("Bob");
|
||||
});
|
||||
|
||||
it("round-trips a status chip (keeps data-color)", async () => {
|
||||
const md = htmlToMarkdown(
|
||||
'<p>s <span data-type="status" data-color="green">Done</span></p>',
|
||||
);
|
||||
const html = await markdownToHtml(md);
|
||||
expect(html).toMatch(/<span[^>]*data-type="status"[^>]*>/);
|
||||
expect(html).toContain("green");
|
||||
});
|
||||
|
||||
// HTML special chars in an attribute value or in a node's text must be
|
||||
// ESCAPED when re-emitted as raw HTML, otherwise the exported tag is
|
||||
// malformed and `markdownToHtml`'s parser cannot restore the original value
|
||||
// (the same silent data loss this PR fixes). Dropping `<`/`>` escaping is the
|
||||
// dangerous regression: a stray `<` or `>` corrupts the tag (or injects new
|
||||
// markup), so the test data carries ALL of `&`, `"`, `<`, `>` in BOTH the
|
||||
// data-label attribute and the visible text. That fully exercises
|
||||
// escapeHtmlAttr's `&,",<,>` branches and escapeHtmlText's `&,<,>` branches
|
||||
// (escapeHtmlText leaves `"` literal); the alphanumeric-only cases above hit
|
||||
// none of them.
|
||||
it("escapes HTML special chars (& \" < >) in attrs + text and round-trips them", async () => {
|
||||
const md = htmlToMarkdown(
|
||||
`<p>hi <span data-type="mention" data-id="u1" data-label="A & <B> "C"">@A & <B> "C"</span> there</p>`,
|
||||
);
|
||||
|
||||
// (a) The exported Markdown carries a WELL-FORMED, correctly-escaped tag:
|
||||
// the attribute escapes `&`, `<`, `>` AND `"`; the text escapes `&`, `<`,
|
||||
// `>` (a `"` inside text content is legal, so it stays literal).
|
||||
expect(md).toContain('data-label="A & <B> "C""');
|
||||
expect(md).toContain('>@A & <B> "C"</span>');
|
||||
// And explicitly NOT the raw, tag-corrupting forms: a literal `<B>` (would
|
||||
// mean `<`/`>` escaping was dropped in either the attr or the text)...
|
||||
expect(md).not.toContain("<B>");
|
||||
// ...nor the malformed attribute that an unescaped `"` would produce.
|
||||
expect(md).not.toContain('data-label="A & <B> "C""');
|
||||
|
||||
// (b) Import restores the ORIGINAL (unescaped) values, attribute and text.
|
||||
const html = await markdownToHtml(md);
|
||||
const dom = new DOMParser().parseFromString(html as string, "text/html");
|
||||
const span = dom.querySelector('span[data-type="mention"]');
|
||||
expect(span).not.toBeNull();
|
||||
expect(span!.getAttribute("data-id")).toBe("u1");
|
||||
expect(span!.getAttribute("data-label")).toBe('A & <B> "C"');
|
||||
expect(span!.textContent).toBe('@A & <B> "C"');
|
||||
});
|
||||
});
|
||||
// Desired: the mention keeps its stable identity (data-id), not just text.
|
||||
expect(md).toContain("u1");
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
@@ -43,54 +43,6 @@ function fillEmptyFootnoteRefs(html: string): string {
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* `pageBreak` and `transclusionReference` are childless atom <div>s. Like an
|
||||
* empty footnote ref (see above), turndown treats a childless block as "blank"
|
||||
* and replaces it with the blankRule BEFORE any custom rule can fire — so the
|
||||
* node disappears from the export with no trace (#206 mdrt-2). Inject a
|
||||
* zero-width space so the node is non-blank and our lossless rule runs; the
|
||||
* rule rebuilds the tag from the element's attributes, so the injected char
|
||||
* never reaches the output.
|
||||
*/
|
||||
function fillEmptyAtomBlocks(html: string): string {
|
||||
return html.replace(
|
||||
/<div\b([^>]*\bdata-type="(?:pageBreak|transclusionReference)"[^>]*)>\s*<\/div>/gi,
|
||||
(_m, attrs) => `<div${attrs}></div>`,
|
||||
);
|
||||
}
|
||||
|
||||
/** HTML-escape an attribute value so a re-emitted raw-HTML tag is well-formed. */
|
||||
function escapeHtmlAttr(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, '&')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
}
|
||||
|
||||
/** HTML-escape text placed inside a re-emitted raw-HTML element. */
|
||||
function escapeHtmlText(value: string): string {
|
||||
return value
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>');
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize ALL of an element's attributes back to a raw-HTML attribute string
|
||||
* (leading space included). Generic on purpose: a custom node's identity lives
|
||||
* entirely in its `data-*` attributes (data-id, data-color, data-source-page-id,
|
||||
* data-transclusion-id, …), and serializing every attribute keeps the export
|
||||
* lossless regardless of which attributes a given node carries.
|
||||
*/
|
||||
function serializeAttrs(node: any): string {
|
||||
const attrs = node?.attributes;
|
||||
if (!attrs) return '';
|
||||
return Array.from(attrs as ArrayLike<{ name: string; value: string }>)
|
||||
.map((attr) => ` ${attr.name}="${escapeHtmlAttr(attr.value ?? '')}"`)
|
||||
.join('');
|
||||
}
|
||||
|
||||
export function htmlToMarkdown(html: string): string {
|
||||
const turndownService = new TurndownService({
|
||||
headingStyle: 'atx',
|
||||
@@ -117,83 +69,12 @@ export function htmlToMarkdown(html: string): string {
|
||||
video,
|
||||
footnoteReference,
|
||||
footnotesList,
|
||||
pageBreak,
|
||||
transclusionReference,
|
||||
mention,
|
||||
status,
|
||||
]);
|
||||
return turndownService
|
||||
.turndown(fillEmptyAtomBlocks(fillEmptyFootnoteRefs(html)))
|
||||
.turndown(fillEmptyFootnoteRefs(html))
|
||||
.replaceAll('<br>', ' ');
|
||||
}
|
||||
|
||||
/**
|
||||
* Lossless export rules for custom nodes that have NO native Markdown syntax
|
||||
* (#206 mdrt-2). Markdown cannot represent a page break, a transclusion
|
||||
* reference, a mention's stable id, or a status chip's color — so rather than
|
||||
* letting turndown silently drop them, each rule re-emits the node as raw HTML
|
||||
* carrying every `data-*` attribute. Plain-Markdown viewers ignore the inert
|
||||
* tag, and the import path round-trips it: `markdownToHtml` passes raw HTML
|
||||
* through and each node's `parseHTML` (`div[data-type="…"]`, `span[…]`) rebuilds
|
||||
* the ProseMirror node with its attributes intact.
|
||||
*/
|
||||
function pageBreak(turndownService: _TurndownService) {
|
||||
turndownService.addRule('pageBreak', {
|
||||
filter: function (node: HTMLInputElement) {
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
node.getAttribute('data-type') === 'pageBreak'
|
||||
);
|
||||
},
|
||||
replacement: function (_content: string, node: HTMLInputElement) {
|
||||
return `\n\n<div${serializeAttrs(node)}></div>\n\n`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function transclusionReference(turndownService: _TurndownService) {
|
||||
turndownService.addRule('transclusionReference', {
|
||||
filter: function (node: HTMLInputElement) {
|
||||
return (
|
||||
node.nodeName === 'DIV' &&
|
||||
node.getAttribute('data-type') === 'transclusionReference'
|
||||
);
|
||||
},
|
||||
replacement: function (_content: string, node: HTMLInputElement) {
|
||||
return `\n\n<div${serializeAttrs(node)}></div>\n\n`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function mention(turndownService: _TurndownService) {
|
||||
turndownService.addRule('mention', {
|
||||
filter: function (node: HTMLInputElement) {
|
||||
return (
|
||||
node.nodeName === 'SPAN' &&
|
||||
node.getAttribute('data-type') === 'mention'
|
||||
);
|
||||
},
|
||||
replacement: function (_content: string, node: HTMLInputElement) {
|
||||
const text = escapeHtmlText(node.textContent || '');
|
||||
return `<span${serializeAttrs(node)}>${text}</span>`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function status(turndownService: _TurndownService) {
|
||||
turndownService.addRule('status', {
|
||||
filter: function (node: HTMLInputElement) {
|
||||
return (
|
||||
node.nodeName === 'SPAN' && node.getAttribute('data-type') === 'status'
|
||||
);
|
||||
},
|
||||
replacement: function (_content: string, node: HTMLInputElement) {
|
||||
const text = escapeHtmlText(node.textContent || '');
|
||||
return `<span${serializeAttrs(node)}>${text}</span>`;
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize the `htmlEmbed` node to Markdown.
|
||||
*
|
||||
|
||||
212
packages/mcp/test/unit/client-host-contract.test.mjs
Normal file
212
packages/mcp/test/unit/client-host-contract.test.mjs
Normal file
@@ -0,0 +1,212 @@
|
||||
import { test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { readFileSync } from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import { dirname, resolve } from "node:path";
|
||||
|
||||
import { DocmostClient } from "../../build/index.js";
|
||||
|
||||
// Drift guard for the THIRD hand-written layer of the AI tool set (issue #193,
|
||||
// layer 3): the in-app server hand-mirrors the DocmostClient method signatures
|
||||
// it consumes as the `DocmostClientLike` interface in
|
||||
// apps/server/src/core/ai-chat/tools/docmost-client.loader.ts ("Signatures here
|
||||
// mirror that file exactly"). That mirror lives across the ESM(mcp)/CJS(server)
|
||||
// boundary and the package ships NO .d.ts, so the server typecheck cannot verify
|
||||
// the names against the real class — a rename/removal in client.ts would surface
|
||||
// only as a runtime "x is not a function" inside an agent tool call.
|
||||
//
|
||||
// SCOPE: this guard checks the method-NAME set only, not signatures. It pins the
|
||||
// contract from the mcp side (ESM, where the real class is directly importable):
|
||||
// every method the embedding host depends on MUST exist as a function on a real
|
||||
// DocmostClient instance. If you rename/remove a client method, this fails here
|
||||
// AND you must update DocmostClientLike to match. It does NOT verify parameter or
|
||||
// return-type parity — signature drift between the hand-mirror and client.ts can
|
||||
// still ship silently; full signature/type parity is the deferred staged-plan
|
||||
// item below.
|
||||
//
|
||||
// Keep the HOST_CONTRACT_METHODS NAME list aligned with the method NAMES declared
|
||||
// in the server's DocmostClientLike interface (the in-app per-user tool adapter
|
||||
// only — it is a SUBSET of the DocmostClient surface — covers only what the in-app adapter
|
||||
// consumes; the standalone MCP transport (packages/mcp/src/index.ts) calls additional
|
||||
// client methods (insertImage/replaceImage/deleteComment/updateComment/insertFootnote)
|
||||
// that this guard does NOT track — the MCP transport's own typecheck covers those). Full type-derivation
|
||||
// of DocmostClientLike from this class is deferred (see the staged plan in
|
||||
// docmost-client.loader.ts): the package emits no declarations and the real
|
||||
// (inferred, concrete) return types conflict with the host's loose
|
||||
// `Record<string,unknown>` + `as`-cast result handling.
|
||||
const HOST_CONTRACT_METHODS = [
|
||||
// read
|
||||
"search",
|
||||
"getPage",
|
||||
"getWorkspace",
|
||||
"getSpaces",
|
||||
"listPages",
|
||||
"listSidebarPages",
|
||||
"getOutline",
|
||||
"getPageJson",
|
||||
"getNode",
|
||||
"getTable",
|
||||
"listComments",
|
||||
"getComment",
|
||||
"checkNewComments",
|
||||
"listShares",
|
||||
"listPageHistory",
|
||||
"getPageHistory",
|
||||
"diffPageVersions",
|
||||
"exportPageMarkdown",
|
||||
// write (page)
|
||||
"createPage",
|
||||
"updatePage",
|
||||
"renamePage",
|
||||
"movePage",
|
||||
"deletePage",
|
||||
"editPageText",
|
||||
"patchNode",
|
||||
"insertNode",
|
||||
"deleteNode",
|
||||
"updatePageJson",
|
||||
"tableInsertRow",
|
||||
"tableDeleteRow",
|
||||
"tableUpdateCell",
|
||||
"copyPageContent",
|
||||
"importPageMarkdown",
|
||||
"sharePage",
|
||||
"unsharePage",
|
||||
"restorePageVersion",
|
||||
"transformPage",
|
||||
// write (comment)
|
||||
"createComment",
|
||||
"resolveComment",
|
||||
];
|
||||
|
||||
test("DocmostClient implements every method the in-app DocmostClientLike mirror declares", () => {
|
||||
// The constructor is side-effect-free (no network/login on construction): it
|
||||
// only stores config and creates an axios instance, so it is safe to build a
|
||||
// throwaway instance here with a dummy token provider.
|
||||
const client = new DocmostClient({
|
||||
apiUrl: "http://127.0.0.1:1/api",
|
||||
getToken: async () => "test-token",
|
||||
});
|
||||
|
||||
const missing = HOST_CONTRACT_METHODS.filter(
|
||||
(name) => typeof client[name] !== "function",
|
||||
);
|
||||
|
||||
assert.deepEqual(
|
||||
missing,
|
||||
[],
|
||||
`DocmostClient is missing host-contract method(s): ${missing.join(", ")}. ` +
|
||||
`Update packages/mcp/src/client.ts and/or the server's DocmostClientLike ` +
|
||||
`interface (apps/server/src/core/ai-chat/tools/docmost-client.loader.ts) ` +
|
||||
`so the hand-mirrored method NAMES stay aligned (this guards names only, ` +
|
||||
`not signatures).`,
|
||||
);
|
||||
});
|
||||
|
||||
test("HOST_CONTRACT_METHODS has no duplicates", () => {
|
||||
assert.equal(
|
||||
new Set(HOST_CONTRACT_METHODS).size,
|
||||
HOST_CONTRACT_METHODS.length,
|
||||
);
|
||||
});
|
||||
|
||||
// Parse the method names declared in the server's `DocmostClientLike` interface
|
||||
// body. We read the .ts source as plain text (no TS compiler dep, and the file
|
||||
// lives in the CJS server tree across the ESM boundary): scan from the
|
||||
// `export interface DocmostClientLike {` line to its closing brace at column 0,
|
||||
// matching member-signature lines like ` methodName(`. Nested param-object
|
||||
// braces (`opts: { ... }`) are indented, so only the interface's own closing
|
||||
// `}` (column 0) ends the scan.
|
||||
function parseDocmostClientLikeMethods() {
|
||||
const here = dirname(fileURLToPath(import.meta.url));
|
||||
// packages/mcp/test/unit -> repo root is four levels up.
|
||||
const loaderPath = resolve(
|
||||
here,
|
||||
"../../../../apps/server/src/core/ai-chat/tools/docmost-client.loader.ts",
|
||||
);
|
||||
let source;
|
||||
try {
|
||||
source = readFileSync(loaderPath, "utf8");
|
||||
} catch (err) {
|
||||
if (err && err.code === "ENOENT") {
|
||||
throw new Error(
|
||||
`Expected monorepo layout; server tree at ${loaderPath} not found. ` +
|
||||
`This drift-guard reads the server's DocmostClientLike interface via a ` +
|
||||
`fixed relative path and must run from inside the monorepo checkout.`,
|
||||
);
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
const lines = source.split(/\r?\n/);
|
||||
|
||||
const startIdx = lines.findIndex((l) =>
|
||||
/^export interface DocmostClientLike\s*\{/.test(l),
|
||||
);
|
||||
assert.notEqual(
|
||||
startIdx,
|
||||
-1,
|
||||
`Could not find "export interface DocmostClientLike {" in ${loaderPath}. ` +
|
||||
`If the interface was renamed/moved, update this drift-guard test.`,
|
||||
);
|
||||
|
||||
const methods = [];
|
||||
let closed = false;
|
||||
// Track whether we are inside a `/* ... */` block comment. Inner lines of a
|
||||
// block comment need NOT start with `*`, so a `name(` line inside one would be
|
||||
// falsely parsed as an interface method without this. (`//` line comments can
|
||||
// never match the method regex below since they start with `/`.)
|
||||
let inBlockComment = false;
|
||||
for (let i = startIdx + 1; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (inBlockComment) {
|
||||
// Stay in the block until we see its closing `*/`.
|
||||
if (line.includes("*/")) inBlockComment = false;
|
||||
continue;
|
||||
}
|
||||
// Enter a block comment only when it opens without closing on the same line;
|
||||
// a self-contained `/* ... */` on one line cannot precede a method name we
|
||||
// care about (such lines start with `/`, so the method regex won't match).
|
||||
if (line.includes("/*") && !line.includes("*/")) {
|
||||
inBlockComment = true;
|
||||
continue;
|
||||
}
|
||||
if (/^\}/.test(line)) {
|
||||
closed = true;
|
||||
break;
|
||||
}
|
||||
// Method-name match: a TS identifier (letters/digits/`_`/`$`, not starting
|
||||
// with a digit) optionally followed by a generic clause (`method<T>(`), then
|
||||
// the opening paren of the signature.
|
||||
const m = /^\s*([A-Za-z_$][A-Za-z0-9_$]*)\s*(?:<[^>]*>)?\(/.exec(line);
|
||||
if (m) methods.push(m[1]);
|
||||
}
|
||||
assert.ok(
|
||||
closed,
|
||||
`Did not find the closing brace of DocmostClientLike in ${loaderPath}.`,
|
||||
);
|
||||
assert.ok(
|
||||
methods.length > 0,
|
||||
`Parsed zero methods from DocmostClientLike in ${loaderPath} — the parser ` +
|
||||
`is likely out of date with the interface formatting.`,
|
||||
);
|
||||
return methods;
|
||||
}
|
||||
|
||||
// The point of the guard is to protect the DocmostClientLike mirror <-> client.ts
|
||||
// link, but HOST_CONTRACT_METHODS is itself a HAND-COPY of that interface kept in
|
||||
// sync manually. The list<->interface link must be tested too: a method consumed
|
||||
// by the adapter and added to DocmostClientLike but forgotten here (or removed
|
||||
// from the interface but left here) would otherwise escape both the server
|
||||
// typecheck (pkg emits no .d.ts) and the first test above (name not in the list).
|
||||
// Assert the two agree BOTH ways.
|
||||
test("HOST_CONTRACT_METHODS exactly mirrors the server's DocmostClientLike interface", () => {
|
||||
const interfaceMethods = parseDocmostClientLikeMethods();
|
||||
assert.deepEqual(
|
||||
[...HOST_CONTRACT_METHODS].sort(),
|
||||
[...interfaceMethods].sort(),
|
||||
`HOST_CONTRACT_METHODS has drifted from the DocmostClientLike interface in ` +
|
||||
`apps/server/src/core/ai-chat/tools/docmost-client.loader.ts. Add/remove ` +
|
||||
`method names in HOST_CONTRACT_METHODS so it lists EXACTLY the methods ` +
|
||||
`declared in that interface (both directions are checked).`,
|
||||
);
|
||||
});
|
||||
Reference in New Issue
Block a user