feat: bulk page imports (#1219)
* refactor imports - WIP * Add readstream * WIP * fix attachmentId render * fix attachmentId render * turndown video tag * feat: add stream upload support and improve file handling - Add stream upload functionality to storage drivers\n- Improve ZIP file extraction with better encoding handling\n- Fix attachment ID rendering issues\n- Add AWS S3 upload stream support\n- Update dependencies for better compatibility * WIP * notion formatter * move embed parser to editor-ext package * import embeds * utility files * cleanup * Switch from happy-dom to cheerio * Refine code * WIP * bug fixes and UI * sync * WIP * sync * keep import modal mounted * Show modal during upload * WIP * WIP
This commit is contained in:
@@ -0,0 +1,346 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import * as path from 'path';
|
||||
import { jsonToText } from '../../../collaboration/collaboration.util';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import {
|
||||
extractZip,
|
||||
FileImportSource,
|
||||
FileTaskStatus,
|
||||
} from '../utils/file.utils';
|
||||
import { StorageService } from '../../storage/storage.service';
|
||||
import * as tmp from 'tmp-promise';
|
||||
import { pipeline } from 'node:stream/promises';
|
||||
import { createWriteStream } from 'node:fs';
|
||||
import { ImportService } from './import.service';
|
||||
import { promises as fs } from 'fs';
|
||||
import { generateSlugId } from '../../../common/helpers';
|
||||
import { v7 } from 'uuid';
|
||||
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||
import { FileTask, InsertablePage } from '@docmost/db/types/entity.types';
|
||||
import { markdownToHtml } from '@docmost/editor-ext';
|
||||
import { getProsemirrorContent } from '../../../common/helpers/prosemirror/utils';
|
||||
import { formatImportHtml } from '../utils/import-formatter';
|
||||
import {
|
||||
buildAttachmentCandidates,
|
||||
collectMarkdownAndHtmlFiles,
|
||||
} from '../utils/import.utils';
|
||||
import { executeTx } from '@docmost/db/utils';
|
||||
import { BacklinkRepo } from '@docmost/db/repos/backlink/backlink.repo';
|
||||
import { ImportAttachmentService } from './import-attachment.service';
|
||||
import { ModuleRef } from '@nestjs/core';
|
||||
import { PageService } from '../../../core/page/services/page.service';
|
||||
import { ImportPageNode } from '../dto/file-task-dto';
|
||||
|
||||
@Injectable()
|
||||
export class FileTaskService {
|
||||
private readonly logger = new Logger(FileTaskService.name);
|
||||
|
||||
constructor(
|
||||
private readonly storageService: StorageService,
|
||||
private readonly importService: ImportService,
|
||||
private readonly pageService: PageService,
|
||||
private readonly backlinkRepo: BacklinkRepo,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
private readonly importAttachmentService: ImportAttachmentService,
|
||||
private moduleRef: ModuleRef,
|
||||
) {}
|
||||
|
||||
async processZIpImport(fileTaskId: string): Promise<void> {
|
||||
const fileTask = await this.db
|
||||
.selectFrom('fileTasks')
|
||||
.selectAll()
|
||||
.where('id', '=', fileTaskId)
|
||||
.executeTakeFirst();
|
||||
|
||||
if (!fileTask) {
|
||||
this.logger.log(`Import file task with ID ${fileTaskId} not found`);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fileTask.status === FileTaskStatus.Failed) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (fileTask.status === FileTaskStatus.Success) {
|
||||
this.logger.log('Imported task already processed.');
|
||||
return;
|
||||
}
|
||||
|
||||
const { path: tmpZipPath, cleanup: cleanupTmpFile } = await tmp.file({
|
||||
prefix: 'docmost-import',
|
||||
postfix: '.zip',
|
||||
discardDescriptor: true,
|
||||
});
|
||||
|
||||
const { path: tmpExtractDir, cleanup: cleanupTmpDir } = await tmp.dir({
|
||||
prefix: 'docmost-extract-',
|
||||
unsafeCleanup: true,
|
||||
});
|
||||
|
||||
try {
|
||||
const fileStream = await this.storageService.readStream(
|
||||
fileTask.filePath,
|
||||
);
|
||||
await pipeline(fileStream, createWriteStream(tmpZipPath));
|
||||
await extractZip(tmpZipPath, tmpExtractDir);
|
||||
} catch (err) {
|
||||
await cleanupTmpFile();
|
||||
await cleanupTmpDir();
|
||||
|
||||
throw err;
|
||||
}
|
||||
|
||||
try {
|
||||
if (
|
||||
fileTask.source === FileImportSource.Generic ||
|
||||
fileTask.source === FileImportSource.Notion
|
||||
) {
|
||||
await this.processGenericImport({
|
||||
extractDir: tmpExtractDir,
|
||||
fileTask,
|
||||
});
|
||||
}
|
||||
|
||||
if (fileTask.source === FileImportSource.Confluence) {
|
||||
let ConfluenceModule: any;
|
||||
try {
|
||||
// eslint-disable-next-line @typescript-eslint/no-require-imports
|
||||
ConfluenceModule = require('./../../../ee/confluence-import/confluence-import.service');
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
'Confluence import requested but EE module not bundled in this build',
|
||||
);
|
||||
return;
|
||||
}
|
||||
const confluenceImportService = this.moduleRef.get(
|
||||
ConfluenceModule.ConfluenceImportService,
|
||||
{ strict: false },
|
||||
);
|
||||
|
||||
await confluenceImportService.processConfluenceImport({
|
||||
extractDir: tmpExtractDir,
|
||||
fileTask,
|
||||
});
|
||||
}
|
||||
try {
|
||||
await this.updateTaskStatus(fileTaskId, FileTaskStatus.Success, null);
|
||||
await cleanupTmpFile();
|
||||
await cleanupTmpDir();
|
||||
// delete stored file on success
|
||||
await this.storageService.delete(fileTask.filePath);
|
||||
} catch (err) {
|
||||
this.logger.error(
|
||||
`Failed to delete import file from storage. Task ID: ${fileTaskId}`,
|
||||
err,
|
||||
);
|
||||
}
|
||||
} catch (err) {
|
||||
await cleanupTmpFile();
|
||||
await cleanupTmpDir();
|
||||
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async processGenericImport(opts: {
|
||||
extractDir: string;
|
||||
fileTask: FileTask;
|
||||
}): Promise<void> {
|
||||
const { extractDir, fileTask } = opts;
|
||||
const allFiles = await collectMarkdownAndHtmlFiles(extractDir);
|
||||
const attachmentCandidates = await buildAttachmentCandidates(extractDir);
|
||||
|
||||
const pagesMap = new Map<string, ImportPageNode>();
|
||||
|
||||
for (const absPath of allFiles) {
|
||||
const relPath = path
|
||||
.relative(extractDir, absPath)
|
||||
.split(path.sep)
|
||||
.join('/'); // normalize to forward-slashes
|
||||
const ext = path.extname(relPath).toLowerCase();
|
||||
let content = await fs.readFile(absPath, 'utf-8');
|
||||
|
||||
if (ext.toLowerCase() === '.md') {
|
||||
content = await markdownToHtml(content);
|
||||
}
|
||||
|
||||
pagesMap.set(relPath, {
|
||||
id: v7(),
|
||||
slugId: generateSlugId(),
|
||||
name: path.basename(relPath, ext),
|
||||
content,
|
||||
parentPageId: null,
|
||||
fileExtension: ext,
|
||||
filePath: relPath,
|
||||
});
|
||||
}
|
||||
|
||||
// parent/child linking
|
||||
pagesMap.forEach((page, filePath) => {
|
||||
const segments = filePath.split('/');
|
||||
segments.pop();
|
||||
let parentPage = null;
|
||||
while (segments.length) {
|
||||
const tryMd = segments.join('/') + '.md';
|
||||
const tryHtml = segments.join('/') + '.html';
|
||||
if (pagesMap.has(tryMd)) {
|
||||
parentPage = pagesMap.get(tryMd)!;
|
||||
break;
|
||||
}
|
||||
if (pagesMap.has(tryHtml)) {
|
||||
parentPage = pagesMap.get(tryHtml)!;
|
||||
break;
|
||||
}
|
||||
segments.pop();
|
||||
}
|
||||
if (parentPage) page.parentPageId = parentPage.id;
|
||||
});
|
||||
|
||||
// generate position keys
|
||||
const siblingsMap = new Map<string | null, ImportPageNode[]>();
|
||||
|
||||
pagesMap.forEach((page) => {
|
||||
const group = siblingsMap.get(page.parentPageId) ?? [];
|
||||
group.push(page);
|
||||
siblingsMap.set(page.parentPageId, group);
|
||||
});
|
||||
|
||||
// get root pages
|
||||
const rootSibs = siblingsMap.get(null);
|
||||
|
||||
if (rootSibs?.length) {
|
||||
rootSibs.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
// get first position key from the server
|
||||
const nextPosition = await this.pageService.nextPagePosition(
|
||||
fileTask.spaceId,
|
||||
);
|
||||
|
||||
let prevPos: string | null = null;
|
||||
rootSibs.forEach((page, idx) => {
|
||||
if (idx === 0) {
|
||||
page.position = nextPosition;
|
||||
} else {
|
||||
page.position = generateJitteredKeyBetween(prevPos, null);
|
||||
}
|
||||
prevPos = page.position;
|
||||
});
|
||||
}
|
||||
|
||||
// non-root buckets (children & deeper levels)
|
||||
siblingsMap.forEach((sibs, parentId) => {
|
||||
if (parentId === null) return; // root already done
|
||||
|
||||
sibs.sort((a, b) => a.name.localeCompare(b.name));
|
||||
|
||||
let prevPos: string | null = null;
|
||||
for (const page of sibs) {
|
||||
page.position = generateJitteredKeyBetween(prevPos, null);
|
||||
prevPos = page.position;
|
||||
}
|
||||
});
|
||||
|
||||
// internal page links
|
||||
const filePathToPageMetaMap = new Map<
|
||||
string,
|
||||
{ id: string; title: string; slugId: string }
|
||||
>();
|
||||
pagesMap.forEach((page) => {
|
||||
filePathToPageMetaMap.set(page.filePath, {
|
||||
id: page.id,
|
||||
title: page.name,
|
||||
slugId: page.slugId,
|
||||
});
|
||||
});
|
||||
|
||||
const pageResults = await Promise.all(
|
||||
Array.from(pagesMap.values()).map(async (page) => {
|
||||
const htmlContent =
|
||||
await this.importAttachmentService.processAttachments({
|
||||
html: page.content,
|
||||
pageRelativePath: page.filePath,
|
||||
extractDir,
|
||||
pageId: page.id,
|
||||
fileTask,
|
||||
attachmentCandidates,
|
||||
});
|
||||
|
||||
const { html, backlinks } = await formatImportHtml({
|
||||
html: htmlContent,
|
||||
currentFilePath: page.filePath,
|
||||
filePathToPageMetaMap: filePathToPageMetaMap,
|
||||
creatorId: fileTask.creatorId,
|
||||
sourcePageId: page.id,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
});
|
||||
|
||||
const pmState = getProsemirrorContent(
|
||||
await this.importService.processHTML(html),
|
||||
);
|
||||
|
||||
const { title, prosemirrorJson } =
|
||||
this.importService.extractTitleAndRemoveHeading(pmState);
|
||||
|
||||
const insertablePage: InsertablePage = {
|
||||
id: page.id,
|
||||
slugId: page.slugId,
|
||||
title: title || page.name,
|
||||
content: prosemirrorJson,
|
||||
textContent: jsonToText(prosemirrorJson),
|
||||
ydoc: await this.importService.createYdoc(prosemirrorJson),
|
||||
position: page.position!,
|
||||
spaceId: fileTask.spaceId,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
creatorId: fileTask.creatorId,
|
||||
lastUpdatedById: fileTask.creatorId,
|
||||
parentPageId: page.parentPageId,
|
||||
};
|
||||
|
||||
return { insertablePage, backlinks };
|
||||
}),
|
||||
);
|
||||
|
||||
const insertablePages = pageResults.map((r) => r.insertablePage);
|
||||
const insertableBacklinks = pageResults.flatMap((r) => r.backlinks);
|
||||
|
||||
if (insertablePages.length < 1) return;
|
||||
const validPageIds = new Set(insertablePages.map((row) => row.id));
|
||||
const filteredBacklinks = insertableBacklinks.filter(
|
||||
({ sourcePageId, targetPageId }) =>
|
||||
validPageIds.has(sourcePageId) && validPageIds.has(targetPageId),
|
||||
);
|
||||
|
||||
await executeTx(this.db, async (trx) => {
|
||||
await trx.insertInto('pages').values(insertablePages).execute();
|
||||
|
||||
if (filteredBacklinks.length > 0) {
|
||||
await this.backlinkRepo.insertBacklink(filteredBacklinks, trx);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async getFileTask(fileTaskId: string) {
|
||||
return this.db
|
||||
.selectFrom('fileTasks')
|
||||
.selectAll()
|
||||
.where('id', '=', fileTaskId)
|
||||
.executeTakeFirst();
|
||||
}
|
||||
|
||||
async updateTaskStatus(
|
||||
fileTaskId: string,
|
||||
status: FileTaskStatus,
|
||||
errorMessage?: string,
|
||||
) {
|
||||
try {
|
||||
await this.db
|
||||
.updateTable('fileTasks')
|
||||
.set({ status: status, errorMessage, updatedAt: new Date() })
|
||||
.where('id', '=', fileTaskId)
|
||||
.execute();
|
||||
} catch (err) {
|
||||
this.logger.error(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,303 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import * as path from 'path';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import { cleanUrlString } from '../utils/file.utils';
|
||||
import { StorageService } from '../../storage/storage.service';
|
||||
import { createReadStream } from 'node:fs';
|
||||
import { promises as fs } from 'fs';
|
||||
import { getMimeType, sanitizeFileName } from '../../../common/helpers';
|
||||
import { v7 } from 'uuid';
|
||||
import { FileTask } from '@docmost/db/types/entity.types';
|
||||
import { getAttachmentFolderPath } from '../../../core/attachment/attachment.utils';
|
||||
import { AttachmentType } from '../../../core/attachment/attachment.constants';
|
||||
import { unwrapFromParagraph } from '../utils/import-formatter';
|
||||
import { resolveRelativeAttachmentPath } from '../utils/import.utils';
|
||||
import { load } from 'cheerio';
|
||||
|
||||
@Injectable()
|
||||
export class ImportAttachmentService {
|
||||
private readonly logger = new Logger(ImportAttachmentService.name);
|
||||
|
||||
constructor(
|
||||
private readonly storageService: StorageService,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
) {}
|
||||
|
||||
async processAttachments(opts: {
|
||||
html: string;
|
||||
pageRelativePath: string;
|
||||
extractDir: string;
|
||||
pageId: string;
|
||||
fileTask: FileTask;
|
||||
attachmentCandidates: Map<string, string>;
|
||||
}): Promise<string> {
|
||||
const {
|
||||
html,
|
||||
pageRelativePath,
|
||||
extractDir,
|
||||
pageId,
|
||||
fileTask,
|
||||
attachmentCandidates,
|
||||
} = opts;
|
||||
|
||||
const attachmentTasks: Promise<void>[] = [];
|
||||
|
||||
/**
|
||||
* Cache keyed by the *relative* path that appears in the HTML.
|
||||
* Ensures we upload (and DB-insert) each attachment at most once,
|
||||
* even if it’s referenced multiple times on the page.
|
||||
*/
|
||||
const processed = new Map<
|
||||
string,
|
||||
{
|
||||
attachmentId: string;
|
||||
storageFilePath: string;
|
||||
apiFilePath: string;
|
||||
fileNameWithExt: string;
|
||||
abs: string;
|
||||
}
|
||||
>();
|
||||
|
||||
const uploadOnce = (relPath: string) => {
|
||||
const abs = attachmentCandidates.get(relPath)!;
|
||||
const attachmentId = v7();
|
||||
const ext = path.extname(abs);
|
||||
|
||||
const fileNameWithExt =
|
||||
sanitizeFileName(path.basename(abs, ext)) + ext.toLowerCase();
|
||||
|
||||
const storageFilePath = `${getAttachmentFolderPath(
|
||||
AttachmentType.File,
|
||||
fileTask.workspaceId,
|
||||
)}/${attachmentId}/${fileNameWithExt}`;
|
||||
|
||||
const apiFilePath = `/api/files/${attachmentId}/${fileNameWithExt}`;
|
||||
|
||||
attachmentTasks.push(
|
||||
(async () => {
|
||||
const fileStream = createReadStream(abs);
|
||||
await this.storageService.uploadStream(storageFilePath, fileStream);
|
||||
const stat = await fs.stat(abs);
|
||||
|
||||
await this.db
|
||||
.insertInto('attachments')
|
||||
.values({
|
||||
id: attachmentId,
|
||||
filePath: storageFilePath,
|
||||
fileName: fileNameWithExt,
|
||||
fileSize: stat.size,
|
||||
mimeType: getMimeType(fileNameWithExt),
|
||||
type: 'file',
|
||||
fileExt: ext,
|
||||
creatorId: fileTask.creatorId,
|
||||
workspaceId: fileTask.workspaceId,
|
||||
pageId,
|
||||
spaceId: fileTask.spaceId,
|
||||
})
|
||||
.execute();
|
||||
})(),
|
||||
);
|
||||
|
||||
return {
|
||||
attachmentId,
|
||||
storageFilePath,
|
||||
apiFilePath,
|
||||
fileNameWithExt,
|
||||
abs,
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* – Returns cached data if we’ve already processed this path.
|
||||
* – Otherwise calls `uploadOnce`, stores the result, and returns it.
|
||||
*/
|
||||
const processFile = (relPath: string) => {
|
||||
const cached = processed.get(relPath);
|
||||
if (cached) return cached;
|
||||
|
||||
const fresh = uploadOnce(relPath);
|
||||
processed.set(relPath, fresh);
|
||||
return fresh;
|
||||
};
|
||||
|
||||
const pageDir = path.dirname(pageRelativePath);
|
||||
const $ = load(html);
|
||||
|
||||
// image
|
||||
for (const imgEl of $('img').toArray()) {
|
||||
const $img = $(imgEl);
|
||||
const src = cleanUrlString($img.attr('src') ?? '')!;
|
||||
if (!src || src.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
src,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await fs.stat(abs);
|
||||
|
||||
const width = $img.attr('width') ?? '100%';
|
||||
const align = $img.attr('data-align') ?? 'center';
|
||||
|
||||
$img
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('width', width)
|
||||
.attr('data-align', align);
|
||||
|
||||
unwrapFromParagraph($, $img);
|
||||
}
|
||||
|
||||
// video
|
||||
for (const vidEl of $('video').toArray()) {
|
||||
const $vid = $(vidEl);
|
||||
const src = cleanUrlString($vid.attr('src') ?? '')!;
|
||||
if (!src || src.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
src,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await fs.stat(abs);
|
||||
|
||||
const width = $vid.attr('width') ?? '100%';
|
||||
const align = $vid.attr('data-align') ?? 'center';
|
||||
|
||||
$vid
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('width', width)
|
||||
.attr('data-align', align);
|
||||
|
||||
unwrapFromParagraph($, $vid);
|
||||
}
|
||||
|
||||
// <div data-type="attachment">
|
||||
for (const el of $('div[data-type="attachment"]').toArray()) {
|
||||
const $oldDiv = $(el);
|
||||
const rawUrl = cleanUrlString($oldDiv.attr('data-attachment-url') ?? '')!;
|
||||
if (!rawUrl || rawUrl.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
rawUrl,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await fs.stat(abs);
|
||||
const fileName = path.basename(abs);
|
||||
const mime = getMimeType(abs);
|
||||
|
||||
const $newDiv = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', fileName)
|
||||
.attr('data-attachment-mime', mime)
|
||||
.attr('data-attachment-size', stat.size.toString())
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$oldDiv.replaceWith($newDiv);
|
||||
unwrapFromParagraph($, $newDiv);
|
||||
}
|
||||
|
||||
// rewrite other attachments via <a>
|
||||
for (const aEl of $('a').toArray()) {
|
||||
const $a = $(aEl);
|
||||
const href = cleanUrlString($a.attr('href') ?? '')!;
|
||||
if (!href || href.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
href,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await fs.stat(abs);
|
||||
const ext = path.extname(relPath).toLowerCase();
|
||||
|
||||
if (ext === '.mp4') {
|
||||
const $video = $('<video>')
|
||||
.attr('src', apiFilePath)
|
||||
.attr('data-attachment-id', attachmentId)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('width', '100%')
|
||||
.attr('data-align', 'center');
|
||||
$a.replaceWith($video);
|
||||
unwrapFromParagraph($, $video);
|
||||
} else {
|
||||
const confAliasName = $a.attr('data-linked-resource-default-alias');
|
||||
let attachmentName = path.basename(abs);
|
||||
if (confAliasName) attachmentName = confAliasName;
|
||||
|
||||
const $div = $('<div>')
|
||||
.attr('data-type', 'attachment')
|
||||
.attr('data-attachment-url', apiFilePath)
|
||||
.attr('data-attachment-name', attachmentName)
|
||||
.attr('data-attachment-mime', getMimeType(abs))
|
||||
.attr('data-attachment-size', stat.size.toString())
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$a.replaceWith($div);
|
||||
unwrapFromParagraph($, $div);
|
||||
}
|
||||
}
|
||||
|
||||
// excalidraw and drawio
|
||||
for (const type of ['excalidraw', 'drawio'] as const) {
|
||||
for (const el of $(`div[data-type="${type}"]`).toArray()) {
|
||||
const $oldDiv = $(el);
|
||||
const rawSrc = cleanUrlString($oldDiv.attr('data-src') ?? '')!;
|
||||
if (!rawSrc || rawSrc.startsWith('http')) continue;
|
||||
|
||||
const relPath = resolveRelativeAttachmentPath(
|
||||
rawSrc,
|
||||
pageDir,
|
||||
attachmentCandidates,
|
||||
);
|
||||
if (!relPath) continue;
|
||||
|
||||
const { attachmentId, apiFilePath, abs } = processFile(relPath);
|
||||
const stat = await fs.stat(abs);
|
||||
const fileName = path.basename(abs);
|
||||
|
||||
const width = $oldDiv.attr('data-width') || '100%';
|
||||
const align = $oldDiv.attr('data-align') || 'center';
|
||||
|
||||
const $newDiv = $('<div>')
|
||||
.attr('data-type', type)
|
||||
.attr('data-src', apiFilePath)
|
||||
.attr('data-title', fileName)
|
||||
.attr('data-width', width)
|
||||
.attr('data-size', stat.size.toString())
|
||||
.attr('data-align', align)
|
||||
.attr('data-attachment-id', attachmentId);
|
||||
|
||||
$oldDiv.replaceWith($newDiv);
|
||||
unwrapFromParagraph($, $newDiv);
|
||||
}
|
||||
}
|
||||
|
||||
// wait for all uploads & DB inserts
|
||||
try {
|
||||
await Promise.all(attachmentTasks);
|
||||
} catch (err) {
|
||||
this.logger.log('Import attachment upload error', err);
|
||||
}
|
||||
|
||||
return $.root().html() || '';
|
||||
}
|
||||
}
|
||||
240
apps/server/src/integrations/import/services/import.service.ts
Normal file
240
apps/server/src/integrations/import/services/import.service.ts
Normal file
@@ -0,0 +1,240 @@
|
||||
import { BadRequestException, Injectable, Logger } from '@nestjs/common';
|
||||
import { PageRepo } from '@docmost/db/repos/page/page.repo';
|
||||
import { MultipartFile } from '@fastify/multipart';
|
||||
import { sanitize } from 'sanitize-filename-ts';
|
||||
import * as path from 'path';
|
||||
import {
|
||||
htmlToJson,
|
||||
jsonToText,
|
||||
tiptapExtensions,
|
||||
} from '../../../collaboration/collaboration.util';
|
||||
import { InjectKysely } from 'nestjs-kysely';
|
||||
import { KyselyDB } from '@docmost/db/types/kysely.types';
|
||||
import { generateSlugId, sanitizeFileName } from '../../../common/helpers';
|
||||
import { generateJitteredKeyBetween } from 'fractional-indexing-jittered';
|
||||
import { TiptapTransformer } from '@hocuspocus/transformer';
|
||||
import * as Y from 'yjs';
|
||||
import { markdownToHtml } from '@docmost/editor-ext';
|
||||
import {
|
||||
FileTaskStatus,
|
||||
FileTaskType,
|
||||
getFileTaskFolderPath,
|
||||
} from '../utils/file.utils';
|
||||
import { v7 as uuid7 } from 'uuid';
|
||||
import { StorageService } from '../../storage/storage.service';
|
||||
import { InjectQueue } from '@nestjs/bullmq';
|
||||
import { Queue } from 'bullmq';
|
||||
import { QueueJob, QueueName } from '../../queue/constants';
|
||||
|
||||
@Injectable()
|
||||
export class ImportService {
|
||||
private readonly logger = new Logger(ImportService.name);
|
||||
|
||||
constructor(
|
||||
private readonly pageRepo: PageRepo,
|
||||
private readonly storageService: StorageService,
|
||||
@InjectKysely() private readonly db: KyselyDB,
|
||||
@InjectQueue(QueueName.FILE_TASK_QUEUE)
|
||||
private readonly fileTaskQueue: Queue,
|
||||
) {}
|
||||
|
||||
async importPage(
|
||||
filePromise: Promise<MultipartFile>,
|
||||
userId: string,
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
): Promise<void> {
|
||||
const file = await filePromise;
|
||||
const fileBuffer = await file.toBuffer();
|
||||
const fileExtension = path.extname(file.filename).toLowerCase();
|
||||
const fileName = sanitize(
|
||||
path.basename(file.filename, fileExtension).slice(0, 255),
|
||||
);
|
||||
const fileContent = fileBuffer.toString();
|
||||
|
||||
let prosemirrorState = null;
|
||||
let createdPage = null;
|
||||
|
||||
try {
|
||||
if (fileExtension.endsWith('.md')) {
|
||||
prosemirrorState = await this.processMarkdown(fileContent);
|
||||
} else if (fileExtension.endsWith('.html')) {
|
||||
prosemirrorState = await this.processHTML(fileContent);
|
||||
}
|
||||
} catch (err) {
|
||||
const message = 'Error processing file content';
|
||||
this.logger.error(message, err);
|
||||
throw new BadRequestException(message);
|
||||
}
|
||||
|
||||
if (!prosemirrorState) {
|
||||
const message = 'Failed to create ProseMirror state';
|
||||
this.logger.error(message);
|
||||
throw new BadRequestException(message);
|
||||
}
|
||||
|
||||
const { title, prosemirrorJson } =
|
||||
this.extractTitleAndRemoveHeading(prosemirrorState);
|
||||
|
||||
const pageTitle = title || fileName;
|
||||
|
||||
if (prosemirrorJson) {
|
||||
try {
|
||||
const pagePosition = await this.getNewPagePosition(spaceId);
|
||||
|
||||
createdPage = await this.pageRepo.insertPage({
|
||||
slugId: generateSlugId(),
|
||||
title: pageTitle,
|
||||
content: prosemirrorJson,
|
||||
textContent: jsonToText(prosemirrorJson),
|
||||
ydoc: await this.createYdoc(prosemirrorJson),
|
||||
position: pagePosition,
|
||||
spaceId: spaceId,
|
||||
creatorId: userId,
|
||||
workspaceId: workspaceId,
|
||||
lastUpdatedById: userId,
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
`Successfully imported "${title}${fileExtension}. ID: ${createdPage.id} - SlugId: ${createdPage.slugId}"`,
|
||||
);
|
||||
} catch (err) {
|
||||
const message = 'Failed to create imported page';
|
||||
this.logger.error(message, err);
|
||||
throw new BadRequestException(message);
|
||||
}
|
||||
}
|
||||
|
||||
return createdPage;
|
||||
}
|
||||
|
||||
async processMarkdown(markdownInput: string): Promise<any> {
|
||||
try {
|
||||
const html = await markdownToHtml(markdownInput);
|
||||
return this.processHTML(html);
|
||||
} catch (err) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async processHTML(htmlInput: string): Promise<any> {
|
||||
try {
|
||||
return htmlToJson(htmlInput);
|
||||
} catch (err) {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async createYdoc(prosemirrorJson: any): Promise<Buffer | null> {
|
||||
if (prosemirrorJson) {
|
||||
// this.logger.debug(`Converting prosemirror json state to ydoc`);
|
||||
|
||||
const ydoc = TiptapTransformer.toYdoc(
|
||||
prosemirrorJson,
|
||||
'default',
|
||||
tiptapExtensions,
|
||||
);
|
||||
|
||||
Y.encodeStateAsUpdate(ydoc);
|
||||
|
||||
return Buffer.from(Y.encodeStateAsUpdate(ydoc));
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
extractTitleAndRemoveHeading(prosemirrorState: any) {
|
||||
let title: string | null = null;
|
||||
|
||||
const content = prosemirrorState.content ?? [];
|
||||
|
||||
if (
|
||||
content.length > 0 &&
|
||||
content[0].type === 'heading' &&
|
||||
content[0].attrs?.level === 1
|
||||
) {
|
||||
title = content[0].content?.[0]?.text ?? null;
|
||||
content.shift();
|
||||
}
|
||||
|
||||
// ensure at least one paragraph
|
||||
if (content.length === 0) {
|
||||
content.push({
|
||||
type: 'paragraph',
|
||||
content: [],
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
title,
|
||||
prosemirrorJson: {
|
||||
...prosemirrorState,
|
||||
content,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async getNewPagePosition(spaceId: string): Promise<string> {
|
||||
const lastPage = await this.db
|
||||
.selectFrom('pages')
|
||||
.select(['id', 'position'])
|
||||
.where('spaceId', '=', spaceId)
|
||||
.orderBy('position', 'desc')
|
||||
.limit(1)
|
||||
.where('parentPageId', 'is', null)
|
||||
.executeTakeFirst();
|
||||
|
||||
if (lastPage) {
|
||||
return generateJitteredKeyBetween(lastPage.position, null);
|
||||
} else {
|
||||
return generateJitteredKeyBetween(null, null);
|
||||
}
|
||||
}
|
||||
|
||||
async importZip(
|
||||
filePromise: Promise<MultipartFile>,
|
||||
source: string,
|
||||
userId: string,
|
||||
spaceId: string,
|
||||
workspaceId: string,
|
||||
) {
|
||||
const file = await filePromise;
|
||||
const fileBuffer = await file.toBuffer();
|
||||
const fileExtension = path.extname(file.filename).toLowerCase();
|
||||
const fileName = sanitizeFileName(
|
||||
path.basename(file.filename, fileExtension),
|
||||
);
|
||||
const fileSize = fileBuffer.length;
|
||||
|
||||
const fileNameWithExt = fileName + fileExtension;
|
||||
|
||||
const fileTaskId = uuid7();
|
||||
const filePath = `${getFileTaskFolderPath(FileTaskType.Import, workspaceId)}/${fileTaskId}/${fileNameWithExt}`;
|
||||
|
||||
// upload file
|
||||
await this.storageService.upload(filePath, fileBuffer);
|
||||
|
||||
const fileTask = await this.db
|
||||
.insertInto('fileTasks')
|
||||
.values({
|
||||
id: fileTaskId,
|
||||
type: FileTaskType.Import,
|
||||
source: source,
|
||||
status: FileTaskStatus.Processing,
|
||||
fileName: fileNameWithExt,
|
||||
filePath: filePath,
|
||||
fileSize: fileSize,
|
||||
fileExt: 'zip',
|
||||
creatorId: userId,
|
||||
spaceId: spaceId,
|
||||
workspaceId: workspaceId,
|
||||
})
|
||||
.returningAll()
|
||||
.executeTakeFirst();
|
||||
|
||||
await this.fileTaskQueue.add(QueueJob.IMPORT_TASK, {
|
||||
fileTaskId: fileTaskId,
|
||||
});
|
||||
|
||||
return fileTask;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user