The bulk embedding reindex could hang on a single page forever
("Indexed 27 of 34 pages") with zero log output:
- all progress logs were debug-level, suppressed in production (pino info);
- embedMany() had no timeout, so a slow/hung embeddings endpoint blocked
the sequential per-page loop indefinitely.
Changes:
- ai.service.embedTexts: bound embedMany with AbortSignal.timeout
(configurable via AI_EMBEDDING_TIMEOUT_MS, default 120000ms); on timeout
throw a clear, greppable message, classified by both signal.aborted and
the error name (TimeoutError/AbortError/ResponseAborted) so a real
provider error racing the timer keeps its diagnostics.
- embedding-indexer.reindexWorkspace: promote lifecycle/progress logs to
info; log "[i/N] indexing page <id>" BEFORE the await so a hang names the
stuck page; warn on slow pages (>30s); add timing + final summary.
- .env.example: document AI_EMBEDDING_TIMEOUT_MS.
84 lines
2.0 KiB
Plaintext
84 lines
2.0 KiB
Plaintext
# your domain, e.g https://example.com
|
|
APP_URL=http://localhost:3000
|
|
PORT=3000
|
|
|
|
# minimum of 32 characters. Generate one with: openssl rand -hex 32
|
|
APP_SECRET=REPLACE_WITH_LONG_SECRET
|
|
|
|
JWT_TOKEN_EXPIRES_IN=30d
|
|
|
|
DATABASE_URL="postgresql://postgres:password@localhost:5432/docmost?schema=public"
|
|
REDIS_URL=redis://127.0.0.1:6379
|
|
|
|
# options: local | s3 | azure
|
|
STORAGE_DRIVER=local
|
|
|
|
# S3 driver config
|
|
AWS_S3_ACCESS_KEY_ID=
|
|
AWS_S3_SECRET_ACCESS_KEY=
|
|
AWS_S3_REGION=
|
|
AWS_S3_BUCKET=
|
|
AWS_S3_ENDPOINT=
|
|
AWS_S3_FORCE_PATH_STYLE=
|
|
|
|
# Azure Blob Storage driver config
|
|
AZURE_STORAGE_ACCOUNT_NAME=
|
|
AZURE_STORAGE_ACCOUNT_KEY=
|
|
AZURE_STORAGE_CONTAINER=
|
|
|
|
# default: 50mb
|
|
FILE_UPLOAD_SIZE_LIMIT=
|
|
|
|
# options: smtp | postmark
|
|
MAIL_DRIVER=smtp
|
|
MAIL_FROM_ADDRESS=hello@example.com
|
|
MAIL_FROM_NAME=Docmost
|
|
|
|
# SMTP driver config
|
|
SMTP_HOST=127.0.0.1
|
|
SMTP_PORT=587
|
|
SMTP_USERNAME=
|
|
SMTP_PASSWORD=
|
|
SMTP_SECURE=false
|
|
SMTP_IGNORETLS=false
|
|
|
|
# Postmark driver config
|
|
POSTMARK_TOKEN=
|
|
|
|
# for custom drawio server
|
|
DRAWIO_URL=
|
|
|
|
# Gotenberg URL for server-side PDF export
|
|
GOTENBERG_URL=
|
|
|
|
DISABLE_TELEMETRY=false
|
|
|
|
# Allow other sites to embed Docmost in an iframe.
|
|
IFRAME_EMBED_ALLOWED=false
|
|
|
|
# Only used when IFRAME_EMBED_ALLOWED=true. When empty, any origin is allowed.
|
|
# Example: https://intranet.example.com,https://portal.example.com
|
|
IFRAME_ALLOWED_ORIGINS=
|
|
|
|
# Enable debug logging in production (default: false)
|
|
DEBUG_MODE=false
|
|
|
|
# Log database queries
|
|
DEBUG_DB=false
|
|
|
|
# Log http requests
|
|
LOG_HTTP=false
|
|
|
|
# MCP server (community): service account the embedded MCP uses to talk to this Docmost instance
|
|
MCP_DOCMOST_EMAIL=
|
|
MCP_DOCMOST_PASSWORD=
|
|
# MCP_DOCMOST_API_URL=http://127.0.0.1:3000/api
|
|
# Optional bearer token to protect the /mcp endpoint. If unset, /mcp relies on
|
|
# the workspace MCP toggle and network isolation (do not expose the port publicly).
|
|
# MCP_TOKEN=
|
|
# MCP_SESSION_IDLE_MS=1800000
|
|
|
|
# Per-embedding-call timeout in milliseconds for the RAG indexer.
|
|
# A slow/hung embeddings endpoint fails after this and the batch continues.
|
|
# AI_EMBEDDING_TIMEOUT_MS=120000
|