diff --git a/.env.example b/.env.example index fbd32428..73e57348 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,38 @@ APP_URL=http://localhost:3000 PORT=3000 +# --- Security / reverse proxy --- +# The app derives the client IP (req.ip) from the `X-Forwarded-For` header via +# Fastify `trustProxy`. That header is client-forgeable, so XFF is trusted only +# from proxies on the configured trusted networks. Deploy this app behind a +# trusted reverse proxy that SETS/OVERWRITES (not appends) `X-Forwarded-For` +# with the real client IP. If XFF is trusted from an untrusted source, any +# per-IP throttling — including the /mcp Basic brute-force limiter — can be +# bypassed by an attacker who simply spoofs `X-Forwarded-For` to rotate IPs. +# (The /mcp limiter keeps a global per-email key as an IP-independent backstop, +# but the per-IP and per-IP+email keys rely on a trustworthy X-Forwarded-For.) +# +# TRUST_PROXY controls which proxies are trusted to set X-Forwarded-For. +# Default (unset/empty): `loopback, linklocal, uniquelocal` — XFF is trusted +# ONLY from private/loopback proxies, so a public-IP client cannot spoof req.ip. +# This is the safe default for the common case where the reverse proxy runs on +# loopback or a private network; req.ip still resolves to the real client. +# WARNING: this changed the previous default of trust-all. If your reverse proxy +# sits on a PUBLIC IP, the default will NOT trust its XFF and req.ip will be the +# proxy's IP — set TRUST_PROXY accordingly. Accepted values: +# - true restore trust-all (ONLY safe if a trusted proxy ALWAYS overwrites +# X-Forwarded-For; otherwise clients can spoof their IP) +# - false never trust X-Forwarded-For (req.ip is the socket peer) +# - number of trusted proxy hops in front of the app +# - comma-separated CIDR/IP list of trusted proxies, e.g. +# `127.0.0.1, 10.0.0.0/8` +# TRUST_PROXY= + +# APP_SECRET has a DUAL role: it signs JWTs AND derives the AES-256-GCM key that +# encrypts stored AI-provider credentials (API keys) at rest. CONSEQUENCE: if you +# change APP_SECRET after setup, every stored AI API key becomes undecryptable — +# you must re-enter them in AI settings — and all existing sessions/JWTs are +# invalidated. Choose it ONCE, keep it stable, and back it up alongside your DB. # minimum of 32 characters. Generate one with: openssl rand -hex 32 APP_SECRET=REPLACE_WITH_LONG_SECRET @@ -69,15 +101,97 @@ DEBUG_DB=false # Log http requests LOG_HTTP=false -# MCP server (community): service account the embedded MCP uses to talk to this Docmost instance +# MCP server (community): the embedded /mcp endpoint authenticates PER USER. +# An MCP client authenticates with one of: +# - HTTP Basic: `Authorization: Basic base64(email:password)` — the user's own +# Docmost login/password. The server validates the credentials and the MCP +# session then acts under that user's permissions (edits attributed to them). +# - Bearer access JWT: `Authorization: Bearer ` (the user's +# `authToken` cookie value). Validated as an ACCESS token. +# +# OPTIONAL service-account fallback. When a request carries NEITHER Basic NOR +# Bearer credentials and these are set, the MCP session falls back to this +# shared service account (back-compat; useful for CI/scripts). Leave BLANK to +# require per-user credentials. MCP_DOCMOST_EMAIL= MCP_DOCMOST_PASSWORD= # MCP_DOCMOST_API_URL=http://127.0.0.1:3000/api -# Optional bearer token to protect the /mcp endpoint. If unset, /mcp relies on -# the workspace MCP toggle and network isolation (do not expose the port publicly). +# Optional shared guard for the /mcp endpoint. When set, every /mcp request must +# carry a matching `X-MCP-Token` header (separate from `Authorization`, which now +# carries the per-user credentials). When unset, /mcp relies on the per-user +# credentials above plus the workspace MCP toggle and network isolation (do not +# expose the port publicly). # MCP_TOKEN= # MCP_SESSION_IDLE_MS=1800000 +# +# AI-AGENT ATTRIBUTION (comments/pages written via MCP are badged as "AI"): +# attribution is driven by a per-user `is_agent` flag on the users row. There is +# NO admin UI/API for it — set it out-of-band with SQL. Use a DEDICATED service +# account for the MCP fallback above and flag ONLY that account, e.g.: +# UPDATE users SET is_agent = true WHERE email = 'mcp-bot@your-domain'; +# NEVER set is_agent on a human or shared account — every action by that account +# (including normal human edits) would then be mis-attributed as AI. # Per-embedding-call timeout in milliseconds for the RAG indexer. # A slow/hung embeddings endpoint fails after this and the batch continues. # AI_EMBEDDING_TIMEOUT_MS=120000 + +# Silence timeout (ms) for streaming chat/agent AI calls AND external-MCP traffic. +# Bounds time-to-first-byte and the gap BETWEEN chunks (NOT the total turn length), +# so an arbitrarily long turn that keeps streaming is never cut. Finite so a hung +# provider is eventually broken instead of leaking forever. Default 900000 (15 min). +# AI_STREAM_TIMEOUT_MS=900000 + +# Keep-alive recycle window (ms) for streaming chat/agent AI + external-MCP calls. +# A pooled connection idle longer than this is closed instead of reused, so a +# NAT / egress firewall / reverse proxy that silently drops idle connections +# cannot poison a reused socket into a PRE-RESPONSE `read ECONNRESET`. Lower it if +# your egress drops idle connections faster than ~10s. Default 10000 (10 s). +# AI_STREAM_KEEPALIVE_MS=10000 + +# Silence timeout (ms) for EXTERNAL-MCP transport ONLY (not the chat provider). +# Tighter than AI_STREAM_TIMEOUT_MS so a byte-silent/hung MCP server is broken in +# ~5 min instead of 15. Note it also cuts a legitimately long but byte-silent +# single tool call (a slow crawl that emits nothing until done) and an SSE +# transport idling >5 min BETWEEN tool calls. Default 300000 (5 min). +# AI_MCP_STREAM_TIMEOUT_MS=300000 + +# Total wall-clock cap (ms) for ONE external MCP tool call (app-level, not +# transport). Aborts a tool that keeps the socket warm (SSE heartbeats / trickle) +# but never returns a result — which the silence timeout above never breaks. +# Default 900000 (15 min). +# AI_MCP_CALL_TIMEOUT_MS=900000 + +# --- Anonymous public-share AI assistant --- +# Opt-in per workspace (AI settings -> "public share assistant"; off by default). +# When enabled, anonymous visitors of a published share can ask an AI about that +# share at POST /api/shares/ai/stream. The assistant is read-only and hard-scoped +# to the single share tree, but every call spends real tokens on the workspace +# owner's configured AI provider. +# +# DEPLOYMENT REQUIREMENT: the per-IP rate limit on this endpoint is only +# effective behind a trusted reverse proxy that OVERWRITES (not appends) +# X-Forwarded-For with the real client IP. The app runs with trustProxy, so +# without such a proxy an attacker can rotate X-Forwarded-For to evade the +# per-IP limit. Put this endpoint (and the app) behind a proxy you control that +# sets X-Forwarded-For to the real client IP. +# +# Backstop: a cluster-wide, sliding-window cap per workspace (IP-independent, +# keyed by the server-resolved workspace id) bounds the owner's bill even if the +# per-IP limit is fully evaded. It is a COST backstop, not an access control, and +# FAILS CLOSED if Redis is unavailable (an optional assistant briefly going +# offline is safer than an unbounded bill). Override the hourly cap below +# (default: 100 calls per workspace per rolling hour). +# SHARE_AI_WORKSPACE_MAX_PER_HOUR=100 +# +# Per-request output-token ceiling for the anonymous assistant (default: 512). +# Worst-case output per accepted call = agent steps (5) × this value. +# SHARE_AI_MAX_OUTPUT_TOKENS=512 +# +# Second cost backstop: a cluster-wide per-workspace rolling-DAY token budget +# (input re-sent per step + output, summed across every accepted turn). The +# hourly request cap above bounds how MANY calls run, not how expensive each is, +# so this caps the owner's actual provider bill directly. Like the request cap it +# FAILS CLOSED if Redis is unavailable (default: 1,000,000 tokens per workspace +# per rolling day). +# SHARE_AI_WORKSPACE_TOKEN_BUDGET_PER_DAY=1000000 diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 736040b7..f25bac74 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -3,7 +3,7 @@ name: Develop on: push: branches: - - main + - develop workflow_dispatch: concurrency: @@ -18,7 +18,12 @@ env: IMAGE: ghcr.io/vvzvlad/gitmost jobs: + # Run the reusable test suite first so a failing test blocks the image build. + test: + uses: ./.github/workflows/test.yml + build: + needs: test runs-on: ubuntu-latest steps: - name: Checkout @@ -51,3 +56,160 @@ jobs: tags: ${{ env.IMAGE }}:develop cache-from: type=gha,scope=develop-amd64 cache-to: type=gha,scope=develop-amd64,mode=max,ignore-error=true + + # e2e jobs run on every develop push but DO NOT gate the build/publish above: + # `build` stays `needs: test` only, so the :develop image still ships even if + # e2e fails. A failing e2e job turns the run red and triggers GitHub's email + # to the pusher — that red run + email is the intended notification, not a + # deploy block. + e2e-server: + runs-on: ubuntu-latest + env: + DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost + REDIS_URL: redis://localhost:6379 + APP_SECRET: ci-e2e-secret-change-me-min-32-characters + APP_URL: http://localhost:3000 + services: + postgres: + image: pgvector/pgvector:pg18 + env: + POSTGRES_DB: docmost + POSTGRES_USER: docmost + POSTGRES_PASSWORD: docmost + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U docmost" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build editor-ext + run: pnpm --filter @docmost/editor-ext build + + - name: Run migrations + run: pnpm --filter ./apps/server migration:latest + + - name: Run server e2e + run: pnpm --filter ./apps/server test:e2e + + # Same rationale as e2e-server: this job is intentionally NOT in + # `build.needs`. Deploy of the :develop image must not be blocked by e2e; + # a red run plus GitHub's email to the pusher is the notification mechanism. + e2e-mcp: + runs-on: ubuntu-latest + env: + DATABASE_URL: postgresql://docmost:docmost@localhost:5432/docmost + REDIS_URL: redis://localhost:6379 + APP_SECRET: ci-e2e-secret-change-me-min-32-characters + APP_URL: http://localhost:3000 + NODE_ENV: production + services: + postgres: + image: pgvector/pgvector:pg18 + env: + POSTGRES_DB: docmost + POSTGRES_USER: docmost + POSTGRES_PASSWORD: docmost + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U docmost" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 5s + --health-timeout 5s + --health-retries 20 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + - name: Build editor-ext + run: pnpm --filter @docmost/editor-ext build + + - name: Build server + run: pnpm server:build + + - name: Build mcp + run: pnpm --filter @docmost/mcp build + + - name: Run migrations + run: pnpm --filter ./apps/server migration:latest + + - name: Start server (prod) + # Capture stdout/stderr so a start-up crash (bind error, stack trace, + # migration mismatch) is diagnosable; without this the only signal is + # the generic health-loop timeout below, ~120s later. + run: pnpm --filter ./apps/server start:prod > /tmp/server.log 2>&1 & + + - name: Wait for server health + run: | + for i in $(seq 1 60); do + if curl -fsS http://localhost:3000/api/health > /dev/null; then + echo "Server is healthy" + exit 0 + fi + sleep 2 + done + echo "Server did not become healthy in time" + exit 1 + + - name: Dump server log on failure + if: failure() + run: cat /tmp/server.log || true + + - name: Seed admin + run: | + curl -fsS -X POST http://localhost:3000/api/auth/setup \ + -H "Content-Type: application/json" \ + -d '{"name":"E2E","email":"e2e@example.com","password":"E2ePassword123","workspaceName":"E2E"}' + + - name: Run mcp e2e + env: + DOCMOST_API_URL: http://localhost:3000/api + DOCMOST_EMAIL: e2e@example.com + DOCMOST_PASSWORD: E2ePassword123 + run: pnpm --filter @docmost/mcp test:e2e diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 7137d953..694df01b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,12 @@ env: IMAGE: ghcr.io/vvzvlad/gitmost jobs: + # Run the reusable test suite first so a failing test blocks the image build. + test: + uses: ./.github/workflows/test.yml + build: + needs: test strategy: matrix: include: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..3a756656 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,79 @@ +name: Test + +on: + pull_request: + workflow_call: + workflow_dispatch: + +concurrency: + group: test-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + # Real Postgres + Redis so the server integration suite (`*.int-spec.ts`, + # behind `pnpm --filter server test:int`) runs in CI (red-team finding #7). + # Without it, cost-cap / FK-cascade / jsonb-round-trip / real-apply tests + # only ran locally, so regressions in those paths stayed green in CI. + # Postgres uses the pgvector image because migrations create vector columns + # and global-setup runs `CREATE EXTENSION vector`. Credentials/db match the + # defaults in apps/server/test/integration/db.ts + global-setup.ts + # (docmost / docmost_dev_pw, maintenance db `docmost`, redis on 6379), so no + # TEST_*_URL overrides are needed. + services: + postgres: + image: pgvector/pgvector:pg18 + env: + POSTGRES_USER: docmost + POSTGRES_PASSWORD: docmost_dev_pw + POSTGRES_DB: docmost + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U docmost" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + redis: + image: redis:7 + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up pnpm + uses: pnpm/action-setup@v4 + + - name: Set up Node + uses: actions/setup-node@v4 + with: + node-version: 22 + cache: pnpm + + - name: Install dependencies + run: pnpm install --frozen-lockfile + + # Required for the client suite, which resolves @docmost/editor-ext via its + # dist build (the server suite also rebuilds it through its own pretest). + - name: Build editor-ext + run: pnpm --filter @docmost/editor-ext build + + - name: Run unit tests + run: pnpm -r test + + # Integration suite against the real Postgres/Redis services above. Runs + # the FK-cascade, cost-cap, jsonb-round-trip and real-apply specs that the + # unit run (mocks only) cannot cover. global-setup drops/recreates the + # isolated `docmost_test` DB and migrates it to latest. + - name: Run server integration tests + run: pnpm --filter server test:int diff --git a/.gitignore b/.gitignore index 6af27e98..16a16b59 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,9 @@ lerna-debug.log* .nx/installation .nx/cache .claude/worktrees/ + +# TypeScript incremental build artifacts +*.tsbuildinfo + +# Self-hosted VAD / onnxruntime-web assets (copied from node_modules at dev/build time) +apps/client/public/vad/ diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 00000000..809e8d1c --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,14 @@ +{ + // VSCode tasks for this repo. + "version": "2.0.0", + "tasks": [ + { + "label": "git push (github + gitea)", + "type": "shell", + "command": "git push github develop && git push gitea develop", + "options": { "cwd": "${workspaceFolder}" }, + "presentation": { "reveal": "never", "focus": false, "panel": "shared", "showReuseMessage": false, "close": true }, + "problemMatcher": [] + } + ] +} diff --git a/CLAUDE.md b/AGENTS.md similarity index 53% rename from CLAUDE.md rename to AGENTS.md index 7e2713f1..743ae57d 100644 --- a/CLAUDE.md +++ b/AGENTS.md @@ -1,6 +1,178 @@ -# CLAUDE.md +# AGENTS.md -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. +This file guides AI agents (Claude Code, opencode, …) working in this +repository. It has two layers: **how to run a task end-to-end** (the +sections below), and **how the codebase is built** (the technical sections +further down, formerly in `CLAUDE.md`). + +## Task lifecycle + +### 1. Start: sync with develop + +Before starting **any** work, update your local `develop` and branch off it: + +```bash +git checkout develop +git fetch gitea +git pull --ff-only gitea develop +git checkout -b +``` + +Never build a feature directly on `develop`, and never branch off a stale +`develop` — otherwise the PR will carry extra commits or conflict. + +### 2. Implementation + +Run the task through the workflow from the system prompt (Phase 1 analysis → +Phase 3 implementation → Phase 4 review → Phase 5 verification → Phase 6 +report). Delegate large changes to a general subagent; review via the review +subagent. + +**Create worktrees only inside the `.claude` folder** (e.g. +`.claude/worktrees/`). Creating a git worktree anywhere else — the repo +root, sibling directories, or temp folders — is forbidden. + +### 3. Commit — ONLY to Gitea and ONLY as `claude_code` + +This rule has no exceptions: + +- **Where:** the only remote for commits/pushes is **`gitea`** + (`gitea.vvzvlad.xyz`). **Never** push to `origin` (the GitHub mirror), and + especially not to `upstream` (the original Docmost). The GitHub mirror is + updated by the owner's CI process, not by the agent. +- **Who:** commit **only** as the agent identity. Any commit whose author or + committer is `vvzvlad` is an error and must be rewritten. + - **name:** `claude_code` + - **email:** `claude_code@vvzvlad.xyz` + +Use `--reset-author` when amending, otherwise git keeps the original author +(the default config on this machine is `vvzvlad`, so check after every commit): + +```bash +GIT_AUTHOR_NAME="claude_code" \ +GIT_AUTHOR_EMAIL="claude_code@vvzvlad.xyz" \ +GIT_COMMITTER_NAME="claude_code" \ +GIT_COMMITTER_EMAIL="claude_code@vvzvlad.xyz" \ +git commit --amend --no-edit --reset-author +``` + +For a regular new commit, set the branch-local config once and commit normally: + +```bash +git config user.name "claude_code" +git config user.email "claude_code@vvzvlad.xyz" +``` + +Check before push: + +```bash +git log -1 --format='Author: %an <%ae>%nCommitter: %cn <%ce>' +# both lines must show claude_code +``` + +### 4. Push and PR to develop + +PRs always target `develop`. The `claude_code` password lives in the macOS +keychain as a **generic password** under service `gitea-claude-code` (do not +duplicate it as an internet-password for `gitea.vvzvlad.xyz` — that creates a +conflict with the owner's account in the git credential helper): + +```bash +AGENT_PASS=$(security find-generic-password -s gitea-claude-code -w) +``` + +Push by temporarily injecting the credentials into the remote URL, then always +restore the URL to its clean form (the password must not linger in git +config / reflog): + +```bash +ORIG_URL=$(git remote get-url gitea) +SAFE_PASS=$(python3 -c "import urllib.parse,sys;print(urllib.parse.quote(sys.argv[1]))" "$AGENT_PASS") +git remote set-url gitea "https://claude_code:${SAFE_PASS}@gitea.vvzvlad.xyz/vvzvlad/gitmost.git" +git push -u gitea +git remote set-url gitea "$ORIG_URL" +unset AGENT_PASS SAFE_PASS +``` + +The PR is created via the Gitea REST API (Basic Auth as `claude_code`): + +```bash +curl -s -X POST \ + -u "claude_code:$(security find-generic-password -s gitea-claude-code -w)" \ + -H "Content-Type: application/json" \ + -d @pr_body.json \ + "https://gitea.vvzvlad.xyz/api/v1/repos/vvzvlad/gitmost/pulls" +``` + +`base: develop`, `head: `. In the PR body: what was done, what is out +of scope, verification results (tsc/lint/tests). + +> If push fails with `User permission denied for writing`, then `claude_code` +> lacks collaborator rights on the repo. Ask the owner to add them (once, via +> the Gitea UI or `PUT /api/v1/repos/vvzvlad/gitmost/collaborators/claude_code` +> with `{"permission":"write"}` from their account). + +### 5. Merge and cleanup + +- **The user merges the PR into develop** (not the agent). The agent does not + press the merge button. +- **After implementing a task, delete its plan from `docs/backlog/.md`** — + this is part of closing the task, not the user's work. Files in + `docs/backlog/` are the work queue; completed items get cleaned out of it. + Do this in a separate commit from the same `claude_code` on the same branch + (or ask the user to delete it if the PR is already open and you don't want to + repush it). +- Any junk left uncommitted in the working tree? Check `git status` before the + final report. + +## Release cycle: staging a new version + +When enough changes have accumulated on `develop` for a release, a **final +review by three orchestrator skills** runs before the merge/tag: + +1. **test-orchestrator** (the `code-review-orchestrator` skill focused on test + coverage) — verifies new code is covered by tests and there are no + regressions in existing ones. +2. **review-orchestrator** (the `code-review-orchestrator` skill) — + multi-aspect code review: security, stability, convention conformance, + regressions, over-complexity. +3. **red-team-orchestrator** (the red-team skill) — adversarial analysis of + attack scenarios against the affected components. + +Order: the orchestrators return finding lists → the agent fixes everything they +found (via a subagent or itself, per the delegation rules) → re-runs the review +on the affected areas → cuts the tag per the "Cutting a release" procedure +below. + +## Accounts & endpoints cheat sheet + +| Item | Value | +| --- | --- | +| Only remote for commits | `gitea` → `https://vvzvlad@gitea.vvzvlad.xyz/vvzvlad/gitmost.git` | +| Agent user (Gitea/git) | `claude_code` | +| Agent email | `claude_code@vvzvlad.xyz` | +| Keychain password | `security find-generic-password -s gitea-claude-code -w` | +| PR API | `https://gitea.vvzvlad.xyz/api/v1/repos/vvzvlad/gitmost/pulls` (here `gitmost` is the repo's real slug on the server) | +| Base branch | `develop` | +| `origin` | GitHub mirror `vvzvlad/gitmost` — **do not push**, updated by the owner's CI | +| `upstream` | The original Docmost — **never push** | + +## Creating issues (Gitea `tea` CLI) + +Issues are filed with the official Gitea CLI `tea`, already logged in as +`claude_code` (`tea logins list` shows the `gitea` login as default): + +```bash +tea issues create --repo vvzvlad/gitmost --labels feature \ + --title '' --description "$(cat body.md)" +``` + +> Gotcha (tea 0.14.1): the issue body flag is `--description`/`-d`, **not** +> `--body` — passing `--body` fails with `flag provided but not defined: -body`. + +--- + +# Architecture and codebase ## What this is @@ -51,14 +223,14 @@ pnpm --filter @docmost/mcp test # node --test (unit + mock) pnpm --filter @docmost/mcp test:e2e # MCP end-to-end against a live instance ``` -**Database migrations** (Kysely, run from `apps/server`; they auto-run on server startup too): +**Database migrations** (Kysely, run from `apps/server`). **Where they auto-apply:** in **production** (the built image / `start:prod`) pending migrations run automatically on server boot. In **local dev** (the `pnpm dev` stand / `nest start --watch`) they do **NOT** auto-run — after you pull or switch branches you must apply them yourself with `pnpm --filter server migration:latest`, or any endpoint touching a new column/table 500s (e.g. a freshly-added `ai_chats.page_id` blanket-500s all of AI chat until migrated). ```bash pnpm --filter server migration:create --name=my_change # new empty migration pnpm --filter server migration:latest # apply all pending pnpm --filter server migration:down # revert last pnpm --filter server migration:codegen # regenerate src/database/types/db.d.ts from the live DB ``` -Migration files live in `apps/server/src/database/migrations/` and are named `YYYYMMDDThhmmss-description.ts`. Fork-specific migrations only **add** tables (`page_embeddings`, `ai_chats`, `ai_chat_messages`, `ai_provider_credentials`, `ai_mcp_servers`) and nullable columns — never drop/rewrite Docmost data. +Migration files live in `apps/server/src/database/migrations/` and are named `YYYYMMDDThhmmss-description.ts`. Fork-specific migrations only **add** tables (`page_embeddings`, `ai_chats`, `ai_chat_messages`, `ai_provider_credentials`, `ai_mcp_servers`, `page_template_references`) and columns (e.g. `pages.is_template`, a `NOT NULL DEFAULT false` boolean) — never drop/rewrite Docmost data. **Migration ordering — always check when merging branches/features.** Kysely runs migrations in **alphabetical (= timestamp) order** and refuses to start if a *new* migration sorts **before** one already applied to the DB (`corrupted migrations: ... must always have a name that comes alphabetically after the last executed migration`). When you merge a branch or land a feature, verify your migration's timestamp still sorts **after every migration that may already be applied on the target** (`/bin/ls -1 apps/server/src/database/migrations | sort | tail`). Branches developed in parallel routinely break this: a feature branch adds `…T130000-…`, `main` meanwhile ships and deploys `…T150000-…`, and after the merge the older-timestamped file is rejected at boot. **Fix = rename your migration to a timestamp after the latest one already in the target** (content unchanged — the filename is the ordering key), then rebuild so the compiled `dist/database/migrations/` picks up the new name. @@ -82,7 +254,7 @@ The API server is a Fastify app with a global `/api` prefix (`main.ts` excludes - **Redis** backs caching, the BullMQ queues, the WebSocket Socket.IO adapter, and collaboration sync. ### The two AI subsystems (the main fork additions) -1. **Embedded MCP server** (`integrations/mcp/` + `packages/mcp`). The standalone `@docmost/mcp` server (38 agent-native tools: per-block patch/insert/delete by id, scripted `(doc)=>doc` transforms with dry-run diff, table editing, version diff/restore, comments, images, shares) is bundled and served over HTTP at `/mcp`. It writes through Docmost's real-time-collaboration layer so concurrent human edits aren't clobbered. It authenticates as a service account configured via `MCP_DOCMOST_EMAIL` / `MCP_DOCMOST_PASSWORD`; an admin enables it with a workspace toggle (Workspace settings → AI). Optionally protected by `MCP_TOKEN`. +1. **Embedded MCP server** (`integrations/mcp/` + `packages/mcp`). The standalone `@docmost/mcp` server (38 agent-native tools: per-block patch/insert/delete by id, scripted `(doc)=>doc` transforms with dry-run diff, table editing, version diff/restore, comments, images, shares) is bundled and served over HTTP at `/mcp`. It writes through Docmost's real-time-collaboration layer so concurrent human edits aren't clobbered. Each request authenticates **per-user** via the `Authorization` header — either HTTP Basic (`base64(email:password)`, the user's own Docmost login, validated through `AuthService`) or a Bearer access JWT (the user's `authToken`) — and the session acts under that user's permissions. `MCP_DOCMOST_EMAIL` / `MCP_DOCMOST_PASSWORD` are an **optional service-account fallback**, used only when a request carries neither Basic nor Bearer credentials (back-compat for CI/scripts). An admin enables MCP with a workspace toggle (Workspace settings → AI). Optionally protected by a shared `MCP_TOKEN`: when set, every `/mcp` request must carry a matching `X-MCP-Token` header (its own header, separate from `Authorization`, which now carries the per-user Basic/Bearer credentials). Note: this changed from the older `Authorization: Bearer <MCP_TOKEN>` scheme — see `.env.example` and the CHANGELOG Breaking Changes entry. 2. **AI agent chat** (`core/ai-chat/` server + `apps/client/src/features/ai-chat/` client). A built-in agent over the wiki using the Vercel **AI SDK** (`ai`, `@ai-sdk/*`) against any OpenAI-compatible provider configured per workspace (`integrations/ai/` — credentials encrypted at rest via `integrations/crypto`, stored in `ai_provider_credentials`). Key pieces: - `core/ai-chat/tools/` — the agent's ~40 read+write tools. Every tool runs under the **calling user's** CASL permissions via a per-user loopback access token (`docmost-client.loader.ts`), so the agent can never exceed what the user could do. Only **reversible** operations are exposed (page history + trash; no permanent delete). Agent edits get an "AI agent" provenance badge in page history (`20260616T130000-agent-provenance` migration). - `core/ai-chat/embedding/` — RAG indexer + a BullMQ consumer on `AI_QUEUE` that embeds pages into `page_embeddings` (vector search), complementing Postgres full-text search. Pages are (re)indexed on edit; `AI_EMBEDDING_TIMEOUT_MS` bounds a hung embeddings endpoint. @@ -105,7 +277,7 @@ Vite SPA. Code is organized by feature under `apps/client/src/features/*` (mirro ## CI / release -- `.github/workflows/develop.yml` — on push to `main`, builds and pushes `ghcr.io/vvzvlad/gitmost:develop`. +- `.github/workflows/develop.yml` — on push to `develop`, builds and pushes `ghcr.io/vvzvlad/gitmost:develop`. - `.github/workflows/release.yml` — on `v*` tags (or manual dispatch), builds multi-arch (amd64 + arm64) images, pushes a manifest list to GHCR (`latest` + semver tags), and creates a draft GitHub Release with image tarballs. Uses the built-in `GITHUB_TOKEN` (not Docker Hub). - The `Dockerfile` is a multi-stage pnpm build; `APP_VERSION` is passed as a build arg because `.git` isn't in the build context. @@ -119,8 +291,30 @@ The git tag is the source of truth for the displayed version (UI reads `git desc 4. Update `CHANGELOG.md` (Keep a Changelog format): add a `## [X.Y.Z] - YYYY-MM-DD` section summarising `git log vPREV..HEAD --no-merges` grouped by type (Breaking / Added / Changed / Fixed / Removed), and add the `compare/vPREV...vX.Y.Z` link at the bottom. Fold the bump + changelog into the release commit. 5. Tag the release commit with a **lightweight** tag (existing release tags are lightweight): `git tag vX.Y.Z`. 6. Push commit and tag: `git push origin main && git push origin vX.Y.Z`. Pushing the `v*` tag triggers `release.yml` (multi-arch GHCR images + a draft GitHub Release). +7. **Back-merge the release into `develop`** so develop builds report the new version: `git checkout develop && git merge --no-ff main && git push origin develop` (push to Gitea as well if that is the canonical remote). +#### Why develop keeps showing the *previous* version (and why step 7 matters) + +The UI version is `git describe --tags --always` (see `vite.config.ts`), which walks **backwards from the current commit** and picks the **nearest tag reachable in that commit's ancestry**, then appends `-<commits-since-tag>-g<short-hash>`. + +The release tag (`vX.Y.Z`) is created on **`main`'s release merge commit**, and that commit is **not** in `develop`'s history. So until the release is back-merged, `git describe` on `develop` cannot see the new tag and falls back to the *previous* reachable tag. Result: every develop build — and the `ghcr.io/vvzvlad/gitmost:develop` image — keeps reporting e.g. `v0.91.0-NNN-g<hash>` even though `main` is already tagged `v0.93.0`. This is the classic git-flow pitfall: the version on `develop` does **not** advance just because a release was tagged on `main`. + +Back-merging `main → develop` (step 7) pulls the tagged release commit into `develop`'s ancestry, after which develop builds correctly show `vX.Y.Z-NNN-g<hash>`. If `develop` already drifted (release tagged but never back-merged), just run step 7 now — no new tag is needed. + +##### The tag must also exist on the remote that CI builds from (multi-remote gotcha) + +`git describe` names a tag **ref**, not just a commit — so the back-merge is *necessary but not sufficient*. The develop image is built by GitHub Actions (`develop.yml`, `actions/checkout` with `fetch-depth: 0`, then `git describe --tags --always`), so the version it prints depends on which tags exist **on the `github` remote**, not on your local clone or on `gitea`. + +This repo has two writable remotes — `gitea` (canonical, where commits land) and `github` (where the `:develop` and release images are built) — plus `upstream` (docmost, never push). **`git push <branch>` does NOT push tags**; tags must be pushed explicitly and *to each remote separately*. A release tag that only lives on `gitea` is invisible to the GitHub Actions build: even with the tagged commit fully in `develop`'s history (step 7 done), `git describe` on the GitHub runner falls back to the previous tag it *does* have, so the develop image keeps showing e.g. `v0.91.0-NNN` while `git describe` locally already says `v0.93.0-NN`. + +Fix / checklist when develop still shows the old version after a back-merge: + +1. Confirm the tag is missing on github: `git ls-remote --tags github` (compare with `gitea`). +2. Push it there: `git push github vX.Y.Z` (and `git push gitea vX.Y.Z` if it is missing on gitea too). Note: pushing a `v*` tag to `github` also triggers `release.yml` (multi-arch GHCR images + draft Release) — expected, but be aware. +3. Re-run the develop build (`gh workflow run Develop`, or push any commit to `develop`) so `git describe` re-resolves with the tag now present. + +(The `git push origin ...` in steps 6–7 above is shorthand — there is no `origin` remote here; substitute `gitea` **and** `github` as appropriate, and always push release tags to both.) ## Planning docs -`docs/*.md` hold design plans for in-progress / planned features (mobile app, offline sync, RAG improvements, voice dictation, arbitrary HTML embed). `docs/backlog/*.md` track known issues / follow-ups (e.g. AI-chat review follow-ups). Consult the relevant plan before working on one of those areas. +`docs/*.md` hold design plans for in-progress / planned features (mobile app, offline sync, RAG improvements, voice dictation). Arbitrary HTML embed has **shipped** — it renders inside a sandboxed iframe and, when the `htmlEmbed` workspace toggle is on, is insertable by any member (no longer admin-only); turning the toggle off hides/stops serving existing embeds on public share pages. `docs/backlog/*.md` track known issues / follow-ups (e.g. AI-chat review follow-ups). Consult the relevant plan before working on one of those areas. diff --git a/CHANGELOG.md b/CHANGELOG.md index 29058510..2174de5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,239 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.94.0] - 2026-06-26 + +This release makes AI chat durable and fast: assistant turns are persisted to +the database step by step and exported server-side, the desktop app no longer +freezes at 100% CPU on long agent runs, and MCP writes are badged with +unspoofable AI attribution. It also reworks footnotes (Pandoc-style reuse and +per-reference back-links), hardens page moves and duplication against cycles +and lost edits, and caps the anonymous public-share assistant with a +per-workspace rolling-day token budget. + +### Added + +- **Persistent AI-chat history as the source of truth + server-side export.** + An assistant turn is now persisted to the database step by step: the row is + inserted upfront as `streaming` and updated as each agent step finishes, then + finalized once to `completed`/`error`/`aborted`. A process that dies mid-turn + keeps every finished step, and a startup sweep flips any dangling `streaming` + row (untouched for 10 minutes) to `aborted`. Chat "Copy" now exports + server-side from these rows (`POST /ai-chat/export`) rather than from live + client state, so the export is identical whether a chat is freshly streaming, + just switched to, or reloaded — and is available from the first turn of a new + chat. (#183, #174) + +- **AI-agent attribution for MCP writes.** Comments (and pages) created through + the MCP endpoint by a dedicated agent account are now badged as "AI", with + unspoofable provenance derived from a per-user `is_agent` flag (not from the + request body). **Operator setup:** use a _dedicated_ service account for the + MCP fallback and set the flag with SQL — + `UPDATE users SET is_agent = true WHERE email = '<mcp-account>'`. Never flag a + human or shared account, or its normal edits get mis-attributed as AI. See the + AI-agent block in `.env.example`. (#143) +- **Footnote import diagnostics.** The MCP page-write tools (`create_page`, + `update_page`, `import_page_markdown`) now return a `footnoteWarnings` array + flagging dangling references, empty or duplicate definitions, and `[^id]` + markers inside table rows, so an agent can fix its own markup. The page is + still created; the field is omitted when there are no problems. (#166) +- **AI chat "Protocol" setting (`chatApiStyle`).** A new admin choice in AI + settings for the `openai` driver: `openai-compatible` (default) routes chat + through `@ai-sdk/openai-compatible`, which surfaces a provider's streamed + reasoning (`reasoning_content` → reasoning parts) for z.ai/GLM, DeepSeek, + OpenRouter, etc.; `openai` uses the official provider (real-OpenAI + reasoning-model request shaping). Chosen explicitly rather than inferred from + the base URL, since a custom URL can front real OpenAI too. (#175, #177) +- **Per-MCP-server instructions in the agent prompt.** Each external MCP server + now has an admin-authored `instructions` field ("how/when to use this server's + tools") that is injected into the agent's system prompt next to that server's + tool descriptions. Trusted text, rendered inside the prompt safety sandwich; + shown only for a server that actually connected and contributed ≥1 callable + tool. (#180) +- **Footnote multi-backlinks.** A footnote referenced more than once now shows a + back-link per reference (↩ a b c …), each scrolling to its own occurrence, like + Pandoc/Wikipedia; a single-reference footnote keeps the plain ↩. (#168) + +### Changed + +- **AI chat default provider is now `openai-compatible` (reasoning surfaced).** + For the `openai` driver the chat provider defaults to the openai-compatible + implementation, so a workspace pointing at z.ai/GLM/DeepSeek now streams the + model's reasoning out of the box. An endpoint that is real OpenAI behind a + custom base URL should set the new `chatApiStyle` "Protocol" to `openai`. (#177) + +- **Footnotes now reuse (Pandoc semantics).** Multiple `[^a]` references to the + same id are ONE footnote — one number, one definition, several back-references + — instead of being renamed to `a__2`, `a__3`. Duplicate `[^a]:` definitions are + first-wins on import (the rest are dropped and reported via `footnoteWarnings`), + and a reference with no definition yields a single empty footnote rather than + one per occurrence. This supersedes the 0.93.0 "survive duplicate-id + definitions" behavior for the import path. (#166) + +- **Public share AI: default per-workspace hourly assistant cap lowered + 300 → 100.** The limiter falls back to this default whenever + `SHARE_AI_WORKSPACE_MAX_PER_HOUR` is unset, so a `0.93.0` deployment that + never set the env var has its anonymous public-share assistant hourly cap + cut from 300 to 100 on upgrade. Set `SHARE_AI_WORKSPACE_MAX_PER_HOUR` to + keep the previous limit. (#62) + +### Fixed + +- **AI chat: the desktop app no longer freezes at 100% CPU on long agent runs.** + `useChat` re-rendered on every streamed token and `MessageItem`/`ReasoningBlock` + re-parsed the whole transcript markdown (marked + DOMPurify) on every delta, so + per-turn work grew quadratically and saturated the main thread. The stream is now + throttled (`experimental_throttle`) to ~20 Hz and each finalized message row / + markdown part / reasoning block is memoized, so a long turn no longer re-parses + already-finished content. (#182) +- **Editor: caret/selection landed on the wrong line when clicking inside code + blocks and footnotes.** The affected NodeViews rendered their non-editable + chrome (language menu, footnotes heading, footnote number marker) before the + editable content, so the browser's click hit-testing missed the contentDOM and + snapped the caret to a previous node. Content now renders first in the DOM + (chrome is lifted back into place via CSS flex `order`), and scroll containers + are nudged after a paste to refresh stale hit-testing geometry. The caret + symptom is macOS-specific and was confirmed manually on macOS; the automated + guard pins the DOM-order invariant, not the caret behavior itself. (#146, #147) +- **AI chat: the live token counter now ticks between agent steps.** During a + multi-step turn the header token badge (and the "Thinking… · N tokens" line) + no longer froze on the previous step's authoritative usage; the current step's + estimate is combined per-component with `max`, so the count rises smoothly and + never jumps backwards. (#163) +- **AI chat: "New chat" during a streaming first turn now resets the whole + chat, not just the role badge.** Starting a new chat mid-stream cleared the + header but left the in-flight turn's messages behind, so the fresh chat opened + pre-populated with the previous conversation; it now fully resets. (#161) +- **AI chat: a dropped tool argument now yields an actionable error.** When the + model omitted a required parameter (typically `pageId`) in a parallel/batch + tool call, the assistant forwarded zod's raw "expected string, received + undefined" text; tool inputs now return a message naming each missing/invalid + parameter (the JSON Schema contract is unchanged and nothing is backfilled). + (#190) +- **Page move: cycle checks are now atomic and depth-bounded.** Moving a page + under one of its own descendants is rejected in the same transaction as the + update (closing a TOCTOU window where two concurrent A→B / B→A moves could + form a cycle), and the recursive tree-traversal CTEs carry a cycle/depth guard + so a pre-existing cycle can no longer spin a query. (#207) +- **Page/editor robustness batch.** Duplicating a page now copies shared + attachments for every referencing page (not just the first); colliding block + ids are de-duplicated on import/normalize so MCP addressed edits can't hit the + wrong node; transient collab store failures are retried so autosave edits + aren't lost; and an out-of-order tree move no longer drops the moved subtree. + (#206) + +### Security + +- **Public share AI: per-workspace rolling-day token budget.** The anonymous + share assistant now caps a workspace's actual token spend (input + output, + summed across every accepted turn) over a trailing day, on top of the hourly + request cap — so a caller who evades the per-IP throttle still cannot run up + the owner's provider bill without bound. Cluster-wide via Redis and FAILS + CLOSED if Redis is down; default 1,000,000 tokens/day, overridable via + `SHARE_AI_WORKSPACE_TOKEN_BUDGET_PER_DAY`. (#159) + +## [0.93.0] - 2026-06-21 + +This release builds on the 0.91.0 AI foundation: admin-defined AI agent roles, +an anonymous AI assistant on public shares, server-side voice dictation, an +editor footnotes model, live page-template embeds, and sandboxed arbitrary-HTML +embeds — plus a large batch of security hardening and test coverage. + +### Breaking Changes + +- **MCP shared-token auth moved to its own header.** The `/mcp` shared guard + no longer reads `Authorization: Bearer <MCP_TOKEN>`; it now reads only the + `X-MCP-Token` header. The `Authorization` header is now reserved for per-user + HTTP Basic / Bearer access-JWT credentials, so each `/mcp` request + authenticates as a specific user (the `MCP_DOCMOST_*` service account is only + a fallback). Existing MCP clients (e.g. Claude Desktop) configured with + `Authorization: Bearer <MCP_TOKEN>` must be reconfigured to send + `X-MCP-Token: <MCP_TOKEN>` instead. See `MCP_TOKEN` in `.env.example`. As a + one-time aid, the server logs a single migration warning when it sees the + old-style header. + +### Added + +- **AI agent roles**: admin-defined assistant personas with an optional + per-role model override, selectable in chat. +- **Anonymous AI assistant on public shares**: public-share visitors can chat + with a selectable agent-role identity that reuses the internal chat + presentation, with per-request output-token caps and a fail-closed Redis + limiter. +- **Voice dictation (STT)**: server-side speech-to-text with a mic button in + the chat and the editor, OpenRouter STT support, an endpoint test, and real + provider-error surfacing. +- **Footnotes**: an editor footnotes model (inline references + a definitions + list). +- **Page templates**: live whole-page embed (MVP) with a template-marker icon + in the page tree and a working Refresh action. +- **Arbitrary HTML/CSS/JS embeds**: a sandboxed-iframe embed block gated by a + per-workspace toggle (default OFF); insertable by any member when the toggle + is on. +- Admin-only **"Analytics / tracker"** workspace setting: a raw HTML/JS snippet + injected into the `<head>` of public share pages only (for analytics such as + Google Analytics or Yandex.Metrika), kept separate from the member-facing + HTML-embed feature. +- **MCP**: a hierarchical tree mode for `list_pages`, and per-user auth for the + embedded `/mcp` endpoint. +- **Page tree**: Expand all / Collapse all for the space tree, and + server-authoritative realtime tree updates. +- **AI chat UX**: a `get_current_page` tool for proxy-robust page context, a + current-context-size readout, an agent step cap raised 8→20 with a forced + final text answer, and auto-collapse of the chat window on page focus. +- **AI settings**: a Clear control inside the API-key field and an endpoint + status dot bound to "configured × enabled". +- **Client**: an always-visible space grid replacing the space-switcher popover, + removal of the sidebar Overview item, tighter comments-panel density, and no + auto-open of the comments panel when adding a comment. + +### Changed + +- HTML embed blocks now render inside a sandboxed iframe (separate origin) and, + when the workspace HTML-embed toggle is on, can be inserted by any member + (previously admin-only). Turning the toggle off hides existing embeds and + stops serving them on public share pages. +- Remove the server-side role-based stripping of HTML-embed blocks from the + write paths (collab/REST/MCP, page create/duplicate, import, transclusion + unsync); sandboxing makes per-write gating unnecessary. The only remaining + server-side strip is the public-share read path, which still honors the + workspace HTML-embed toggle. + +### Fixed + +- AI chat: preserve scroll position during streaming, record chats that fail on + their first turn, and resolve the current page for agent context behind + proxies. +- AI roles: guard `update()` against concurrent soft-delete; harden the model + override, role-name uniqueness, and id validation; sandwich the safety + framework around the role persona. +- Auth: handle null-password (SSO/LDAP-only) accounts without a bcrypt throw. +- Footnotes: survive duplicate-id definitions without collab divergence. +- HTML embed: fix stale iframe height and damp the resize loop; strip embeds at + serve time on authenticated read paths and the plain page-create path. +- Page templates: import `ThrottleModule` so collab boots, never strand an + in-flight page-embed id, and add defense-in-depth workspace checks. +- Pages: `movePage` cycle guard with no phantom `PAGE_MOVED` event. +- Import: surface the real error cause from `/pages/import` instead of a generic 400. + +### Security + +- MCP: close an SSO/MFA bypass on Basic auth and stop minting non-init sessions; + close a brute-force limiter check-then-act race. +- Public share: block restricted descendants in the anonymous assistant, cap + per-request output, fail closed when Redis is unavailable, and reject non-text + message parts to close a size-cap bypass. +- Make `trustProxy` env-configurable with a safe default. + +### Internal + +- CI: gate the `develop` and release image builds on the test suite, run the + suites on push/PR, and build the `:develop` image on push to `develop`. +- Docs: replace `CLAUDE.md` with `AGENTS.md` codifying the agent workflow and + the release procedure, add migration-ordering guidance, and prune implemented + plans. +- A large batch of new server/client test coverage. + ## [0.91.0] - 2026-06-18 Gitmost is a community-focused fork of Docmost. This release drops the @@ -92,5 +325,6 @@ knowledge layer, an embedded MCP server, and the Gitmost rebrand. - Build: drop the private EE submodule, retarget CI to GHCR, and update the Docker image to the GHCR registry. -[Unreleased]: https://github.com/vvzvlad/gitmost/compare/v0.91.0...HEAD +[Unreleased]: https://github.com/vvzvlad/gitmost/compare/v0.93.0...HEAD +[0.93.0]: https://github.com/vvzvlad/gitmost/compare/v0.91.0...v0.93.0 [0.91.0]: https://github.com/vvzvlad/gitmost/compare/v0.90.1...v0.91.0 diff --git a/README.md b/README.md index 578790f0..9f9982cb 100644 --- a/README.md +++ b/README.md @@ -101,6 +101,9 @@ community feature, with no enterprise license. Open it from the page header; the - ✅ **macOS app** — native macOS app ([gitmost-app](https://github.com/vvzvlad/gitmost-app)) that embeds the UI with multi-server tabs. - ✅ **AI chat** — built-in AI agent chat over your wiki content (read + write, RAG search, configurable provider, optional web access via external MCP). - ✅ **Voice dictation** — microphone button in the AI agent chat and the page editor; audio is transcribed server-side (Whisper / OpenAI-compatible STT) via the workspace AI provider, with an admin toggle to show/hide it. +- ✅ **Page templates** — flag a page as a template and embed its whole content live into other pages; edits to the template propagate to every place it is inserted (whole-page transclusion on top of the existing synced blocks). +- ✅ **Public-share AI assistant** — anonymous visitors of a shared page can ask the AI agent, scoped strictly to that share's page tree (read-only, share-scoped search), behind a workspace toggle. +- ✅ **Footnotes** — academic-style footnotes: a numbered superscript reference inline (read it in place via a hover popover), with the note text living as a real, editable block at the bottom of the page; auto-numbered, collaboration-safe, and round-trips through Markdown export/import and the AI agent / MCP. ### In progress @@ -108,14 +111,11 @@ community feature, with no enterprise license. Open it from the page header; the ### Planned -- 🔭 **Page templates** — flag a page as a template and embed its whole content live into other pages; edits to the template propagate to every place it is inserted (whole-page transclusion on top of the existing synced blocks). See [docs/page-templates-plan.md](docs/page-templates-plan.md). - 🔭 **Viewer comments** — let read-only viewers leave comments. -- 🔭 **Public-share AI assistant** — let anonymous visitors of a shared page ask the AI agent, scoped strictly to that share's page tree (read-only, share-scoped search), behind a workspace toggle. See [docs/public-share-assistant-plan.md](docs/public-share-assistant-plan.md). - 🔭 **Password-protected pages** — protect individual pages / shares with a password. - 🔭 **Windows / Linux app** — native desktop app for Windows and Linux. -- 🔭 **Mobile app** — mobile apps (iOS first, Android to follow), reusing the existing responsive web UI and editor via a Capacitor wrapper, with offline planned for later. See [docs/mobile-app-plan.md](docs/mobile-app-plan.md). +- 🔭 **Mobile app** — mobile apps (iOS first, Android to follow), reusing the existing responsive web UI and editor via a Capacitor wrapper, with offline planned for later. See [issue #195](https://gitea.vvzvlad.xyz/vvzvlad/gitmost/issues/195). - 🔭 **Offline mode** — offline sync & PWA support. -- 🔭 **Footnotes** — academic-style footnotes: a numbered superscript reference inline (read it in place via a hover popover), with the note text living as a real, editable block at the bottom of the page; auto-numbered, collaboration-safe, and round-trips through Markdown export/import and the AI agent / MCP. See [docs/footnotes-plan.md](docs/footnotes-plan.md). - 🔭 **Editor & UX improvements** — blocks inside tables (lists, to-do items), column layout, additional heading levels, highlight blocks, custom emoji in callouts, floating images, anchor links for page mentions, toggles (shared-page width, aside/sidebar, spellcheck, ligatures), sanitized space-tree export, and mentions in breadcrumbs. ## Getting started @@ -158,6 +158,11 @@ the existing data directory is reused as-is: start the new migrations apply on top of your existing schema (`CREATE EXTENSION vector` plus the `page_embeddings` and AI tables); watch the logs for `Migration "..." executed successfully`. +> ⚠️ **Never change `APP_SECRET` after setup.** It does double duty: it signs JWTs *and* derives the +> AES-256-GCM key that encrypts stored AI-provider credentials (API keys). Rotating it makes every +> saved AI API key undecryptable (you'd have to re-enter them in AI settings) and invalidates all +> existing sessions. Pick it once, keep it stable, and back it up together with your database. + ### Notes - **Back up first.** Take a `pg_dump` before swapping — migrations apply in place, and the diff --git a/README.ru.md b/README.ru.md index 0bd9a5de..d659d2fb 100644 --- a/README.ru.md +++ b/README.ru.md @@ -102,6 +102,9 @@ real-time-коллаборации Docmost, поэтому запись нико - ✅ **Приложение для macOS** — нативное приложение для macOS ([gitmost-app](https://github.com/vvzvlad/gitmost-app)), встраивающее UI с вкладками для нескольких серверов. - ✅ **AI-чат** — встроенный чат с AI-агентом по содержимому вики (чтение + запись, RAG-поиск, настраиваемый провайдер, опциональный доступ в интернет через внешние MCP). - ✅ **Голосовая диктовка** — кнопка-микрофон в чате AI-агента и в редакторе страниц; аудио распознаётся на сервере (Whisper / OpenAI-совместимый STT) через AI-провайдер воркспейса, с тумблером админа для показа/скрытия. +- ✅ **Шаблоны страниц** — пометить страницу шаблоном и вставлять её содержимое живой ссылкой в другие страницы; правки шаблона распространяются на все места вставки (whole-page-транслюзия поверх существующих synced-блоков). +- ✅ **AI-ассистент на публичных шарах** — анонимный зритель расшаренной страницы может спросить AI-агента, который ищет строго по дереву этой шары (read-only, share-scoped поиск), за тумблером воркспейса. +- ✅ **Сноски** — сноски академического вида: нумерованная ссылка-надстрочник прямо в тексте (читается на месте во всплывающем окне по наведению), а текст сноски живёт реальным редактируемым блоком внизу страницы; авто-нумерация, безопасна для совместного редактирования, переживает экспорт/импорт Markdown и доступна AI-агенту / MCP. ### В процессе @@ -109,14 +112,11 @@ real-time-коллаборации Docmost, поэтому запись нико ### В планах -- 🔭 **Шаблоны страниц** — пометить страницу шаблоном и вставлять её содержимое живой ссылкой в другие страницы; правки шаблона распространяются на все места вставки (whole-page-транслюзия поверх существующих synced-блоков). См. [docs/page-templates-plan.md](docs/page-templates-plan.md). - 🔭 **Комментарии зрителей** — возможность комментировать для пользователей с доступом только на чтение. -- 🔭 **AI-ассистент на публичных шарах** — возможность анонимному зрителю расшаренной страницы спросить AI-агента, который ищет строго по дереву этой шары (read-only, share-scoped поиск), за тумблером воркспейса. См. [docs/public-share-assistant-plan.md](docs/public-share-assistant-plan.md). - 🔭 **Защищённые паролем страницы** — защита отдельных страниц / шар паролем. - 🔭 **Приложение для Windows / Linux** — нативное десктоп-приложение для Windows и Linux. -- 🔭 **Мобильное приложение** — мобильные приложения (iOS обязательно, Android как пойдёт) на базе существующей адаптивной веб-версии и редактора через обёртку Capacitor; оффлайн запланирован на будущее. См. [docs/mobile-app-plan.md](docs/mobile-app-plan.md). +- 🔭 **Мобильное приложение** — мобильные приложения (iOS обязательно, Android как пойдёт) на базе существующей адаптивной веб-версии и редактора через обёртку Capacitor; оффлайн запланирован на будущее. См. [issue #195](https://gitea.vvzvlad.xyz/vvzvlad/gitmost/issues/195). - 🔭 **Офлайн-режим** — офлайн-синхронизация и поддержка PWA. -- 🔭 **Сноски** — сноски академического вида: нумерованная ссылка-надстрочник прямо в тексте (читается на месте во всплывающем окне по наведению), а текст сноски живёт реальным редактируемым блоком внизу страницы; авто-нумерация, безопасна для совместного редактирования, переживает экспорт/импорт Markdown и доступна AI-агенту / MCP. См. [docs/footnotes-plan.md](docs/footnotes-plan.md). - 🔭 **Улучшения редактора и UX** — блоки внутри таблиц (списки, чек-листы), колоночная вёрстка, дополнительные уровни заголовков, highlight-блоки, кастомные эмодзи в callout-ах, плавающие изображения, anchor-ссылки на упоминания страниц, тоглы (ширина шары, aside/сайдбар, spellcheck, лигатуры), санитизация экспорта дерева спейса и mentions в хлебных крошках. ## С чего начать @@ -159,6 +159,12 @@ dump/restore, существующий каталог данных переис новые миграции применяются поверх вашей схемы (`CREATE EXTENSION vector` плюс таблицы `page_embeddings` и AI-таблицы); следите в логах за строками `Migration "..." executed successfully`. +> ⚠️ **Никогда не меняйте `APP_SECRET` после установки.** Он выполняет двойную роль: подписывает JWT +> *и* служит материалом для ключа AES-256-GCM, которым шифруются сохранённые ключи AI-провайдеров +> (API-ключи). Смена секрета сделает все сохранённые AI-ключи нерасшифровываемыми (придётся вводить +> их заново в настройках AI) и инвалидирует все текущие сессии. Задайте его один раз, держите +> неизменным и бэкапьте вместе с базой данных. + ## Возможности diff --git a/apps/client/package.json b/apps/client/package.json index 00a25bbe..59da968e 100644 --- a/apps/client/package.json +++ b/apps/client/package.json @@ -1,10 +1,10 @@ { "name": "client", "private": true, - "version": "0.91.0", + "version": "0.94.0", "scripts": { - "dev": "vite", - "build": "tsc && vite build", + "dev": "node scripts/copy-vad-assets.mjs && vite", + "build": "node scripts/copy-vad-assets.mjs && tsc && vite build", "lint": "eslint .", "preview": "vite preview", "format": "prettier --write \"src/**/*.tsx\" \"src/**/*.ts\"", @@ -28,6 +28,7 @@ "@mantine/modals": "8.3.18", "@mantine/notifications": "8.3.18", "@mantine/spotlight": "8.3.18", + "@ricky0123/vad-web": "^0.0.30", "@slidoapp/emoji-mart": "5.8.7", "@slidoapp/emoji-mart-data": "1.2.4", "@slidoapp/emoji-mart-react": "1.1.5", @@ -53,6 +54,7 @@ "mantine-form-zod-resolver": "1.3.0", "mermaid": "11.15.0", "mitt": "3.0.1", + "onnxruntime-web": "^1.27.0", "posthog-js": "1.372.2", "react": "18.3.1", "react-clear-modal": "^2.0.18", diff --git a/apps/client/public/locales/en-US/translation.json b/apps/client/public/locales/en-US/translation.json index 21f7c5f7..ad884ddb 100644 --- a/apps/client/public/locales/en-US/translation.json +++ b/apps/client/public/locales/en-US/translation.json @@ -119,6 +119,8 @@ "Name": "Name", "New email": "New email", "New page": "New page", + "New note": "New note", + "Create in space": "Create in space", "New password": "New password", "No group found": "No group found", "No page history saved yet.": "No page history saved yet.", @@ -183,6 +185,7 @@ "Successfully imported": "Successfully imported", "Successfully restored": "Successfully restored", "System settings": "System settings", + "Template": "Template", "Templates": "Templates", "Theme": "Theme", "To change your email, you have to enter your password and new email.": "To change your email, you have to enter your password and new email.", @@ -255,6 +258,7 @@ "Copy to space": "Copy to space", "Copy chat": "Copy chat", "Copied": "Copied", + "Failed to export chat": "Failed to export chat", "Duplicate": "Duplicate", "Select a user": "Select a user", "Select a group": "Select a group", @@ -417,6 +421,8 @@ "{{count}} command available_other": "{{count}} commands available", "{{count}} result available_one": "1 result available", "{{count}} result available_other": "{{count}} results available", + "{{count}} result found_one": "{{count}} result found", + "{{count}} result found_other": "{{count}} results found", "Equal columns": "Equal columns", "Left sidebar": "Left sidebar", "Right sidebar": "Right sidebar", @@ -473,6 +479,7 @@ "Make sub-pages public too": "Make sub-pages public too", "Allow search engines to index page": "Allow search engines to index page", "Open page": "Open page", + "Open source page": "Open source page", "Page": "Page", "Delete public share link": "Delete public share link", "Delete share": "Delete share", @@ -529,6 +536,7 @@ "Add 2FA method": "Add 2FA method", "Backup codes": "Backup codes", "Disable": "Disable", + "disabled": "disabled", "Invalid verification code": "Invalid verification code", "New backup codes have been generated": "New backup codes have been generated", "Failed to regenerate backup codes": "Failed to regenerate backup codes", @@ -703,10 +711,12 @@ "Authorization header": "Authorization header", "Tool allowlist": "Tool allowlist", "Optional. Leave empty to allow all tools the server exposes.": "Optional. Leave empty to allow all tools the server exposes.", - "Use Tavily preset": "Use Tavily preset", + "Optional guidance for the agent on how and when to use this server's tools. Injected into the system prompt. The server's tools are namespaced as \"<server name>_*\".": "Optional guidance for the agent on how and when to use this server's tools. Injected into the system prompt. The server's tools are namespaced as \"<server name>_*\".", "Test": "Test", "Available tools": "Available tools", "No tools available": "No tools available", + "Failed": "Failed", + "OK · {{n}}": "OK · {{n}}", "Created successfully": "Created successfully", "Deleted successfully": "Deleted successfully", "Clear": "Clear", @@ -948,6 +958,7 @@ "Try a different search term.": "Try a different search term.", "Try again": "Try again", "Untitled chat": "Untitled chat", + "No document": "No document", "You": "You", "What can I help you with?": "What can I help you with?", "Are you sure you want to revoke this {{credential}}": "Are you sure you want to revoke this {{credential}}", @@ -977,6 +988,9 @@ "Page menu": "Page menu", "Expand": "Expand", "Collapse": "Collapse", + "Expand all": "Expand all", + "Collapse all": "Collapse all", + "Couldn't expand the tree: {{reason}}": "Couldn't expand the tree: {{reason}}", "Comment menu": "Comment menu", "Group menu": "Group menu", "Show hidden breadcrumbs": "Show hidden breadcrumbs", @@ -1067,6 +1081,8 @@ "Undo": "Undo", "Redo": "Redo", "Backlinks": "Backlinks", + "Back to references": "Back to references", + "Back to reference {{label}}": "Back to reference {{label}}", "Last updated by": "Last updated by", "Last updated": "Last updated", "Stats": "Stats", @@ -1119,15 +1135,55 @@ "Removed from favorites": "Removed from favorites", "Added {{name}} to favorites": "Added {{name}} to favorites", "Removed {{name}} from favorites": "Removed {{name}} from favorites", + "Label added": "Label added", + "Label removed": "Label removed", + "Image updated": "Image updated", + "Unsupported image type": "Unsupported image type", + "Member deactivated": "Member deactivated", + "Member activated": "Member activated", + "Name is required": "Name is required", + "Name must be 40 characters or fewer": "Name must be 40 characters or fewer", + "Group name must be at least 2 characters": "Group name must be at least 2 characters", + "Group name must be 100 characters or fewer": "Group name must be 100 characters or fewer", + "Description must be 500 characters or fewer": "Description must be 500 characters or fewer", + "Invalid invitation link": "Invalid invitation link", "Page menu for {{name}}": "Page menu for {{name}}", "Create subpage of {{name}}": "Create subpage of {{name}}", "AI chat": "AI chat", + "Ask a question about this documentation.": "Ask a question about this documentation.", + "Ask a question…": "Ask a question…", + "Thinking…": "Thinking…", + "Thinking… · {{count}} tokens": "Thinking… · {{count}} tokens", + "Thinking… · {{count}} tokens_one": "Thinking… · {{count}} token", + "Thinking… · {{count}} tokens_other": "Thinking… · {{count}} tokens", + "Thinking · {{count}} tokens": "Thinking · {{count}} tokens", + "Thinking · {{count}} tokens_one": "Thinking · {{count}} token", + "Thinking · {{count}} tokens_other": "Thinking · {{count}} tokens", + "The assistant is unavailable right now. Please try again.": "The assistant is unavailable right now. Please try again.", + "Public share assistant": "Public share assistant", + "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.": "Let anonymous visitors of public shares ask an AI assistant scoped to that share's pages. You pay for the tokens.", + "Public assistant model": "Public assistant model", + "Defaults to the chat model": "Defaults to the chat model", + "Optional cheaper model id for the public assistant. Empty uses the chat model above.": "Optional cheaper model id for the public assistant. Empty uses the chat model above.", + "Assistant identity": "Assistant identity", + "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.": "Pick an agent role whose persona the public assistant adopts. The safety rules always still apply.", + "Built-in assistant persona": "Built-in assistant persona", "Minimize": "Minimize", - "Current context size": "Current context size", + "Context size / model limit": "Context size / model limit", + "Context window (tokens)": "Context window (tokens)", + "Shown as used / total in the chat header. Leave empty to hide the limit.": "Shown as used / total in the chat header. Leave empty to hide the limit.", "AI agent": "AI agent", + "Take a look at the current document": "Take a look at the current document", "AI agent is typing…": "AI agent is typing…", + "{{name}} is typing…": "{{name}} is typing…", "Send": "Send", + "Send when the agent finishes": "Send when the agent finishes", + "Queue message": "Queue message", + "Remove queued message": "Remove queued message", "Stop": "Stop", + "Response stopped.": "Response stopped.", + "Connection lost — the answer was interrupted.": "Connection lost — the answer was interrupted.", + "Response stopped (manually or the connection dropped).": "Response stopped (manually or the connection dropped).", "Chat menu": "Chat menu", "No chats yet.": "No chats yet.", "Delete this chat?": "Delete this chat?", @@ -1159,9 +1215,16 @@ "Semantic search": "Semantic search", "Voice / STT": "Voice / STT", "Voice dictation": "Voice dictation", + "Streaming dictation": "Streaming dictation", + "Transcribe as you speak, cutting on pauses": "Transcribe as you speak, cutting on pauses", "Voice dictation is not available yet.": "Voice dictation is not available yet.", "Test endpoint": "Test endpoint", + "Save and test": "Save and test", "Save endpoints": "Save endpoints", + "Configured and enabled": "Configured and enabled", + "Configured but disabled": "Configured but disabled", + "Enabled but not configured": "Enabled but not configured", + "Not configured": "Not configured", "External tools": "External tools", "Gitmost as MCP client": "Gitmost as MCP client", "Servers the agent calls out to.": "Servers the agent calls out to.", @@ -1189,11 +1252,71 @@ "No microphone found": "No microphone found", "Could not start recording": "Could not start recording", "Transcription failed": "Transcription failed", + "Transcribe": "Transcribe", + "No speech detected": "No speech detected", "Voice dictation is not configured": "Voice dictation is not configured", "Microphone is unavailable or already in use": "Microphone is unavailable or already in use", "Audio recording is not available in this browser/context": "Audio recording is not available in this browser/context", "Request format": "Request format", "How transcription requests are sent to the endpoint": "How transcription requests are sent to the endpoint", "OpenAI-compatible (multipart/form-data)": "OpenAI-compatible (multipart/form-data)", - "OpenRouter (JSON, base64 audio)": "OpenRouter (JSON, base64 audio)" + "OpenRouter (JSON, base64 audio)": "OpenRouter (JSON, base64 audio)", + "Dictation language": "Dictation language", + "Auto-detect": "Auto-detect", + "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.": "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.", + "Agent role": "Agent role", + "Universal assistant": "Universal assistant", + "Add role": "Add role", + "Edit role": "Edit role", + "Role name": "Role name", + "e.g. Proofreader": "e.g. Proofreader", + "Optional. Shown as the chat badge.": "Optional. Shown as the chat badge.", + "Optional. A short note about what this role does.": "Optional. A short note about what this role does.", + "Instructions": "Instructions", + "The built-in safety framework is always added automatically.": "The built-in safety framework is always added automatically.", + "Model provider override": "Model provider override", + "Optional. Defaults to the workspace provider.": "Optional. Defaults to the workspace provider.", + "Model override": "Model override", + "Optional. Defaults to the workspace model.": "Optional. Defaults to the workspace model.", + "e.g. gpt-4o-mini": "e.g. gpt-4o-mini", + "If you choose a different provider, it must already be configured in AI settings.": "If you choose a different provider, it must already be configured in AI settings.", + "Start automatically": "Start automatically", + "When on, picking this role sends a launch message and starts the chat. When off, the role is selected and you type the first message yourself.": "When on, picking this role sends a launch message and starts the chat. When off, the role is selected and you type the first message yourself.", + "Launch message": "Launch message", + "Sent automatically when this role is picked. Leave empty to use the default text. Ignored when “Start automatically” is off.": "Sent automatically when this role is picked. Leave empty to use the default text. Ignored when “Start automatically” is off.", + "Agent roles": "Agent roles", + "Reusable presets that shape the agent's behavior (and optionally its model). Picked when starting a new chat.": "Reusable presets that shape the agent's behavior (and optionally its model). Picked when starting a new chat.", + "No roles configured": "No roles configured", + "Delete role": "Delete role", + "Are you sure you want to delete this role?": "Are you sure you want to delete this role?", + "HTML embed": "HTML embed", + "Edit HTML embed": "Edit HTML embed", + "HTML embed is disabled in this workspace": "HTML embed is disabled in this workspace", + "Click to add HTML / CSS / JS": "Click to add HTML / CSS / JS", + "This HTML/CSS/JS runs in a sandboxed frame and cannot access the viewer's session, cookies, or API.": "This HTML/CSS/JS runs in a sandboxed frame and cannot access the viewer's session, cookies, or API.", + "<script>...</script>": "<script>...</script>", + "Height (px, blank = auto)": "Height (px, blank = auto)", + "advanced": "advanced", + "Enable HTML embed": "Enable HTML embed", + "Allow members to insert raw HTML/CSS/JavaScript blocks. The block renders in a sandboxed frame and cannot access the viewer's session, cookies, or API. Off by default.": "Allow members to insert raw HTML/CSS/JavaScript blocks. The block renders in a sandboxed frame and cannot access the viewer's session, cookies, or API. Off by default.", + "When enabled, any member can insert an HTML embed block. The toggle just enables or disables the block type workspace-wide.": "When enabled, any member can insert an HTML embed block. The toggle just enables or disables the block type workspace-wide.", + "Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.": "Embeds run inside a sandboxed iframe with a separate origin, so they cannot read or modify the page they are embedded in.", + "Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.": "Turning this off hides existing embeds (they render as a disabled placeholder) and stops serving them on public share pages.", + "Analytics / tracker": "Analytics / tracker", + "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.": "Injected verbatim into the <head> of PUBLIC SHARE pages only (same-origin). For analytics snippets (Google Analytics, Yandex.Metrika, etc.). Admin only.", + "Go to login page": "Go to login page", + "Move to space": "Move to space", + "Float left (wrap text)": "Float left (wrap text)", + "Float right (wrap text)": "Float right (wrap text)", + "Switch to tree": "Switch to tree", + "Switch to flat list": "Switch to flat list", + "Toggle subpages display mode": "Toggle subpages display mode", + "Page tree (child pages, recursive)": "Page tree (child pages, recursive)", + "Render the full nested tree of all descendant pages": "Render the full nested tree of all descendant pages", + "Showing {{count}} subpages_one": "Showing {{count}} subpage", + "Showing {{count}} subpages_other": "Showing {{count}} subpages", + "Protocol": "Protocol", + "How chat requests are sent and how reasoning is surfaced": "How chat requests are sent and how reasoning is surfaced", + "OpenAI-compatible (surfaces reasoning)": "OpenAI-compatible (surfaces reasoning)", + "OpenAI (official)": "OpenAI (official)" } diff --git a/apps/client/public/locales/ru-RU/translation.json b/apps/client/public/locales/ru-RU/translation.json index 25ff2530..c6cb7c6a 100644 --- a/apps/client/public/locales/ru-RU/translation.json +++ b/apps/client/public/locales/ru-RU/translation.json @@ -119,6 +119,8 @@ "Name": "Имя", "New email": "Новый электронный адрес", "New page": "Новая страница", + "New note": "Новая заметка", + "Create in space": "Создать в пространстве", "New password": "Новый пароль", "No group found": "Группа не найдена", "No page history saved yet.": "История страниц ещё не сохранена.", @@ -183,6 +185,7 @@ "Successfully imported": "Успешно импортировано", "Successfully restored": "Успешно восстановлено", "System settings": "Системные настройки", + "Template": "Шаблон", "Templates": "Шаблоны", "Theme": "Тема", "To change your email, you have to enter your password and new email.": "Чтобы изменить электронную почту, вам нужно ввести пароль и новый адрес.", @@ -254,6 +257,7 @@ "Copy": "Копировать", "Copy to space": "Копировать в пространство", "Copied": "Скопировано", + "Failed to export chat": "Не удалось экспортировать чат", "Duplicate": "Дублировать", "Select a user": "Выберите пользователя", "Select a group": "Выберите группу", @@ -382,6 +386,11 @@ "Quote": "Цитата", "Image": "Изображение", "Audio": "Аудио", + "Transcribe": "Транскрибировать", + "Transcribing…": "Транскрибация…", + "No speech detected": "Речь не распознана", + "Transcription failed": "Не удалось распознать речь", + "Voice dictation is not configured": "Голосовой ввод не настроен", "Embed PDF": "Встроить PDF", "Upload and embed a PDF file.": "Загрузите и встроите PDF-файл.", "Embed as PDF": "Встроить как PDF", @@ -391,6 +400,15 @@ "Toggle block": "Сворачиваемый блок", "Callout": "Выноска", "Insert callout notice.": "Вставить выноску с сообщением.", + "Footnote": "Сноска", + "Insert a footnote reference.": "Вставить ссылку на сноску.", + "Footnotes": "Примечания", + "Footnote {{number}}": "Сноска {{number}}", + "Go to footnote": "Перейти к сноске", + "Back to reference": "Вернуться к ссылке", + "Back to references": "Вернуться к ссылкам", + "Back to reference {{label}}": "Вернуться к ссылке {{label}}", + "Empty footnote": "Пустая сноска", "Math inline": "Строчная формула", "Insert inline math equation.": "Вставить математическое выражение в строку.", "Math block": "Блок формулы", @@ -471,6 +489,7 @@ "Make sub-pages public too": "Сделать подстраницы тоже общедоступными", "Allow search engines to index page": "Разрешить поисковым системам индексировать страницу", "Open page": "Открыть страницу", + "Open source page": "Открыть исходную страницу", "Page": "Страница", "Delete public share link": "Удалить публичную ссылку", "Delete share": "Удалить общий доступ", @@ -659,6 +678,57 @@ "AI search": "Поиск ИИ", "AI Answer": "Ответ ИИ", "Ask AI": "Спросить ИИ", + "AI agent": "AI-агент", + "Take a look at the current document": "Посмотри текущий документ", + "Start automatically": "Запускать автоматически", + "When on, picking this role sends a launch message and starts the chat. When off, the role is selected and you type the first message yourself.": "Когда включено, выбор этой роли отправляет стартовое сообщение и начинает чат. Когда выключено, роль выбирается, а первое сообщение вы вводите сами.", + "Launch message": "Стартовое сообщение", + "Sent automatically when this role is picked. Leave empty to use the default text. Ignored when “Start automatically” is off.": "Отправляется автоматически при выборе этой роли. Оставьте пустым, чтобы использовать текст по умолчанию. Игнорируется, когда «Запускать автоматически» выключено.", + "AI agent is typing…": "AI-агент печатает…", + "{{name}} is typing…": "{{name}} печатает…", + "Thinking…": "Думаю…", + "Thinking… · {{count}} tokens": "Думаю… · {{count}} токенов", + "Thinking… · {{count}} tokens_one": "Думаю… · {{count}} токен", + "Thinking… · {{count}} tokens_few": "Думаю… · {{count}} токена", + "Thinking… · {{count}} tokens_many": "Думаю… · {{count}} токенов", + "Thinking · {{count}} tokens": "Размышления · {{count}} токенов", + "Thinking · {{count}} tokens_one": "Размышления · {{count}} токен", + "Thinking · {{count}} tokens_few": "Размышления · {{count}} токена", + "Thinking · {{count}} tokens_many": "Размышления · {{count}} токенов", + "Agent role": "Роль агента", + "AI chat": "AI-чат", + "AI chat is disabled for this workspace.": "AI-чат отключён для этого рабочего пространства.", + "Ask a question about this documentation.": "Задайте вопрос об этой документации.", + "Ask a question…": "Задайте вопрос…", + "Ask the AI agent anything about your workspace.": "Спросите AI-агента о чём угодно по вашему рабочему пространству.", + "Ask the AI agent…": "Спросите AI-агента…", + "Copy chat": "Копировать чат", + "Created successfully": "Успешно создано", + "Context size / model limit": "Размер контекста / лимит модели", + "Context window (tokens)": "Окно контекста (токены)", + "Shown as used / total in the chat header. Leave empty to hide the limit.": "Показывается в шапке чата как использовано / всего. Пусто — лимит скрыт.", + "Delete this chat?": "Удалить этот чат?", + "Deleted successfully": "Успешно удалено", + "Edited by AI agent on behalf of {{name}}": "Отредактировано AI-агентом от имени {{name}}", + "Failed to delete chat": "Не удалось удалить чат", + "Failed to rename chat": "Не удалось переименовать чат", + "Failed": "Ошибка", + "OK · {{n}}": "OK · {{n}}", + "Test": "Тест", + "No tools available": "Инструменты недоступны", + "Available tools": "Доступные инструменты", + "Minimize": "Свернуть", + "No chats yet.": "Чатов пока нет.", + "Send": "Отправить", + "Send when the agent finishes": "Отправить, когда агент закончит", + "Queue message": "Поставить в очередь", + "Remove queued message": "Убрать из очереди", + "Something went wrong": "Что-то пошло не так", + "Stop": "Стоп", + "The AI agent could not respond. Please try again.": "AI-агент не смог ответить. Попробуйте ещё раз.", + "The AI provider is not configured. Ask an administrator to set it up.": "AI-провайдер не настроен. Попросите администратора настроить его.", + "Universal assistant": "Универсальный ассистент", + "You": "Вы", "AI is thinking...": "ИИ обрабатывает запрос...", "Thinking": "Думаю", "Ask a question...": "Задайте вопрос...", @@ -688,6 +758,8 @@ "Manage API keys for all users in the workspace. View the <anchor>API documentation</anchor> for usage details.": "Управляйте API-ключами для всех пользователей в рабочем пространстве. Смотрите <anchor>документацию по API</anchor> для получения информации об использовании.", "View the <anchor>API documentation</anchor> for usage details.": "Смотрите <anchor>документацию по API</anchor> для получения информации об использовании.", "View the <anchor>MCP documentation</anchor>.": "Смотрите <anchor>документацию по MCP</anchor>.", + "Instructions": "Инструкции", + "Optional guidance for the agent on how and when to use this server's tools. Injected into the system prompt. The server's tools are namespaced as \"<server name>_*\".": "Необязательное указание агенту, как и когда использовать инструменты этого сервера. Добавляется в системный промпт. Инструменты сервера именуются с префиксом «<имя сервера>_*».", "Sources": "Источники", "AI Answers not available for attachments": "Ответы ИИ недоступны для вложений", "No answer available": "Ответ недоступен", @@ -914,6 +986,7 @@ "Try a different search term.": "Попробуйте другой поисковый запрос.", "Try again": "Попробовать снова", "Untitled chat": "Чат без названия", + "No document": "Без документа", "What can I help you with?": "Чем я могу вам помочь?", "Are you sure you want to revoke this {{credential}}": "Вы уверены, что хотите отозвать этот {{credential}}", "Automatically provision users and groups from your identity provider via SCIM.": "Автоматически предоставляйте доступ пользователям и группам из вашего провайдера удостоверений через SCIM.", @@ -1085,5 +1158,22 @@ "Added {{name}} to favorites": "{{name}} добавлено в избранное", "Removed {{name}} from favorites": "{{name}} удалено из избранного", "Page menu for {{name}}": "Меню страницы для {{name}}", - "Create subpage of {{name}}": "Создать подстраницу для {{name}}" + "Create subpage of {{name}}": "Создать подстраницу для {{name}}", + "Dictation language": "Язык диктовки", + "Auto-detect": "Автоопределение", + "Spoken language hint sent to the transcription model. Auto-detect lets the model decide.": "Подсказка языка речи для модели транскрипции. «Автоопределение» оставляет выбор за моделью.", + "Float left (wrap text)": "Обтекание слева", + "Float right (wrap text)": "Обтекание справа", + "Switch to tree": "Переключить на дерево", + "Switch to flat list": "Переключить на плоский список", + "Toggle subpages display mode": "Переключить режим отображения подстраниц", + "Page tree (child pages, recursive)": "Дерево страниц (дочерние, рекурсивно)", + "Render the full nested tree of all descendant pages": "Показать полное вложенное дерево всех дочерних страниц", + "Showing {{count}} subpages_one": "Показано {{count}} подстраница", + "Showing {{count}} subpages_few": "Показано {{count}} подстраницы", + "Showing {{count}} subpages_many": "Показано {{count}} подстраниц", + "Protocol": "Протокол", + "How chat requests are sent and how reasoning is surfaced": "Как отправляются запросы чата и как показывается reasoning", + "OpenAI-compatible (surfaces reasoning)": "OpenAI-совместимый (показывает reasoning)", + "OpenAI (official)": "OpenAI (официальный)" } diff --git a/apps/client/scripts/copy-vad-assets.mjs b/apps/client/scripts/copy-vad-assets.mjs new file mode 100644 index 00000000..49aaaf1c --- /dev/null +++ b/apps/client/scripts/copy-vad-assets.mjs @@ -0,0 +1,70 @@ +// Self-host the @ricky0123/vad-web + onnxruntime-web runtime assets under +// apps/client/public/vad/. +// +// WHY THIS EXISTS: +// Both vad-web and onnxruntime-web resolve their assets by URL *at runtime* (the +// VAD audio worklet + Silero model, and ORT's wasm/mjs backend). In vad-web +// 0.0.30 the default baseAssetPath / onnxWASMBasePath is "./" — i.e. relative to +// the current page URL — NOT a CDN. In this SPA that "./" request hits the +// client-side catch-all route and gets served index.html (text/html), so the +// onnxruntime ESM/wasm backend fails to initialize ("'text/html' is not a valid +// JavaScript MIME type"). We fix that by copying the needed runtime files into +// public/vad/ and pointing both path constants at the fixed absolute "/vad/". +// +// These copies are NOT committed (the ORT wasm is ~26 MB); this script runs +// before `dev` and `build` (see package.json) to repopulate them from +// node_modules. It is idempotent: it (re)creates the dir and overwrites. + +import { createRequire } from "node:module"; +import { fileURLToPath } from "node:url"; +import path from "node:path"; +import fs from "node:fs"; + +const require = createRequire(import.meta.url); +const here = path.dirname(fileURLToPath(import.meta.url)); +const outDir = path.join(here, "..", "public", "vad"); + +// vad-web exposes ./package.json, so derive its dist dir from there. +const vadDist = path.join( + path.dirname(require.resolve("@ricky0123/vad-web/package.json")), + "dist", +); + +// onnxruntime-web's "exports" map does NOT expose ./package.json, so resolving +// it would throw ERR_PACKAGE_PATH_NOT_EXPORTED. It DOES export the exact asset +// subpaths we need, so resolve those files directly. +// +// ORT ships several wasm backends and which one the app bundle references depends +// on the resolver: Vite dev resolves the JSEP build (ort-wasm-simd-threaded.jsep.*) +// while the production rolldown build resolves the plain build +// (ort-wasm-simd-threaded.*). Ship BOTH variants so the runtime fetch hits a real +// file under /vad/ regardless of which the bundle picked (each .mjs proxy fetches +// its matching .wasm at init). +const ortJsepMjs = require.resolve( + "onnxruntime-web/ort-wasm-simd-threaded.jsep.mjs", +); +const ortJsepWasm = require.resolve( + "onnxruntime-web/ort-wasm-simd-threaded.jsep.wasm", +); +const ortMjs = require.resolve("onnxruntime-web/ort-wasm-simd-threaded.mjs"); +const ortWasm = require.resolve("onnxruntime-web/ort-wasm-simd-threaded.wasm"); + +// [absolute source path, output filename] +const files = [ + [path.join(vadDist, "vad.worklet.bundle.min.js"), "vad.worklet.bundle.min.js"], + [path.join(vadDist, "silero_vad_v5.onnx"), "silero_vad_v5.onnx"], + [ortJsepMjs, "ort-wasm-simd-threaded.jsep.mjs"], + [ortJsepWasm, "ort-wasm-simd-threaded.jsep.wasm"], + [ortMjs, "ort-wasm-simd-threaded.mjs"], + [ortWasm, "ort-wasm-simd-threaded.wasm"], +]; + +fs.mkdirSync(outDir, { recursive: true }); +for (const [src, name] of files) { + if (!fs.existsSync(src)) { + console.error(`[copy-vad-assets] missing source: ${src}`); + process.exit(1); + } + fs.copyFileSync(src, path.join(outDir, name)); + console.log(`[copy-vad-assets] ${name}`); +} diff --git a/apps/client/src/components/common/avatar-uploader.tsx b/apps/client/src/components/common/avatar-uploader.tsx index d7ac5f40..ec98aa02 100644 --- a/apps/client/src/components/common/avatar-uploader.tsx +++ b/apps/client/src/components/common/avatar-uploader.tsx @@ -42,6 +42,23 @@ export default function AvatarUploader({ return; } + // Validate file type. The `accept` attribute only filters the dialog; + // a user can still select a non-image file, which previously failed + // silently. Surface a visible error instead (issue #133). Accept any + // image/* MIME (png, jpeg, webp, gif, svg, ...) so we don't narrow below + // what the server accepts; only genuinely non-image files are rejected. + if (!file.type.startsWith("image/")) { + notifications.show({ + message: t("Unsupported image type"), + color: "red", + }); + // Reset the input + if (fileInputRef.current) { + fileInputRef.current.value = ""; + } + return; + } + // Validate file size (max 10MB) const maxSizeInBytes = 10 * 1024 * 1024; if (file.size > maxSizeInBytes) { @@ -58,6 +75,8 @@ export default function AvatarUploader({ try { await onUpload(file); + // Notify on success so the upload gives visible feedback (issue #128) + notifications.show({ message: t("Image updated") }); } catch (error) { console.error(error); notifications.show({ @@ -117,7 +136,7 @@ export default function AvatarUploader({ type="file" ref={fileInputRef} onChange={handleFileInputChange} - accept="image/png,image/jpeg,image/jpg" + accept="image/*" aria-label={ariaLabel} tabIndex={-1} style={{ display: "none" }} diff --git a/apps/client/src/components/common/recent-changes.tsx b/apps/client/src/components/common/recent-changes.tsx index ec531cf4..4e1183d9 100644 --- a/apps/client/src/components/common/recent-changes.tsx +++ b/apps/client/src/components/common/recent-changes.tsx @@ -67,6 +67,7 @@ export default function RecentChanges({ spaceId }: Props) { <Badge color={getInitialsColor(page?.space.name)} variant="light" + tt="none" component={Link} to={getSpaceUrl(page?.space.slug)} style={{ cursor: "pointer" }} diff --git a/apps/client/src/components/icons/icon-columns-4.tsx b/apps/client/src/components/icons/icon-columns-4.tsx index d2b4541b..b4c0188e 100644 --- a/apps/client/src/components/icons/icon-columns-4.tsx +++ b/apps/client/src/components/icons/icon-columns-4.tsx @@ -9,8 +9,10 @@ export function IconColumns4({ size = 24, stroke = 2 }: Props) { return ( <svg xmlns="http://www.w3.org/2000/svg" - width={rem(size)} - height={rem(size)} + // rem(size) returns a `calc(...)` string, which is invalid for the raw + // SVG width/height length attributes ("Expected length, calc(...)"). Pass + // it via CSS style instead (matching the other icon components). + style={{ width: rem(size), height: rem(size) }} viewBox="0 0 24 24" fill="none" stroke="currentColor" diff --git a/apps/client/src/components/icons/icon-columns-5.tsx b/apps/client/src/components/icons/icon-columns-5.tsx index afa4773c..4321e221 100644 --- a/apps/client/src/components/icons/icon-columns-5.tsx +++ b/apps/client/src/components/icons/icon-columns-5.tsx @@ -9,8 +9,10 @@ export function IconColumns5({ size = 24, stroke = 2 }: Props) { return ( <svg xmlns="http://www.w3.org/2000/svg" - width={rem(size)} - height={rem(size)} + // rem(size) returns a `calc(...)` string, which is invalid for the raw + // SVG width/height length attributes ("Expected length, calc(...)"). Pass + // it via CSS style instead (matching the other icon components). + style={{ width: rem(size), height: rem(size) }} viewBox="0 0 24 24" fill="none" stroke="currentColor" diff --git a/apps/client/src/components/layouts/global/app-header.module.css b/apps/client/src/components/layouts/global/app-header.module.css index 7cdec643..6abbdad4 100644 --- a/apps/client/src/components/layouts/global/app-header.module.css +++ b/apps/client/src/components/layouts/global/app-header.module.css @@ -13,6 +13,7 @@ text-decoration: none; color: inherit; cursor: pointer; + user-select: none; } .brandIcon { @@ -33,21 +34,3 @@ that is ~9.3px, minus the font descent (~2px) ≈ 7px. */ margin-bottom: rem(7px); } - -.link { - display: block; - line-height: 1; - padding: rem(8px) rem(12px); - border-radius: var(--mantine-radius-sm); - text-decoration: none; - color: light-dark(var(--mantine-color-gray-7), var(--mantine-color-dark-0)); - font-size: var(--mantine-font-size-sm); - font-weight: 500; - user-select: none; - white-space: nowrap; - flex-shrink: 0; - - @mixin hover { - background-color: light-dark(var(--mantine-color-gray-0), var(--mantine-color-dark-6)); - } -} diff --git a/apps/client/src/components/layouts/global/app-header.tsx b/apps/client/src/components/layouts/global/app-header.tsx index fc8e769a..6ef437e7 100644 --- a/apps/client/src/components/layouts/global/app-header.tsx +++ b/apps/client/src/components/layouts/global/app-header.tsx @@ -5,12 +5,11 @@ import { Text, Tooltip, } from "@mantine/core"; -import { IconSparkles } from "@tabler/icons-react"; +import { IconMessage } from "@tabler/icons-react"; import classes from "./app-header.module.css"; import { BrandLogo } from "@/components/ui/brand-logo"; import TopMenu from "@/components/layouts/global/top-menu.tsx"; import { Link } from "react-router-dom"; -import APP_ROUTE from "@/lib/app-route.ts"; import { useAtom, useSetAtom } from "jotai"; import { desktopSidebarAtom, @@ -30,10 +29,6 @@ import { } from "@/features/search/constants.ts"; import { NotificationPopover } from "@/features/notification/components/notification-popover.tsx"; -const links = [ - { link: APP_ROUTE.HOME, label: "Home" }, -]; - export function AppHeader() { const { t } = useTranslation(); const [mobileOpened] = useAtom(mobileSidebarAtom); @@ -47,12 +42,6 @@ export function AppHeader() { // AI chat entry point: only shown when the workspace enables it (A7 gate). const aiChatEnabled = workspace?.settings?.ai?.chat === true; - const items = links.map((link) => ( - <Link key={link.label} to={link.link} className={classes.link}> - {t(link.label)} - </Link> - )); - return ( <> <Group h="100%" px="md" justify="space-between" wrap={"nowrap"}> @@ -97,10 +86,6 @@ export function AppHeader() { </Text> </Tooltip> </Group> - - <Group ml="xl" gap={5} className={classes.links} visibleFrom="sm"> - {items} - </Group> </Group> <div> @@ -122,7 +107,7 @@ export function AppHeader() { aria-label={t("AI chat")} onClick={() => setAiChatWindowOpen((v) => !v)} > - <IconSparkles size={20} /> + <IconMessage size={20} /> </ActionIcon> </Tooltip> )} diff --git a/apps/client/src/components/layouts/global/aside.tsx b/apps/client/src/components/layouts/global/aside.tsx index 6faf853a..e76e8bd3 100644 --- a/apps/client/src/components/layouts/global/aside.tsx +++ b/apps/client/src/components/layouts/global/aside.tsx @@ -27,7 +27,7 @@ export default function Aside() { switch (tab) { case "comments": - component = <CommentListWithTabs />; + component = <CommentListWithTabs onClose={closeAside} />; title = "Comments"; break; case "toc": @@ -44,26 +44,27 @@ export default function Aside() { } return ( - <Box p="md" style={{ height: "100%", display: "flex", flexDirection: "column" }}> - {component && ( - <> - <Group justify="space-between" wrap="nowrap" mb="md"> - <Title order={2} size="h6" fw={500}>{t(title)} - - - - - - - - {tab === "comments" ? ( - component - ) : ( + + {component && + (tab === "comments" ? ( + component + ) : ( + <> + + + {t(title)} + + + + + + +
{component}
- )} - - )} + + ))}
); } diff --git a/apps/client/src/components/layouts/global/global-app-shell.tsx b/apps/client/src/components/layouts/global/global-app-shell.tsx index d373f5e7..b756bdde 100644 --- a/apps/client/src/components/layouts/global/global-app-shell.tsx +++ b/apps/client/src/components/layouts/global/global-app-shell.tsx @@ -14,6 +14,7 @@ import { SpaceSidebar } from "@/features/space/components/sidebar/space-sidebar. import { AppHeader } from "@/components/layouts/global/app-header.tsx"; import Aside from "@/components/layouts/global/aside.tsx"; import AiChatWindow from "@/features/ai-chat/components/ai-chat-window.tsx"; +import GitmostGlobalBridge from "@/features/editor/gitmost/gitmost-global-bridge.tsx"; import classes from "./app-shell.module.css"; import { useToggleSidebar } from "@/components/layouts/global/hooks/hooks/use-toggle-sidebar.ts"; import GlobalSidebar from "@/components/layouts/global/global-sidebar.tsx"; @@ -94,12 +95,12 @@ export default function GlobalAppShell({ }} aside={ isPageRoute && { - width: 350, + width: 420, breakpoint: "sm", collapsed: { mobile: !isAsideOpen, desktop: !isAsideOpen }, } } - padding="md" + padding={{ base: "xs", sm: "md" }} > @@ -138,7 +139,7 @@ export default function GlobalAppShell({ id={ASIDE_PANEL_ID} tabIndex={-1} className={classes.aside} - p="md" + p="sm" withBorder={false} aria-label={ asideTab === "comments" @@ -157,6 +158,10 @@ export default function GlobalAppShell({ {/* Floating AI chat window. Mounted once globally; it is position: fixed and self-hides when closed, so its place in the tree is not critical. */} + {/* Global gitmost native bridge: registers listSpaces / listPages / + createPageWithRecording on window.gitmost so the native host can + create a page with a recording even when no page editor is open. */} + ); } diff --git a/apps/client/src/components/layouts/global/top-menu.tsx b/apps/client/src/components/layouts/global/top-menu.tsx index 84925080..f2872ea0 100644 --- a/apps/client/src/components/layouts/global/top-menu.tsx +++ b/apps/client/src/components/layouts/global/top-menu.tsx @@ -20,18 +20,29 @@ import { } from "@tabler/icons-react"; import { useAtom } from "jotai"; import { currentUserAtom } from "@/features/user/atoms/current-user-atom.ts"; -import { Link } from "react-router-dom"; +import { Link, useMatch } from "react-router-dom"; import APP_ROUTE from "@/lib/app-route.ts"; import useAuth from "@/features/auth/hooks/use-auth.ts"; import { CustomAvatar } from "@/components/ui/custom-avatar.tsx"; import { useTranslation } from "react-i18next"; import { AvatarIconType } from "@/features/attachments/types/attachment.types.ts"; +import { useDisclosure } from "@mantine/hooks"; +import SpaceSettingsModal from "@/features/space/components/settings-modal.tsx"; export default function TopMenu() { const { t } = useTranslation(); const [currentUser] = useAtom(currentUserAtom); const { logout } = useAuth(); const { colorScheme, setColorScheme } = useMantineColorScheme(); + // Detect the currently viewed space so the "Space settings" item is only + // offered while the user is inside a space. The "/*" splat also matches the + // bare "/s/:spaceSlug" route (the splat matches an empty segment). + const spaceMatch = useMatch("/s/:spaceSlug/*"); + const spaceSlug = spaceMatch?.params?.spaceSlug; + const [ + spaceSettingsOpened, + { open: openSpaceSettings, close: closeSpaceSettings }, + ] = useDisclosure(false); const user = currentUser?.user; const workspace = currentUser?.workspace; @@ -41,124 +52,143 @@ export default function TopMenu() { } return ( - - - - - - - {workspace?.name} - - - - - - - {t("Workspace")} - - } - > - {t("Workspace settings")} - - - } - > - {t("Manage members")} - - - - - {t("Account")} - - - - -
- - {user.name} + <> + + + + + + + {workspace?.name} - - {user.email} - -
-
-
- } - > - {t("My profile")} - + + + + + + {t("Workspace")} - } - > - {t("My preferences")} - + } + > + {t("Workspace settings")} + - - - }> - {t("Theme")} - - - - + {spaceSlug && ( setColorScheme("light")} - leftSection={} - rightSection={ - colorScheme === "light" ? : null - } + onClick={openSpaceSettings} + leftSection={} > - {t("Light")} + {t("Space settings")} - setColorScheme("dark")} - leftSection={} - rightSection={ - colorScheme === "dark" ? : null - } - > - {t("Dark")} - - setColorScheme("auto")} - leftSection={} - rightSection={ - colorScheme === "auto" ? : null - } - > - {t("System settings")} - - - + )} - + } + > + {t("Manage members")} + - }> - {t("Logout")} - - -
+ + + {t("Account")} + + + + +
+ + {user.name} + + + {user.email} + +
+
+
+ } + > + {t("My profile")} + + + } + > + {t("My preferences")} + + + + + }> + {t("Theme")} + + + + + setColorScheme("light")} + leftSection={} + rightSection={ + colorScheme === "light" ? : null + } + > + {t("Light")} + + setColorScheme("dark")} + leftSection={} + rightSection={ + colorScheme === "dark" ? : null + } + > + {t("Dark")} + + setColorScheme("auto")} + leftSection={} + rightSection={ + colorScheme === "auto" ? : null + } + > + {t("System settings")} + + + + + + + }> + {t("Logout")} + + + + + {spaceSlug && ( + + )} + ); } diff --git a/apps/client/src/components/settings/settings-sidebar.tsx b/apps/client/src/components/settings/settings-sidebar.tsx index 47ecece6..a3f6c0ed 100644 --- a/apps/client/src/components/settings/settings-sidebar.tsx +++ b/apps/client/src/components/settings/settings-sidebar.tsx @@ -20,7 +20,6 @@ import { prefetchSpaces, prefetchWorkspaceMembers, } from "@/components/settings/settings-queries.tsx"; -import AppVersion from "@/components/settings/app-version.tsx"; import { mobileSidebarAtom } from "@/components/layouts/global/hooks/atoms/sidebar-atom.ts"; import { useToggleSidebar } from "@/components/layouts/global/hooks/hooks/use-toggle-sidebar.ts"; import { useSettingsNavigation } from "@/hooks/use-settings-navigation"; @@ -141,8 +140,6 @@ export default function SettingsSidebar() { {menuItems} - - ); } diff --git a/apps/client/src/components/ui/ai-agent-badge.test.tsx b/apps/client/src/components/ui/ai-agent-badge.test.tsx new file mode 100644 index 00000000..678013ed --- /dev/null +++ b/apps/client/src/components/ui/ai-agent-badge.test.tsx @@ -0,0 +1,96 @@ +import { describe, it, expect, vi } from "vitest"; +import { render, screen, fireEvent } from "@testing-library/react"; +import { MantineProvider } from "@mantine/core"; +import { Provider, createStore } from "jotai"; +import { AiAgentBadge } from "./ai-agent-badge"; +import { + activeAiChatIdAtom, + aiChatWindowOpenAtom, + aiChatDraftAtom, +} from "@/features/ai-chat/atoms/ai-chat-atom.ts"; + +// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. + +function renderBadge(props: { authorName?: string; aiChatId?: string | null }) { + return render( + + + , + ); +} + +// Render a clickable badge inside an explicit jotai store, with a leftover draft +// and an onActivate + parent-click spy, so the deep-link side effects are +// assertable. Returns the store and spies. +function setupClickable() { + const store = createStore(); + store.set(aiChatDraftAtom, "leftover draft from another chat"); + const onActivate = vi.fn(); + const onParentClick = vi.fn(); + render( + + +
+ +
+
+
, + ); + return { store, onActivate, onParentClick, badge: screen.getByRole("button") }; +} + +function expectDeepLinked(store: ReturnType, onActivate: ReturnType) { + expect(store.get(activeAiChatIdAtom)).toBe("chat-1"); + expect(store.get(aiChatWindowOpenAtom)).toBe(true); + expect(store.get(aiChatDraftAtom)).toBe(""); // draft cleared + expect(onActivate).toHaveBeenCalledTimes(1); // caller closes its own modal etc. +} + +describe("AiAgentBadge", () => { + it("renders the AI-agent label", () => { + renderBadge({ authorName: "Bot" }); + expect(screen.getByText("AI-agent")).toBeDefined(); + }); + + it("is clickable (accessible button) when aiChatId is present", () => { + renderBadge({ authorName: "Bot", aiChatId: "chat-1" }); + const badge = screen.getByRole("button"); + expect(badge).toBeDefined(); + expect(badge.textContent).toContain("AI-agent"); + }); + + it("click deep-links: sets active chat, clears draft, opens window, fires onActivate, stops propagation", () => { + const { store, onActivate, onParentClick, badge } = setupClickable(); + fireEvent.click(badge); + expectDeepLinked(store, onActivate); + expect(onParentClick).not.toHaveBeenCalled(); // stopPropagation contained the click + }); + + it.each(["Enter", " "])( + "keyboard %j activates the deep-link (same side effects as click)", + (key) => { + const { store, onActivate, badge } = setupClickable(); + fireEvent.keyDown(badge, { key }); + expectDeepLinked(store, onActivate); + }, + ); + + it("an unrelated key does NOT activate the badge", () => { + const { store, onActivate, badge } = setupClickable(); + fireEvent.keyDown(badge, { key: "Tab" }); + expect(store.get(activeAiChatIdAtom)).toBeNull(); + expect(store.get(aiChatWindowOpenAtom)).toBe(false); + expect(store.get(aiChatDraftAtom)).toBe("leftover draft from another chat"); + expect(onActivate).not.toHaveBeenCalled(); + }); + + it.each([{ aiChatId: null }, {}])( + "is a plain non-clickable label without a chat target (%o)", + (props) => { + renderBadge({ authorName: "Bot", ...props }); + expect(screen.getByText("AI-agent")).toBeDefined(); + // No interactive role is exposed when there is no chat to deep-link into. + expect(screen.queryByRole("button")).toBeNull(); + }, + ); +}); diff --git a/apps/client/src/components/ui/ai-agent-badge.tsx b/apps/client/src/components/ui/ai-agent-badge.tsx new file mode 100644 index 00000000..39e29614 --- /dev/null +++ b/apps/client/src/components/ui/ai-agent-badge.tsx @@ -0,0 +1,99 @@ +import { Badge, Tooltip } from "@mantine/core"; +import { IconSparkles } from "@tabler/icons-react"; +import { useCallback } from "react"; +import { useTranslation } from "react-i18next"; +import { useSetAtom } from "jotai"; +import { + activeAiChatIdAtom, + aiChatWindowOpenAtom, + aiChatDraftAtom, +} from "@/features/ai-chat/atoms/ai-chat-atom.ts"; + +interface AiAgentBadgeProps { + authorName?: string; + aiChatId?: string | null; + // Fired after the badge deep-links into its chat. The caller handles its own + // context (e.g. the page-history row closes the history modal) so this generic + // ui/ primitive stays free of cross-feature coupling (#143 review Arch B). + onActivate?: () => void; +} + +/** + * Badge marking content written by the AI agent (provenance C3 / §7.4). It is + * ADDITIVE — shown next to the human author, never replacing them. Reused by the + * page-history list and the comments sidebar. + * + * When the item carries an `aiChatId` (an internal AI-chat edit), clicking the + * badge deep-links into that chat: it sets the active-chat atom and opens the + * floating AI-chat window, then invokes `onActivate` so the caller can react + * (e.g. the history modal closes itself). When `aiChatId` is null/absent (an + * external MCP write with no internal ai_chats row), the badge is a plain + * non-clickable label. The click is contained (stopPropagation) so it does not + * also trigger an enclosing row's click handler. + */ +export function AiAgentBadge({ + authorName, + aiChatId, + onActivate, +}: AiAgentBadgeProps) { + const { t } = useTranslation(); + const setAiChatWindowOpen = useSetAtom(aiChatWindowOpenAtom); + const setActiveChatId = useSetAtom(activeAiChatIdAtom); + const setDraft = useSetAtom(aiChatDraftAtom); + + const tooltip = t("Edited by AI agent on behalf of {{name}}", { + name: authorName ?? "", + }); + + const openChat = useCallback( + (event: React.SyntheticEvent) => { + event.stopPropagation(); + if (!aiChatId) return; + setActiveChatId(aiChatId); + // Switching to another chat must start with a clean composer — clear any + // unsent draft so it does not leak from the previously open chat. + setDraft(""); + setAiChatWindowOpen(true); + onActivate?.(); + }, + [aiChatId, setActiveChatId, setDraft, setAiChatWindowOpen, onActivate], + ); + + const badge = ( + } + style={aiChatId ? { cursor: "pointer" } : undefined} + {...(aiChatId + ? { + // Keep the default Badge root element (not a )} + + + + ); } if (!invitation) { diff --git a/apps/client/src/features/comment/components/comment-list-item.test.tsx b/apps/client/src/features/comment/components/comment-list-item.test.tsx new file mode 100644 index 00000000..82e12785 --- /dev/null +++ b/apps/client/src/features/comment/components/comment-list-item.test.tsx @@ -0,0 +1,59 @@ +import { describe, it, expect, vi } from "vitest"; +import { render, screen } from "@testing-library/react"; +import { MantineProvider } from "@mantine/core"; +import { IComment } from "@/features/comment/types/comment.types"; + +// matchMedia (read by MantineProvider) is stubbed globally in vitest.setup.ts. + +// The comment mutation hooks reach out to react-query/network — stub them so the +// component renders in isolation. We only assert the AI-badge rendering branch. +vi.mock("@/features/comment/queries/comment-query", () => ({ + useDeleteCommentMutation: () => ({ mutateAsync: vi.fn() }), + useResolveCommentMutation: () => ({ mutateAsync: vi.fn() }), + useUpdateCommentMutation: () => ({ mutateAsync: vi.fn() }), +})); + +// CommentEditor pulls in the full TipTap editor stack; replace it with a stub. +vi.mock("@/features/comment/components/comment-editor", () => ({ + default: () =>
, +})); + +import CommentListItem from "./comment-list-item"; + +const baseComment = (over?: Partial): IComment => + ({ + id: "c-1", + content: JSON.stringify({ type: "doc", content: [] }), + creatorId: "user-1", + pageId: "page-1", + workspaceId: "ws-1", + createdAt: new Date(), + creator: { id: "user-1", name: "Service Bot", avatarUrl: null } as any, + ...over, + }) as IComment; + +function renderItem(comment: IComment) { + return render( + + + , + ); +} + +describe("CommentListItem — AI badge", () => { + it('renders the AI-agent badge when createdSource === "agent"', () => { + renderItem(baseComment({ createdSource: "agent", aiChatId: null })); + expect(screen.getByText("AI-agent")).toBeDefined(); + expect(screen.getByText("Service Bot")).toBeDefined(); + }); + + it('does NOT render the badge for a normal user comment (createdSource "user")', () => { + renderItem(baseComment({ createdSource: "user" })); + expect(screen.queryByText("AI-agent")).toBeNull(); + expect(screen.getByText("Service Bot")).toBeDefined(); + }); + + // The non-clickable (null aiChatId) branch is a property of AiAgentBadge itself + // and is covered in ai-agent-badge.test.tsx; this integration suite only needs + // the insertion gate (agent → badge, user → no badge) above (#143 review). +}); diff --git a/apps/client/src/features/comment/components/comment-list-item.tsx b/apps/client/src/features/comment/components/comment-list-item.tsx index a53e326a..dabb72b1 100644 --- a/apps/client/src/features/comment/components/comment-list-item.tsx +++ b/apps/client/src/features/comment/components/comment-list-item.tsx @@ -1,4 +1,5 @@ -import { Group, Text, Box, Badge } from "@mantine/core"; +import { Group, Text, Box } from "@mantine/core"; +import { AiAgentBadge } from "@/components/ui/ai-agent-badge.tsx"; import React, { useEffect, useRef, useState } from "react"; import classes from "./comment.module.css"; import { useAtom, useAtomValue } from "jotai"; @@ -116,8 +117,8 @@ function CommentListItem({ } return ( - - + + - - {comment.creator.name} - + + + {comment.creator.name} + + + {comment.createdSource === "agent" && ( + + )} +
{!comment.parentCommentId && canComment && ( @@ -155,7 +165,7 @@ function CommentListItem({ - + {createdAtAgo} @@ -177,7 +187,7 @@ function CommentListItem({ tabIndex={0} aria-label={t("Jump to comment selection")} > - {comment?.selection} + {comment?.selection} )} diff --git a/apps/client/src/features/comment/components/comment-list-with-tabs.tsx b/apps/client/src/features/comment/components/comment-list-with-tabs.tsx index a3f348b8..a29d3da8 100644 --- a/apps/client/src/features/comment/components/comment-list-with-tabs.tsx +++ b/apps/client/src/features/comment/components/comment-list-with-tabs.tsx @@ -11,6 +11,7 @@ import { Badge, Text, ScrollArea, + Tooltip, } from "@mantine/core"; import CommentListItem from "@/features/comment/components/comment-list-item"; import { @@ -26,12 +27,16 @@ import { IPagination } from "@/lib/types.ts"; import { extractPageSlugId } from "@/lib"; import { useTranslation } from "react-i18next"; import { useGetSpaceBySlugQuery } from "@/features/space/queries/space-query.ts"; -import { IconArrowUp, IconMessageOff } from "@tabler/icons-react"; +import { IconArrowUp, IconMessageOff, IconX } from "@tabler/icons-react"; import { useAtom } from "jotai"; import { currentUserAtom } from "@/features/user/atoms/current-user-atom"; import { CustomAvatar } from "@/components/ui/custom-avatar.tsx"; -function CommentListWithTabs() { +interface CommentListWithTabsProps { + onClose?: () => void; +} + +function CommentListWithTabs({ onClose }: CommentListWithTabsProps) { const { t } = useTranslation(); const { pageSlug } = useParams(); const { data: page } = usePageQuery({ pageId: extractPageSlugId(pageSlug) }); @@ -121,8 +126,8 @@ function CommentListWithTabs() { - + - - - {activeComments.length} - - } - > - {t("Open")} - - - {resolvedComments.length} - - } - > - {t("Resolved")} - - + {/* Header row: full-width centered tab list with the close button overlaid on the right. */} +
+ + + {activeComments.length} + + } + > + {t("Open")} + + + {resolvedComments.length} + + } + > + {t("Resolved")} + + + {onClose && ( + + + + + + )} +
{ flex: "0 0 auto", borderTop: "1px solid var(--mantine-color-default-border)", paddingTop: "var(--mantine-spacing-sm)", - paddingBottom: 25, + paddingBottom: 10, position: "relative", }} > @@ -374,7 +401,7 @@ const PageCommentInput = ({ onSave, isLoading }) => { size="sm" avatarUrl={currentUser?.user?.avatarUrl} name={currentUser?.user?.name} - style={{ flexShrink: 0, marginTop: 10 }} + style={{ flexShrink: 0, marginTop: 2 }} />
{ onClick={handleSave} onMouseDown={(e) => e.preventDefault()} loading={isLoading} - style={{ position: "absolute", right: 8, bottom: 30 }} + style={{ position: "absolute", right: 8, bottom: 15 }} > diff --git a/apps/client/src/features/comment/components/comment.module.css b/apps/client/src/features/comment/components/comment.module.css index dfa61b79..36362338 100644 --- a/apps/client/src/features/comment/components/comment.module.css +++ b/apps/client/src/features/comment/components/comment.module.css @@ -1,15 +1,16 @@ -.wrapper { - padding: var(--mantine-spacing-md); -} - .focused-thread { border: 2px solid #8d7249; } .textSelection { - margin-top: 4px; + /* Breathing room below the comment header (author + timestamp) so the + quote does not stick to the timestamp when it is the first block. */ + margin-top: 8px; + /* Align the quote's left bar with the comment body text left edge + (the comment editor insets its text by 6px). */ + margin-left: 6px; border-left: 2px solid var(--mantine-color-gray-6); - padding: 8px; + padding: 6px; background: var(--mantine-color-gray-light); cursor: pointer; overflow-wrap: break-word; @@ -32,6 +33,9 @@ box-shadow: 0 0 0 2px var(--mantine-color-blue-3); } + /* Denser comments: override the global 16px ProseMirror body size with 14px + and tighten the rhythm vs. the comment header. Scoped to the comment + editor only - the page editor is unaffected. */ .ProseMirror :global(.ProseMirror){ border-radius: var(--mantine-radius-sm); max-width: 100%; @@ -39,7 +43,9 @@ word-break: break-word; padding-left: 6px; padding-right: 6px; - margin-top: 10px; + font-size: var(--mantine-font-size-sm); + line-height: 1.4; + margin-top: 4px; margin-bottom: 2px; } diff --git a/apps/client/src/features/comment/types/comment.types.ts b/apps/client/src/features/comment/types/comment.types.ts index 164e63dc..ddffcb91 100644 --- a/apps/client/src/features/comment/types/comment.types.ts +++ b/apps/client/src/features/comment/types/comment.types.ts @@ -17,6 +17,13 @@ export interface IComment { deletedAt?: Date; creator: IUser; resolvedBy?: IUser; + // Agent-edit provenance (returned by the backend via selectAll('comments')). + // createdSource === "agent" marks a comment authored via an AI agent (MCP / + // internal AI chat); aiChatId deep-links to the internal chat when present + // (null for an external MCP agent); resolvedSource marks an AI-resolved thread. + createdSource?: string; + aiChatId?: string | null; + resolvedSource?: string | null; yjsSelection?: { anchor: any; head: any; diff --git a/apps/client/src/features/dictation/components/mic-button.module.css b/apps/client/src/features/dictation/components/mic-button.module.css new file mode 100644 index 00000000..53a3a2aa --- /dev/null +++ b/apps/client/src/features/dictation/components/mic-button.module.css @@ -0,0 +1,24 @@ +.recordingWrap { + position: relative; + display: inline-flex; + align-items: center; + justify-content: center; +} + +/* Translucent red halo that sits behind the stop button and scales with the + live microphone level (scale set inline from audioLevel). Radius follows the + ActionIcon's own radius so the halo matches the button's rounded-square + outline instead of being a circle. */ +.pulse { + position: absolute; + inset: 0; + border-radius: var(--mantine-radius-default); + background-color: var(--mantine-color-red-5); + opacity: 0.35; + transform-origin: center; + transform: scale(1); + transition: transform 90ms linear; + pointer-events: none; + will-change: transform; + z-index: 0; +} diff --git a/apps/client/src/features/dictation/components/mic-button.tsx b/apps/client/src/features/dictation/components/mic-button.tsx index b04e753a..70ead74e 100644 --- a/apps/client/src/features/dictation/components/mic-button.tsx +++ b/apps/client/src/features/dictation/components/mic-button.tsx @@ -3,6 +3,8 @@ import { ActionIcon, Loader, Tooltip } from "@mantine/core"; import { IconMicrophone, IconPlayerStopFilled } from "@tabler/icons-react"; import { useTranslation } from "react-i18next"; import { useDictation } from "@/features/dictation/hooks/use-dictation"; +import { useStreamingDictation } from "@/features/dictation/hooks/use-streaming-dictation"; +import classes from "./mic-button.module.css"; interface MicButtonProps { onText: (text: string) => void; @@ -11,6 +13,14 @@ interface MicButtonProps { // Mantine ActionIcon size token; "lg" matches the chat composer, "md" the // editor toolbar. size?: "md" | "lg"; + // Optional Mantine color override for the idle/transcribing states (the + // recording state stays red). Defaults to the theme primary when omitted. + color?: string; + // Optional explicit glyph size override; defaults to the size-token value. + iconSize?: number; + // When true, use the streaming (Silero-VAD) dictation controller, which emits + // text progressively as the user pauses; otherwise use the batch controller. + streaming?: boolean; } /** @@ -24,35 +34,64 @@ export const MicButton: FC = ({ onStart, disabled, size = "lg", + color, + iconSize, + streaming = false, }) => { const { t } = useTranslation(); - const { status, start, stop } = useDictation({ onText, onStart }); - const iconSize = size === "lg" ? 18 : 16; + // Call BOTH hooks unconditionally to respect the rules of hooks: which one is + // active is a render-time choice, but both must be invoked every render. This + // is safe because both controllers are inert until start() is called — neither + // opens the mic on mount — so the unused one costs nothing. + const batchCtl = useDictation({ onText, onStart }); + const streamingCtl = useStreamingDictation({ onText, onStart }); + const ctl = streaming ? streamingCtl : batchCtl; + const { status, start, stop, audioLevel } = ctl; + const resolvedIconSize = iconSize ?? (size === "lg" ? 18 : 16); if (status === "recording") { + // Live volume-driven halo: the scale follows the current mic level. + const haloScale = 1 + Math.min(1, audioLevel) * 0.9; return ( - - - + + ); } - if (status === "transcribing" || status === "error") { + if ( + status === "loading" || + status === "transcribing" || + status === "error" + ) { + // "loading" (streaming hook fetching the VAD model on first use) shows the + // same spinner+disabled state so the first click is visibly acknowledged and + // a confusing second click can't fire while the model loads. + const label = status === "loading" ? t("Preparing…") : t("Transcribing…"); return ( - + @@ -65,11 +104,12 @@ export const MicButton: FC = ({ void start()} disabled={disabled} aria-label={t("Start dictation")} > - + ); diff --git a/apps/client/src/features/dictation/hooks/use-dictation.ts b/apps/client/src/features/dictation/hooks/use-dictation.ts index 86af4c78..4d8c451d 100644 --- a/apps/client/src/features/dictation/hooks/use-dictation.ts +++ b/apps/client/src/features/dictation/hooks/use-dictation.ts @@ -3,7 +3,15 @@ import { notifications } from "@mantine/notifications"; import { useTranslation } from "react-i18next"; import { transcribeAudio } from "@/features/dictation/services/dictation-service"; -export type DictationStatus = "idle" | "recording" | "transcribing" | "error"; +// "loading" is set only by the streaming hook while it lazily loads the VAD +// model on first use; the batch hook never sets it. It exists so the streaming +// hook and the mic button can show immediate feedback during that load. +export type DictationStatus = + | "idle" + | "recording" + | "transcribing" + | "error" + | "loading"; interface UseDictationOptions { onText: (text: string) => void; @@ -16,6 +24,8 @@ interface UseDictationResult { start: () => Promise; stop: () => void; cancel: () => void; + // Smoothed live microphone level in the 0..1 range while recording (0 when idle). + audioLevel: number; } // Candidate container/codec combinations in preference order. The first one the @@ -56,6 +66,7 @@ export function useDictation( ): UseDictationResult { const { t } = useTranslation(); const [status, setStatus] = useState("idle"); + const [audioLevel, setAudioLevel] = useState(0); // Keep the latest callbacks in a ref so the recorder's onstop closure always // calls the current handlers without re-creating the recorder. @@ -70,6 +81,15 @@ export function useDictation( const canceledRef = useRef(false); const startingRef = useRef(false); + // Web Audio metering: derives a live input level from the captured stream. + const audioContextRef = useRef(null); + const analyserRef = useRef(null); + const sourceRef = useRef(null); + const rafRef = useRef(null); + // Exponentially smoothed level, and the last value pushed to React state. + const smoothedLevelRef = useRef(0); + const emittedLevelRef = useRef(0); + const clearTimer = useCallback(() => { if (timerRef.current !== null) { clearTimeout(timerRef.current); @@ -82,6 +102,91 @@ export function useDictation( streamRef.current = null; }, []); + // Tear the audio meter down fully. Safe to call multiple times and on any exit + // path; defensive try/catch so cleanup never throws. + const stopMeter = useCallback(() => { + // Cancel the rAF first so getByteTimeDomainData can't run on a closed context. + if (rafRef.current !== null) { + cancelAnimationFrame(rafRef.current); + rafRef.current = null; + } + try { + sourceRef.current?.disconnect(); + sourceRef.current = null; + analyserRef.current = null; + if (audioContextRef.current && audioContextRef.current.state !== "closed") { + void audioContextRef.current.close(); + } + audioContextRef.current = null; + } catch (err) { + // Cleanup must never throw; just log for diagnosis. + console.warn("[dictation] audio meter teardown failed", err); + } + smoothedLevelRef.current = 0; + emittedLevelRef.current = 0; + setAudioLevel(0); + }, []); + + // Set up Web Audio metering on the already-captured stream. Reuses the existing + // MediaStream — never requests a second mic. Failure here must not break + // recording: on any error we warn and return, leaving the recorder running. + const startMeter = useCallback((stream: MediaStream) => { + try { + const Ctor = + window.AudioContext || + (window as unknown as { webkitAudioContext?: typeof AudioContext }) + .webkitAudioContext; + if (!Ctor) return; + + const audioContext = new Ctor(); + // Some browsers start the context suspended; resume so the loop produces + // data. Swallow rejection (e.g. context already closed by a fast + // start/stop race) to avoid an unhandled promise rejection. + audioContext.resume().catch(() => {}); + const source = audioContext.createMediaStreamSource(stream); + const analyser = audioContext.createAnalyser(); + analyser.fftSize = 512; + analyser.smoothingTimeConstant = 0.5; + // Connect ONLY to the analyser — never to destination, which would echo the + // mic back to the speakers. + source.connect(analyser); + + audioContextRef.current = audioContext; + sourceRef.current = source; + analyserRef.current = analyser; + + // Allocate the time-domain buffer once and reuse it on every tick. + const data = new Uint8Array(analyser.fftSize); + + const tick = () => { + const a = analyserRef.current; + if (!a) return; + a.getByteTimeDomainData(data); + // RMS of the centered waveform (samples are 0..255, midpoint 128). + let sumSquares = 0; + for (let i = 0; i < data.length; i++) { + const v = (data[i] - 128) / 128; + sumSquares += v * v; + } + const rms = Math.sqrt(sumSquares / data.length); + // Boost + clamp so normal speech maps to a visible 0..1 range. + const level = Math.min(1, rms * 3); + // Exponential smoothing to avoid jitter. + smoothedLevelRef.current = smoothedLevelRef.current * 0.8 + level * 0.2; + // Throttle React re-renders: only push when it changed meaningfully. + if (Math.abs(smoothedLevelRef.current - emittedLevelRef.current) > 0.01) { + emittedLevelRef.current = smoothedLevelRef.current; + setAudioLevel(smoothedLevelRef.current); + } + rafRef.current = requestAnimationFrame(tick); + }; + rafRef.current = requestAnimationFrame(tick); + } catch (err) { + // Web Audio unavailable or threw: recording continues without the meter. + console.warn("[dictation] audio meter unavailable", err); + } + }, []); + const start = useCallback(async (): Promise => { // Synchronous live guard: status is stale between renders, so also block on // refs to prevent a double-click from opening two MediaStreams (the first @@ -163,8 +268,9 @@ export function useDictation( const recordedMime = recorder.mimeType || mimeType || "audio/webm"; const wasCanceled = canceledRef.current; - // Stop the mic tracks regardless of how we got here. + // Stop the mic tracks and the audio meter regardless of how we got here. stopTracks(); + stopMeter(); recorderRef.current = null; if (wasCanceled) { @@ -237,34 +343,49 @@ export function useDictation( // Recording has truly begun; release the synchronous start guard. startingRef.current = false; + // Start the live audio meter on the stream we already acquired. + startMeter(stream); + const maxDurationMs = optionsRef.current.maxDurationMs ?? 120000; timerRef.current = setTimeout(() => { if (recorderRef.current?.state === "recording") { recorderRef.current.stop(); } }, maxDurationMs); - }, [status, t, clearTimer, stopTracks]); + }, [status, t, clearTimer, stopTracks, startMeter, stopMeter]); const stop = useCallback((): void => { clearTimer(); const recorder = recorderRef.current; if (recorder && recorder.state === "recording") { + // Normal path: onstop tears down tracks + meter and runs transcription. recorder.stop(); + } else { + // No live recorder (e.g. the track ended on its own): tear everything + // down directly so the meter/AudioContext and stream don't leak, and + // recover the UI to idle. + stopTracks(); + stopMeter(); + recorderRef.current = null; + chunksRef.current = []; + setStatus("idle"); } - }, [clearTimer]); + }, [clearTimer, stopTracks, stopMeter]); const cancel = useCallback((): void => { clearTimer(); canceledRef.current = true; const recorder = recorderRef.current; if (recorder && recorder.state === "recording") { - // onstop sees canceledRef and skips transcription; it also stops tracks. + // onstop sees canceledRef and skips transcription; it also stops tracks + // and the meter. recorder.stop(); } else { stopTracks(); + stopMeter(); } setStatus("idle"); - }, [clearTimer, stopTracks]); + }, [clearTimer, stopTracks, stopMeter]); // Clean up on unmount: stop any live recorder/stream and clear the timers. useEffect(() => { @@ -280,8 +401,9 @@ export function useDictation( recorder.stop(); } stopTracks(); + stopMeter(); }; - }, [clearTimer, stopTracks]); + }, [clearTimer, stopTracks, stopMeter]); - return { status, start, stop, cancel }; + return { status, start, stop, cancel, audioLevel }; } diff --git a/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts new file mode 100644 index 00000000..b086747c --- /dev/null +++ b/apps/client/src/features/dictation/hooks/use-streaming-dictation.ts @@ -0,0 +1,474 @@ +import { useCallback, useEffect, useRef, useState } from "react"; +import { notifications } from "@mantine/notifications"; +import { useTranslation } from "react-i18next"; +import { transcribeAudio } from "@/features/dictation/services/dictation-service"; +import { encodeWavPcm16 } from "@/features/dictation/utils/encode-wav"; +import type { DictationStatus } from "@/features/dictation/hooks/use-dictation"; + +// Lazily-imported MicVAD type. The runtime import happens inside start() so the +// heavy onnxruntime-web / Silero model is code-split out of the main bundle and +// only fetched when the user actually begins dictation. +type MicVADInstance = { + start: () => Promise; + pause: () => Promise; + destroy: () => Promise; +}; + +interface UseStreamingDictationOptions { + onText: (text: string) => void; + onStart?: () => void; + maxDurationMs?: number; +} + +interface UseStreamingDictationResult { + status: DictationStatus; + start: () => Promise; + stop: () => void; + cancel: () => void; + // Smoothed live speech level in the 0..1 range while recording (0 when idle). + audioLevel: number; +} + +// Sample rate of the audio MicVAD hands to onSpeechEnd (Silero VAD runs at 16k). +const VAD_SAMPLE_RATE = 16000; + +// Asset paths for the VAD worklet/Silero model and the onnxruntime-web WASM +// binaries. vad-web 0.0.30's default asset path is "./" (relative to the current +// page URL), NOT a CDN — in this SPA that request hits the client-side catch-all +// route and returns index.html (text/html), so the onnxruntime ESM/wasm backend +// fails to initialize. We instead self-host the four needed files (the vad-web +// worklet + `silero_vad_v5.onnx` model and the onnxruntime-web `*.jsep.mjs`/ +// `*.jsep.wasm`) under `apps/client/public/vad/` — populated by +// `scripts/copy-vad-assets.mjs`, which runs before `dev`/`build` — and point both +// paths at the fixed absolute "/vad/". +const VAD_BASE_ASSET_PATH: string | undefined = "/vad/"; +const VAD_ONNX_WASM_BASE_PATH: string | undefined = "/vad/"; + +/** + * Streaming variant of useDictation. Detects speech with a real (Silero) VAD and, + * each time the speaker pauses, cuts that speech segment and POSTs it to the same + * batch transcription endpoint, so text appears progressively as the user speaks. + * + * Returns the SAME shape as useDictation ({ status, start, stop, cancel, + * audioLevel }) so MicButton can use either interchangeably. Refs hold the live + * VAD instance / counters / timer so component re-renders never lose them, and + * every exit path destroys the VAD and stops the MediaStream. + */ +export function useStreamingDictation( + options: UseStreamingDictationOptions, +): UseStreamingDictationResult { + const { t } = useTranslation(); + const [status, setStatus] = useState("idle"); + const [audioLevel, setAudioLevel] = useState(0); + + // Keep the latest callbacks in a ref so async VAD/HTTP closures always call the + // current handlers without re-creating the VAD. + const optionsRef = useRef(options); + optionsRef.current = options; + + const vadRef = useRef(null); + // AudioContext we create+resume inside the click gesture and inject into + // MicVAD (see start()). We own it; MicVAD does not close an injected context. + const audioContextRef = useRef(null); + const timerRef = useRef | null>(null); + const canceledRef = useRef(false); + const startingRef = useRef(false); + // True while a recording session is active (VAD listening). Used to ignore late + // VAD callbacks that fire after stop()/cancel(). + const activeRef = useRef(false); + + // In-order emission: each segment gets a monotonically increasing seq when its + // speech ends; completed transcriptions are buffered by seq and flushed in + // order so out-of-order HTTP responses can't scramble the text. + const nextSeqRef = useRef(0); + const nextEmitSeqRef = useRef(0); + const resultsRef = useRef>(new Map()); + // Number of transcription requests still in flight. + const inFlightRef = useRef(0); + // Session epoch: bumped when a NEW session starts (start) or everything is + // hard-discarded (cancel). Each in-flight request captures the epoch at send + // time; if the epoch has since changed, the request is stale and its + // then/catch/finally are skipped so old text can't leak into a new session and + // the in-flight counter can't be driven negative across sessions. + const epochRef = useRef(0); + + // Exponentially smoothed speech level, and the last value pushed to React state. + const smoothedLevelRef = useRef(0); + const emittedLevelRef = useRef(0); + + const clearTimer = useCallback(() => { + if (timerRef.current !== null) { + clearTimeout(timerRef.current); + timerRef.current = null; + } + }, []); + + // Reset the level meter back to zero (refs + React state). + const resetLevel = useCallback(() => { + smoothedLevelRef.current = 0; + emittedLevelRef.current = 0; + setAudioLevel(0); + }, []); + + // Destroy the live VAD instance (which also releases the mic stream and audio + // context it created). Safe to call multiple times and on any exit path; + // defensive try/catch so teardown never throws. + const destroyVad = useCallback(() => { + const vad = vadRef.current; + vadRef.current = null; + if (vad) { + try { + // destroy() pauses + tears down the worklet/stream/context internally. + // It returns a promise, so attach a .catch too: the surrounding + // try/catch only catches synchronous throws, and a rejected destroy() + // would otherwise surface as an unhandled rejection. + void vad + .destroy() + .catch((err) => + console.warn("[dictation] VAD teardown failed", err), + ); + } catch (err) { + // Cleanup must never throw; just log for diagnosis. + console.warn("[dictation] VAD teardown failed", err); + } + } + }, []); + + // Decide the status once recording has ended: stay "transcribing" while + // requests are in flight, otherwise return to "idle". + const settleAfterStop = useCallback(() => { + if (inFlightRef.current > 0) { + setStatus("transcribing"); + } else { + setStatus("idle"); + } + }, []); + + // Drain the in-order result buffer: while the next expected seq is ready, trim + // it, emit it if non-empty, and advance. Called after every resolved request. + const drainResults = useCallback(() => { + const results = resultsRef.current; + while (results.has(nextEmitSeqRef.current)) { + const text = results.get(nextEmitSeqRef.current)!; + results.delete(nextEmitSeqRef.current); + nextEmitSeqRef.current += 1; + const trimmed = text.trim(); + // Whisper often returns a leading space; emit the trimmed value. + if (trimmed.length > 0) optionsRef.current.onText(trimmed); + } + }, []); + + // Map a transcription error to a user-facing message, mirroring the batch hook. + const transcriptionErrorMessage = useCallback( + (err: unknown): string => { + const resp = ( + err as { response?: { status?: number; data?: { message?: string } } } + )?.response; + const serverMsg = resp?.data?.message; + if (serverMsg && serverMsg.trim().length > 0) { + // The server already explains the cause (e.g. provider 404, bad format, + // STT not configured) — show it verbatim. + return serverMsg; + } + if (resp?.status === 503 || resp?.status === 403) { + return t("Voice dictation is not configured"); + } + return `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`; + }, + [t], + ); + + // Handle one ended speech segment: encode to WAV and transcribe. Results are + // buffered by seq and flushed in order. A single failed segment does NOT kill + // the session: log + one notification, then advance past that seq so later + // segments still flush. + const handleSegment = useCallback( + (audio: Float32Array) => { + const seq = nextSeqRef.current; + nextSeqRef.current += 1; + inFlightRef.current += 1; + // Capture the epoch for this request synchronously at send time. + const epoch = epochRef.current; + + const wavBlob = encodeWavPcm16(audio, VAD_SAMPLE_RATE); + void transcribeAudio(wavBlob, "speech.wav") + .then((text) => { + // Stale request from a previous session: drop it without touching any + // current-session state. + if (epoch !== epochRef.current) return; + // Defend against a non-string server value before drainResults trims. + resultsRef.current.set(seq, typeof text === "string" ? text : ""); + drainResults(); + }) + .catch((err: unknown) => { + if (epoch !== epochRef.current) return; + // Log the full error for diagnosis (status + body + stack). + console.error("[dictation] segment transcription failed", err); + notifications.show({ + color: "red", + message: transcriptionErrorMessage(err), + }); + // Skip this seq so later segments can still flush in order. + if (nextEmitSeqRef.current === seq) { + nextEmitSeqRef.current += 1; + drainResults(); + } else { + resultsRef.current.set(seq, ""); + drainResults(); + } + }) + .finally(() => { + if (epoch !== epochRef.current) return; + inFlightRef.current -= 1; + // If recording already stopped, flip to idle once everything drained. + if (!activeRef.current && inFlightRef.current === 0) { + setStatus("idle"); + } + }); + }, + [drainResults, transcriptionErrorMessage], + ); + + const start = useCallback(async (): Promise => { + // Synchronous live guard: status is stale between renders, so also block on + // refs to prevent a double-click from creating two VAD instances (the first + // would leak its mic stream). + if (startingRef.current || vadRef.current || activeRef.current) return; + if (status !== "idle") return; + startingRef.current = true; + + // Notify the caller right when dictation begins (before any async work) so the + // editor can snapshot the caret position. + optionsRef.current.onStart?.(); + + // Reset per-session in-order emission state. Bump the epoch so any request + // still in flight from a previous (stopped) session becomes stale and its + // then/catch/finally are skipped — it can neither emit old text into this + // new session nor decrement this session's freshly-zeroed in-flight counter. + epochRef.current += 1; + canceledRef.current = false; + nextSeqRef.current = 0; + nextEmitSeqRef.current = 0; + resultsRef.current = new Map(); + inFlightRef.current = 0; + resetLevel(); + + // Create and resume the AudioContext NOW, inside the click gesture, before + // the (first-time-slow) model load below. A context first touched outside a + // user gesture stays "suspended" and the VAD audio worklet never runs — that + // is exactly why the first click did nothing and only the second (model + // already cached, so MicVAD.new was fast enough to create the context inside + // the gesture) started recording. We own this context and inject it into + // MicVAD (which then will NOT close it); it is reused across start/stop and + // closed only on unmount. + const AudioCtor = + window.AudioContext || + (window as unknown as { webkitAudioContext?: typeof AudioContext }) + .webkitAudioContext; + if (AudioCtor && !audioContextRef.current) { + audioContextRef.current = new AudioCtor(); + } + // Resume within the gesture; swallow rejection (e.g. already running/closed). + void audioContextRef.current?.resume().catch(() => {}); + // Show immediate feedback while the model loads (see Part B). + setStatus("loading"); + + let vad: MicVADInstance; + try { + // Lazy import so the heavy onnx model/worklet are only fetched on first use + // and code-split out of the main bundle. + const { MicVAD } = await import("@ricky0123/vad-web"); + + vad = await MicVAD.new({ + // Silero v5 model (smaller/faster than the legacy model). + model: "v5", + // vad-web 0.0.30 defaults startOnLoad:true, which opens the mic (calls + // getUserMedia) inside new() and leaves the later vad.start() a no-op — + // making its mic-permission error handling dead code. Force it off so the + // mic is opened only by the explicit vad.start() below, where the real + // getUserMedia errors are caught and mapped. + startOnLoad: false, + // Inject the AudioContext we created+resumed inside the click gesture so + // the VAD worklet runs on a "running" context. When provided, the library + // uses it and does NOT take ownership/close it. + ...(audioContextRef.current + ? { audioContext: audioContextRef.current } + : {}), + // Only pass asset paths when defined; otherwise the library uses its + // bundled CDN defaults. + ...(VAD_BASE_ASSET_PATH !== undefined + ? { baseAssetPath: VAD_BASE_ASSET_PATH } + : {}), + ...(VAD_ONNX_WASM_BASE_PATH !== undefined + ? { onnxWASMBasePath: VAD_ONNX_WASM_BASE_PATH } + : {}), + // --- VAD tuning (all tunable) --- + // Probability over which a frame counts as speech. + positiveSpeechThreshold: 0.5, + // Probability under which a frame counts as non-speech (~0.15 below the + // positive threshold, per Silero guidance). + negativeSpeechThreshold: 0.35, + // Silence to wait through before ending a segment (the "don't cut + // immediately" delay). Each ended segment is ONE transcription request, so + // cutting on short gaps over-fragments normal speech into a flood of tiny + // requests (and trips the server's per-user rate limit). Wait ~1.5s — a + // real sentence/thought boundary — so request count tracks actual pauses, + // not every inter-word gap. Higher = fewer requests but more latency + // before text appears. NOTE: vad-web 0.0.30 takes this in ms, not frames + // (one Silero frame is ~32ms at 16k). + redemptionMs: 1500, + // Audio kept before speech start (left padding so the first word isn't + // clipped) — ~0.3s. + preSpeechPadMs: 320, + // Ignore sub-100ms blips like clicks. + minSpeechMs: 96, + onFrameProcessed: (probabilities: { isSpeech: number }) => { + // Drive the level meter from the speech probability. Light exponential + // smoothing + a throttle so React state isn't updated every frame; this + // powers the existing button halo. Reuses the VAD's own frame + // probabilities — no second AudioContext/AnalyserNode. + if (!activeRef.current) return; + const level = Math.min(1, Math.max(0, probabilities.isSpeech)); + smoothedLevelRef.current = smoothedLevelRef.current * 0.8 + level * 0.2; + if (Math.abs(smoothedLevelRef.current - emittedLevelRef.current) > 0.01) { + emittedLevelRef.current = smoothedLevelRef.current; + setAudioLevel(smoothedLevelRef.current); + } + }, + onSpeechStart: () => { + // No-op: the segment is only handled once it ends. + }, + onSpeechEnd: (audio: Float32Array) => { + // A pause was detected — cut this segment and transcribe it. Ignore late + // callbacks that fire after stop()/cancel(). + if (!activeRef.current || canceledRef.current) return; + handleSegment(audio); + }, + }); + } catch (err) { + // With startOnLoad:false, new() loads the model/worklet/wasm but does NOT + // open the mic, so a throw here is an asset/init failure (model fetch, + // worklet, onnxruntime wasm), not a mic-permission error. Map it as a + // generic "could not start" with the underlying detail. (The mic-permission + // name checks are kept in the vad.start() catch below, where getUserMedia + // actually runs.) + console.error("[dictation] VAD init failed", err); + const detail = (err as { message?: string })?.message ?? String(err); + notifications.show({ + color: "red", + message: `${t("Could not start recording")}: ${detail}`, + }); + // Defensive: if MicVAD.new partially succeeded before throwing, make sure we + // don't leak it. + destroyVad(); + setStatus("idle"); + startingRef.current = false; + return; + } + + vadRef.current = vad; + // Accept frames once start() resolves; the VAD callbacks already guard on + // activeRef, so setting it before start() is safe. + activeRef.current = true; + + try { + // With startOnLoad:false this is where getUserMedia actually runs, so map + // mic-permission errors here the same way the batch hook does; otherwise + // fall back to a generic "could not start" message. + await vad.start(); + } catch (err) { + // Always log the full error for diagnosis (name, message, stack). + console.error("[dictation] VAD.start failed", err); + const name = (err as { name?: string })?.name; + const detail = (err as { message?: string })?.message ?? String(err); + let message: string; + if (name === "NotAllowedError" || name === "SecurityError") { + message = t("Microphone access denied"); + } else if (name === "NotFoundError" || name === "OverconstrainedError") { + message = t("No microphone found"); + } else if (name === "NotReadableError" || name === "AbortError") { + message = t("Microphone is unavailable or already in use"); + } else { + message = `${t("Could not start recording")}: ${detail}`; + } + notifications.show({ color: "red", message }); + activeRef.current = false; + destroyVad(); + setStatus("idle"); + startingRef.current = false; + return; + } + + setStatus("recording"); + // Recording has truly begun; release the synchronous start guard. + startingRef.current = false; + + // Optional overall safety cap: auto-stop after maxDurationMs like the batch + // hook does. + const maxDurationMs = optionsRef.current.maxDurationMs ?? 120000; + timerRef.current = setTimeout(() => { + if (activeRef.current) stopRef.current(); + }, maxDurationMs); + }, [status, t, resetLevel, destroyVad, handleSegment]); + + const stop = useCallback((): void => { + clearTimer(); + if (!activeRef.current && !vadRef.current) { + // Nothing is running; make sure the UI is idle. + setStatus("idle"); + return; + } + // Mark inactive first so late onSpeechEnd/onFrameProcessed callbacks are + // ignored. Any speech segment that has NOT yet ended (user clicks Stop + // mid-utterance) is dropped — acceptable for v1; users normally pause before + // stopping. + activeRef.current = false; + destroyVad(); + resetLevel(); + settleAfterStop(); + }, [clearTimer, destroyVad, resetLevel, settleAfterStop]); + + // Keep stop() reachable from the maxDuration timer closure (which is created + // before stop is defined) without re-creating the VAD. + const stopRef = useRef(stop); + stopRef.current = stop; + + const cancel = useCallback((): void => { + clearTimer(); + canceledRef.current = true; + activeRef.current = false; + // Hard discard: bump the epoch so any in-flight request becomes stale and is + // ignored the moment it resolves (no emit, no counter touch). + epochRef.current += 1; + // Drop pending results / queue; in-flight requests will resolve into a now- + // empty buffer and be ignored. + resultsRef.current = new Map(); + nextSeqRef.current = 0; + nextEmitSeqRef.current = 0; + inFlightRef.current = 0; + destroyVad(); + resetLevel(); + setStatus("idle"); + }, [clearTimer, destroyVad, resetLevel]); + + // Clean up on unmount: destroy the VAD, stop the mic stream, clear the timer. + // Defensive try/catch lives inside destroyVad so teardown never throws. + useEffect(() => { + return () => { + clearTimer(); + activeRef.current = false; + canceledRef.current = true; + destroyVad(); + // Close the AudioContext we own (MicVAD never closes an injected one). + if ( + audioContextRef.current && + audioContextRef.current.state !== "closed" + ) { + void audioContextRef.current.close().catch(() => {}); + } + audioContextRef.current = null; + }; + }, [clearTimer, destroyVad]); + + return { status, start, stop, cancel, audioLevel }; +} diff --git a/apps/client/src/features/dictation/utils/encode-wav.test.ts b/apps/client/src/features/dictation/utils/encode-wav.test.ts new file mode 100644 index 00000000..67913588 --- /dev/null +++ b/apps/client/src/features/dictation/utils/encode-wav.test.ts @@ -0,0 +1,87 @@ +import { describe, it, expect } from "vitest"; +import { encodeWavPcm16 } from "./encode-wav"; + +// Contract tests for `encodeWavPcm16` (encode-wav.ts). The dictation feature +// streams microphone audio as mono 16-bit PCM WAV to the STT endpoint, which +// whitelists audio/wav. A regression in the WAV header or PCM16 clamping would +// produce audio the server cannot decode (silence / garbled transcripts), so we +// assert the canonical 44-byte header layout and the sample quantisation rails. + +// Read a DataView back out of a Blob. jsdom's Blob does not implement +// `.arrayBuffer()`, so go through FileReader.readAsArrayBuffer instead. +function readView(blob: Blob): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = () => resolve(new DataView(reader.result as ArrayBuffer)); + reader.onerror = () => reject(reader.error); + reader.readAsArrayBuffer(blob); + }); +} + +function readStr(view: DataView, offset: number, length: number): string { + let s = ""; + for (let i = 0; i < length; i++) s += String.fromCharCode(view.getUint8(offset + i)); + return s; +} + +describe("encodeWavPcm16", () => { + it("writes the canonical RIFF/WAVE/fmt /data tags", async () => { + const view = await readView(encodeWavPcm16(new Float32Array(4))); + expect(readStr(view, 0, 4)).toBe("RIFF"); + expect(readStr(view, 8, 4)).toBe("WAVE"); + expect(readStr(view, 12, 4)).toBe("fmt "); + expect(readStr(view, 36, 4)).toBe("data"); + }); + + it("writes a PCM fmt chunk (size=16, format=1, mono, 16-bit)", async () => { + const samples = new Float32Array(10); + const view = await readView(encodeWavPcm16(samples)); + expect(view.getUint32(16, true)).toBe(16); // fmt chunk size + expect(view.getUint16(20, true)).toBe(1); // audioFormat = PCM + expect(view.getUint16(22, true)).toBe(1); // channels = mono + expect(view.getUint16(34, true)).toBe(16); // bits per sample + }); + + it("derives byteRate, blockAlign and dataSize from the sample rate and length", async () => { + const sampleRate = 16000; + const samples = new Float32Array(10); + const view = await readView(encodeWavPcm16(samples, sampleRate)); + expect(view.getUint32(28, true)).toBe(sampleRate * 2); // byteRate = sampleRate * 2 + expect(view.getUint16(32, true)).toBe(2); // blockAlign = 2 (mono * 16-bit) + expect(view.getUint32(40, true)).toBe(samples.length * 2); // dataSize + expect(view.getUint32(4, true)).toBe(36 + samples.length * 2); // RIFF chunk size + }); + + it("defaults the sample rate to 16000 at offset 24", async () => { + const view = await readView(encodeWavPcm16(new Float32Array(2))); + expect(view.getUint32(24, true)).toBe(16000); + }); + + it("writes the overridden sample rate at offset 24 (8000 / 48000)", async () => { + const view8 = await readView(encodeWavPcm16(new Float32Array(2), 8000)); + expect(view8.getUint32(24, true)).toBe(8000); + expect(view8.getUint32(28, true)).toBe(8000 * 2); // byteRate follows the override + + const view48 = await readView(encodeWavPcm16(new Float32Array(2), 48000)); + expect(view48.getUint32(24, true)).toBe(48000); + expect(view48.getUint32(28, true)).toBe(48000 * 2); + }); + + it("clamps and quantises PCM16 samples to the asymmetric rails", async () => { + // +1.0 -> 32767 (clamped>=0 uses *0x7fff), -1.0 -> -32768 (clamped<0 uses *0x8000), + // 0 -> 0, and out-of-range values are clamped to the rails first. + const samples = new Float32Array([1.0, -1.0, 0, 1.5, -1.5]); + const view = await readView(encodeWavPcm16(samples)); + expect(view.getInt16(44 + 0 * 2, true)).toBe(32767); // +1.0 + expect(view.getInt16(44 + 1 * 2, true)).toBe(-32768); // -1.0 + expect(view.getInt16(44 + 2 * 2, true)).toBe(0); // 0 + expect(view.getInt16(44 + 3 * 2, true)).toBe(32767); // +1.5 -> clamped to +1.0 + expect(view.getInt16(44 + 4 * 2, true)).toBe(-32768); // -1.5 -> clamped to -1.0 + }); + + it("produces a mono blob of length 44 + samples.length * 2", () => { + expect(encodeWavPcm16(new Float32Array(0)).size).toBe(44); + expect(encodeWavPcm16(new Float32Array(100)).size).toBe(44 + 100 * 2); + expect(encodeWavPcm16(new Float32Array(100)).type).toBe("audio/wav"); + }); +}); diff --git a/apps/client/src/features/dictation/utils/encode-wav.ts b/apps/client/src/features/dictation/utils/encode-wav.ts new file mode 100644 index 00000000..818d50ee --- /dev/null +++ b/apps/client/src/features/dictation/utils/encode-wav.ts @@ -0,0 +1,32 @@ +// Encode mono Float32 PCM samples into a 16-bit PCM WAV blob (audio/wav). +// The server STT endpoint whitelists audio/wav, so this is sent as-is. +export function encodeWavPcm16(samples: Float32Array, sampleRate = 16000): Blob { + const bytesPerSample = 2; + const blockAlign = bytesPerSample; // mono + const dataSize = samples.length * bytesPerSample; + const buffer = new ArrayBuffer(44 + dataSize); + const view = new DataView(buffer); + const writeStr = (offset: number, s: string) => { + for (let i = 0; i < s.length; i++) view.setUint8(offset + i, s.charCodeAt(i)); + }; + writeStr(0, "RIFF"); + view.setUint32(4, 36 + dataSize, true); + writeStr(8, "WAVE"); + writeStr(12, "fmt "); + view.setUint32(16, 16, true); // PCM fmt chunk size + view.setUint16(20, 1, true); // audio format = PCM + view.setUint16(22, 1, true); // channels = mono + view.setUint32(24, sampleRate, true); + view.setUint32(28, sampleRate * blockAlign, true); // byte rate + view.setUint16(32, blockAlign, true); + view.setUint16(34, 16, true); // bits per sample + writeStr(36, "data"); + view.setUint32(40, dataSize, true); + let offset = 44; + for (let i = 0; i < samples.length; i++) { + const clamped = Math.max(-1, Math.min(1, samples[i])); + view.setInt16(offset, clamped < 0 ? clamped * 0x8000 : clamped * 0x7fff, true); + offset += 2; + } + return new Blob([buffer], { type: "audio/wav" }); +} diff --git a/apps/client/src/features/editor/components/audio/audio-menu.tsx b/apps/client/src/features/editor/components/audio/audio-menu.tsx index eadc1afe..bd649482 100644 --- a/apps/client/src/features/editor/components/audio/audio-menu.tsx +++ b/apps/client/src/features/editor/components/audio/audio-menu.tsx @@ -1,23 +1,43 @@ import { BubbleMenu as BaseBubbleMenu } from "@tiptap/react/menus"; import { findParentNode, posToDOMRect, useEditorState } from "@tiptap/react"; -import { useCallback } from "react"; +import { useCallback, useState } from "react"; import { Node as PMNode } from "@tiptap/pm/model"; import { isEditorReady } from "@docmost/editor-ext"; import { EditorMenuProps, ShouldShowProps, } from "@/features/editor/components/table/types/types.ts"; -import { ActionIcon, Tooltip } from "@mantine/core"; +import { ActionIcon, Loader, Tooltip } from "@mantine/core"; import { IconDownload, + IconFileText, IconTrash, } from "@tabler/icons-react"; +import { notifications } from "@mantine/notifications"; +import { useAtomValue } from "jotai"; import { useTranslation } from "react-i18next"; import { getFileUrl } from "@/lib/config.ts"; +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; +import { transcribeAudio } from "@/features/dictation/services/dictation-service"; import classes from "../common/toolbar-menu.module.css"; +// STT-accepted audio MIME types (mirror of the server whitelist). If the +// fetched blob's type is not one of these, we infer it from the file +// extension so the upload's content-type is something the endpoint accepts. +const RECOGNIZED_AUDIO_MIME = new Set([ + "audio/webm", "audio/ogg", "audio/mp4", "audio/mpeg", + "audio/wav", "audio/x-wav", "audio/wave", "audio/m4a", "audio/x-m4a", +]); +const AUDIO_MIME_BY_EXT: Record = { + mp3: "audio/mpeg", m4a: "audio/mp4", mp4: "audio/mp4", + wav: "audio/wav", ogg: "audio/ogg", oga: "audio/ogg", webm: "audio/webm", +}; + export function AudioMenu({ editor }: EditorMenuProps) { const { t } = useTranslation(); + const workspace = useAtomValue(workspaceAtom); + const dictationEnabled = workspace?.settings?.ai?.dictation === true; + const [isTranscribing, setIsTranscribing] = useState(false); const editorState = useEditorState({ editor, @@ -68,6 +88,100 @@ export function AudioMenu({ editor }: EditorMenuProps) { }; }, [editor]); + const handleTranscribe = useCallback(async () => { + const src = editorState?.src; + if (!src || isTranscribing) return; + + // The bubble menu shows for the selected audio node, so selection.from is + // that node's start position. Capture it now to disambiguate duplicate-src + // blocks after the async transcription completes. + const selectedPos = editor.state.selection.from; + + setIsTranscribing(true); + try { + const fileUrl = getFileUrl(src); + // Derive a filename from the internal src for the multipart part name and + // for MIME inference when the fetched blob has no usable type. + const filename = decodeURIComponent( + src.split("?")[0].split("/").pop() || "audio", + ); + + const res = await fetch(fileUrl, { credentials: "include" }); + if (!res.ok) { + throw new Error(`Failed to fetch audio file (HTTP ${res.status})`); + } + const blob = await res.blob(); + + // Ensure the upload's content-type is one the STT endpoint accepts; the + // server keys off the blob's MIME type. + let uploadBlob = blob; + const baseType = (blob.type || "").split(";")[0].trim().toLowerCase(); + if (!RECOGNIZED_AUDIO_MIME.has(baseType)) { + const ext = filename.split(".").pop()?.toLowerCase() ?? ""; + const inferred = AUDIO_MIME_BY_EXT[ext]; + if (inferred) { + // Rebuild the blob with an accepted content-type; the server keys off it. + uploadBlob = new Blob([blob], { type: inferred }); + } + } + + const text = (await transcribeAudio(uploadBlob, filename)).trim(); + if (text.length === 0) { + notifications.show({ message: t("No speech detected") }); + return; + } + + // Re-scan the doc at insert time so a collaborative edit during the async + // transcription can't misplace the text. Among audio nodes with this src + // (the same file may be embedded more than once), pick the occurrence + // closest to the originally-selected block. + let insertPos: number | null = null; + let bestDelta = Infinity; + editor.state.doc.descendants((node, pos) => { + if (node.type.name === "audio" && node.attrs.src === src) { + const delta = Math.abs(pos - selectedPos); + if (delta < bestDelta) { + bestDelta = delta; + insertPos = pos + node.nodeSize; // position just after the audio block + } + } + return true; // visit all nodes to find the closest match + }); + + const paragraph = { type: "paragraph", content: [{ type: "text", text }] }; + try { + if (insertPos !== null) { + editor.chain().focus().insertContentAt(insertPos, paragraph).run(); + } else { + editor.chain().focus().insertContent(paragraph).run(); + } + } catch (insertErr) { + // A destroyed editor or out-of-bounds position must not throw; log and + // ignore so the transcription itself is not reported as a failure. + console.error("[audio-transcribe] insert failed", insertErr); + } + } catch (err) { + console.error("[audio-transcribe] failed", err); + const resp = ( + err as { response?: { status?: number; data?: { message?: string } } } + )?.response; + const serverMsg = resp?.data?.message; + let message: string; + if (serverMsg && serverMsg.trim().length > 0) { + // The server already explains the cause (e.g. provider error, bad + // format, STT not configured) — show it verbatim. + message = serverMsg; + } else if (resp?.status === 503 || resp?.status === 403) { + message = t("Voice dictation is not configured"); + } else { + message = `${t("Transcription failed")}: ${(err as { message?: string })?.message ?? String(err)}`; + } + notifications.show({ color: "red", message }); + } finally { + setIsTranscribing(false); + } + }, [editor, editorState?.src, isTranscribing, t]); + const handleDownload = useCallback(() => { if (!editorState?.src) return; const url = getFileUrl(editorState.src); @@ -95,6 +209,20 @@ export function AudioMenu({ editor }: EditorMenuProps) { shouldShow={shouldShow} >
+ {dictationEnabled && ( + + + {isTranscribing ? : } + + + )} + + {/* #146: the editable
 (contentDOM) MUST come first in the DOM.
+          With the non-editable menu rendered before it, the browser's click
+          hit-testing snapped the caret up one line. Render content first; the
+          menu is rendered after it and lifted back above visually via flex
+          `order: -1` (the `.codeBlock` wrapper is a flex column — see
+          code-block.module.css). It stays fully in flow as a full-width row
+          above the code: no overlay/absolute positioning. The second #146
+          mitigation lives in editor-paste-handler.tsx (reflowAfterPaste). */}
+      
+
       
       
 
-      
-
       {language === "mermaid" && (
         
           
diff --git a/apps/client/src/features/editor/components/code-block/code-block.module.css b/apps/client/src/features/editor/components/code-block/code-block.module.css
index 6e0a5dd3..4ecda370 100644
--- a/apps/client/src/features/editor/components/code-block/code-block.module.css
+++ b/apps/client/src/features/editor/components/code-block/code-block.module.css
@@ -17,7 +17,14 @@
     justify-content: center;
 }
 
+/* #146: the menu now follows the 
 in the DOM (so the editable contentDOM is
+   FIRST and click hit-testing is correct). Lift it back ABOVE the code visually
+   with flex `order` — the .codeBlock wrapper is a flex column (see code.css) —
+   so the menu still reads as a row above the code, exactly as before, without
+   sitting in-flow before the contentDOM. */
 .menuGroup {
+    order: -1;
+
     @media print {
         display: none;
     }
diff --git a/apps/client/src/features/editor/components/common/editor-paste-handler.test.ts b/apps/client/src/features/editor/components/common/editor-paste-handler.test.ts
new file mode 100644
index 00000000..bde6c837
--- /dev/null
+++ b/apps/client/src/features/editor/components/common/editor-paste-handler.test.ts
@@ -0,0 +1,160 @@
+import { describe, it, expect, beforeEach, afterEach, vi } from "vitest";
+import {
+  collectScrollAncestors,
+  reflowAfterPaste,
+} from "./editor-paste-handler";
+
+/**
+ * Unit tests for the #146 post-paste reflow helpers. jsdom does not compute
+ * styles or layout, so we stub getComputedStyle (per element via a Map) and the
+ * scroll/overflow geometry properties (per element via Object.defineProperty).
+ * Element trees are built DETACHED from `document`, so the ancestor walk only
+ * traverses the elements we create. collectScrollAncestors always appends
+ * document.scrollingElement, so we assert on specific ancestors with
+ * toContain/not.toContain rather than exact-array equality.
+ */
+
+type Overflow = { overflowX: string; overflowY: string };
+const styleMap = new Map();
+
+function makeScrollable(
+  overflowY: string,
+  {
+    sh = 0,
+    ch = 0,
+    sw = 0,
+    cw = 0,
+    left = 0,
+    top = 0,
+    overflowX = "visible",
+  }: {
+    sh?: number;
+    ch?: number;
+    sw?: number;
+    cw?: number;
+    left?: number;
+    top?: number;
+    overflowX?: string;
+  } = {},
+) {
+  const el = document.createElement("div");
+  Object.defineProperty(el, "scrollHeight", { configurable: true, value: sh });
+  Object.defineProperty(el, "clientHeight", { configurable: true, value: ch });
+  Object.defineProperty(el, "scrollWidth", { configurable: true, value: sw });
+  Object.defineProperty(el, "clientWidth", { configurable: true, value: cw });
+  Object.defineProperty(el, "scrollLeft", { configurable: true, value: left });
+  Object.defineProperty(el, "scrollTop", { configurable: true, value: top });
+  styleMap.set(el, { overflowX, overflowY });
+  return el;
+}
+
+// A leaf node whose parentElement is `parent`. The walk starts from
+// node.parentElement, so the parent is the first candidate ancestor.
+function makeNodeUnder(parent: HTMLElement) {
+  const node = document.createElement("div");
+  parent.appendChild(node);
+  return node;
+}
+
+// Override `document.scrollingElement` as an instance own-property (the native
+// implementation is a getter on Document.prototype, which we never touch).
+function setScrollingElement(value: Element | null) {
+  Object.defineProperty(document, "scrollingElement", {
+    configurable: true,
+    get: () => value,
+  });
+}
+
+beforeEach(() => {
+  styleMap.clear();
+  vi.stubGlobal("getComputedStyle", (el: Element) => {
+    return styleMap.get(el) ?? { overflowX: "visible", overflowY: "visible" };
+  });
+});
+
+afterEach(() => {
+  vi.unstubAllGlobals();
+  // Drop the per-test instance override so the native prototype getter shows
+  // through again (it was never modified, so no further restore is needed).
+  delete (document as any).scrollingElement;
+});
+
+describe("collectScrollAncestors", () => {
+  it("includes an overflow:overlay ancestor that overflows (macOS case)", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("overlay", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).toContain(a);
+  });
+
+  it("excludes an overflow:auto ancestor that does NOT overflow (gate fails)", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("auto", { sh: 100, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).not.toContain(a);
+  });
+
+  it("includes an overflow:auto ancestor that overflows", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("auto", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).toContain(a);
+  });
+
+  it("excludes a non-scrollable overflow even when it overflows", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("hidden", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).not.toContain(a);
+  });
+
+  it("includes an X-axis overflow:scroll ancestor that overflows horizontally", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("visible", {
+      overflowX: "scroll",
+      sw: 200,
+      cw: 100,
+    });
+    const node = makeNodeUnder(a);
+    expect(collectScrollAncestors(node)).toContain(a);
+  });
+
+  it("dedups: scrollingElement already in the walk is added exactly once", () => {
+    const a = makeScrollable("auto", { sh: 200, ch: 100 });
+    setScrollingElement(a);
+    const node = makeNodeUnder(a);
+    const result = collectScrollAncestors(node);
+    expect(result.filter((x) => x === a).length).toBe(1);
+  });
+
+  it("does not throw and appends nothing when scrollingElement is null", () => {
+    setScrollingElement(null);
+    const a = makeScrollable("auto", { sh: 200, ch: 100 });
+    const node = makeNodeUnder(a);
+    const result = collectScrollAncestors(node);
+    // Only the qualifying ancestor we built — no trailing scrollingElement.
+    expect(result).toEqual([a]);
+  });
+});
+
+describe("reflowAfterPaste", () => {
+  it("runs the double rAF and nudges each ancestor with scrollTo(scrollLeft, scrollTop)", () => {
+    // Run the double-nested requestAnimationFrame synchronously.
+    vi.stubGlobal(
+      "requestAnimationFrame",
+      (cb: FrameRequestCallback) => {
+        cb(0);
+        return 0;
+      },
+    );
+    setScrollingElement(null);
+
+    const a = makeScrollable("auto", { sh: 200, ch: 100, left: 5, top: 10 });
+    const node = makeNodeUnder(a);
+    (a as any).scrollTo = vi.fn();
+
+    reflowAfterPaste({ view: { dom: node } } as any);
+
+    expect((a as any).scrollTo).toHaveBeenCalledWith(5, 10);
+  });
+});
diff --git a/apps/client/src/features/editor/components/common/editor-paste-handler.tsx b/apps/client/src/features/editor/components/common/editor-paste-handler.tsx
index 85d49872..63300020 100644
--- a/apps/client/src/features/editor/components/common/editor-paste-handler.tsx
+++ b/apps/client/src/features/editor/components/common/editor-paste-handler.tsx
@@ -22,12 +22,81 @@ const ATTACHMENT_NODE_TYPES = [
 
 const ATTACHMENT_URL_RE = /\/api\/files\/([0-9a-f-]+)\//;
 
+const SCROLLABLE_OVERFLOW = new Set(["auto", "scroll", "overlay"]);
+
+/**
+ * Collect every scrollable ancestor of the editor DOM whose hit-test layer
+ * could be stale after a paste, plus the document scrolling element. We nudge
+ * ALL of them (a zero-delta nudge is harmless) because the real scroll container
+ * varies — a styled overflow ancestor on most pages, the document itself on
+ * others — and `overflow: overlay` (common on macOS, where #146 reproduces)
+ * must count as scrollable too. Called only AFTER the paste has committed, so
+ * `scrollHeight > clientHeight` reflects the inserted content.
+ */
+export function collectScrollAncestors(node: HTMLElement): HTMLElement[] {
+  const targets: HTMLElement[] = [];
+  // Walk every ancestor (incl. body/html) — on some layouts the scroll lives on
+  // body rather than the documentElement that scrollingElement points at.
+  let el: HTMLElement | null = node.parentElement;
+  while (el) {
+    const { overflowX, overflowY } = getComputedStyle(el);
+    const scrollsY =
+      SCROLLABLE_OVERFLOW.has(overflowY) && el.scrollHeight > el.clientHeight;
+    const scrollsX =
+      SCROLLABLE_OVERFLOW.has(overflowX) && el.scrollWidth > el.clientWidth;
+    if (scrollsY || scrollsX) targets.push(el);
+    el = el.parentElement;
+  }
+  const docEl = document.scrollingElement as HTMLElement | null;
+  if (docEl && !targets.includes(docEl)) targets.push(docEl);
+  return targets;
+}
+
+/**
+ * Re-flow the editor's scroll containers after a paste so the browser refreshes
+ * its click hit-testing geometry (#146). Pasting markdown/code inserts React
+ * NodeViews that mount ASYNCHRONOUSLY; until the next reflow, ProseMirror's
+ * posAtCoords/caretRangeFromPoint can map a click to a stale (offset) line —
+ * which users observed clears itself on any scroll. We reproduce that scroll's
+ * side effect with a ZERO-delta nudge (re-assign scrollTop/Left to their current
+ * value), invalidating the hit-test layer WITHOUT moving the viewport. The
+ * container lookup AND the nudge run across two animation frames so they happen
+ * AFTER the pasted content + NodeViews commit (only then is the real scroll
+ * container measurable).
+ *
+ * This is the SECOND of two #146 mitigations; the FIRST is the content-first DOM
+ * order in the NodeViews (code-block-view.tsx, footnotes-list-view.tsx,
+ * footnote-definition-view.tsx). Editing one, check the other.
+ */
+export function reflowAfterPaste(editor: Editor) {
+  const dom = editor.view.dom as HTMLElement;
+  requestAnimationFrame(() => {
+    requestAnimationFrame(() => {
+      for (const el of collectScrollAncestors(dom)) {
+        // Zero-delta nudge: re-set the scroll position to its current value to
+        // invalidate the browser's hit-test layer WITHOUT moving the viewport.
+        // `scrollTo(x, y)` is the repo idiom and avoids a lint-flagged
+        // self-assignment.
+        el.scrollTo(el.scrollLeft, el.scrollTop);
+      }
+    });
+  });
+}
+
 export const handlePaste = (
   editor: Editor,
   event: ClipboardEvent,
   pageId: string,
   creatorId?: string,
 ) => {
+  // Schedule a post-paste reflow on EVERY paste path — intentionally. handlePaste
+  // returns BEFORE the markdown/code-insertion plugin runs, so it cannot know here
+  // whether async NodeViews will be inserted; the nudge is a cheap layout read on
+  // the next frames and a no-op for the viewport, so scheduling it unconditionally
+  // is simpler and harmless. Pairs with the content-first DOM order in the
+  // NodeViews — both address #146 from different angles.
+  reflowAfterPaste(editor);
+
   const clipboardData = event.clipboardData.getData("text/plain");
 
   if (INTERNAL_LINK_REGEX.test(clipboardData)) {
diff --git a/apps/client/src/features/editor/components/common/node-resize.module.css b/apps/client/src/features/editor/components/common/node-resize.module.css
index 4159e44e..d2d7d4fe 100644
--- a/apps/client/src/features/editor/components/common/node-resize.module.css
+++ b/apps/client/src/features/editor/components/common/node-resize.module.css
@@ -73,3 +73,18 @@
     display: none !important;
   }
 }
+
+/* Float image (#145): on narrow screens a floated image would crowd the text to
+   an unreadable column, so collapse it to full width and drop the float.
+   `!important` is required because applyAlignment sets `float`/`padding` inline,
+   which a normal rule cannot override. Keys off the `data-image-align` attribute
+   the image node view mirrors onto its container. This module is the one actually
+   imported by the resize node views (node-resize-handles.ts), so the rule loads. */
+@media (max-width: 600px) {
+  .container:global([data-image-align="floatLeft"]),
+  .container:global([data-image-align="floatRight"]) {
+    float: none !important;
+    width: 100% !important;
+    padding: 0 !important;
+  }
+}
diff --git a/apps/client/src/features/editor/components/fixed-toolbar/fixed-toolbar.tsx b/apps/client/src/features/editor/components/fixed-toolbar/fixed-toolbar.tsx
index e59f9863..b425753e 100644
--- a/apps/client/src/features/editor/components/fixed-toolbar/fixed-toolbar.tsx
+++ b/apps/client/src/features/editor/components/fixed-toolbar/fixed-toolbar.tsx
@@ -13,7 +13,6 @@ import { QuickInsertsGroup } from "./groups/quick-inserts-group";
 import { MoreInsertsGroup } from "./groups/more-inserts-group";
 import { HistoryGroup } from "./groups/history-group";
 import { AskAiGroup } from "./groups/ask-ai-group";
-import { DictationGroup } from "./groups/dictation-group";
 import { workspaceAtom } from "@/features/user/atoms/current-user-atom";
 import classes from "./fixed-toolbar.module.css";
 
@@ -31,7 +30,6 @@ export const FixedToolbar: FC = ({
   const state = useToolbarState(editor);
   const workspace = useAtomValue(workspaceAtom);
   const isGenerativeAiEnabled = workspace?.settings?.ai?.generative === true;
-  const isDictationEnabled = workspace?.settings?.ai?.dictation === true;
 
   if (!editor || !state) return null;
 
@@ -67,12 +65,6 @@ export const FixedToolbar: FC = ({
           
           
- {isDictationEnabled && ( - <> -
- - - )}
diff --git a/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx b/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx index 8a88f0e3..e8921816 100644 --- a/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx +++ b/apps/client/src/features/editor/components/fixed-toolbar/groups/dictation-group.tsx @@ -1,48 +1,72 @@ import { FC, useRef } from "react"; import type { Editor } from "@tiptap/react"; +import { useAtomValue } from "jotai"; +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; import { MicButton } from "@/features/dictation/components/mic-button"; interface Props { editor: Editor; + color?: string; + iconSize?: number; } -export const DictationGroup: FC = ({ editor }) => { +export const DictationGroup: FC = ({ editor, color, iconSize }) => { + // Streaming (silence-cut) dictation is opt-in per workspace; absent/false + // keeps the stable batch path. + const workspace = useAtomValue(workspaceAtom); + const streamingDictation = + workspace?.settings?.ai?.dictationStreaming === true; + // Caret snapshot taken when dictation starts (where the first segment lands). const rangeRef = useRef<{ from: number; to: number } | null>(null); + // Running insertion point: after each inserted segment we remember the caret + // end so the NEXT segment appends right after it, contiguously, regardless of + // where the user's caret currently is. Null until the first segment lands. + const insertPosRef = useRef(null); const handleStart = () => { const { from, to } = editor.state.selection; rangeRef.current = { from, to }; + // New session: forget any insertion point from a previous dictation so the + // first segment uses the fresh snapshot above. + insertPosRef.current = null; }; const handleText = (text: string) => { // The editor may be gone by the time async transcription returns; bail out // instead of operating on a destroyed instance. if (!editor || editor.isDestroyed) return; - const snapshot = rangeRef.current; - rangeRef.current = null; // The document may have shrunk during transcription (e.g. a collaborative - // edit), so clamp the snapshot into the current bounds before inserting. + // edit), so clamp any position into the current bounds before inserting. const docSize = editor.state.doc.content.size; const clamp = (p: number) => Math.max(0, Math.min(p, docSize)); + // First segment lands at the snapshotted caret range; subsequent segments + // land at a zero-length range at the running insertion point so they stay + // contiguous even if the user clicked elsewhere mid-dictation. + const snapshot = rangeRef.current; + const range = + insertPosRef.current !== null + ? { from: clamp(insertPosRef.current), to: clamp(insertPosRef.current) } + : snapshot + ? { from: clamp(snapshot.from), to: clamp(snapshot.to) } + : null; try { - if (snapshot) { - // Insert at the snapshotted caret; a trailing space keeps words - // separated (the hook already trims the transcribed text). - editor - .chain() - .focus() - .insertContentAt( - { from: clamp(snapshot.from), to: clamp(snapshot.to) }, - `${text} `, - ) - .run(); + if (range) { + // Insert at the resolved range; a trailing space keeps words separated + // (the hook already trims the transcribed text). + editor.chain().focus().insertContentAt(range, `${text} `).run(); } else { + // No snapshot and no running point (shouldn't happen normally) — fall + // back to the current caret. editor.chain().focus().insertContent(`${text} `).run(); } + // Remember where the inserted text ends so the next segment appends right + // after it, independent of later user caret moves. + insertPosRef.current = editor.state.selection.to; } catch { - // The snapshot drifted out of range; fall back to the current caret. + // The range drifted out of bounds; fall back to the current caret. try { editor.chain().focus().insertContent(`${text} `).run(); + insertPosRef.current = editor.state.selection.to; } catch { // The editor may have been destroyed; ignore so a dead editor can't // surface an uncaught error. @@ -53,9 +77,12 @@ export const DictationGroup: FC = ({ editor }) => { return ( ); }; diff --git a/apps/client/src/features/editor/components/footnote/footnote-definition-view.tsx b/apps/client/src/features/editor/components/footnote/footnote-definition-view.tsx new file mode 100644 index 00000000..b8fe182f --- /dev/null +++ b/apps/client/src/features/editor/components/footnote/footnote-definition-view.tsx @@ -0,0 +1,108 @@ +import { NodeViewContent, NodeViewProps, NodeViewWrapper } from "@tiptap/react"; +import { useTranslation } from "react-i18next"; +import { getFootnoteNumber, getFootnoteRefCount } from "@docmost/editor-ext"; +import classes from "./footnote.module.css"; + +/** + * A 0-based backlink index -> its lowercase letter label (0 -> "a", 25 -> "z", + * 26 -> "aa", ...), matching the Pandoc/Wikipedia "↩ a b c" convention. + */ +export function backlinkLabel(index: number): string { + let out = ""; + let x = index; + while (x >= 0) { + out = String.fromCharCode(97 + (x % 26)) + out; + x = Math.floor(x / 26) - 1; + } + return out; +} + +/** + * NodeView for a single footnote definition: a decorative number marker, the + * editable content (NodeViewContent), and a "↩" back-link to its reference. + * The number is derived from the document (not stored). + * + * After #166 a footnote can be referenced more than once (one number, one + * definition, N forward links). When it is, the back-link becomes a row of + * per-occurrence links — ↩ a b c … — each scrolling to its own reference (#168); + * a single-reference footnote keeps the plain ↩. + */ +export default function FootnoteDefinitionView(props: NodeViewProps) { + const { node, editor } = props; + const { t } = useTranslation(); + const id = node.attrs.id as string; + + // Read the cached number/ref-count from the numbering plugin (computed once + // per doc change) rather than recomputing the whole map on every render. + const number = getFootnoteNumber(editor.state, id) ?? "?"; + const refCount = getFootnoteRefCount(editor.state, id); + + const jumpTo = (e: React.MouseEvent, index: number) => { + e.preventDefault(); + editor.commands.scrollToReference(id, index); + }; + + return ( + + {/* #146: contentDOM MUST be the first child — a non-editable marker before + it makes click hit-testing snap the caret above. Content first; the + marker + back-link follow in DOM and are placed left/right via CSS + flex `order`. The second #146 mitigation lives in + editor-paste-handler.tsx (reflowAfterPaste). */} + + + {refCount > 1 ? ( + // Multiple references -> ↩ followed by one lettered link per occurrence. + + + {Array.from({ length: refCount }, (_, i) => ( + jumpTo(e, i)} + role="button" + aria-label={t("Back to reference {{label}}", { + label: backlinkLabel(i), + })} + title={t("Back to reference {{label}}", { + label: backlinkLabel(i), + })} + > + {backlinkLabel(i)} + + ))} + + ) : ( + // Single reference -> the plain ↩ (unchanged behavior). + jumpTo(e, 0)} + role="button" + aria-label={t("Back to reference")} + title={t("Back to reference")} + > + ↩ + + )} + + ); +} diff --git a/apps/client/src/features/editor/components/footnote/footnote-reference-view.tsx b/apps/client/src/features/editor/components/footnote/footnote-reference-view.tsx new file mode 100644 index 00000000..7ea9e87d --- /dev/null +++ b/apps/client/src/features/editor/components/footnote/footnote-reference-view.tsx @@ -0,0 +1,146 @@ +import { useEffect, useRef, useState, useCallback } from "react"; +import { NodeViewProps, NodeViewWrapper } from "@tiptap/react"; +import { createPortal } from "react-dom"; +import { useTranslation } from "react-i18next"; +import { + autoUpdate, + computePosition, + flip, + offset, + shift, +} from "@floating-ui/dom"; +import { + FOOTNOTE_DEFINITION_NAME, + getFootnoteNumber, +} from "@docmost/editor-ext"; +import { ActionIcon } from "@mantine/core"; +import { IconArrowDown } from "@tabler/icons-react"; +import classes from "./footnote.module.css"; + +/** + * Read the plain text of the footnote definition with `id` directly from the + * editor state. No sub-editor: the popover is read-only. + */ +function getDefinitionText(editor: NodeViewProps["editor"], id: string): string { + let text = ""; + editor.state.doc.descendants((node) => { + if ( + node.type.name === FOOTNOTE_DEFINITION_NAME && + node.attrs.id === id + ) { + text = node.textContent; + return false; + } + return undefined; + }); + return text; +} + +export default function FootnoteReferenceView(props: NodeViewProps) { + const { node, editor, selected } = props; + const { t } = useTranslation(); + const id = node.attrs.id as string; + + const anchorRef = useRef(null); + const popoverRef = useRef(null); + const [open, setOpen] = useState(false); + + // Number is derived (not stored). Read it from the numbering plugin's cached + // map (computed once per doc change) instead of walking the whole document on + // every render — recomputing per NodeView per render was O(n^2) per keystroke. + const number = getFootnoteNumber(editor.state, id) ?? "?"; + const defText = open ? getDefinitionText(editor, id) : ""; + + const position = useCallback(() => { + const anchor = anchorRef.current; + const popup = popoverRef.current; + if (!anchor || !popup) return; + computePosition(anchor, popup, { + placement: "top", + middleware: [offset(6), flip(), shift({ padding: 8 })], + }).then(({ x, y }) => { + popup.style.left = `${x}px`; + popup.style.top = `${y}px`; + }); + }, []); + + useEffect(() => { + if (!open) return; + const anchor = anchorRef.current; + const popup = popoverRef.current; + if (!anchor || !popup) return; + + const cleanup = autoUpdate(anchor, popup, position); + + const onPointerDown = (e: PointerEvent) => { + if ( + popup.contains(e.target as Node) || + anchor.contains(e.target as Node) + ) { + return; + } + setOpen(false); + }; + document.addEventListener("pointerdown", onPointerDown, true); + + return () => { + cleanup(); + document.removeEventListener("pointerdown", onPointerDown, true); + }; + }, [open, position]); + + const handleGoTo = (e: React.MouseEvent) => { + e.preventDefault(); + e.stopPropagation(); + setOpen(false); + editor.commands.scrollToFootnote(id); + }; + + return ( + + (anchorRef.current = el)} + data-footnote-ref="" + data-id={id} + className={`${classes.reference} ${selected ? classes.selected : ""}`} + onMouseEnter={() => setOpen(true)} + onClick={(e) => { + e.preventDefault(); + setOpen((v) => !v); + }} + // The decoration sets --footnote-number; provide a fallback inline. + style={{ ["--footnote-number" as any]: `"${number}"` }} + aria-label={t("Footnote {{number}}", { number })} + role="button" + /> + {open && + createPortal( +
setOpen(false)} + > +
+ + {t("Footnote {{number}}", { number })} + + + + +
+
+ {defText || t("Empty footnote")} +
+
, + document.body, + )} +
+ ); +} diff --git a/apps/client/src/features/editor/components/footnote/footnote-views.structure.test.tsx b/apps/client/src/features/editor/components/footnote/footnote-views.structure.test.tsx new file mode 100644 index 00000000..bfffac90 --- /dev/null +++ b/apps/client/src/features/editor/components/footnote/footnote-views.structure.test.tsx @@ -0,0 +1,231 @@ +import { describe, it, expect, vi, afterEach } from "vitest"; +import { render, fireEvent } from "@testing-library/react"; + +/** + * Structural regression guard for #146 (PR #147). + * + * Guards ALL THREE editable NodeViews touched by the fix: the two footnote views + * (FootnotesListView, FootnoteDefinitionView) AND the code block (CodeBlockView). + * + * The caret/click-offset fix rests entirely on ONE invariant: in every editable + * NodeView the editable `NodeViewContent` (contentDOM) must come FIRST in the + * wrapper, with no non-editable (`contenteditable="false"`) element before it. + * If a future edit reinserts chrome (separator, heading, marker, back-link, + * language menu) ahead of the content, the macOS hit-testing bug returns + * silently — and the symptom needs a real browser to see. This test pins the + * DOM ORDER (the proxy that IS the fix) in the existing jsdom harness. + * + * We stub `@tiptap/react` so the views render as plain DOM and we can inspect + * the child order our JSX produces — that order is exactly what regresses, and + * it does not depend on a live editor. The stubbed `NodeViewContent` carries the + * real `data-node-view-content` marker tiptap uses, so the assertion mirrors + * production. This test passes on the fixed order and FAILS on the pre-fix order + * (chrome-before-content). + */ +vi.mock("@tiptap/react", () => ({ + NodeViewWrapper: ({ children, ...props }: any) => ( +
+ {children} +
+ ), + // Mirror the real contentDOM marker so the guard matches production output. + NodeViewContent: (props: any) =>
, +})); + +vi.mock("react-i18next", () => ({ + useTranslation: () => ({ t: (key: string) => key }), +})); + +// footnote-definition-view reads a cached number + reference count from the +// numbering plugin; stub them so we don't need a live ProseMirror state. The +// ref-count is a hoisted mutable so a test can drive the single-vs-multi +// backlink branch (#168). Default 1 = single reference (the #146 cases). +const { mockRefCount } = vi.hoisted(() => ({ mockRefCount: { value: 1 } })); +vi.mock("@docmost/editor-ext", () => ({ + getFootnoteNumber: () => 1, + getFootnoteRefCount: () => mockRefCount.value, +})); + +// Mocks so CodeBlockView renders cheaply (no MantineProvider, no matchMedia). +// The Group mock MUST forward contentEditable: React serializes +// contentEditable={false} to the DOM attribute contenteditable="false", which +// the structural guard selects on to identify non-editable chrome. +vi.mock("@mantine/core", () => ({ + Group: ({ children, className, contentEditable }: any) => ( +
+ {children} +
+ ), + Select: () => null, + Tooltip: ({ children }: any) => <>{children}, + ActionIcon: ({ children, onClick }: any) => ( + + ), +})); +vi.mock("@/components/common/copy-button", () => ({ + CopyButton: ({ children }: any) => + children({ copied: false, copy: () => {} }), +})); +vi.mock("@tabler/icons-react", () => ({ + IconCheck: () => null, + IconCopy: () => null, +})); +vi.mock("@/features/editor/components/code-block/mermaid-view.tsx", () => ({ + default: () => null, +})); + +import FootnotesListView from "./footnotes-list-view"; +import FootnoteDefinitionView, { + backlinkLabel, +} from "./footnote-definition-view"; +import CodeBlockView from "../code-block/code-block-view"; + +// Minimal NodeViewProps stub: definition view only touches node.attrs.id and +// editor.state (the latter unused once getFootnoteNumber is mocked). +const props = { + node: { attrs: { id: "fn-1" }, textContent: "" }, + editor: { state: {}, isEditable: true, commands: {} }, + getPos: () => 0, + updateAttributes: () => {}, + deleteNode: () => {}, +} as any; + +// CodeBlockView needs more than the footnote stub: a language attr (non-mermaid +// so MermaidView never renders), an editor with selection/on/off, and an +// extension exposing lowlight.listLanguages. +const codeBlockProps = { + node: { attrs: { language: "javascript" }, textContent: "", nodeSize: 1 }, + editor: { + state: { selection: { from: 0, to: 0 } }, + isEditable: true, + commands: {}, + on: vi.fn(), + off: vi.fn(), + }, + extension: { + options: { lowlight: { listLanguages: () => ["javascript", "python"] } }, + }, + getPos: () => 0, + updateAttributes: () => {}, + deleteNode: () => {}, +} as any; + +const cases: Array<{ name: string; ui: React.ReactElement }> = [ + { name: "FootnotesListView", ui: }, + { name: "FootnoteDefinitionView", ui: }, + { name: "CodeBlockView", ui: }, +]; + +describe("#146 editable NodeView contentDOM-first invariant", () => { + it.each(cases)( + "$name renders the editable contentDOM ahead of all non-editable chrome", + ({ ui }) => { + const { getByTestId } = render(ui); + const wrapper = getByTestId("nvw"); + + const content = wrapper.querySelector("[data-node-view-content]"); + expect(content).not.toBeNull(); + + // The contentDOM sits at the FRONT of the wrapper: it is either the + // wrapper's first child (footnote views) or nested in the first child + // (code-block wraps it in
). Either way the first element child
+      // must contain it. (compareDocumentPosition below is NOT redundant here:
+      // for code-block the content is not the literal first child, so we keep
+      // the document-order check to prove no chrome precedes the content.)
+      const firstEl = wrapper.firstElementChild!;
+      expect(firstEl === content || firstEl.contains(content!)).toBe(true);
+
+      // Chrome exists (separator/heading/marker/back-link/menu)...
+      const nonEditable = wrapper.querySelectorAll('[contenteditable="false"]');
+      expect(nonEditable.length).toBeGreaterThan(0);
+
+      // ...and every non-editable element comes AFTER the contentDOM, so the
+      // browser's click hit-testing reaches the editable content first (#146).
+      for (const el of Array.from(nonEditable)) {
+        const pos = content!.compareDocumentPosition(el);
+        expect(pos & Node.DOCUMENT_POSITION_FOLLOWING).toBeTruthy();
+      }
+    },
+  );
+});
+
+// #168: a footnote referenced more than once shows one lettered backlink per
+// occurrence (↩ a b c), each scrolling to its own reference; a single-reference
+// footnote keeps the plain ↩.
+describe("#168 footnote definition multi-backlinks", () => {
+  afterEach(() => {
+    // Reset the shared ref-count mock so other tests see a single reference.
+    mockRefCount.value = 1;
+  });
+
+  const makeProps = () =>
+    ({
+      node: { attrs: { id: "fn-1" }, textContent: "" },
+      editor: {
+        state: {},
+        isEditable: true,
+        commands: { scrollToReference: vi.fn() },
+      },
+      getPos: () => 0,
+      updateAttributes: () => {},
+      deleteNode: () => {},
+    }) as any;
+
+  it("renders one lettered backlink per reference (a, b, c) plus the ↩ arrow", () => {
+    mockRefCount.value = 3;
+    const { getByTestId } = render();
+    const wrapper = getByTestId("nvw");
+
+    const links = wrapper.querySelectorAll('[role="button"]');
+    expect(Array.from(links).map((l) => l.textContent)).toEqual([
+      "a",
+      "b",
+      "c",
+    ]);
+    // The ↩ arrow is present (as decorative chrome, not a button).
+    expect(wrapper.textContent).toContain("↩");
+  });
+
+  it("clicking the n-th backlink scrolls to the n-th occurrence (0-based)", () => {
+    mockRefCount.value = 3;
+    const props = makeProps();
+    const { getByTestId } = render();
+    const links = getByTestId("nvw").querySelectorAll('[role="button"]');
+
+    fireEvent.click(links[1]); // "b"
+    expect(props.editor.commands.scrollToReference).toHaveBeenCalledWith(
+      "fn-1",
+      1,
+    );
+  });
+
+  it("a single-reference footnote renders just one ↩ (no letters)", () => {
+    mockRefCount.value = 1;
+    const props = makeProps();
+    const { getByTestId } = render();
+    const wrapper = getByTestId("nvw");
+
+    const links = wrapper.querySelectorAll('[role="button"]');
+    expect(links.length).toBe(1);
+    expect(links[0].textContent).toBe("↩");
+
+    fireEvent.click(links[0]);
+    expect(props.editor.commands.scrollToReference).toHaveBeenCalledWith(
+      "fn-1",
+      0,
+    );
+  });
+});
+
+// #185 re-review pt 7: backlinkLabel is base-26 (a..z, then aa…). The component
+// tests only cover a,b,c (index 0-2); pin the >= 26 carry boundary.
+describe("backlinkLabel base-26 boundary (#168)", () => {
+  it("maps 0->a, 25->z, 26->aa, 27->ab, 51->az, 52->ba", () => {
+    expect(backlinkLabel(0)).toBe("a");
+    expect(backlinkLabel(25)).toBe("z");
+    expect(backlinkLabel(26)).toBe("aa");
+    expect(backlinkLabel(27)).toBe("ab");
+    expect(backlinkLabel(51)).toBe("az");
+    expect(backlinkLabel(52)).toBe("ba");
+  });
+});
diff --git a/apps/client/src/features/editor/components/footnote/footnote.module.css b/apps/client/src/features/editor/components/footnote/footnote.module.css
new file mode 100644
index 00000000..fb21fc03
--- /dev/null
+++ b/apps/client/src/features/editor/components/footnote/footnote.module.css
@@ -0,0 +1,132 @@
+/* Superscript reference marker. The visible number comes from the numbering
+   plugin decoration which sets the --footnote-number CSS variable. */
+.reference {
+  cursor: pointer;
+  color: var(--mantine-color-blue-6);
+  font-weight: 500;
+  vertical-align: super;
+  font-size: 0.75em;
+  line-height: 0;
+  user-select: none;
+  white-space: nowrap;
+}
+
+.reference::after {
+  content: var(--footnote-number, "");
+}
+
+.reference:hover {
+  text-decoration: underline;
+}
+
+.reference.selected {
+  background-color: var(--mantine-color-blue-1);
+  border-radius: 2px;
+}
+
+/* Read-only popover shown on hover/click of a reference. */
+.popover {
+  position: absolute;
+  z-index: 1000;
+  max-width: 360px;
+  padding: var(--mantine-spacing-sm);
+  background: var(--mantine-color-body);
+  color: var(--mantine-color-default-color);
+  border: 1px solid var(--mantine-color-default-border);
+  border-radius: var(--mantine-radius-md);
+  box-shadow: var(--mantine-shadow-md);
+  font-size: var(--mantine-font-size-sm);
+  line-height: 1.4;
+}
+
+.popoverHeader {
+  display: flex;
+  align-items: center;
+  justify-content: space-between;
+  gap: var(--mantine-spacing-xs);
+  margin-bottom: 4px;
+}
+
+.popoverNumber {
+  font-weight: 600;
+  color: var(--mantine-color-dimmed);
+}
+
+.popoverBody {
+  white-space: pre-wrap;
+  word-break: break-word;
+}
+
+/* Bottom footnotes container. Flex column so the heading (rendered AFTER the
+   editable NodeViewContent in the DOM for #146) is lifted back above the list
+   visually via `order`, instead of sitting in-flow before the contentDOM. */
+.list {
+  display: flex;
+  flex-direction: column;
+  margin-top: var(--mantine-spacing-lg);
+  padding-top: var(--mantine-spacing-md);
+  border-top: 1px solid var(--mantine-color-default-border);
+}
+
+.listHeading {
+  order: -1; /* visually above the list, though it follows it in the DOM (#146) */
+  font-weight: 600;
+  font-size: var(--mantine-font-size-sm);
+  color: var(--mantine-color-dimmed);
+  margin-bottom: var(--mantine-spacing-xs);
+  text-transform: uppercase;
+  letter-spacing: 0.03em;
+}
+
+.definition {
+  display: flex;
+  align-items: flex-start;
+  /* Tight number→text spacing (~one space) so it reads like "1. text"
+     instead of leaving a wide gap after the period. */
+  gap: 0.4em;
+  padding: 2px 0;
+}
+
+.definitionMarker {
+  order: -1; /* keep the "N." marker on the LEFT though it follows content in DOM (#146) */
+  flex: 0 0 auto;
+  min-width: 1.5em;
+  /* Right-align within the narrow column so the period sits next to the text
+     and multi-digit numbers (10, 11, …) stay aligned on their right edge. */
+  text-align: right;
+  font-variant-numeric: tabular-nums;
+  color: var(--mantine-color-dimmed);
+  user-select: none;
+}
+
+.definitionContent {
+  flex: 1 1 auto;
+  min-width: 0;
+}
+
+.backLink {
+  flex: 0 0 auto;
+  cursor: pointer;
+  color: var(--mantine-color-blue-6);
+  user-select: none;
+  font-size: 0.9em;
+}
+
+.backLink:hover {
+  text-decoration: underline;
+}
+
+/* Multi-backlink row (#168): ↩ a b c — one lettered link per reference
+   occurrence. Sits on the right, after the content, like the single ↩. */
+.backLinks {
+  flex: 0 0 auto;
+  display: inline-flex;
+  align-items: baseline;
+  gap: 0.3em;
+  user-select: none;
+}
+
+.backLinkArrow {
+  color: var(--mantine-color-dimmed);
+  font-size: 0.9em;
+}
diff --git a/apps/client/src/features/editor/components/footnote/footnotes-list-view.tsx b/apps/client/src/features/editor/components/footnote/footnotes-list-view.tsx
new file mode 100644
index 00000000..7ad03f12
--- /dev/null
+++ b/apps/client/src/features/editor/components/footnote/footnotes-list-view.tsx
@@ -0,0 +1,41 @@
+import { NodeViewContent, NodeViewProps, NodeViewWrapper } from "@tiptap/react";
+import { useTranslation } from "react-i18next";
+import classes from "./footnote.module.css";
+
+/**
+ * NodeView for the bottom footnotes container: the editable list of definitions
+ * (NodeViewContent) plus a visual separator + localized heading.
+ *
+ * #146: the editable NodeViewContent MUST be the FIRST child in the DOM. A
+ * non-editable block rendered before it (the old separator + heading) makes the
+ * browser's click hit-testing (posAtCoords → caretRangeFromPoint) miss the
+ * contentDOM and snap the caret to the previous node (several lines above, into
+ * the body). So content goes first; the heading is rendered AFTER it and lifted
+ * back above visually with CSS flex `order` (the separator border lives on the
+ * flex container itself).
+ *
+ * The second #146 mitigation lives in editor-paste-handler.tsx (reflowAfterPaste).
+ */
+export default function FootnotesListView(_props: NodeViewProps) {
+  const { t } = useTranslation();
+
+  return (
+    // role/aria-label preserve the section label for AT: the visible heading
+    // below is now aria-hidden, so without these the "Footnotes" label would be
+    // lost to a screen reader (WCAG 1.3.2 — DOM order has heading after content).
+    
+      
+      
+    
+  );
+}
diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-sandbox.test.ts b/apps/client/src/features/editor/components/html-embed/html-embed-sandbox.test.ts
new file mode 100644
index 00000000..bf7206a3
--- /dev/null
+++ b/apps/client/src/features/editor/components/html-embed/html-embed-sandbox.test.ts
@@ -0,0 +1,170 @@
+import { describe, it, expect } from "vitest";
+import {
+  buildSandboxSrcdoc,
+  canEdit,
+  clampHeight,
+  HTML_EMBED_HEIGHT_MESSAGE,
+  HTML_EMBED_SANDBOX,
+  isTrustedHeightMessage,
+  MAX_IFRAME_HEIGHT,
+  MIN_IFRAME_HEIGHT,
+  shouldRender,
+} from "./html-embed-sandbox";
+
+describe("buildSandboxSrcdoc", () => {
+  it("embeds the user source verbatim", () => {
+    const out = buildSandboxSrcdoc("
hello
"); + expect(out).toContain("
hello
"); + }); + + it("injects the height-postMessage bootstrap after the source", () => { + const out = buildSandboxSrcdoc("

body

"); + // The bootstrap is appended AFTER the source. + expect(out.indexOf("

body

")).toBeLessThan( + out.indexOf(HTML_EMBED_HEIGHT_MESSAGE), + ); + // It reports its height to the parent via postMessage with the agreed type. + expect(out).toContain("parent.postMessage"); + expect(out).toContain(HTML_EMBED_HEIGHT_MESSAGE); + // It observes resizes so the parent can keep the iframe sized to fit. + expect(out).toContain("ResizeObserver"); + expect(out).toContain('addEventListener("load"'); + }); + + it("handles an empty source (still injects the bootstrap)", () => { + const out = buildSandboxSrcdoc(""); + expect(out).toContain(HTML_EMBED_HEIGHT_MESSAGE); + }); +}); + +describe("shouldRender (render policy)", () => { + it("read-only renders regardless of the workspace toggle", () => { + // isEditable=false → the server already gated the content. + expect(shouldRender(false, false)).toBe(true); + expect(shouldRender(false, true)).toBe(true); + }); + + it("editable + toggle OFF does NOT render", () => { + expect(shouldRender(true, false)).toBe(false); + }); + + it("editable + toggle ON renders", () => { + expect(shouldRender(true, true)).toBe(true); + }); +}); + +describe("clampHeight", () => { + it("clamps below the lower bound up to MIN_IFRAME_HEIGHT", () => { + expect(clampHeight(0)).toBe(MIN_IFRAME_HEIGHT); + expect(clampHeight(-100)).toBe(MIN_IFRAME_HEIGHT); + expect(clampHeight(MIN_IFRAME_HEIGHT - 1)).toBe(MIN_IFRAME_HEIGHT); + }); + + it("clamps above the upper bound down to MAX_IFRAME_HEIGHT", () => { + expect(clampHeight(MAX_IFRAME_HEIGHT + 1)).toBe(MAX_IFRAME_HEIGHT); + expect(clampHeight(999999)).toBe(MAX_IFRAME_HEIGHT); + }); + + it("passes a value within range through unchanged", () => { + expect(clampHeight(150)).toBe(150); + expect(clampHeight(MIN_IFRAME_HEIGHT)).toBe(MIN_IFRAME_HEIGHT); + expect(clampHeight(MAX_IFRAME_HEIGHT)).toBe(MAX_IFRAME_HEIGHT); + }); +}); + +describe("isTrustedHeightMessage (resize message guard)", () => { + // Stand-ins for window objects; identity is all the guard compares. + const ownWindow = {} as Window; + const foreignWindow = {} as Window; + const iframeEl = { contentWindow: ownWindow }; + + const validData = { type: HTML_EMBED_HEIGHT_MESSAGE, height: 300 }; + + it("accepts a same-source message with a finite numeric height", () => { + expect( + isTrustedHeightMessage({ source: ownWindow, data: validData }, iframeEl), + ).toBe(true); + }); + + it("rejects a message from a DIFFERENT source (foreign window)", () => { + // A page can postMessage anything; only our own iframe's contentWindow is + // trusted. This is the core security check. + expect( + isTrustedHeightMessage( + { source: foreignWindow, data: validData }, + iframeEl, + ), + ).toBe(false); + }); + + it("rejects a wrong-type message even from the right source", () => { + expect( + isTrustedHeightMessage( + { source: ownWindow, data: { type: "something-else", height: 300 } }, + iframeEl, + ), + ).toBe(false); + }); + + it("rejects a NaN height", () => { + expect( + isTrustedHeightMessage( + { source: ownWindow, data: { type: HTML_EMBED_HEIGHT_MESSAGE, height: NaN } }, + iframeEl, + ), + ).toBe(false); + }); + + it("rejects an Infinity height", () => { + expect( + isTrustedHeightMessage( + { + source: ownWindow, + data: { type: HTML_EMBED_HEIGHT_MESSAGE, height: Infinity }, + }, + iframeEl, + ), + ).toBe(false); + }); + + it("rejects when the iframe element / contentWindow is null", () => { + expect( + isTrustedHeightMessage({ source: ownWindow, data: validData }, null), + ).toBe(false); + expect( + isTrustedHeightMessage( + { source: null, data: validData }, + { contentWindow: null }, + ), + ).toBe(false); + }); +}); + +describe("iframe sandbox attributes", () => { + it("uses EXACTLY allow-scripts allow-popups allow-forms (no allow-same-origin)", () => { + expect(HTML_EMBED_SANDBOX).toBe("allow-scripts allow-popups allow-forms"); + // The critical security invariant: opaque origin => no session/cookie access. + expect(HTML_EMBED_SANDBOX).not.toContain("allow-same-origin"); + }); + + it("the NodeView renders the embed via srcDoc (not src), set to the sandbox doc", () => { + // The iframe carries the generated srcdoc; it never loads an external URL. + const srcdoc = buildSandboxSrcdoc("

hi

"); + expect(srcdoc).toContain("

hi

"); + expect(srcdoc).toContain(HTML_EMBED_HEIGHT_MESSAGE); + }); +}); + +describe("canEdit (edit policy)", () => { + it("any member can edit when editable and the toggle is ON (no admin gate)", () => { + expect(canEdit(true, true)).toBe(true); + }); + + it("cannot edit when the toggle is OFF", () => { + expect(canEdit(true, false)).toBe(false); + }); + + it("cannot edit in read-only mode (no edit affordance)", () => { + expect(canEdit(false, true)).toBe(false); + }); +}); diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-sandbox.ts b/apps/client/src/features/editor/components/html-embed/html-embed-sandbox.ts new file mode 100644 index 00000000..d8659331 --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-sandbox.ts @@ -0,0 +1,142 @@ +/** + * Pure helpers for the HTML embed node view. Kept out of the React component so + * the sandbox srcdoc builder and the render/edit policy can be unit-tested + * against a bare environment with no Tiptap/Mantine providers. + */ + +/** postMessage type the sandboxed iframe uses to report its content height. */ +export const HTML_EMBED_HEIGHT_MESSAGE = "gitmost-html-embed-height"; + +// Sane bounds for the auto-resized iframe so a runaway embed cannot blow up the +// page layout, and a sensible default before the first height message arrives. +export const MIN_IFRAME_HEIGHT = 40; +export const MAX_IFRAME_HEIGHT = 4000; +export const DEFAULT_IFRAME_HEIGHT = 150; + +/** + * Sandbox tokens for the embed iframe. Intentionally does NOT include + * `allow-same-origin`: the content must run in an opaque ("null") origin so it + * cannot read the viewer's cookies/session/API. + */ +export const HTML_EMBED_SANDBOX = "allow-scripts allow-popups allow-forms"; + +/** Clamp a reported/configured height into the sane iframe bounds. */ +export function clampHeight(h: number): number { + return Math.min(MAX_IFRAME_HEIGHT, Math.max(MIN_IFRAME_HEIGHT, h)); +} + +/** + * Guard for the auto-resize `message` handler. Returns the clamped numeric + * height ONLY when the event is a trusted resize report; otherwise null. + * + * Trusted means ALL of: + * - `event.source` is this iframe's own `contentWindow` (the sandboxed srcdoc + * has an opaque "null" origin, so we cannot match by `event.origin` — we + * match by source instead). A message from any OTHER window is rejected. + * - the payload `type` is exactly our agreed resize message type. + * - the reported `height` is a finite number (rejects NaN/Infinity). + */ +export function isTrustedHeightMessage( + event: Pick, + iframeEl: { contentWindow: Window | null } | null, +): boolean { + // Reject when there is no contentWindow to match against; otherwise a `null` + // event.source would spuriously equal a `null` contentWindow. + if (!iframeEl?.contentWindow) return false; + if (event.source !== iframeEl.contentWindow) return false; + const data = event.data as { type?: string; height?: number } | null; + if (data?.type !== HTML_EMBED_HEIGHT_MESSAGE) return false; + return Number.isFinite(Number(data.height)); +} + +/** + * Build the `srcdoc` document for the sandboxed embed iframe. + * + * The user's `source` is placed verbatim, then a small bootstrap `; + return `${source || ""}${bootstrap}`; +} + +/** + * Render policy split by editor mode: + * - READ-ONLY / public-share view: the SERVER already decided whether to + * include the embed (it strips htmlEmbed from shared content when the + * workspace master toggle is OFF). An anonymous viewer has no workspace and + * thus reads `featureEnabled` as false, so we must NOT gate rendering on it + * here — we render exactly the `source` the server chose to serve. + * - EDITABLE editor: gate on the per-workspace master toggle so an author sees + * the inert placeholder when the feature is OFF. + */ +export function shouldRender( + isEditable: boolean, + featureEnabled: boolean, +): boolean { + return !isEditable || featureEnabled; +} + +/** + * The edit affordance is only meaningful in edit mode and is offered only when + * the workspace master toggle is ON. The block renders in a sandboxed iframe + * (no same-origin access), so authoring is allowed to ANY member — there is no + * admin requirement. + */ +export function canEdit(isEditable: boolean, featureEnabled: boolean): boolean { + return isEditable && featureEnabled; +} diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css b/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css new file mode 100644 index 00000000..2ff32e3a --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-view.module.css @@ -0,0 +1,50 @@ +.htmlEmbedNodeView { + position: relative; +} + +/* Fallback container used only for the empty, non-editor case. */ +.htmlEmbedContent { + width: 100%; +} + +/* The sandboxed iframe the embed source is rendered into. */ +.htmlEmbedFrame { + display: block; + width: 100%; + border: none; +} + +/* Edit affordance overlay, only shown while editing the document. */ +.htmlEmbedToolbar { + position: absolute; + top: 4px; + right: 4px; + z-index: 2; + opacity: 0; + transition: opacity 0.15s ease; +} + +.htmlEmbedNodeView:hover .htmlEmbedToolbar { + opacity: 1; +} + +/* Placeholder card shown when the source is empty (edit mode only). */ +.htmlEmbedPlaceholder { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + padding: 16px; + border: 1px dashed var(--mantine-color-gray-4); + border-radius: 8px; + color: var(--mantine-color-dimmed); + + @mixin dark { + border-color: var(--mantine-color-dark-3); + } +} + +.htmlEmbedSelected { + outline: 2px solid var(--mantine-color-blue-5); + border-radius: 8px; +} diff --git a/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx b/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx new file mode 100644 index 00000000..6b8c3917 --- /dev/null +++ b/apps/client/src/features/editor/components/html-embed/html-embed-view.tsx @@ -0,0 +1,207 @@ +import { NodeViewProps, NodeViewWrapper } from "@tiptap/react"; +import React, { + useCallback, + useEffect, + useMemo, + useRef, + useState, +} from "react"; +import clsx from "clsx"; +import { + ActionIcon, + Button, + Group, + Modal, + NumberInput, + Text, + Textarea, +} from "@mantine/core"; +import { IconCode, IconEdit } from "@tabler/icons-react"; +import { useTranslation } from "react-i18next"; +import { useAtomValue } from "jotai"; +import { workspaceAtom } from "@/features/user/atoms/current-user-atom.ts"; +import classes from "./html-embed-view.module.css"; +import { + buildSandboxSrcdoc, + canEdit as computeCanEdit, + clampHeight, + DEFAULT_IFRAME_HEIGHT, + HTML_EMBED_SANDBOX, + isTrustedHeightMessage, + MAX_IFRAME_HEIGHT, + MIN_IFRAME_HEIGHT, + shouldRender as computeShouldRender, +} from "./html-embed-sandbox.ts"; + +export default function HtmlEmbedView(props: NodeViewProps) { + const { t } = useTranslation(); + const { node, selected, updateAttributes, editor } = props; + const { source, height } = node.attrs as { + source: string; + height: number | null; + }; + + // The HTML embed renders inside a SANDBOXED iframe (no same-origin access), so + // the workspace toggle is a feature switch, not a security gate. When OFF (the + // default) we render a neutral placeholder in the editor and nothing else. + const workspace = useAtomValue(workspaceAtom); + const htmlEmbedEnabled = workspace?.settings?.htmlEmbed === true; + + const shouldRender = computeShouldRender( + editor.isEditable, + htmlEmbedEnabled, + ); + + const iframeRef = useRef(null); + const [modalOpen, setModalOpen] = useState(false); + const [draft, setDraft] = useState(source || ""); + const [draftHeight, setDraftHeight] = useState(height ?? ""); + + // True when the author pinned an explicit height; otherwise we auto-resize to + // the iframe's reported content height. + const hasFixedHeight = typeof height === "number" && Number.isFinite(height); + + // Auto-resize height tracked in state. Seeded to the default and updated from + // the iframe's postMessage reports (see effect below) regardless of mode, so + // switching a fixed-height embed back to auto immediately reflects the last + // reported content height instead of staying pinned to the old fixed value. + const [autoHeight, setAutoHeight] = useState(DEFAULT_IFRAME_HEIGHT); + + const srcdoc = useMemo(() => buildSandboxSrcdoc(source || ""), [source]); + + // Auto-resize: accept height messages ONLY from this iframe's own content + // window. The sandboxed srcdoc has an opaque ("null") origin, so we cannot + // match by event.origin — we match by event.source instead. We track the + // reported height even while a fixed height is in effect, so toggling back to + // auto shows the current content height with no iframe reload. + useEffect(() => { + function onMessage(event: MessageEvent) { + if (!isTrustedHeightMessage(event, iframeRef.current)) return; + const next = Number((event.data as { height?: number }).height); + setAutoHeight(clampHeight(next)); + } + window.addEventListener("message", onMessage); + return () => window.removeEventListener("message", onMessage); + }, []); + + const effectiveHeight = hasFixedHeight ? clampHeight(height) : autoHeight; + + const openEditor = useCallback(() => { + setDraft(source || ""); + setDraftHeight(height ?? ""); + setModalOpen(true); + }, [source, height]); + + const onSave = useCallback(() => { + if (editor.isEditable) { + updateAttributes({ + source: draft, + height: draftHeight === "" ? null : Number(draftHeight), + }); + } + setModalOpen(false); + }, [draft, draftHeight, editor.isEditable, updateAttributes]); + + // The edit affordance is only meaningful in edit mode and is offered only when + // the workspace master toggle is ON. Any member can edit (sandboxed = safe). + const canEdit = computeCanEdit(editor.isEditable, htmlEmbedEnabled); + + return ( + + {canEdit && ( +
+ + + +
+ )} + + {!shouldRender ? ( + // Feature disabled for this workspace AND we're in the editable editor: + // render a neutral placeholder so an existing embed is visibly inert for + // the author. Read-only / share viewers never hit this branch + // (`shouldRender` is always true there) — they render exactly the + // source the server chose to serve. +
+ + + {t("HTML embed is disabled in this workspace")} + +
+ ) : source ? ( + // Raw HTML/CSS/JS rendered inside a sandboxed iframe (no same-origin): + // scripts run in an opaque origin and cannot touch the viewer's + // session/cookies/API. + ", height: 480 }, + }); + const ydoc = TiptapTransformer.toYdoc(doc, "default", docmostExtensions); + const back = TiptapTransformer.fromYdoc(ydoc, "default"); + const node = back.content.find((n) => n.type === "htmlEmbed"); + assert.ok(node, "htmlEmbed node survives the round-trip"); + assert.equal(node.attrs.source, ""); + assert.equal(node.attrs.height, 480); +}); diff --git a/packages/mcp/test/unit/tool-specs.test.mjs b/packages/mcp/test/unit/tool-specs.test.mjs new file mode 100644 index 00000000..e98f18b6 --- /dev/null +++ b/packages/mcp/test/unit/tool-specs.test.mjs @@ -0,0 +1,90 @@ +import { test } from "node:test"; +import assert from "node:assert/strict"; +import { z } from "zod"; + +import { SHARED_TOOL_SPECS } from "../../build/tool-specs.js"; + +// The shared registry is consumed by BOTH the zod-v3 MCP server and the zod-v4 +// in-app AI-SDK service, so every spec must carry the cross-layer wiring +// (mcpName + inAppKey) and its builders must produce the right field set when +// called with a real zod namespace. + +test("every spec exposes mcpName + inAppKey, and the key matches inAppKey", () => { + for (const [key, spec] of Object.entries(SHARED_TOOL_SPECS)) { + assert.equal(typeof spec.mcpName, "string"); + assert.ok(spec.mcpName.length > 0, `${key}: empty mcpName`); + assert.equal(typeof spec.inAppKey, "string"); + assert.ok(spec.inAppKey.length > 0, `${key}: empty inAppKey`); + assert.equal(typeof spec.description, "string"); + assert.ok(spec.description.length > 0, `${key}: empty description`); + // The registry is keyed by inAppKey — keep the two in sync. + assert.equal(spec.inAppKey, key, `${key}: registry key must equal inAppKey`); + } +}); + +test("mcpName uses snake_case and inAppKey uses camelCase", () => { + for (const [key, spec] of Object.entries(SHARED_TOOL_SPECS)) { + assert.match(spec.mcpName, /^[a-z0-9]+(_[a-z0-9]+)*$/, `${key}: mcpName not snake_case`); + assert.match(spec.inAppKey, /^[a-z][a-zA-Z0-9]*$/, `${key}: inAppKey not camelCase`); + } +}); + +test("mcpName and inAppKey are each unique across the registry", () => { + const mcpNames = new Set(); + const inAppKeys = new Set(); + for (const spec of Object.values(SHARED_TOOL_SPECS)) { + assert.ok(!mcpNames.has(spec.mcpName), `duplicate mcpName: ${spec.mcpName}`); + assert.ok(!inAppKeys.has(spec.inAppKey), `duplicate inAppKey: ${spec.inAppKey}`); + mcpNames.add(spec.mcpName); + inAppKeys.add(spec.inAppKey); + } +}); + +test("buildShape (when present) returns a usable ZodRawShape with a real zod", () => { + for (const [key, spec] of Object.entries(SHARED_TOOL_SPECS)) { + if (!spec.buildShape) continue; + const shape = spec.buildShape(z); + assert.equal(typeof shape, "object"); + // Each field must be a real zod type so z.object(shape) compiles a schema. + for (const [field, zt] of Object.entries(shape)) { + assert.ok( + zt && typeof zt.parse === "function", + `${key}.${field}: not a zod type`, + ); + } + // The compiled object schema must parse a minimal valid input. + assert.doesNotThrow(() => z.object(shape)); + } +}); + +test("editPageText builder produces { pageId, edits } and drops the stale strip-and-retry claim", () => { + const spec = SHARED_TOOL_SPECS.editPageText; + assert.equal(spec.mcpName, "edit_page_text"); + const shape = spec.buildShape(z); + assert.deepEqual(Object.keys(shape).sort(), ["edits", "pageId"]); + // A valid edits batch parses. + const schema = z.object(shape); + const parsed = schema.parse({ + pageId: "p1", + edits: [{ find: "teh", replace: "the" }], + }); + assert.equal(parsed.pageId, "p1"); + assert.equal(parsed.edits.length, 1); + // The canonical description must NOT carry the stale MCP strip-and-retry claim. + assert.ok( + !/strip-and-retry/i.test(spec.description), + "editPageText description still claims strip-and-retry", + ); + assert.match(spec.description, /REFUSED into\s+failed\[\]/); +}); + +test("getNode builder produces exactly { pageId, nodeId }", () => { + const shape = SHARED_TOOL_SPECS.getNode.buildShape(z); + assert.deepEqual(Object.keys(shape).sort(), ["nodeId", "pageId"]); +}); + +test("no-arg specs (getWorkspace/listSpaces/listShares) omit buildShape", () => { + for (const key of ["getWorkspace", "listSpaces", "listShares"]) { + assert.equal(SHARED_TOOL_SPECS[key].buildShape, undefined, `${key} should be no-arg`); + } +}); diff --git a/packages/mcp/test/unit/transforms.test.mjs b/packages/mcp/test/unit/transforms.test.mjs index 3f66593c..f7999113 100644 --- a/packages/mcp/test/unit/transforms.test.mjs +++ b/packages/mcp/test/unit/transforms.test.mjs @@ -34,6 +34,18 @@ const li = (text) => ({ const doc = (...children) => ({ type: "doc", content: children }); const snapshot = (v) => JSON.parse(JSON.stringify(v)); +// Collect every footnoteReference id under a node, in reading order. +const collectRefIds = (node, acc = []) => { + if (!node || typeof node !== "object") return acc; + if (node.type === "footnoteReference") acc.push(node.attrs?.id); + if (Array.isArray(node.content)) { + for (const c of node.content) collectRefIds(c, acc); + } + return acc; +}; +// Plain text of a footnoteDefinition. +const defText = (def) => blockText(def); + // --------------------------------------------------------------------------- // blockText / walk / getList // --------------------------------------------------------------------------- @@ -173,21 +185,30 @@ test("commentsToFootnotes anchors comments and renumbers by position", () => { const { doc: out, consumed } = commentsToFootnotes(d, comments); assert.deepEqual(consumed.sort(), ["cA", "cB"]); - // Markers in reading order: p1 "apple"->[1], p2 existing->[2], p3 "banana"->[3] - assert.match(blockText(out.content[1]), /\[1\]/); - assert.match(blockText(out.content[2]), /\[2\]/); - assert.match(blockText(out.content[3]), /\[3\]/); + // Real footnoteReference nodes were inserted at p1 (apple), p2 (existing), + // p3 (banana), in reading order — the old `[N]` text markers are gone. + const refIds = collectRefIds(out); + assert.equal(refIds.length, 3); + // Body paragraphs p1..p3 no longer carry literal [N] text markers. + assert.doesNotMatch(blockText(out.content[1]), /\[\d+\]/); + assert.doesNotMatch(blockText(out.content[2]), /\[\d+\]/); + assert.doesNotMatch(blockText(out.content[3]), /\[\d+\]/); - // No stray placeholders remain. - const allText = blockText(out); - assert.doesNotMatch(allText, / F\d+ /); + // No stray NUL placeholders remain. + assert.doesNotMatch(blockText(out), /\u0000/); - // Notes list reordered to [apple, existing, banana] (reading order). - const list = out.content.find((n) => n.type === "orderedList"); + // The bottom footnotesList holds the definitions in reading order, each keyed + // by the matching reference id. + const list = out.content.find((n) => n.type === "footnotesList"); + assert.ok(list, "footnotesList present"); assert.equal(list.content.length, 3); - assert.equal(blockText(list.content[0]), "apple note"); - assert.equal(blockText(list.content[1]), "existing note one"); - assert.equal(blockText(list.content[2]), "banana note"); + assert.deepEqual( + list.content.map((d) => d.attrs.id), + refIds, + ); + assert.equal(defText(list.content[0]), "apple note"); + assert.equal(defText(list.content[1]), "existing note one"); + assert.equal(defText(list.content[2]), "banana note"); // Callout range synced to 3 notes. assert.match(blockText(out.content[0]), /\[1\]…\[3\]/); @@ -224,15 +245,16 @@ test("commentsToFootnotes leaves literal 'F1'/'FN2'/'F12' body text untouched", // The literal "F1"/"FN2"/"F12" prose is preserved verbatim (no bogus // footnotes, no eaten spaces around them). assert.match(bodyText, /Press F1 for help, model FN2 and F12 for tools/); - // Exactly one real footnote marker was produced, at the anchored word. - const markerCount = (bodyText.match(/\[\d+\]/g) || []).length; - assert.equal(markerCount, 1); - assert.match(bodyText, /apple \[1\]/); + // Exactly one real footnoteReference node was produced, at the anchored word. + const refIds = collectRefIds(out); + assert.equal(refIds.length, 1); // Exactly one note in the list — "F1"/"FN2"/"F12" did not spawn extra notes. - const list = out.content.find((n) => n.type === "orderedList"); + const list = out.content.find((n) => n.type === "footnotesList"); + assert.ok(list, "footnotesList present"); assert.equal(list.content.length, 1); - assert.equal(blockText(list.content[0]), "apple note"); + assert.equal(list.content[0].attrs.id, refIds[0]); + assert.equal(defText(list.content[0]), "apple note"); // No stray placeholder sentinel remains anywhere: the NUL-delimited sentinel // is fully consumed by the renumber pass, so no raw NUL control char persists @@ -287,17 +309,25 @@ test("commentsToFootnotes renumbers body callouts but skips the disclaimer range assert.deepEqual(consumed, []); // The disclaimer's "[1]…[K]" range is NOT treated as body markers: it stays - // a range and is synced to the note count (2), not renumbered into [1],[2]. + // a range and is synced to the note count (2), not turned into references. assert.match(blockText(out.content[0]), /\[1\]…\[2\]/); - // The body callout's [1] is renumbered as a real reading-order marker. - assert.match(blockText(out.content[1]), /noted \[1\] above/); - // The following paragraph's [2] keeps reading order. - assert.match(blockText(out.content[2]), /with \[2\] too/); + // The body callout's [1] and the paragraph's [2] became footnoteReference + // nodes in reading order (the literal text markers are gone). + const refIds = collectRefIds(out); + assert.equal(refIds.length, 2); + assert.match(blockText(out.content[1]), /noted +above/); // [1] -> node, no text + assert.match(blockText(out.content[2]), /with +too/); // [2] -> node, no text - // Notes list still has the two original notes in order. - const list = out.content.find((n) => n.type === "orderedList"); + // The footnotesList holds the two original notes in reading order, keyed to + // the new reference ids. + const list = out.content.find((n) => n.type === "footnotesList"); + assert.ok(list, "footnotesList present"); assert.equal(list.content.length, 2); - assert.equal(blockText(list.content[0]), "first note"); - assert.equal(blockText(list.content[1]), "second note"); + assert.deepEqual( + list.content.map((d) => d.attrs.id), + refIds, + ); + assert.equal(defText(list.content[0]), "first note"); + assert.equal(defText(list.content[1]), "second note"); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 4816bcd7..4a55e7a0 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -299,6 +299,9 @@ importers: '@mantine/spotlight': specifier: 8.3.18 version: 8.3.18(@mantine/core@8.3.18(@mantine/hooks@8.3.18(react@18.3.1))(@types/react@18.3.12)(react-dom@18.3.1(react@18.3.1))(react@18.3.1))(@mantine/hooks@8.3.18(react@18.3.1))(react-dom@18.3.1(react@18.3.1))(react@18.3.1) + '@ricky0123/vad-web': + specifier: ^0.0.30 + version: 0.0.30 '@slidoapp/emoji-mart': specifier: 5.8.7 version: 5.8.7 @@ -374,6 +377,9 @@ importers: mitt: specifier: 3.0.1 version: 3.0.1 + onnxruntime-web: + specifier: ^1.27.0 + version: 1.27.0 posthog-js: specifier: 1.372.2 version: 1.372.2 @@ -940,6 +946,9 @@ importers: ws: specifier: 8.20.1 version: 8.20.1 + y-prosemirror: + specifier: 1.3.7 + version: 1.3.7(prosemirror-model@1.25.1)(prosemirror-state@1.4.3)(prosemirror-view@1.40.0)(y-protocols@1.0.6(yjs@13.6.30(patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810)))(yjs@13.6.30(patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810)) yjs: specifier: ^13.6.29 version: 13.6.30(patch_hash=1ceeb66dba1f86545c98a3ff7f5152aff9b35caf409091cef9caedb5e65c8810) @@ -4205,6 +4214,9 @@ packages: '@remirror/core-constants@3.0.0': resolution: {integrity: sha512-42aWfPrimMfDKDi4YegyS7x+/0tlzaqwPQCULLanv3DMIlu96KTJR0fM5isWX2UViOqlGnX6YFgqWepcX+XMNg==} + '@ricky0123/vad-web@0.0.30': + resolution: {integrity: sha512-cJyYrh4YeeUBJcbR9Bic/bFDyB9qBkAepvpuWM3vLxnAi7bC3VHzf51UeNdT+OtY4D7MLAgV8iJMc4z41ZnaWg==} + '@rolldown/binding-android-arm64@1.0.0-rc.12': resolution: {integrity: sha512-pv1y2Fv0JybcykuiiD3qBOBdz6RteYojRFY1d+b95WVuzx211CRh+ytI/+9iVyWQ6koTh5dawe4S/yRfOFjgaA==} engines: {node: ^20.19.0 || >=22.12.0} @@ -5253,6 +5265,7 @@ packages: '@ungap/structured-clone@1.3.0': resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==} + deprecated: Potential CWE-502 - Update to 1.3.1 or higher '@unrs/resolver-binding-android-arm-eabi@1.11.1': resolution: {integrity: sha512-ppLRUgHVaGRWUx0R0Ut06Mjo9gBaBkg3v/8AxusGLhsIotbBLuRk51rAzqLC8gq6NyyAojEXglNjzf6R948DNw==} @@ -7026,6 +7039,9 @@ packages: resolution: {integrity: sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==} hasBin: true + flatbuffers@25.9.23: + resolution: {integrity: sha512-MI1qs7Lo4Syw0EOzUl0xjs2lsoeqFku44KpngfIduHBYvzm8h2+7K8YMQh1JtVVVrUvhLpNwqVi4DERegUJhPQ==} + flatted@3.4.2: resolution: {integrity: sha512-PjDse7RzhcPkIJwy5t7KPWQSZ9cAbzQXcafsetQoD7sOJRQlGikNbx7yZp2OotDnJyrDcbyRq3Ttb18iYOqkxA==} @@ -7188,6 +7204,9 @@ packages: graceful-fs@4.2.11: resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + guid-typescript@1.0.9: + resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==} + hachure-fill@0.5.2: resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==} @@ -8623,6 +8642,12 @@ packages: resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} engines: {node: '>=6'} + onnxruntime-common@1.27.0: + resolution: {integrity: sha512-3KxL5wIVqa8Ex08jxSzncm9CMgw8CjOFyOQ7SxvG9o0cVLlhTNKXyIQuTbtX4tGPJEf73OER2xrjt4HJSBL4ow==} + + onnxruntime-web@1.27.0: + resolution: {integrity: sha512-ogDLsqIozHZwifPuN37OproAo0byX6t43/bP8GzeZWBWD6MOGExswFAx3up4NS/vvWBOg2u2PXomDt3rMmdQSg==} + open@8.4.2: resolution: {integrity: sha512-7x81NCL719oNbsq/3mh+hVrAWmFuEYUqrq/Iw3kUzH8ReypT9QQ0BLoJS7/G9k6N81XjW4qHWtjWwe/9eLy1EQ==} engines: {node: '>=12'} @@ -8912,6 +8937,9 @@ packages: pkg-types@1.3.1: resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==} + platform@1.3.6: + resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==} + pluralize@8.0.0: resolution: {integrity: sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA==} engines: {node: '>=4'} @@ -9645,6 +9673,7 @@ packages: sliced@1.0.1: resolution: {integrity: sha512-VZBmZP8WU3sMOZm1bdgTadsQbcscK0UM8oKxKVBs4XAhUo2Xxzm/OFMGBkPusxw9xL3Uy8LrzEqGqJhclsr0yA==} + deprecated: Unsupported socket.io-adapter@2.5.4: resolution: {integrity: sha512-wDNHGXGewWAjQPt3pyeYBtpWSq9cLE5UW1ZUPL/2eGK9jtse/FpXib7epSTsz0Q0m+6sg6Y4KtcFTlah1bdOVg==} @@ -14568,6 +14597,10 @@ snapshots: '@remirror/core-constants@3.0.0': {} + '@ricky0123/vad-web@0.0.30': + dependencies: + onnxruntime-web: 1.27.0 + '@rolldown/binding-android-arm64@1.0.0-rc.12': optional: true @@ -17812,6 +17845,8 @@ snapshots: flat@5.0.2: {} + flatbuffers@25.9.23: {} + flatted@3.4.2: {} follow-redirects@1.16.0: {} @@ -17970,6 +18005,8 @@ snapshots: graceful-fs@4.2.11: {} + guid-typescript@1.0.9: {} + hachure-fill@0.5.2: {} handlebars@4.7.9: @@ -19587,6 +19624,17 @@ snapshots: dependencies: mimic-fn: 2.1.0 + onnxruntime-common@1.27.0: {} + + onnxruntime-web@1.27.0: + dependencies: + flatbuffers: 25.9.23 + guid-typescript: 1.0.9 + long: 5.3.2 + onnxruntime-common: 1.27.0 + platform: 1.3.6 + protobufjs: 7.5.8 + open@8.4.2: dependencies: define-lazy-prop: 2.0.0 @@ -19911,6 +19959,8 @@ snapshots: mlly: 1.8.0 pathe: 2.0.3 + platform@1.3.6: {} + pluralize@8.0.0: {} png-chunk-text@1.0.0: {}