feat: publishing infernet-container-starter v0.2.0

This commit is contained in:
ritual-all
2024-03-29 10:50:13 -04:00
parent 41aaa152e6
commit 4545223364
155 changed files with 6086 additions and 257 deletions

1
projects/tgi-llm/container/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
config.json

View File

@ -0,0 +1,25 @@
FROM python:3.11-slim as builder
WORKDIR /app
ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
ENV PIP_NO_CACHE_DIR 1
ENV RUNTIME docker
ENV PYTHONPATH src
RUN apt-get update
RUN apt-get install -y git curl
# install uv
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
RUN /install.sh && rm /install.sh
COPY src/requirements.txt .
RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
COPY src src
ENTRYPOINT ["hypercorn", "app:create_app()"]
CMD ["-b", "0.0.0.0:3000"]

View File

@ -0,0 +1,17 @@
DOCKER_ORG := ritualnetwork
EXAMPLE_NAME := tgi-llm
TAG := $(DOCKER_ORG)/example-$(EXAMPLE_NAME)-infernet:latest
.phony: build run build-multiplatform
build:
@docker build -t $(TAG) .
run:
docker run -p 3000:3000 --env-file tgi-llm.env $(TAG)
# You may need to set up a docker builder, to do so run:
# docker buildx create --name mybuilder --bootstrap --use
# refer to https://docs.docker.com/build/building/multi-platform/#building-multi-platform-images for more info
build-multiplatform:
docker buildx build --platform linux/amd64,linux/arm64 -t $(TAG) --push .

View File

@ -0,0 +1,88 @@
# TGI LLM
In this example, we're running an infernet node along with a TGI service.
## Deploying TGI Service
If you have your own TGI service running, feel free to skip this part. Otherwise,
you can deploy the TGI service using the following command.
Make sure you have a machine with proper GPU support. Clone this repository &
run the following command:
```bash
make run-service project=tgi-llm service=tgi
```
## Deploying Infernet Node Locally
Running an infernet node involves a simple configuration step & running step.
### Configuration
Copy our [sample config file](./config.sample.json) into a new file
called `config.json`.
```bash
cp config.sample.json config.json
```
Then provide the `"env"` field of the `"containers"` section of the file to point to the
TGI Service you just deployed.
```json
{
// etc.
"containers": [
{
"id": "tgi-llm",
"image": "ritualnetwork/llm_inference_service:latest",
"external": true,
"port": "3000",
"allowed_delegate_addresses": [],
"allowed_addresses": [],
"allowed_ips": [],
"command": "--bind=0.0.0.0:3000 --workers=2",
"env": {
"TGI_SERVICE_URL": "http://{your-service-ip}:{your-service-port}" // <- Change this to the TGI service you deployed
}
}
]
}
```
### Running the Infernet Node Locally
With that out of the way, you can now run the infernet node using the following command
at the top-level directory of this repo:
```
make deploy-container project=tgi-llm
```
## Testing the Infernet Node
You can test the infernet node by posting a job in the node's REST api.
```bash
curl -X POST "http://127.0.0.1:4000/api/jobs" \
-H "Content-Type: application/json" \
-d '{"containers":["tgi-llm"], "data": {"prompt": "can shrimp actually fry rice?"}}'
```
You can expect a response similar to the following:
```json
{
"id": "f026c7c2-7027-4c2d-b662-2b48c9433a12"
}
```
You can then check the status of the job using the following command:
```bash
curl -X GET http://127.0.0.1:4000/api/jobs\?id\=f026c7c2-7027-4c2d-b662-2b48c9433a12
[{"id":"f026c7c2-7027-4c2d-b662-2b48c9433a12","result":{"container":"tgi-llm","output":{"output":"\n\nI\u2019m not sure if this is a real question or not, but I\u2019m"}},"status":"success"}]
```
Congratulations! You've successfully ran an infernet node with a TGI service.

View File

@ -0,0 +1,52 @@
{
"log_path": "infernet_node.log",
"server": {
"port": 4000
},
"chain": {
"enabled": true,
"trail_head_blocks": 0,
"rpc_url": "http://host.docker.internal:8545",
"coordinator_address": "0x5FbDB2315678afecb367f032d93F642f64180aa3",
"wallet": {
"max_gas_limit": 4000000,
"private_key": "0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d"
}
},
"startup_wait": 1.0,
"docker": {
"username": "your-username",
"password": ""
},
"redis": {
"host": "redis",
"port": 6379
},
"forward_stats": true,
"containers": [
{
"id": "tgi-llm",
"image": "ritualnetwork/example-tgi-llm-infernet:latest",
"external": true,
"port": "3000",
"allowed_delegate_addresses": [],
"allowed_addresses": [],
"allowed_ips": [],
"command": "--bind=0.0.0.0:3000 --workers=2",
"env": {
"TGI_SERVICE_URL": "http://{your_service_ip}:{your_service_port}"
}
},
{
"id": "anvil-node",
"image": "ritualnetwork/infernet-anvil:0.0.0",
"external": true,
"port": "8545",
"allowed_delegate_addresses": [],
"allowed_addresses": [],
"allowed_ips": [],
"command": "",
"env": {}
}
]
}

View File

@ -0,0 +1,85 @@
import logging
import os
from typing import Any, cast
from eth_abi import decode, encode # type: ignore
from infernet_ml.utils.service_models import InfernetInput, InfernetInputSource
from infernet_ml.workflows.inference.tgi_client_inference_workflow import (
TGIClientInferenceWorkflow,
)
from quart import Quart, request
log = logging.getLogger(__name__)
def create_app() -> Quart:
app = Quart(__name__)
workflow = TGIClientInferenceWorkflow(
server_url=cast(str, os.environ.get("TGI_SERVICE_URL"))
)
workflow.setup()
@app.route("/")
def index() -> str:
"""
Utility endpoint to check if the service is running.
"""
return "LLM Inference Service is running."
@app.route("/service_output", methods=["POST"])
async def inference() -> dict[str, Any]:
req_data = await request.get_json()
"""
InfernetInput has the format:
source: (0 on-chain, 1 off-chain)
data: dict[str, Any]
"""
infernet_input: InfernetInput = InfernetInput(**req_data)
if infernet_input.source == InfernetInputSource.OFFCHAIN:
prompt = cast(dict[str, Any], infernet_input.data).get("prompt")
else:
# On-chain requests are sent as a generalized hex-string which we will
# decode to the appropriate format.
(prompt,) = decode(
["string"], bytes.fromhex(cast(str, infernet_input.data))
)
result: dict[str, Any] = workflow.inference({"text": prompt})
if infernet_input.source == InfernetInputSource.OFFCHAIN:
"""
In case of an off-chain request, the result is returned as a dict. The
infernet node expects a dict format.
"""
return {"data": result}
else:
"""
In case of an on-chain request, the result is returned in the format:
{
"raw_input": str,
"processed_input": str,
"raw_output": str,
"processed_output": str,
"proof": str,
}
refer to: https://docs.ritual.net/infernet/node/containers for more info.
"""
return {
"raw_input": "",
"processed_input": "",
"raw_output": encode(["string"], [result]).hex(),
"processed_output": "",
"proof": "",
}
return app
if __name__ == "__main__":
"""
Utility to run the app locally. For development purposes only.
"""
create_app().run(port=3000)

View File

@ -0,0 +1,6 @@
quart==0.19.4
infernet_ml==0.1.0
PyArweave @ git+https://github.com/ritual-net/pyarweave.git
web3==6.15.0
retry2==0.9.5
text-generation==0.6.1

View File

@ -0,0 +1 @@
TGI_SERVICE_URL=http://{your-service-ip}:{your-service-port}

View File

@ -0,0 +1,34 @@
name: test
on: workflow_dispatch
env:
FOUNDRY_PROFILE: ci
jobs:
check:
strategy:
fail-fast: true
name: Foundry project
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install Foundry
uses: foundry-rs/foundry-toolchain@v1
with:
version: nightly
- name: Run Forge build
run: |
forge --version
forge build --sizes
id: build
- name: Run Forge tests
run: |
forge test -vvv
id: test

14
projects/tgi-llm/contracts/.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
# Compiler files
cache/
out/
# Ignores development broadcast logs
!/broadcast
/broadcast/*/31337/
/broadcast/**/dry-run/
# Docs
docs/
# Dotenv file
.env

View File

@ -0,0 +1,14 @@
# phony targets are targets that don't actually create a file
.phony: deploy
# anvil's third default address
sender := 0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a
RPC_URL := http://localhost:8545
# deploying the contract
deploy:
@PRIVATE_KEY=$(sender) forge script script/Deploy.s.sol:Deploy --broadcast --rpc-url $(RPC_URL)
# calling sayGM()
call-contract:
@PRIVATE_KEY=$(sender) forge script script/CallContract.s.sol:CallContract --broadcast --rpc-url $(RPC_URL)

View File

@ -0,0 +1,7 @@
[profile.default]
src = "src"
out = "out"
libs = ["lib"]
via_ir = true
# See more config options https://github.com/foundry-rs/foundry/blob/master/crates/config/README.md#all-options

View File

@ -0,0 +1,2 @@
forge-std/=lib/forge-std/src
infernet-sdk/=lib/infernet-sdk/src

View File

@ -0,0 +1,19 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
pragma solidity ^0.8.0;
import {Script, console2} from "forge-std/Script.sol";
import {Prompter} from "../src/Prompter.sol";
contract CallContract is Script {
function run() public {
// Setup wallet
uint256 deployerPrivateKey = vm.envUint("PRIVATE_KEY");
vm.startBroadcast(deployerPrivateKey);
Prompter prompter = Prompter(0x663F3ad617193148711d28f5334eE4Ed07016602);
prompter.promptLLM(vm.envString("prompt"));
vm.stopBroadcast();
}
}

View File

@ -0,0 +1,26 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
pragma solidity ^0.8.13;
import {Script, console2} from "forge-std/Script.sol";
import {Prompter} from "../src/Prompter.sol";
contract Deploy is Script {
function run() public {
// Setup wallet
uint256 deployerPrivateKey = vm.envUint("PRIVATE_KEY");
vm.startBroadcast(deployerPrivateKey);
// Log address
address deployerAddress = vm.addr(deployerPrivateKey);
console2.log("Loaded deployer: ", deployerAddress);
address coordinator = 0x5FbDB2315678afecb367f032d93F642f64180aa3;
// Create consumer
Prompter prompter = new Prompter(coordinator);
console2.log("Deployed Prompter: ", address(prompter));
// Execute
vm.stopBroadcast();
vm.broadcast();
}
}

View File

@ -0,0 +1,47 @@
// SPDX-License-Identifier: BSD-3-Clause-Clear
pragma solidity ^0.8.13;
import {console2} from "forge-std/console2.sol";
import {CallbackConsumer} from "infernet-sdk/consumer/Callback.sol";
contract Prompter is CallbackConsumer {
string private EXTREMELY_COOL_BANNER = "\n\n"
"_____ _____ _______ _ _ _ \n"
"| __ \\|_ _|__ __| | | | /\\ | | \n"
"| |__) | | | | | | | | | / \\ | | \n"
"| _ / | | | | | | | |/ /\\ \\ | | \n"
"| | \\ \\ _| |_ | | | |__| / ____ \\| |____ \n"
"|_| \\_\\_____| |_| \\____/_/ \\_\\______| \n\n";
constructor(address coordinator) CallbackConsumer(coordinator) {}
function promptLLM(string calldata prompt) public {
_requestCompute(
"tgi-llm",
abi.encode(prompt),
20 gwei,
1_000_000,
1
);
}
function _receiveCompute(
uint32 subscriptionId,
uint32 interval,
uint16 redundancy,
address node,
bytes calldata input,
bytes calldata output,
bytes calldata proof
) internal override {
console2.log(EXTREMELY_COOL_BANNER);
(bytes memory raw_output, bytes memory processed_output) = abi.decode(output, (bytes, bytes));
(string memory outputStr) = abi.decode(raw_output, (string));
console2.log("subscription Id", subscriptionId);
console2.log("interval", interval);
console2.log("redundancy", redundancy);
console2.log("node", node);
console2.log("output:", outputStr);
}
}

444
projects/tgi-llm/tgi-llm.md Normal file
View File

@ -0,0 +1,444 @@
# TGI Inference with Mistral-7b
In this tutorial we are going to use [Huggingface's TGI (Text Generation Interface)](https://huggingface.co/docs/text-generation-inference/en/index) to run an arbitrary LLM model
and enable users to requests jobs form it, both on-chain and off-chain.
## Install Pre-requisites
For this tutorial you'll need to have the following installed.
1. [Docker](https://docs.docker.com/engine/install/)
2. [Foundry](https://book.getfoundry.sh/getting-started/installation)
## Setting up a TGI LLM Service
Included with this tutorial, is a [containerized llm service](./tgi). We're going to deploy this service on a powerful
machine with access to GPU.
### Rent a GPU machine
To run this service, you will need to have access to a machine with a powerful GPU. In the video above, we use an
A100 instance on [Paperspace](https://www.paperspace.com/).
### Install docker
You will have to install docker.
For Ubuntu, you can run the following commands:
```bash copy
# install docker
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
```
As docker installation may vary depending on your operating system, consult the
[official documentation](https://docs.docker.com/engine/install/ubuntu/) for more information.
After installation, you can verify that docker is installed by running:
```bash
# sudo docker run hello-world
Hello from Docker!
```
### Ensure CUDA is installed
Depending on where you rent your GPU machine, CUDA is typically pre-installed. For Ubuntu, you can follow the
instructions [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#prepare-ubuntu).
You can verify that CUDA is installed by running:
```bash copy
# verify Installation
python -c '
import torch
print("torch.cuda.is_available()", torch.cuda.is_available())
print("torch.cuda.device_count()", torch.cuda.device_count())
print("torch.cuda.current_device()", torch.cuda.current_device())
print("torch.cuda.get_device_name(0)", torch.cuda.get_device_name(0))
'
```
If CUDA is installed and available, your output will look similar to the following:
```bash
torch.cuda.is_available() True
torch.cuda.device_count() 1
torch.cuda.current_device() 0
torch.cuda.get_device_name(0) Tesla V100-SXM2-16GB
```
### Ensure `nvidia-container-runtime` is installed
For your container to be able to access the GPU, you will need to install the `nvidia-container-runtime`.
On Ubuntu, you can run the following commands:
```bash copy
# Docker GPU support
# nvidia container-runtime repos
# https://nvidia.github.io/nvidia-container-runtime/
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
sudo apt-key add - distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
sudo apt-get update
# install nvidia-container-runtime
# https://docs.docker.com/config/containers/resource_constraints/#gpu
sudo apt-get install -y nvidia-container-runtime
```
As always, consult the [official documentation](https://nvidia.github.io/nvidia-container-runtime/) for more
information.
You can verify that `nvidia-container-runtime` is installed by running:
```bash copy
which nvidia-container-runtime-hook
# this should return a path to the nvidia-container-runtime-hook
```
Now, with the pre-requisites installed, we can move on to setting up the TGI service.
### Clone this repository
```bash copy
# Clone locally
git clone --recurse-submodules https://github.com/ritual-net/infernet-container-starter
# Navigate to the repository
cd infernet-container-starter
```
### Run the Stable Diffusion service
```bash copy
make run-service project=tgi-llm service=tgi
```
This will start the `tgi` service. Note that this service will have to download a large model file,
so it may take a few minutes to be fully ready. Downloaded model will get cached, so subsequent runs will be faster.
## Testing the `tgi-llm` service via the gradio UI
Included with this project is a simple gradio chat UI that allows you to interact with the `tgi-llm` service. This is
not needed for running the Infernet node, but a nice way to debug and test the TGI service.
### Ensure `docker` & `foundry` exist
To check for `docker`, run the following command in your terminal:
```bash copy
docker --version
# Docker version 25.0.2, build 29cf629 (example output)
```
You'll also need to ensure that docker-compose exists in your terminal:
```bash copy
which docker-compose
# /usr/local/bin/docker-compose (example output)
```
To check for `foundry`, run the following command in your terminal:
```bash copy
forge --version
# forge 0.2.0 (551bcb5 2024-02-28T07:40:42.782478000Z) (example output)
```
### Clone the starter repository
Just like our other examples, we're going to clone this repository. All of the code and instructions for this tutorial
can be found in the [`projects/tgi-llm`](../tgi-llm) directory of the repository.
```bash copy
# Clone locally
git clone --recurse-submodules https://github.com/ritual-net/infernet-container-starter
# Navigate to the repository
cd infernet-container-starter
```
### Configure the UI Service
You'll need to configure the UI service to point to the `tgi` service. To do this, you'll have to
pass that info as environemnt variables. There exists a [`gradio_ui.env.sample`](./ui/gradio_ui.env.sample)
file in the [`projects/tgi-llm/ui`](./ui)
directory. Simply copy this file to `gradio_ui.env` and set the `TGI_SERVICE_URL` to the address of the `tgi` service.
```bash copy
cd projects/tgi-llm/ui
cp gradio_ui.env.sample gradio_ui.env
```
Then modify the content of `gradio_ui.env` to look like this:
```env
TGI_SERVICE_URL={your_service_ip}:{your_service_port} # <- replace with your service ip & port
HF_API_TOKEN={huggingface_api_token} # <- replace with your huggingface api token
PROMPT_FILE_PATH=./prompt.txt # <- path to the prompt file
```
The env vars are as follows:
- `TGI_SERVICE_URL` is the address of the `tgi` service
- `HF_API_TOKEN` is the Huggingface API token. You can get one by signing up at [Huggingface](https://huggingface.co/)
- `PROMPT_FILE_PATH` is the path to the system prompt file. By default it is set to `./prompt.txt`. A simple
`prompt.txt` file is included in the `ui` directory.
### Build the UI service
From the top-level directory of the repository, simply run the following command to build the UI service:
```bash copy
# cd back to the top-level directory
cd ../../..
# build the UI service
make build-service project=tgi-llm service=ui
```
### Run the UI service
In the same directory, you can also run the following command to run the UI service:
```bash copy
make run-service project=tgi-llm service=ui
```
By default the service will run on `http://localhost:3001`. You can navigate to this address in your browser to see
the UI.
### Chat with the TGI service!
Congratulations! You can now chat with the TGI service using the gradio UI. You can enter a prompt and see the
response from the TGI service.
Now that we've tested the TGI service, we can move on to setting up the Infernet Node and the `tgi-llm` container.
## Setting up the Infernet Node along with the `tgi-llm` container
You can follow the following steps on your local machine to setup the Infernet Node and the `tgi-llm` container.
The first couple of steps are identical to that of [the previous section](#ensure-docker--foundry-exist). So if you've already completed those
steps, you can skip to [building the tgi-llm container](#build-the-tgi-llm-container).
### Ensure `docker` & `foundry` exist
To check for `docker`, run the following command in your terminal:
```bash copy
docker --version
# Docker version 25.0.2, build 29cf629 (example output)
```
You'll also need to ensure that docker-compose exists in your terminal:
```bash copy
which docker-compose
# /usr/local/bin/docker-compose (example output)
```
To check for `foundry`, run the following command in your terminal:
```bash copy
forge --version
# forge 0.2.0 (551bcb5 2024-02-28T07:40:42.782478000Z) (example output)
```
### Clone the starter repository
Just like our other examples, we're going to clone this repository.
All of the code and instructions for this tutorial can be found in the
[`projects/tgi-llm`](../tgi-llm)
directory of the repository.
```bash copy
# Clone locally
git clone --recurse-submodules https://github.com/ritual-net/infernet-container-starter
# Navigate to the repository
cd infernet-container-starter
```
### Configure the `tgi-llm` container
#### Configure the URL for the TGI Service
The `tgi-llm` container needs to know where to find the TGI service that we started in the steps above. To do this,
we need to modify the configuration file for the `tgi-llm` container. We have a sample [config.json](./config.sample.json) file.
Simply navigate to the `projects/tgi-llm` directory and set up the config file:
```bash
cd projects/tgi-llm/container
cp config.sample.json config.json
```
In the `containers` field, you will see the following:
```json
"containers": [
{
// etc. etc.
"env": {
"TGI_SERVICE_URL": "http://{your_service_ip}:{your_service_port}" // <- replace with your service ip & port
}
}
},
```
### Build the `tgi-llm` container
Simply run the following command to build the `tgi-llm` container:
```bash copy
make build-container project=tgi-llm
```
### Deploy the `tgi-llm` container with Infernet
You can run a simple command to deploy the `tgi-llm` container along with bootstrapping the rest of the
Infernet node stack in one go:
```bash copy
make deploy-container project=tgi-llm
```
### Check the running containers
At this point it makes sense to check the running containers to ensure everything is running as expected.
```bash
# > docker container ps
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
0dbc30f67e1e ritualnetwork/example-tgi-llm-infernet:latest "hypercorn app:creat…" 8 seconds ago Up 7 seconds
0.0.0.0:3000->3000/tcp tgi-llm
0c5140e0f41b ritualnetwork/infernet-anvil:0.0.0 "anvil --host 0.0.0.…" 23 hours ago Up 23 hours
0.0.0.0:8545->3000/tcp anvil-node
f5682ec2ad31 ritualnetwork/infernet-node:latest "/app/entrypoint.sh" 23 hours ago Up 9 seconds
0.0.0.0:4000->4000/tcp deploy-node-1
c1ece27ba112 fluent/fluent-bit:latest "/fluent-bit/bin/flu…" 23 hours ago Up 10 seconds 2020/tcp,
0.0.0.0:24224->24224/tcp, :::24224->24224/tcp deploy-fluentbit-1
3cccea24a303 redis:latest "docker-entrypoint.s…" 23 hours ago Up 10 seconds 0.0.0.0:6379->6379/tcp,
:::6379->6379/tcp deploy-redis-1
```
You should see five different images running, including the Infernet node and the `tgi-llm` container.
### Send a job request to the `tgi-llm` container
From here, we can make a Web-2 job request to the container by posting a request to the [`api/jobs`](https://docs.ritual.net/infernet/node/api#2a-post-apijobs) endpoint.
```bash copy
curl -X POST http://127.0.0.1:4000/api/jobs \
-H "Content-Type: application/json" \
-d '{"containers": ["tgi-llm"], "data": {"prompt": "Can shrimp actually fry rice fr?"}}'
# {"id":"7a375a56-0da0-40d8-91e0-6440b3282ed8"}
```
You will get a job id in response. You can use this id to check the status of the job.
### Check the status of the job
You can make a `GET` request to the [`api/jobs`](https://docs.ritual.net/infernet/node/api#3-get-apijobs) endpoint to check the status of the job.
```bash copy
curl -X GET "http://127.0.0.1:4000/api/jobs?id=7a375a56-0da0-40d8-91e0-6440b3282ed8"
# [{"id":"7a375a56-0da0-40d8-91e0-6440b3282ed8","result":{"container":"tgi-llm","output":{"data":"\n\n## Can you fry rice in a wok?\n\nThe wok is the"}},"status":"success"}]
```
Congratulations! You have successfully setup the Infernet Node and the `tgi-llm` container. Now let's move on to
calling our service from a smart contract (a la web3 request).
## Calling our service from a smart contract
In the following steps, we will deploy our [consumer contract](https://github.com/ritual-net/infernet-container-starter/blob/main/projects/tgi-llm/contracts/src/Prompter.sol) and make a subscription request by calling the
contract.
### Setup
Ensure that you have followed the steps in the previous section up until [here](#check-the-running-containers) to setup
the Infernet Node and the `tgi-llm` container.
Notice that in [the step above](#check-the-running-containers) we have an Anvil node running on port `8545`.
By default, the [`anvil-node`](https://hub.docker.com/r/ritualnetwork/infernet-anvil) image used deploys the
[Infernet SDK](https://docs.ritual.net/infernet/sdk/introduction) and other relevant contracts for you:
- Coordinator: `0x5FbDB2315678afecb367f032d93F642f64180aa3`
- Primary node: `0x70997970C51812dc3A010C7d01b50e0d17dc79C8`
### Deploy our `Prompter` smart contract
In this step, we will deploy our [`Prompter.sol`](./contracts/src/Prompter.sol)
to the Anvil node. This contract simply allows us to submit a prompt to the LLM, and receives the result of the
prompt and prints it to the anvil console.
#### Anvil logs
During this process, it is useful to look at the logs of the Anvil node to see what's going on. To follow the logs,
in a new terminal, run:
```bash copy
docker logs -f anvil-node
```
#### Deploying the contract
Once ready, to deploy the `Prompter` consumer contract, in another terminal, run:
```bash copy
make deploy-contracts project=tgi-llm
```
You should expect to see similar Anvil logs:
```bash
# > make deploy-contracts project=tgi-llm
eth_getTransactionReceipt
Transaction: 0x17a9d17cc515d39eef26b6a9427e04ed6f7ce6572d9756c07305c2df78d93ffe
Contract created: 0x663f3ad617193148711d28f5334ee4ed07016602
Gas used: 731312
Block Number: 1
Block Hash: 0xd17b344af15fc32cd3359e6f2c2724a8d0a0283fc3b44febba78fc99f2f00189
Block Time: "Wed, 6 Mar 2024 18:21:01 +0000"
eth_getTransactionByHash
```
From our logs, we can see that the `Prompter` contract has been deployed to address
`0x663f3ad617193148711d28f5334ee4ed07016602`.
### Call the contract
Now, let's call the contract to with a prompt! In the same terminal, run:
```bash copy
make call-contract project=tgi-llm prompt="What is 2 * 3?"
```
You should first expect to see an initiation transaction sent to the `Prompter` contract:
```bash
eth_getTransactionReceipt
Transaction: 0x988b1b251f3b6ad887929a58429291891d026f11392fb9743e9a90f78c7a0801
Gas used: 190922
Block Number: 2
Block Hash: 0x51f3abf62e763f1bd1b0d245a4eab4ced4b18f58bd13645dbbf3a878f1964044
Block Time: "Wed, 6 Mar 2024 18:21:34 +0000"
eth_getTransactionByHash
eth_getTransactionReceipt
```
Shortly after that you should see another transaction submitted from the Infernet Node which is the
result of your on-chain subscription and its associated job request:
```bash
eth_sendRawTransaction
_____ _____ _______ _ _ _
| __ \|_ _|__ __| | | | /\ | |
| |__) | | | | | | | | | / \ | |
| _ / | | | | | | | |/ /\ \ | |
| | \ \ _| |_ | | | |__| / ____ \| |____
|_| \_\_____| |_| \____/_/ \_\______|
subscription Id 1
interval 1
redundancy 1
node 0x70997970C51812dc3A010C7d01b50e0d17dc79C8
output:
2 * 3 = 6
Transaction: 0xdaaf559c2baba212ab218fb268906613ce3be93ba79b37f902ff28c8fe9a1e1a
Gas used: 116153
Block Number: 3
Block Hash: 0x2f26b2b487a4195ff81865b2966eab1508d10642bf525a258200eea432522e24
Block Time: "Wed, 6 Mar 2024 18:21:35 +0000"
eth_blockNumber
```
We can now confirm that the address of the Infernet Node (see the logged `node` parameter in the Anvil logs above)
matches the address of the node we setup by default for our Infernet Node.
Congratulations! 🎉 You have successfully enabled a contract to have access to a TGI LLM service.

View File

@ -0,0 +1,8 @@
.phony: run
volume ?= $(PWD)/data
model ?= mistralai/Mistral-7B-v0.1
run:
docker run --gpus all --shm-size 1g -p 8080:80 -v $(volume):/data \
ghcr.io/huggingface/text-generation-inference:1.4 --model-id $(model)

View File

@ -0,0 +1,15 @@
# TGI Service
The [Makefile](./Makefile) for this service simply invokes
huggingface's `huggingface/text-generation-inference:1.4`
docker image. Ensure that you are running this on a machine with a GPU.
For example, to run the TGI container with model `mistralai/Mistral-7B-v0.1`, you can
use the following command:
```bash
make run model=mistralai/Mistral-7B-v0.1 volume=/path/to/your/data
```
* `model`: is defaulted to `mistralai/Mistral-7B-v0.1`
* `volume`: is defaulted to `./data`

View File

@ -0,0 +1,22 @@
FROM python:3.11-slim as builder
WORKDIR /app
ENV PYTHONUNBUFFERED 1
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONPATH src
WORKDIR /app
RUN apt-get update
COPY src/requirements.txt .
RUN pip install --upgrade pip && pip install -r requirements.txt
COPY src src
COPY prompt.txt .
ENTRYPOINT ["python", "src/app.py"]
CMD ["-b", "0.0.0.0:3000"]

View File

@ -0,0 +1,17 @@
DOCKER_ORG := ritualnetwork
EXAMPLE_NAME := tgi-llm-ui
TAG := $(DOCKER_ORG)/example-$(EXAMPLE_NAME)-infernet:latest
.phony: build run publish
build:
@docker build -t $(TAG) .
run: build
docker run --env-file ./gradio_ui.env -p 3001:7860 $(TAG)
# You may need to set up a docker builder, to do so run:
# docker buildx create --name mybuilder --bootstrap --use
# refer to https://docs.docker.com/build/building/multi-platform/#building-multi-platform-images for more info
build-multiplatform:
docker buildx build --platform linux/amd64,linux/arm64 -t $(TAG) --push .

View File

@ -0,0 +1,35 @@
# Gradio UI
This is a utility UI project to chat with your TGI LLM.
## Configuration
Copy the [`gradio_ui.env.sample`](./gradio_ui.env.sample) file into a new file
called `gradio_ui.env` and fill in the necessary environment variables.
```bash
cp gradio_ui.env.sample gradio_ui.env
```
Environment variables are as follows:
```bash
TGI_SERVICE_URL= # URL to your running TGI service
HF_API_TOKEN=
PROMPT_FILE_PATH= # path to a prompt file
```
## Running
Simply run:
```bash
make run
```
The UI will run on port `3001` on your localhost. You can change that configuration
[here](./Makefile#L11).
Congratulations! You have successfully set up the Gradio UI for your TGI LLM.
Now you can go to `http://localhost:3001` and chat with your LLM instance.

View File

@ -0,0 +1,3 @@
TGI_SERVICE_URL=
HF_API_TOKEN=
PROMPT_FILE_PATH=./prompt.txt

View File

@ -0,0 +1 @@
You're a friendly chatbot.

View File

@ -0,0 +1,109 @@
import os
from builtins import str
from pathlib import Path
from typing import Union, cast, Any, Callable
import gradio as gr # type: ignore
from dotenv import load_dotenv
from huggingface_hub import InferenceClient # type: ignore
load_dotenv()
TGI_SERVICE_URL = os.getenv("TGI_SERVICE_URL")
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
client = InferenceClient(model=TGI_SERVICE_URL)
def start_interface(
lambdafn: Callable[[str, list[str]], Any],
examples: list[str],
title: str,
description: str,
share: bool = True,
height: int = 300,
placeholder: str = "Chat with me!",
scale: int = 7,
container: bool = False,
) -> None:
"""
Starts the Gradio interface for the Jazz model.
Args:
lambdafn (callable): text_generation lambda fn with message, history
examples (list[str]): A list of example inputs for the interface.
title (str): The gradio title.
description (str): The gradio description.
share (bool): Whether to generate a global gradio link for 72 hours.
height (int): Height of chat window in pixels.
placeholder (str): Placeholder when chat window is empty.
scale (int): The scale of the chat window.
container (bool): Show the chat window in a container.
"""
gr.ChatInterface(
lambdafn,
chatbot=gr.Chatbot(height=height),
textbox=gr.Textbox(placeholder=placeholder, container=container, scale=scale),
description=description,
title=title,
examples=examples,
retry_btn="Retry",
undo_btn="Undo",
clear_btn="Clear",
).queue().launch(share=share, server_name="0.0.0.0")
def read_text_file(file_path: Union[Path, str]) -> str:
"""Reads content from file as a string."""
with open(file_path, "r") as file:
return file.read()
def main() -> None:
cwd = os.getcwd()
PROMPT_FILE_PATH: str = cast(str, os.getenv("PROMPT_FILE_PATH"))
if not PROMPT_FILE_PATH:
raise ValueError("PROMPT_FILE_PATH is not set in the environment.")
input_text = read_text_file(os.path.join(cwd, PROMPT_FILE_PATH))
def prompt_formatter(user_prompt: str, input_text: str) -> str:
return user_prompt
# You should write your own lambdafn to set the parameters
# Gradio doesn't currently support functions with more than
# [message,history] as parameters into the interface
# if you don't want the user to see them.
def stream_inference(message: str, history: list[str]) -> Any:
response = client.text_generation(
prompt_formatter(message, input_text),
max_new_tokens=40,
temperature=0.3,
details=True,
).generated_text
# this is just for the gradio front end, you can ignore for
# backend in the ML model for strikethroughs.
if response.startswith("<s>"):
response = response[3:]
yield response
title = "Your Ritual Model🎷"
description = "This is the demo for your model."
# if you want a global url others can visit.
share = True
examples = ["Can shrimp actually fry rice?"]
start_interface(
lambdafn=stream_inference,
title=title,
description=description,
share=share,
examples=examples,
)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,4 @@
python-dotenv==1.0.0
gradio==3.47.1
huggingface-hub==0.17.3
text-generation==0.6.1