feat: publishing infernet-container-starter v0.2.0
This commit is contained in:
1
projects/tgi-llm/container/.gitignore
vendored
Normal file
1
projects/tgi-llm/container/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
config.json
|
25
projects/tgi-llm/container/Dockerfile
Normal file
25
projects/tgi-llm/container/Dockerfile
Normal file
@ -0,0 +1,25 @@
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PIP_NO_CACHE_DIR 1
|
||||
ENV RUNTIME docker
|
||||
ENV PYTHONPATH src
|
||||
|
||||
RUN apt-get update
|
||||
RUN apt-get install -y git curl
|
||||
|
||||
# install uv
|
||||
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
|
||||
RUN /install.sh && rm /install.sh
|
||||
|
||||
COPY src/requirements.txt .
|
||||
|
||||
RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
|
||||
|
||||
COPY src src
|
||||
|
||||
ENTRYPOINT ["hypercorn", "app:create_app()"]
|
||||
CMD ["-b", "0.0.0.0:3000"]
|
17
projects/tgi-llm/container/Makefile
Normal file
17
projects/tgi-llm/container/Makefile
Normal file
@ -0,0 +1,17 @@
|
||||
DOCKER_ORG := ritualnetwork
|
||||
EXAMPLE_NAME := tgi-llm
|
||||
TAG := $(DOCKER_ORG)/example-$(EXAMPLE_NAME)-infernet:latest
|
||||
|
||||
.phony: build run build-multiplatform
|
||||
|
||||
build:
|
||||
@docker build -t $(TAG) .
|
||||
|
||||
run:
|
||||
docker run -p 3000:3000 --env-file tgi-llm.env $(TAG)
|
||||
|
||||
# You may need to set up a docker builder, to do so run:
|
||||
# docker buildx create --name mybuilder --bootstrap --use
|
||||
# refer to https://docs.docker.com/build/building/multi-platform/#building-multi-platform-images for more info
|
||||
build-multiplatform:
|
||||
docker buildx build --platform linux/amd64,linux/arm64 -t $(TAG) --push .
|
88
projects/tgi-llm/container/README.md
Normal file
88
projects/tgi-llm/container/README.md
Normal file
@ -0,0 +1,88 @@
|
||||
# TGI LLM
|
||||
|
||||
In this example, we're running an infernet node along with a TGI service.
|
||||
|
||||
## Deploying TGI Service
|
||||
|
||||
If you have your own TGI service running, feel free to skip this part. Otherwise,
|
||||
you can deploy the TGI service using the following command.
|
||||
|
||||
Make sure you have a machine with proper GPU support. Clone this repository &
|
||||
run the following command:
|
||||
|
||||
```bash
|
||||
make run-service project=tgi-llm service=tgi
|
||||
```
|
||||
|
||||
## Deploying Infernet Node Locally
|
||||
|
||||
Running an infernet node involves a simple configuration step & running step.
|
||||
|
||||
### Configuration
|
||||
|
||||
Copy our [sample config file](./config.sample.json) into a new file
|
||||
called `config.json`.
|
||||
|
||||
```bash
|
||||
cp config.sample.json config.json
|
||||
```
|
||||
|
||||
Then provide the `"env"` field of the `"containers"` section of the file to point to the
|
||||
TGI Service you just deployed.
|
||||
|
||||
```json
|
||||
{
|
||||
// etc.
|
||||
"containers": [
|
||||
{
|
||||
"id": "tgi-llm",
|
||||
"image": "ritualnetwork/llm_inference_service:latest",
|
||||
"external": true,
|
||||
"port": "3000",
|
||||
"allowed_delegate_addresses": [],
|
||||
"allowed_addresses": [],
|
||||
"allowed_ips": [],
|
||||
"command": "--bind=0.0.0.0:3000 --workers=2",
|
||||
"env": {
|
||||
"TGI_SERVICE_URL": "http://{your-service-ip}:{your-service-port}" // <- Change this to the TGI service you deployed
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### Running the Infernet Node Locally
|
||||
|
||||
With that out of the way, you can now run the infernet node using the following command
|
||||
at the top-level directory of this repo:
|
||||
|
||||
```
|
||||
make deploy-container project=tgi-llm
|
||||
```
|
||||
|
||||
## Testing the Infernet Node
|
||||
|
||||
You can test the infernet node by posting a job in the node's REST api.
|
||||
|
||||
```bash
|
||||
curl -X POST "http://127.0.0.1:4000/api/jobs" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"containers":["tgi-llm"], "data": {"prompt": "can shrimp actually fry rice?"}}'
|
||||
```
|
||||
|
||||
You can expect a response similar to the following:
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "f026c7c2-7027-4c2d-b662-2b48c9433a12"
|
||||
}
|
||||
```
|
||||
|
||||
You can then check the status of the job using the following command:
|
||||
|
||||
```bash
|
||||
curl -X GET http://127.0.0.1:4000/api/jobs\?id\=f026c7c2-7027-4c2d-b662-2b48c9433a12
|
||||
[{"id":"f026c7c2-7027-4c2d-b662-2b48c9433a12","result":{"container":"tgi-llm","output":{"output":"\n\nI\u2019m not sure if this is a real question or not, but I\u2019m"}},"status":"success"}]
|
||||
```
|
||||
|
||||
Congratulations! You've successfully ran an infernet node with a TGI service.
|
52
projects/tgi-llm/container/config.sample.json
Normal file
52
projects/tgi-llm/container/config.sample.json
Normal file
@ -0,0 +1,52 @@
|
||||
{
|
||||
"log_path": "infernet_node.log",
|
||||
"server": {
|
||||
"port": 4000
|
||||
},
|
||||
"chain": {
|
||||
"enabled": true,
|
||||
"trail_head_blocks": 0,
|
||||
"rpc_url": "http://host.docker.internal:8545",
|
||||
"coordinator_address": "0x5FbDB2315678afecb367f032d93F642f64180aa3",
|
||||
"wallet": {
|
||||
"max_gas_limit": 4000000,
|
||||
"private_key": "0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d"
|
||||
}
|
||||
},
|
||||
"startup_wait": 1.0,
|
||||
"docker": {
|
||||
"username": "your-username",
|
||||
"password": ""
|
||||
},
|
||||
"redis": {
|
||||
"host": "redis",
|
||||
"port": 6379
|
||||
},
|
||||
"forward_stats": true,
|
||||
"containers": [
|
||||
{
|
||||
"id": "tgi-llm",
|
||||
"image": "ritualnetwork/example-tgi-llm-infernet:latest",
|
||||
"external": true,
|
||||
"port": "3000",
|
||||
"allowed_delegate_addresses": [],
|
||||
"allowed_addresses": [],
|
||||
"allowed_ips": [],
|
||||
"command": "--bind=0.0.0.0:3000 --workers=2",
|
||||
"env": {
|
||||
"TGI_SERVICE_URL": "http://{your_service_ip}:{your_service_port}"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "anvil-node",
|
||||
"image": "ritualnetwork/infernet-anvil:0.0.0",
|
||||
"external": true,
|
||||
"port": "8545",
|
||||
"allowed_delegate_addresses": [],
|
||||
"allowed_addresses": [],
|
||||
"allowed_ips": [],
|
||||
"command": "",
|
||||
"env": {}
|
||||
}
|
||||
]
|
||||
}
|
85
projects/tgi-llm/container/src/app.py
Normal file
85
projects/tgi-llm/container/src/app.py
Normal file
@ -0,0 +1,85 @@
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, cast
|
||||
|
||||
from eth_abi import decode, encode # type: ignore
|
||||
from infernet_ml.utils.service_models import InfernetInput, InfernetInputSource
|
||||
from infernet_ml.workflows.inference.tgi_client_inference_workflow import (
|
||||
TGIClientInferenceWorkflow,
|
||||
)
|
||||
from quart import Quart, request
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def create_app() -> Quart:
|
||||
app = Quart(__name__)
|
||||
|
||||
workflow = TGIClientInferenceWorkflow(
|
||||
server_url=cast(str, os.environ.get("TGI_SERVICE_URL"))
|
||||
)
|
||||
|
||||
workflow.setup()
|
||||
|
||||
@app.route("/")
|
||||
def index() -> str:
|
||||
"""
|
||||
Utility endpoint to check if the service is running.
|
||||
"""
|
||||
return "LLM Inference Service is running."
|
||||
|
||||
@app.route("/service_output", methods=["POST"])
|
||||
async def inference() -> dict[str, Any]:
|
||||
req_data = await request.get_json()
|
||||
"""
|
||||
InfernetInput has the format:
|
||||
source: (0 on-chain, 1 off-chain)
|
||||
data: dict[str, Any]
|
||||
"""
|
||||
infernet_input: InfernetInput = InfernetInput(**req_data)
|
||||
|
||||
if infernet_input.source == InfernetInputSource.OFFCHAIN:
|
||||
prompt = cast(dict[str, Any], infernet_input.data).get("prompt")
|
||||
else:
|
||||
# On-chain requests are sent as a generalized hex-string which we will
|
||||
# decode to the appropriate format.
|
||||
(prompt,) = decode(
|
||||
["string"], bytes.fromhex(cast(str, infernet_input.data))
|
||||
)
|
||||
|
||||
result: dict[str, Any] = workflow.inference({"text": prompt})
|
||||
|
||||
if infernet_input.source == InfernetInputSource.OFFCHAIN:
|
||||
"""
|
||||
In case of an off-chain request, the result is returned as a dict. The
|
||||
infernet node expects a dict format.
|
||||
"""
|
||||
return {"data": result}
|
||||
else:
|
||||
"""
|
||||
In case of an on-chain request, the result is returned in the format:
|
||||
{
|
||||
"raw_input": str,
|
||||
"processed_input": str,
|
||||
"raw_output": str,
|
||||
"processed_output": str,
|
||||
"proof": str,
|
||||
}
|
||||
refer to: https://docs.ritual.net/infernet/node/containers for more info.
|
||||
"""
|
||||
return {
|
||||
"raw_input": "",
|
||||
"processed_input": "",
|
||||
"raw_output": encode(["string"], [result]).hex(),
|
||||
"processed_output": "",
|
||||
"proof": "",
|
||||
}
|
||||
|
||||
return app
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Utility to run the app locally. For development purposes only.
|
||||
"""
|
||||
create_app().run(port=3000)
|
6
projects/tgi-llm/container/src/requirements.txt
Normal file
6
projects/tgi-llm/container/src/requirements.txt
Normal file
@ -0,0 +1,6 @@
|
||||
quart==0.19.4
|
||||
infernet_ml==0.1.0
|
||||
PyArweave @ git+https://github.com/ritual-net/pyarweave.git
|
||||
web3==6.15.0
|
||||
retry2==0.9.5
|
||||
text-generation==0.6.1
|
1
projects/tgi-llm/container/tgi-llm.env.sample
Normal file
1
projects/tgi-llm/container/tgi-llm.env.sample
Normal file
@ -0,0 +1 @@
|
||||
TGI_SERVICE_URL=http://{your-service-ip}:{your-service-port}
|
34
projects/tgi-llm/contracts/.github/workflows/test.yml
vendored
Normal file
34
projects/tgi-llm/contracts/.github/workflows/test.yml
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
name: test
|
||||
|
||||
on: workflow_dispatch
|
||||
|
||||
env:
|
||||
FOUNDRY_PROFILE: ci
|
||||
|
||||
jobs:
|
||||
check:
|
||||
strategy:
|
||||
fail-fast: true
|
||||
|
||||
name: Foundry project
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: recursive
|
||||
|
||||
- name: Install Foundry
|
||||
uses: foundry-rs/foundry-toolchain@v1
|
||||
with:
|
||||
version: nightly
|
||||
|
||||
- name: Run Forge build
|
||||
run: |
|
||||
forge --version
|
||||
forge build --sizes
|
||||
id: build
|
||||
|
||||
- name: Run Forge tests
|
||||
run: |
|
||||
forge test -vvv
|
||||
id: test
|
14
projects/tgi-llm/contracts/.gitignore
vendored
Normal file
14
projects/tgi-llm/contracts/.gitignore
vendored
Normal file
@ -0,0 +1,14 @@
|
||||
# Compiler files
|
||||
cache/
|
||||
out/
|
||||
|
||||
# Ignores development broadcast logs
|
||||
!/broadcast
|
||||
/broadcast/*/31337/
|
||||
/broadcast/**/dry-run/
|
||||
|
||||
# Docs
|
||||
docs/
|
||||
|
||||
# Dotenv file
|
||||
.env
|
14
projects/tgi-llm/contracts/Makefile
Normal file
14
projects/tgi-llm/contracts/Makefile
Normal file
@ -0,0 +1,14 @@
|
||||
# phony targets are targets that don't actually create a file
|
||||
.phony: deploy
|
||||
|
||||
# anvil's third default address
|
||||
sender := 0x5de4111afa1a4b94908f83103eb1f1706367c2e68ca870fc3fb9a804cdab365a
|
||||
RPC_URL := http://localhost:8545
|
||||
|
||||
# deploying the contract
|
||||
deploy:
|
||||
@PRIVATE_KEY=$(sender) forge script script/Deploy.s.sol:Deploy --broadcast --rpc-url $(RPC_URL)
|
||||
|
||||
# calling sayGM()
|
||||
call-contract:
|
||||
@PRIVATE_KEY=$(sender) forge script script/CallContract.s.sol:CallContract --broadcast --rpc-url $(RPC_URL)
|
7
projects/tgi-llm/contracts/foundry.toml
Normal file
7
projects/tgi-llm/contracts/foundry.toml
Normal file
@ -0,0 +1,7 @@
|
||||
[profile.default]
|
||||
src = "src"
|
||||
out = "out"
|
||||
libs = ["lib"]
|
||||
via_ir = true
|
||||
|
||||
# See more config options https://github.com/foundry-rs/foundry/blob/master/crates/config/README.md#all-options
|
2
projects/tgi-llm/contracts/remappings.txt
Normal file
2
projects/tgi-llm/contracts/remappings.txt
Normal file
@ -0,0 +1,2 @@
|
||||
forge-std/=lib/forge-std/src
|
||||
infernet-sdk/=lib/infernet-sdk/src
|
19
projects/tgi-llm/contracts/script/CallContract.s.sol
Normal file
19
projects/tgi-llm/contracts/script/CallContract.s.sol
Normal file
@ -0,0 +1,19 @@
|
||||
// SPDX-License-Identifier: BSD-3-Clause-Clear
|
||||
pragma solidity ^0.8.0;
|
||||
|
||||
import {Script, console2} from "forge-std/Script.sol";
|
||||
import {Prompter} from "../src/Prompter.sol";
|
||||
|
||||
contract CallContract is Script {
|
||||
function run() public {
|
||||
// Setup wallet
|
||||
uint256 deployerPrivateKey = vm.envUint("PRIVATE_KEY");
|
||||
vm.startBroadcast(deployerPrivateKey);
|
||||
|
||||
Prompter prompter = Prompter(0x663F3ad617193148711d28f5334eE4Ed07016602);
|
||||
|
||||
prompter.promptLLM(vm.envString("prompt"));
|
||||
|
||||
vm.stopBroadcast();
|
||||
}
|
||||
}
|
26
projects/tgi-llm/contracts/script/Deploy.s.sol
Normal file
26
projects/tgi-llm/contracts/script/Deploy.s.sol
Normal file
@ -0,0 +1,26 @@
|
||||
// SPDX-License-Identifier: BSD-3-Clause-Clear
|
||||
pragma solidity ^0.8.13;
|
||||
|
||||
import {Script, console2} from "forge-std/Script.sol";
|
||||
import {Prompter} from "../src/Prompter.sol";
|
||||
|
||||
contract Deploy is Script {
|
||||
function run() public {
|
||||
// Setup wallet
|
||||
uint256 deployerPrivateKey = vm.envUint("PRIVATE_KEY");
|
||||
vm.startBroadcast(deployerPrivateKey);
|
||||
|
||||
// Log address
|
||||
address deployerAddress = vm.addr(deployerPrivateKey);
|
||||
console2.log("Loaded deployer: ", deployerAddress);
|
||||
|
||||
address coordinator = 0x5FbDB2315678afecb367f032d93F642f64180aa3;
|
||||
// Create consumer
|
||||
Prompter prompter = new Prompter(coordinator);
|
||||
console2.log("Deployed Prompter: ", address(prompter));
|
||||
|
||||
// Execute
|
||||
vm.stopBroadcast();
|
||||
vm.broadcast();
|
||||
}
|
||||
}
|
47
projects/tgi-llm/contracts/src/Prompter.sol
Normal file
47
projects/tgi-llm/contracts/src/Prompter.sol
Normal file
@ -0,0 +1,47 @@
|
||||
// SPDX-License-Identifier: BSD-3-Clause-Clear
|
||||
pragma solidity ^0.8.13;
|
||||
|
||||
import {console2} from "forge-std/console2.sol";
|
||||
import {CallbackConsumer} from "infernet-sdk/consumer/Callback.sol";
|
||||
|
||||
contract Prompter is CallbackConsumer {
|
||||
string private EXTREMELY_COOL_BANNER = "\n\n"
|
||||
"_____ _____ _______ _ _ _ \n"
|
||||
"| __ \\|_ _|__ __| | | | /\\ | | \n"
|
||||
"| |__) | | | | | | | | | / \\ | | \n"
|
||||
"| _ / | | | | | | | |/ /\\ \\ | | \n"
|
||||
"| | \\ \\ _| |_ | | | |__| / ____ \\| |____ \n"
|
||||
"|_| \\_\\_____| |_| \\____/_/ \\_\\______| \n\n";
|
||||
|
||||
constructor(address coordinator) CallbackConsumer(coordinator) {}
|
||||
|
||||
function promptLLM(string calldata prompt) public {
|
||||
_requestCompute(
|
||||
"tgi-llm",
|
||||
abi.encode(prompt),
|
||||
20 gwei,
|
||||
1_000_000,
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
function _receiveCompute(
|
||||
uint32 subscriptionId,
|
||||
uint32 interval,
|
||||
uint16 redundancy,
|
||||
address node,
|
||||
bytes calldata input,
|
||||
bytes calldata output,
|
||||
bytes calldata proof
|
||||
) internal override {
|
||||
console2.log(EXTREMELY_COOL_BANNER);
|
||||
(bytes memory raw_output, bytes memory processed_output) = abi.decode(output, (bytes, bytes));
|
||||
(string memory outputStr) = abi.decode(raw_output, (string));
|
||||
|
||||
console2.log("subscription Id", subscriptionId);
|
||||
console2.log("interval", interval);
|
||||
console2.log("redundancy", redundancy);
|
||||
console2.log("node", node);
|
||||
console2.log("output:", outputStr);
|
||||
}
|
||||
}
|
444
projects/tgi-llm/tgi-llm.md
Normal file
444
projects/tgi-llm/tgi-llm.md
Normal file
@ -0,0 +1,444 @@
|
||||
# TGI Inference with Mistral-7b
|
||||
|
||||
In this tutorial we are going to use [Huggingface's TGI (Text Generation Interface)](https://huggingface.co/docs/text-generation-inference/en/index) to run an arbitrary LLM model
|
||||
and enable users to requests jobs form it, both on-chain and off-chain.
|
||||
|
||||
## Install Pre-requisites
|
||||
|
||||
For this tutorial you'll need to have the following installed.
|
||||
|
||||
1. [Docker](https://docs.docker.com/engine/install/)
|
||||
2. [Foundry](https://book.getfoundry.sh/getting-started/installation)
|
||||
|
||||
## Setting up a TGI LLM Service
|
||||
|
||||
Included with this tutorial, is a [containerized llm service](./tgi). We're going to deploy this service on a powerful
|
||||
machine with access to GPU.
|
||||
|
||||
### Rent a GPU machine
|
||||
To run this service, you will need to have access to a machine with a powerful GPU. In the video above, we use an
|
||||
A100 instance on [Paperspace](https://www.paperspace.com/).
|
||||
|
||||
### Install docker
|
||||
You will have to install docker.
|
||||
|
||||
For Ubuntu, you can run the following commands:
|
||||
|
||||
```bash copy
|
||||
# install docker
|
||||
sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
|
||||
```
|
||||
As docker installation may vary depending on your operating system, consult the
|
||||
[official documentation](https://docs.docker.com/engine/install/ubuntu/) for more information.
|
||||
|
||||
After installation, you can verify that docker is installed by running:
|
||||
|
||||
```bash
|
||||
# sudo docker run hello-world
|
||||
Hello from Docker!
|
||||
```
|
||||
|
||||
### Ensure CUDA is installed
|
||||
Depending on where you rent your GPU machine, CUDA is typically pre-installed. For Ubuntu, you can follow the
|
||||
instructions [here](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#prepare-ubuntu).
|
||||
|
||||
You can verify that CUDA is installed by running:
|
||||
|
||||
```bash copy
|
||||
# verify Installation
|
||||
python -c '
|
||||
import torch
|
||||
print("torch.cuda.is_available()", torch.cuda.is_available())
|
||||
print("torch.cuda.device_count()", torch.cuda.device_count())
|
||||
print("torch.cuda.current_device()", torch.cuda.current_device())
|
||||
print("torch.cuda.get_device_name(0)", torch.cuda.get_device_name(0))
|
||||
'
|
||||
```
|
||||
|
||||
If CUDA is installed and available, your output will look similar to the following:
|
||||
|
||||
```bash
|
||||
torch.cuda.is_available() True
|
||||
torch.cuda.device_count() 1
|
||||
torch.cuda.current_device() 0
|
||||
torch.cuda.get_device_name(0) Tesla V100-SXM2-16GB
|
||||
```
|
||||
|
||||
### Ensure `nvidia-container-runtime` is installed
|
||||
For your container to be able to access the GPU, you will need to install the `nvidia-container-runtime`.
|
||||
On Ubuntu, you can run the following commands:
|
||||
|
||||
```bash copy
|
||||
# Docker GPU support
|
||||
# nvidia container-runtime repos
|
||||
# https://nvidia.github.io/nvidia-container-runtime/
|
||||
curl -s -L https://nvidia.github.io/nvidia-container-runtime/gpgkey | \
|
||||
sudo apt-key add - distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
|
||||
curl -s -L https://nvidia.github.io/nvidia-container-runtime/$distribution/nvidia-container-runtime.list | \
|
||||
sudo tee /etc/apt/sources.list.d/nvidia-container-runtime.list
|
||||
sudo apt-get update
|
||||
|
||||
# install nvidia-container-runtime
|
||||
# https://docs.docker.com/config/containers/resource_constraints/#gpu
|
||||
sudo apt-get install -y nvidia-container-runtime
|
||||
```
|
||||
As always, consult the [official documentation](https://nvidia.github.io/nvidia-container-runtime/) for more
|
||||
information.
|
||||
|
||||
You can verify that `nvidia-container-runtime` is installed by running:
|
||||
|
||||
```bash copy
|
||||
which nvidia-container-runtime-hook
|
||||
# this should return a path to the nvidia-container-runtime-hook
|
||||
```
|
||||
|
||||
Now, with the pre-requisites installed, we can move on to setting up the TGI service.
|
||||
|
||||
### Clone this repository
|
||||
|
||||
```bash copy
|
||||
# Clone locally
|
||||
git clone --recurse-submodules https://github.com/ritual-net/infernet-container-starter
|
||||
# Navigate to the repository
|
||||
cd infernet-container-starter
|
||||
```
|
||||
|
||||
### Run the Stable Diffusion service
|
||||
```bash copy
|
||||
make run-service project=tgi-llm service=tgi
|
||||
```
|
||||
|
||||
This will start the `tgi` service. Note that this service will have to download a large model file,
|
||||
so it may take a few minutes to be fully ready. Downloaded model will get cached, so subsequent runs will be faster.
|
||||
|
||||
## Testing the `tgi-llm` service via the gradio UI
|
||||
Included with this project is a simple gradio chat UI that allows you to interact with the `tgi-llm` service. This is
|
||||
not needed for running the Infernet node, but a nice way to debug and test the TGI service.
|
||||
|
||||
### Ensure `docker` & `foundry` exist
|
||||
To check for `docker`, run the following command in your terminal:
|
||||
```bash copy
|
||||
docker --version
|
||||
# Docker version 25.0.2, build 29cf629 (example output)
|
||||
```
|
||||
|
||||
You'll also need to ensure that docker-compose exists in your terminal:
|
||||
```bash copy
|
||||
which docker-compose
|
||||
# /usr/local/bin/docker-compose (example output)
|
||||
```
|
||||
|
||||
To check for `foundry`, run the following command in your terminal:
|
||||
```bash copy
|
||||
forge --version
|
||||
# forge 0.2.0 (551bcb5 2024-02-28T07:40:42.782478000Z) (example output)
|
||||
```
|
||||
|
||||
### Clone the starter repository
|
||||
Just like our other examples, we're going to clone this repository. All of the code and instructions for this tutorial
|
||||
can be found in the [`projects/tgi-llm`](../tgi-llm) directory of the repository.
|
||||
|
||||
```bash copy
|
||||
# Clone locally
|
||||
git clone --recurse-submodules https://github.com/ritual-net/infernet-container-starter
|
||||
# Navigate to the repository
|
||||
cd infernet-container-starter
|
||||
```
|
||||
|
||||
### Configure the UI Service
|
||||
You'll need to configure the UI service to point to the `tgi` service. To do this, you'll have to
|
||||
pass that info as environemnt variables. There exists a [`gradio_ui.env.sample`](./ui/gradio_ui.env.sample)
|
||||
file in the [`projects/tgi-llm/ui`](./ui)
|
||||
directory. Simply copy this file to `gradio_ui.env` and set the `TGI_SERVICE_URL` to the address of the `tgi` service.
|
||||
|
||||
```bash copy
|
||||
cd projects/tgi-llm/ui
|
||||
cp gradio_ui.env.sample gradio_ui.env
|
||||
```
|
||||
|
||||
Then modify the content of `gradio_ui.env` to look like this:
|
||||
|
||||
```env
|
||||
TGI_SERVICE_URL={your_service_ip}:{your_service_port} # <- replace with your service ip & port
|
||||
HF_API_TOKEN={huggingface_api_token} # <- replace with your huggingface api token
|
||||
PROMPT_FILE_PATH=./prompt.txt # <- path to the prompt file
|
||||
```
|
||||
|
||||
The env vars are as follows:
|
||||
- `TGI_SERVICE_URL` is the address of the `tgi` service
|
||||
- `HF_API_TOKEN` is the Huggingface API token. You can get one by signing up at [Huggingface](https://huggingface.co/)
|
||||
- `PROMPT_FILE_PATH` is the path to the system prompt file. By default it is set to `./prompt.txt`. A simple
|
||||
`prompt.txt` file is included in the `ui` directory.
|
||||
|
||||
### Build the UI service
|
||||
From the top-level directory of the repository, simply run the following command to build the UI service:
|
||||
|
||||
```bash copy
|
||||
# cd back to the top-level directory
|
||||
cd ../../..
|
||||
# build the UI service
|
||||
make build-service project=tgi-llm service=ui
|
||||
```
|
||||
|
||||
### Run the UI service
|
||||
In the same directory, you can also run the following command to run the UI service:
|
||||
```bash copy
|
||||
make run-service project=tgi-llm service=ui
|
||||
```
|
||||
|
||||
By default the service will run on `http://localhost:3001`. You can navigate to this address in your browser to see
|
||||
the UI.
|
||||
|
||||
### Chat with the TGI service!
|
||||
Congratulations! You can now chat with the TGI service using the gradio UI. You can enter a prompt and see the
|
||||
response from the TGI service.
|
||||
|
||||
Now that we've tested the TGI service, we can move on to setting up the Infernet Node and the `tgi-llm` container.
|
||||
|
||||
## Setting up the Infernet Node along with the `tgi-llm` container
|
||||
|
||||
You can follow the following steps on your local machine to setup the Infernet Node and the `tgi-llm` container.
|
||||
|
||||
The first couple of steps are identical to that of [the previous section](#ensure-docker--foundry-exist). So if you've already completed those
|
||||
steps, you can skip to [building the tgi-llm container](#build-the-tgi-llm-container).
|
||||
|
||||
### Ensure `docker` & `foundry` exist
|
||||
|
||||
To check for `docker`, run the following command in your terminal:
|
||||
```bash copy
|
||||
docker --version
|
||||
# Docker version 25.0.2, build 29cf629 (example output)
|
||||
```
|
||||
|
||||
You'll also need to ensure that docker-compose exists in your terminal:
|
||||
```bash copy
|
||||
which docker-compose
|
||||
# /usr/local/bin/docker-compose (example output)
|
||||
```
|
||||
|
||||
To check for `foundry`, run the following command in your terminal:
|
||||
```bash copy
|
||||
forge --version
|
||||
# forge 0.2.0 (551bcb5 2024-02-28T07:40:42.782478000Z) (example output)
|
||||
```
|
||||
|
||||
### Clone the starter repository
|
||||
Just like our other examples, we're going to clone this repository.
|
||||
All of the code and instructions for this tutorial can be found in the
|
||||
[`projects/tgi-llm`](../tgi-llm)
|
||||
directory of the repository.
|
||||
|
||||
```bash copy
|
||||
# Clone locally
|
||||
git clone --recurse-submodules https://github.com/ritual-net/infernet-container-starter
|
||||
# Navigate to the repository
|
||||
cd infernet-container-starter
|
||||
```
|
||||
|
||||
### Configure the `tgi-llm` container
|
||||
|
||||
#### Configure the URL for the TGI Service
|
||||
The `tgi-llm` container needs to know where to find the TGI service that we started in the steps above. To do this,
|
||||
we need to modify the configuration file for the `tgi-llm` container. We have a sample [config.json](./config.sample.json) file.
|
||||
Simply navigate to the `projects/tgi-llm` directory and set up the config file:
|
||||
|
||||
```bash
|
||||
cd projects/tgi-llm/container
|
||||
cp config.sample.json config.json
|
||||
```
|
||||
|
||||
In the `containers` field, you will see the following:
|
||||
|
||||
```json
|
||||
"containers": [
|
||||
{
|
||||
// etc. etc.
|
||||
"env": {
|
||||
"TGI_SERVICE_URL": "http://{your_service_ip}:{your_service_port}" // <- replace with your service ip & port
|
||||
}
|
||||
}
|
||||
},
|
||||
```
|
||||
|
||||
### Build the `tgi-llm` container
|
||||
|
||||
Simply run the following command to build the `tgi-llm` container:
|
||||
|
||||
```bash copy
|
||||
make build-container project=tgi-llm
|
||||
```
|
||||
|
||||
### Deploy the `tgi-llm` container with Infernet
|
||||
|
||||
You can run a simple command to deploy the `tgi-llm` container along with bootstrapping the rest of the
|
||||
Infernet node stack in one go:
|
||||
|
||||
```bash copy
|
||||
make deploy-container project=tgi-llm
|
||||
```
|
||||
|
||||
### Check the running containers
|
||||
|
||||
At this point it makes sense to check the running containers to ensure everything is running as expected.
|
||||
|
||||
```bash
|
||||
# > docker container ps
|
||||
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
|
||||
0dbc30f67e1e ritualnetwork/example-tgi-llm-infernet:latest "hypercorn app:creat…" 8 seconds ago Up 7 seconds
|
||||
0.0.0.0:3000->3000/tcp tgi-llm
|
||||
0c5140e0f41b ritualnetwork/infernet-anvil:0.0.0 "anvil --host 0.0.0.…" 23 hours ago Up 23 hours
|
||||
0.0.0.0:8545->3000/tcp anvil-node
|
||||
f5682ec2ad31 ritualnetwork/infernet-node:latest "/app/entrypoint.sh" 23 hours ago Up 9 seconds
|
||||
0.0.0.0:4000->4000/tcp deploy-node-1
|
||||
c1ece27ba112 fluent/fluent-bit:latest "/fluent-bit/bin/flu…" 23 hours ago Up 10 seconds 2020/tcp,
|
||||
0.0.0.0:24224->24224/tcp, :::24224->24224/tcp deploy-fluentbit-1
|
||||
3cccea24a303 redis:latest "docker-entrypoint.s…" 23 hours ago Up 10 seconds 0.0.0.0:6379->6379/tcp,
|
||||
:::6379->6379/tcp deploy-redis-1
|
||||
```
|
||||
|
||||
You should see five different images running, including the Infernet node and the `tgi-llm` container.
|
||||
|
||||
### Send a job request to the `tgi-llm` container
|
||||
From here, we can make a Web-2 job request to the container by posting a request to the [`api/jobs`](https://docs.ritual.net/infernet/node/api#2a-post-apijobs) endpoint.
|
||||
|
||||
```bash copy
|
||||
curl -X POST http://127.0.0.1:4000/api/jobs \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{"containers": ["tgi-llm"], "data": {"prompt": "Can shrimp actually fry rice fr?"}}'
|
||||
# {"id":"7a375a56-0da0-40d8-91e0-6440b3282ed8"}
|
||||
```
|
||||
You will get a job id in response. You can use this id to check the status of the job.
|
||||
|
||||
### Check the status of the job
|
||||
You can make a `GET` request to the [`api/jobs`](https://docs.ritual.net/infernet/node/api#3-get-apijobs) endpoint to check the status of the job.
|
||||
|
||||
```bash copy
|
||||
curl -X GET "http://127.0.0.1:4000/api/jobs?id=7a375a56-0da0-40d8-91e0-6440b3282ed8"
|
||||
# [{"id":"7a375a56-0da0-40d8-91e0-6440b3282ed8","result":{"container":"tgi-llm","output":{"data":"\n\n## Can you fry rice in a wok?\n\nThe wok is the"}},"status":"success"}]
|
||||
```
|
||||
|
||||
Congratulations! You have successfully setup the Infernet Node and the `tgi-llm` container. Now let's move on to
|
||||
calling our service from a smart contract (a la web3 request).
|
||||
|
||||
|
||||
## Calling our service from a smart contract
|
||||
|
||||
In the following steps, we will deploy our [consumer contract](https://github.com/ritual-net/infernet-container-starter/blob/main/projects/tgi-llm/contracts/src/Prompter.sol) and make a subscription request by calling the
|
||||
contract.
|
||||
|
||||
### Setup
|
||||
Ensure that you have followed the steps in the previous section up until [here](#check-the-running-containers) to setup
|
||||
the Infernet Node and the `tgi-llm` container.
|
||||
|
||||
Notice that in [the step above](#check-the-running-containers) we have an Anvil node running on port `8545`.
|
||||
|
||||
By default, the [`anvil-node`](https://hub.docker.com/r/ritualnetwork/infernet-anvil) image used deploys the
|
||||
[Infernet SDK](https://docs.ritual.net/infernet/sdk/introduction) and other relevant contracts for you:
|
||||
- Coordinator: `0x5FbDB2315678afecb367f032d93F642f64180aa3`
|
||||
- Primary node: `0x70997970C51812dc3A010C7d01b50e0d17dc79C8`
|
||||
|
||||
### Deploy our `Prompter` smart contract
|
||||
|
||||
In this step, we will deploy our [`Prompter.sol`](./contracts/src/Prompter.sol)
|
||||
to the Anvil node. This contract simply allows us to submit a prompt to the LLM, and receives the result of the
|
||||
prompt and prints it to the anvil console.
|
||||
|
||||
#### Anvil logs
|
||||
|
||||
During this process, it is useful to look at the logs of the Anvil node to see what's going on. To follow the logs,
|
||||
in a new terminal, run:
|
||||
|
||||
```bash copy
|
||||
docker logs -f anvil-node
|
||||
```
|
||||
|
||||
#### Deploying the contract
|
||||
|
||||
Once ready, to deploy the `Prompter` consumer contract, in another terminal, run:
|
||||
|
||||
```bash copy
|
||||
make deploy-contracts project=tgi-llm
|
||||
```
|
||||
|
||||
You should expect to see similar Anvil logs:
|
||||
|
||||
```bash
|
||||
# > make deploy-contracts project=tgi-llm
|
||||
eth_getTransactionReceipt
|
||||
|
||||
Transaction: 0x17a9d17cc515d39eef26b6a9427e04ed6f7ce6572d9756c07305c2df78d93ffe
|
||||
Contract created: 0x663f3ad617193148711d28f5334ee4ed07016602
|
||||
Gas used: 731312
|
||||
|
||||
Block Number: 1
|
||||
Block Hash: 0xd17b344af15fc32cd3359e6f2c2724a8d0a0283fc3b44febba78fc99f2f00189
|
||||
Block Time: "Wed, 6 Mar 2024 18:21:01 +0000"
|
||||
|
||||
eth_getTransactionByHash
|
||||
```
|
||||
|
||||
From our logs, we can see that the `Prompter` contract has been deployed to address
|
||||
`0x663f3ad617193148711d28f5334ee4ed07016602`.
|
||||
|
||||
### Call the contract
|
||||
|
||||
Now, let's call the contract to with a prompt! In the same terminal, run:
|
||||
|
||||
```bash copy
|
||||
make call-contract project=tgi-llm prompt="What is 2 * 3?"
|
||||
```
|
||||
|
||||
You should first expect to see an initiation transaction sent to the `Prompter` contract:
|
||||
|
||||
```bash
|
||||
|
||||
eth_getTransactionReceipt
|
||||
|
||||
Transaction: 0x988b1b251f3b6ad887929a58429291891d026f11392fb9743e9a90f78c7a0801
|
||||
Gas used: 190922
|
||||
|
||||
Block Number: 2
|
||||
Block Hash: 0x51f3abf62e763f1bd1b0d245a4eab4ced4b18f58bd13645dbbf3a878f1964044
|
||||
Block Time: "Wed, 6 Mar 2024 18:21:34 +0000"
|
||||
|
||||
eth_getTransactionByHash
|
||||
eth_getTransactionReceipt
|
||||
|
||||
```
|
||||
Shortly after that you should see another transaction submitted from the Infernet Node which is the
|
||||
result of your on-chain subscription and its associated job request:
|
||||
|
||||
```bash
|
||||
eth_sendRawTransaction
|
||||
|
||||
|
||||
_____ _____ _______ _ _ _
|
||||
| __ \|_ _|__ __| | | | /\ | |
|
||||
| |__) | | | | | | | | | / \ | |
|
||||
| _ / | | | | | | | |/ /\ \ | |
|
||||
| | \ \ _| |_ | | | |__| / ____ \| |____
|
||||
|_| \_\_____| |_| \____/_/ \_\______|
|
||||
|
||||
|
||||
subscription Id 1
|
||||
interval 1
|
||||
redundancy 1
|
||||
node 0x70997970C51812dc3A010C7d01b50e0d17dc79C8
|
||||
output:
|
||||
|
||||
2 * 3 = 6
|
||||
|
||||
Transaction: 0xdaaf559c2baba212ab218fb268906613ce3be93ba79b37f902ff28c8fe9a1e1a
|
||||
Gas used: 116153
|
||||
|
||||
Block Number: 3
|
||||
Block Hash: 0x2f26b2b487a4195ff81865b2966eab1508d10642bf525a258200eea432522e24
|
||||
Block Time: "Wed, 6 Mar 2024 18:21:35 +0000"
|
||||
|
||||
eth_blockNumber
|
||||
```
|
||||
|
||||
We can now confirm that the address of the Infernet Node (see the logged `node` parameter in the Anvil logs above)
|
||||
matches the address of the node we setup by default for our Infernet Node.
|
||||
|
||||
Congratulations! 🎉 You have successfully enabled a contract to have access to a TGI LLM service.
|
8
projects/tgi-llm/tgi/Makefile
Normal file
8
projects/tgi-llm/tgi/Makefile
Normal file
@ -0,0 +1,8 @@
|
||||
.phony: run
|
||||
|
||||
volume ?= $(PWD)/data
|
||||
model ?= mistralai/Mistral-7B-v0.1
|
||||
|
||||
run:
|
||||
docker run --gpus all --shm-size 1g -p 8080:80 -v $(volume):/data \
|
||||
ghcr.io/huggingface/text-generation-inference:1.4 --model-id $(model)
|
15
projects/tgi-llm/tgi/README.md
Normal file
15
projects/tgi-llm/tgi/README.md
Normal file
@ -0,0 +1,15 @@
|
||||
# TGI Service
|
||||
|
||||
The [Makefile](./Makefile) for this service simply invokes
|
||||
huggingface's `huggingface/text-generation-inference:1.4`
|
||||
docker image. Ensure that you are running this on a machine with a GPU.
|
||||
|
||||
For example, to run the TGI container with model `mistralai/Mistral-7B-v0.1`, you can
|
||||
use the following command:
|
||||
|
||||
```bash
|
||||
make run model=mistralai/Mistral-7B-v0.1 volume=/path/to/your/data
|
||||
```
|
||||
|
||||
* `model`: is defaulted to `mistralai/Mistral-7B-v0.1`
|
||||
* `volume`: is defaulted to `./data`
|
22
projects/tgi-llm/ui/Dockerfile
Normal file
22
projects/tgi-llm/ui/Dockerfile
Normal file
@ -0,0 +1,22 @@
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONPATH src
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update
|
||||
|
||||
COPY src/requirements.txt .
|
||||
|
||||
RUN pip install --upgrade pip && pip install -r requirements.txt
|
||||
|
||||
COPY src src
|
||||
|
||||
COPY prompt.txt .
|
||||
|
||||
ENTRYPOINT ["python", "src/app.py"]
|
||||
CMD ["-b", "0.0.0.0:3000"]
|
17
projects/tgi-llm/ui/Makefile
Normal file
17
projects/tgi-llm/ui/Makefile
Normal file
@ -0,0 +1,17 @@
|
||||
DOCKER_ORG := ritualnetwork
|
||||
EXAMPLE_NAME := tgi-llm-ui
|
||||
TAG := $(DOCKER_ORG)/example-$(EXAMPLE_NAME)-infernet:latest
|
||||
|
||||
.phony: build run publish
|
||||
|
||||
build:
|
||||
@docker build -t $(TAG) .
|
||||
|
||||
run: build
|
||||
docker run --env-file ./gradio_ui.env -p 3001:7860 $(TAG)
|
||||
|
||||
# You may need to set up a docker builder, to do so run:
|
||||
# docker buildx create --name mybuilder --bootstrap --use
|
||||
# refer to https://docs.docker.com/build/building/multi-platform/#building-multi-platform-images for more info
|
||||
build-multiplatform:
|
||||
docker buildx build --platform linux/amd64,linux/arm64 -t $(TAG) --push .
|
35
projects/tgi-llm/ui/README.md
Normal file
35
projects/tgi-llm/ui/README.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Gradio UI
|
||||
|
||||
This is a utility UI project to chat with your TGI LLM.
|
||||
|
||||
## Configuration
|
||||
|
||||
Copy the [`gradio_ui.env.sample`](./gradio_ui.env.sample) file into a new file
|
||||
called `gradio_ui.env` and fill in the necessary environment variables.
|
||||
|
||||
```bash
|
||||
cp gradio_ui.env.sample gradio_ui.env
|
||||
```
|
||||
|
||||
Environment variables are as follows:
|
||||
|
||||
```bash
|
||||
TGI_SERVICE_URL= # URL to your running TGI service
|
||||
HF_API_TOKEN=
|
||||
PROMPT_FILE_PATH= # path to a prompt file
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
Simply run:
|
||||
|
||||
```bash
|
||||
make run
|
||||
```
|
||||
|
||||
The UI will run on port `3001` on your localhost. You can change that configuration
|
||||
[here](./Makefile#L11).
|
||||
|
||||
Congratulations! You have successfully set up the Gradio UI for your TGI LLM.
|
||||
|
||||
Now you can go to `http://localhost:3001` and chat with your LLM instance.
|
3
projects/tgi-llm/ui/gradio_ui.env.sample
Normal file
3
projects/tgi-llm/ui/gradio_ui.env.sample
Normal file
@ -0,0 +1,3 @@
|
||||
TGI_SERVICE_URL=
|
||||
HF_API_TOKEN=
|
||||
PROMPT_FILE_PATH=./prompt.txt
|
1
projects/tgi-llm/ui/prompt.txt
Normal file
1
projects/tgi-llm/ui/prompt.txt
Normal file
@ -0,0 +1 @@
|
||||
You're a friendly chatbot.
|
109
projects/tgi-llm/ui/src/app.py
Normal file
109
projects/tgi-llm/ui/src/app.py
Normal file
@ -0,0 +1,109 @@
|
||||
import os
|
||||
from builtins import str
|
||||
from pathlib import Path
|
||||
from typing import Union, cast, Any, Callable
|
||||
|
||||
import gradio as gr # type: ignore
|
||||
from dotenv import load_dotenv
|
||||
from huggingface_hub import InferenceClient # type: ignore
|
||||
|
||||
load_dotenv()
|
||||
|
||||
TGI_SERVICE_URL = os.getenv("TGI_SERVICE_URL")
|
||||
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
||||
|
||||
client = InferenceClient(model=TGI_SERVICE_URL)
|
||||
|
||||
|
||||
def start_interface(
|
||||
lambdafn: Callable[[str, list[str]], Any],
|
||||
examples: list[str],
|
||||
title: str,
|
||||
description: str,
|
||||
share: bool = True,
|
||||
height: int = 300,
|
||||
placeholder: str = "Chat with me!",
|
||||
scale: int = 7,
|
||||
container: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Starts the Gradio interface for the Jazz model.
|
||||
|
||||
Args:
|
||||
lambdafn (callable): text_generation lambda fn with message, history
|
||||
examples (list[str]): A list of example inputs for the interface.
|
||||
title (str): The gradio title.
|
||||
description (str): The gradio description.
|
||||
share (bool): Whether to generate a global gradio link for 72 hours.
|
||||
height (int): Height of chat window in pixels.
|
||||
placeholder (str): Placeholder when chat window is empty.
|
||||
scale (int): The scale of the chat window.
|
||||
container (bool): Show the chat window in a container.
|
||||
"""
|
||||
gr.ChatInterface(
|
||||
lambdafn,
|
||||
chatbot=gr.Chatbot(height=height),
|
||||
textbox=gr.Textbox(placeholder=placeholder, container=container, scale=scale),
|
||||
description=description,
|
||||
title=title,
|
||||
examples=examples,
|
||||
retry_btn="Retry",
|
||||
undo_btn="Undo",
|
||||
clear_btn="Clear",
|
||||
).queue().launch(share=share, server_name="0.0.0.0")
|
||||
|
||||
|
||||
def read_text_file(file_path: Union[Path, str]) -> str:
|
||||
"""Reads content from file as a string."""
|
||||
with open(file_path, "r") as file:
|
||||
return file.read()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
cwd = os.getcwd()
|
||||
|
||||
PROMPT_FILE_PATH: str = cast(str, os.getenv("PROMPT_FILE_PATH"))
|
||||
|
||||
if not PROMPT_FILE_PATH:
|
||||
raise ValueError("PROMPT_FILE_PATH is not set in the environment.")
|
||||
|
||||
input_text = read_text_file(os.path.join(cwd, PROMPT_FILE_PATH))
|
||||
|
||||
def prompt_formatter(user_prompt: str, input_text: str) -> str:
|
||||
return user_prompt
|
||||
|
||||
# You should write your own lambdafn to set the parameters
|
||||
# Gradio doesn't currently support functions with more than
|
||||
# [message,history] as parameters into the interface
|
||||
# if you don't want the user to see them.
|
||||
def stream_inference(message: str, history: list[str]) -> Any:
|
||||
response = client.text_generation(
|
||||
prompt_formatter(message, input_text),
|
||||
max_new_tokens=40,
|
||||
temperature=0.3,
|
||||
details=True,
|
||||
).generated_text
|
||||
# this is just for the gradio front end, you can ignore for
|
||||
# backend in the ML model for strikethroughs.
|
||||
if response.startswith("<s>"):
|
||||
response = response[3:]
|
||||
yield response
|
||||
|
||||
title = "Your Ritual Model🎷"
|
||||
description = "This is the demo for your model."
|
||||
|
||||
# if you want a global url others can visit.
|
||||
share = True
|
||||
examples = ["Can shrimp actually fry rice?"]
|
||||
|
||||
start_interface(
|
||||
lambdafn=stream_inference,
|
||||
title=title,
|
||||
description=description,
|
||||
share=share,
|
||||
examples=examples,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
4
projects/tgi-llm/ui/src/requirements.txt
Normal file
4
projects/tgi-llm/ui/src/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
python-dotenv==1.0.0
|
||||
gradio==3.47.1
|
||||
huggingface-hub==0.17.3
|
||||
text-generation==0.6.1
|
Reference in New Issue
Block a user