feat: publishing infernet-container-starter v0.2.0
This commit is contained in:
22
projects/tgi-llm/ui/Dockerfile
Normal file
22
projects/tgi-llm/ui/Dockerfile
Normal file
@ -0,0 +1,22 @@
|
||||
FROM python:3.11-slim as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONPATH src
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update
|
||||
|
||||
COPY src/requirements.txt .
|
||||
|
||||
RUN pip install --upgrade pip && pip install -r requirements.txt
|
||||
|
||||
COPY src src
|
||||
|
||||
COPY prompt.txt .
|
||||
|
||||
ENTRYPOINT ["python", "src/app.py"]
|
||||
CMD ["-b", "0.0.0.0:3000"]
|
17
projects/tgi-llm/ui/Makefile
Normal file
17
projects/tgi-llm/ui/Makefile
Normal file
@ -0,0 +1,17 @@
|
||||
DOCKER_ORG := ritualnetwork
|
||||
EXAMPLE_NAME := tgi-llm-ui
|
||||
TAG := $(DOCKER_ORG)/example-$(EXAMPLE_NAME)-infernet:latest
|
||||
|
||||
.phony: build run publish
|
||||
|
||||
build:
|
||||
@docker build -t $(TAG) .
|
||||
|
||||
run: build
|
||||
docker run --env-file ./gradio_ui.env -p 3001:7860 $(TAG)
|
||||
|
||||
# You may need to set up a docker builder, to do so run:
|
||||
# docker buildx create --name mybuilder --bootstrap --use
|
||||
# refer to https://docs.docker.com/build/building/multi-platform/#building-multi-platform-images for more info
|
||||
build-multiplatform:
|
||||
docker buildx build --platform linux/amd64,linux/arm64 -t $(TAG) --push .
|
35
projects/tgi-llm/ui/README.md
Normal file
35
projects/tgi-llm/ui/README.md
Normal file
@ -0,0 +1,35 @@
|
||||
# Gradio UI
|
||||
|
||||
This is a utility UI project to chat with your TGI LLM.
|
||||
|
||||
## Configuration
|
||||
|
||||
Copy the [`gradio_ui.env.sample`](./gradio_ui.env.sample) file into a new file
|
||||
called `gradio_ui.env` and fill in the necessary environment variables.
|
||||
|
||||
```bash
|
||||
cp gradio_ui.env.sample gradio_ui.env
|
||||
```
|
||||
|
||||
Environment variables are as follows:
|
||||
|
||||
```bash
|
||||
TGI_SERVICE_URL= # URL to your running TGI service
|
||||
HF_API_TOKEN=
|
||||
PROMPT_FILE_PATH= # path to a prompt file
|
||||
```
|
||||
|
||||
## Running
|
||||
|
||||
Simply run:
|
||||
|
||||
```bash
|
||||
make run
|
||||
```
|
||||
|
||||
The UI will run on port `3001` on your localhost. You can change that configuration
|
||||
[here](./Makefile#L11).
|
||||
|
||||
Congratulations! You have successfully set up the Gradio UI for your TGI LLM.
|
||||
|
||||
Now you can go to `http://localhost:3001` and chat with your LLM instance.
|
3
projects/tgi-llm/ui/gradio_ui.env.sample
Normal file
3
projects/tgi-llm/ui/gradio_ui.env.sample
Normal file
@ -0,0 +1,3 @@
|
||||
TGI_SERVICE_URL=
|
||||
HF_API_TOKEN=
|
||||
PROMPT_FILE_PATH=./prompt.txt
|
1
projects/tgi-llm/ui/prompt.txt
Normal file
1
projects/tgi-llm/ui/prompt.txt
Normal file
@ -0,0 +1 @@
|
||||
You're a friendly chatbot.
|
109
projects/tgi-llm/ui/src/app.py
Normal file
109
projects/tgi-llm/ui/src/app.py
Normal file
@ -0,0 +1,109 @@
|
||||
import os
|
||||
from builtins import str
|
||||
from pathlib import Path
|
||||
from typing import Union, cast, Any, Callable
|
||||
|
||||
import gradio as gr # type: ignore
|
||||
from dotenv import load_dotenv
|
||||
from huggingface_hub import InferenceClient # type: ignore
|
||||
|
||||
load_dotenv()
|
||||
|
||||
TGI_SERVICE_URL = os.getenv("TGI_SERVICE_URL")
|
||||
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
||||
|
||||
client = InferenceClient(model=TGI_SERVICE_URL)
|
||||
|
||||
|
||||
def start_interface(
|
||||
lambdafn: Callable[[str, list[str]], Any],
|
||||
examples: list[str],
|
||||
title: str,
|
||||
description: str,
|
||||
share: bool = True,
|
||||
height: int = 300,
|
||||
placeholder: str = "Chat with me!",
|
||||
scale: int = 7,
|
||||
container: bool = False,
|
||||
) -> None:
|
||||
"""
|
||||
Starts the Gradio interface for the Jazz model.
|
||||
|
||||
Args:
|
||||
lambdafn (callable): text_generation lambda fn with message, history
|
||||
examples (list[str]): A list of example inputs for the interface.
|
||||
title (str): The gradio title.
|
||||
description (str): The gradio description.
|
||||
share (bool): Whether to generate a global gradio link for 72 hours.
|
||||
height (int): Height of chat window in pixels.
|
||||
placeholder (str): Placeholder when chat window is empty.
|
||||
scale (int): The scale of the chat window.
|
||||
container (bool): Show the chat window in a container.
|
||||
"""
|
||||
gr.ChatInterface(
|
||||
lambdafn,
|
||||
chatbot=gr.Chatbot(height=height),
|
||||
textbox=gr.Textbox(placeholder=placeholder, container=container, scale=scale),
|
||||
description=description,
|
||||
title=title,
|
||||
examples=examples,
|
||||
retry_btn="Retry",
|
||||
undo_btn="Undo",
|
||||
clear_btn="Clear",
|
||||
).queue().launch(share=share, server_name="0.0.0.0")
|
||||
|
||||
|
||||
def read_text_file(file_path: Union[Path, str]) -> str:
|
||||
"""Reads content from file as a string."""
|
||||
with open(file_path, "r") as file:
|
||||
return file.read()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
cwd = os.getcwd()
|
||||
|
||||
PROMPT_FILE_PATH: str = cast(str, os.getenv("PROMPT_FILE_PATH"))
|
||||
|
||||
if not PROMPT_FILE_PATH:
|
||||
raise ValueError("PROMPT_FILE_PATH is not set in the environment.")
|
||||
|
||||
input_text = read_text_file(os.path.join(cwd, PROMPT_FILE_PATH))
|
||||
|
||||
def prompt_formatter(user_prompt: str, input_text: str) -> str:
|
||||
return user_prompt
|
||||
|
||||
# You should write your own lambdafn to set the parameters
|
||||
# Gradio doesn't currently support functions with more than
|
||||
# [message,history] as parameters into the interface
|
||||
# if you don't want the user to see them.
|
||||
def stream_inference(message: str, history: list[str]) -> Any:
|
||||
response = client.text_generation(
|
||||
prompt_formatter(message, input_text),
|
||||
max_new_tokens=40,
|
||||
temperature=0.3,
|
||||
details=True,
|
||||
).generated_text
|
||||
# this is just for the gradio front end, you can ignore for
|
||||
# backend in the ML model for strikethroughs.
|
||||
if response.startswith("<s>"):
|
||||
response = response[3:]
|
||||
yield response
|
||||
|
||||
title = "Your Ritual Model🎷"
|
||||
description = "This is the demo for your model."
|
||||
|
||||
# if you want a global url others can visit.
|
||||
share = True
|
||||
examples = ["Can shrimp actually fry rice?"]
|
||||
|
||||
start_interface(
|
||||
lambdafn=stream_inference,
|
||||
title=title,
|
||||
description=description,
|
||||
share=share,
|
||||
examples=examples,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
4
projects/tgi-llm/ui/src/requirements.txt
Normal file
4
projects/tgi-llm/ui/src/requirements.txt
Normal file
@ -0,0 +1,4 @@
|
||||
python-dotenv==1.0.0
|
||||
gradio==3.47.1
|
||||
huggingface-hub==0.17.3
|
||||
text-generation==0.6.1
|
Reference in New Issue
Block a user