Compare commits
	
		
			11 Commits
		
	
	
		
			9a211a4748
			...
			XGBRegress
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 1ba4c0158d | |||
| fc7097fd50 | |||
| 4b7f57d0dd | |||
| 61fa099391 | |||
| 520416b772 | |||
| 3f17f7f0b7 | |||
| 59672292e2 | |||
| 505ba1a42d | |||
| 7fd61d13e5 | |||
| ca552f5a7a | |||
| 2475e22c1a | 
@ -4,6 +4,8 @@ FROM amd64/python:3.9-buster as project_env
 | 
			
		||||
# Set the working directory in the container
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
 | 
			
		||||
ENV FLASK_ENV=production
 | 
			
		||||
 | 
			
		||||
# Install dependencies
 | 
			
		||||
COPY requirements.txt requirements.txt
 | 
			
		||||
RUN pip install --upgrade pip setuptools \
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										80
									
								
								app.py
									
									
									
									
									
								
							
							
						
						
									
										80
									
								
								app.py
									
									
									
									
									
								
							@ -4,64 +4,78 @@ import pandas as pd
 | 
			
		||||
import numpy as np
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from flask import Flask, jsonify, Response
 | 
			
		||||
from model import download_data, format_data, train_model, training_price_data_path
 | 
			
		||||
from model import download_data, format_data, train_model, get_training_data_path
 | 
			
		||||
from config import model_file_path
 | 
			
		||||
 | 
			
		||||
app = Flask(__name__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def update_data():
 | 
			
		||||
    """Download price data, format data and train model."""
 | 
			
		||||
    """Download price data, format data and train model for each token."""
 | 
			
		||||
    tokens = ["ETH", "BTC", "SOL", "BNB", "ARB"]
 | 
			
		||||
    download_data()
 | 
			
		||||
    format_data()
 | 
			
		||||
    train_model()
 | 
			
		||||
    for token in tokens:
 | 
			
		||||
        format_data(token)
 | 
			
		||||
        train_model(token)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_eth_inference():
 | 
			
		||||
    """Load model and predict current price."""
 | 
			
		||||
def get_inference(token, period):
 | 
			
		||||
    try:
 | 
			
		||||
        with open(model_file_path, "rb") as f:
 | 
			
		||||
        model_path = model_file_path[token]
 | 
			
		||||
 | 
			
		||||
        with open(model_path, "rb") as f:
 | 
			
		||||
            loaded_model = pickle.load(f)
 | 
			
		||||
 | 
			
		||||
        # Загружаем последние данные из файла
 | 
			
		||||
        # Загружаем последние данные для данного токена
 | 
			
		||||
        training_price_data_path = get_training_data_path(token)
 | 
			
		||||
        price_data = pd.read_csv(training_price_data_path)
 | 
			
		||||
 | 
			
		||||
        # Используем последние значения признаков для предсказания
 | 
			
		||||
        X_new = (
 | 
			
		||||
            price_data[
 | 
			
		||||
                [
 | 
			
		||||
                    "timestamp",
 | 
			
		||||
                    "price_diff",
 | 
			
		||||
                    "volatility",
 | 
			
		||||
                    "volume",
 | 
			
		||||
                    "moving_avg_7",
 | 
			
		||||
                    "moving_avg_30",
 | 
			
		||||
                ]
 | 
			
		||||
        last_row = price_data.iloc[-1]
 | 
			
		||||
        last_timestamp = last_row["timestamp"]
 | 
			
		||||
 | 
			
		||||
        # Преобразуем период в секунды
 | 
			
		||||
        period_seconds = convert_period_to_seconds(period)
 | 
			
		||||
        new_timestamp = last_timestamp + period_seconds
 | 
			
		||||
 | 
			
		||||
        # Формируем данные для предсказания с новым timestamp
 | 
			
		||||
        X_new = np.array(
 | 
			
		||||
            [
 | 
			
		||||
                new_timestamp,
 | 
			
		||||
                last_row["price_diff"],
 | 
			
		||||
                last_row["volatility"],
 | 
			
		||||
                last_row["volume"],
 | 
			
		||||
                last_row["moving_avg_7"],
 | 
			
		||||
                last_row["moving_avg_30"],
 | 
			
		||||
            ]
 | 
			
		||||
            .iloc[-1]
 | 
			
		||||
            .values.reshape(1, -1)
 | 
			
		||||
        )
 | 
			
		||||
        ).reshape(1, -1)
 | 
			
		||||
 | 
			
		||||
        # Делаем предсказание
 | 
			
		||||
        current_price_pred = loaded_model.predict(X_new)
 | 
			
		||||
        future_price_pred = loaded_model.predict(X_new)
 | 
			
		||||
        return future_price_pred[0]
 | 
			
		||||
 | 
			
		||||
        return current_price_pred[0]
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        print(f"Error during inference: {str(e)}")
 | 
			
		||||
        raise
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route("/inference/<string:token>")
 | 
			
		||||
def generate_inference(token):
 | 
			
		||||
    """Generate inference for given token."""
 | 
			
		||||
    if not token or token != "ETH":
 | 
			
		||||
        error_msg = "Token is required" if not token else "Token not supported"
 | 
			
		||||
        return Response(
 | 
			
		||||
            json.dumps({"error": error_msg}), status=400, mimetype="application/json"
 | 
			
		||||
        )
 | 
			
		||||
def convert_period_to_seconds(period):
 | 
			
		||||
    """Конвертируем период в секунды."""
 | 
			
		||||
    if period.endswith("m"):
 | 
			
		||||
        return int(period[:-1]) * 60
 | 
			
		||||
    elif period.endswith("h"):
 | 
			
		||||
        return int(period[:-1]) * 3600
 | 
			
		||||
    elif period.endswith("d"):
 | 
			
		||||
        return int(period[:-1]) * 86400
 | 
			
		||||
    else:
 | 
			
		||||
        raise ValueError(f"Unknown period format: {period}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.route("/inference/<string:token>/<string:period>")
 | 
			
		||||
def generate_inference(token, period):
 | 
			
		||||
    """Generate inference for given token and period."""
 | 
			
		||||
    try:
 | 
			
		||||
        inference = get_eth_inference()
 | 
			
		||||
        inference = get_inference(token, period)
 | 
			
		||||
        return Response(str(inference), status=200)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        return Response(
 | 
			
		||||
@ -81,4 +95,4 @@ def update():
 | 
			
		||||
 | 
			
		||||
if __name__ == "__main__":
 | 
			
		||||
    update_data()
 | 
			
		||||
    app.run(host="0.0.0.0", port=8000)
 | 
			
		||||
    app.run(host="0.0.0.0", port=8080)
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										62
									
								
								config.json
									
									
									
									
									
								
							
							
						
						
									
										62
									
								
								config.json
									
									
									
									
									
								
							@ -8,35 +8,53 @@
 | 
			
		||||
        "nodeRpc": "###RPC_URL###",
 | 
			
		||||
        "maxRetries": 10,
 | 
			
		||||
        "delay": 30,
 | 
			
		||||
        "submitTx": false
 | 
			
		||||
        "submitTx": true
 | 
			
		||||
    },
 | 
			
		||||
    "worker": [
 | 
			
		||||
        {
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 1,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer",
 | 
			
		||||
            "loopSeconds": 5,
 | 
			
		||||
            "parameters": {
 | 
			
		||||
                "InferenceEndpoint": "http://inference:8000/inference/{Token}",
 | 
			
		||||
                "Token": "ETH"
 | 
			
		||||
            }
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/10m", "Token": "ETH" }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 2,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer",
 | 
			
		||||
            "loopSeconds": 5,
 | 
			
		||||
            "parameters": {
 | 
			
		||||
                "InferenceEndpoint": "http://inference:8000/inference/{Token}",
 | 
			
		||||
                "Token": "ETH"
 | 
			
		||||
            }
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/24h", "Token": "ETH" }
 | 
			
		||||
        },
 | 
			
		||||
        {
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 3,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/BTC/10m", "Token": "BTC" }
 | 
			
		||||
        },
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 4,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/BTC/24h", "Token": "BTC" }
 | 
			
		||||
        },
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 5,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/SOL/10m", "Token": "SOL" }
 | 
			
		||||
        },
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 6,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/SOL/24h", "Token": "SOL" }
 | 
			
		||||
        },
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 7,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer",
 | 
			
		||||
            "loopSeconds": 5,
 | 
			
		||||
            "parameters": {
 | 
			
		||||
                "InferenceEndpoint": "http://inference:8000/inference/{Token}",
 | 
			
		||||
                "Token": "ETH"
 | 
			
		||||
            }
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/20m", "Token": "ETH" }
 | 
			
		||||
        },
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 8,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/BNB/20m", "Token": "BNB" }
 | 
			
		||||
        },
 | 
			
		||||
        {   
 | 
			
		||||
            "topicId": 9,
 | 
			
		||||
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
 | 
			
		||||
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ARB/20m", "Token": "ARB" }
 | 
			
		||||
        }
 | 
			
		||||
    ]
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										13
									
								
								config.py
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								config.py
									
									
									
									
									
								
							@ -2,4 +2,15 @@ import os
 | 
			
		||||
 | 
			
		||||
app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd())
 | 
			
		||||
data_base_path = os.path.join(app_base_path, "data")
 | 
			
		||||
model_file_path = os.path.join(data_base_path, "model.pkl")
 | 
			
		||||
 | 
			
		||||
model_file_path = {
 | 
			
		||||
    "ETH": os.path.join(data_base_path, "eth_model.pkl"),
 | 
			
		||||
    "BTC": os.path.join(data_base_path, "btc_model.pkl"),
 | 
			
		||||
    "SOL": os.path.join(data_base_path, "sol_model.pkl"),
 | 
			
		||||
    "BNB": os.path.join(data_base_path, "bnb_model.pkl"),
 | 
			
		||||
    "ARB": os.path.join(data_base_path, "arb_model.pkl"),
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_training_data_path(token):
 | 
			
		||||
    return os.path.join(data_base_path, f"{token.lower()}_price_data.csv")
 | 
			
		||||
 | 
			
		||||
@ -4,12 +4,12 @@ services:
 | 
			
		||||
    build: .
 | 
			
		||||
    command: python -u /app/app.py
 | 
			
		||||
    ports:
 | 
			
		||||
      - "8000:8000"
 | 
			
		||||
      - "8080:8080"
 | 
			
		||||
    healthcheck:
 | 
			
		||||
      test: ["CMD", "curl", "-f", "http://localhost:8000/inference/ETH"]
 | 
			
		||||
      interval: 10s
 | 
			
		||||
      test: ["CMD", "curl", "-f", "http://localhost:8080/inference/ETH/10m"]
 | 
			
		||||
      interval: 30s
 | 
			
		||||
      timeout: 5s
 | 
			
		||||
      retries: 12
 | 
			
		||||
      retries: 20
 | 
			
		||||
    volumes:
 | 
			
		||||
      - ./inference-data:/app/data
 | 
			
		||||
    restart: always
 | 
			
		||||
@ -18,7 +18,7 @@ services:
 | 
			
		||||
    container_name: updater-basic-eth-pred
 | 
			
		||||
    build: .
 | 
			
		||||
    environment:
 | 
			
		||||
      - INFERENCE_API_ADDRESS=http://inference:8000
 | 
			
		||||
      - INFERENCE_API_ADDRESS=http://inference:8080
 | 
			
		||||
    command: >
 | 
			
		||||
      sh -c "
 | 
			
		||||
      while true; do
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										40
									
								
								model.py
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								model.py
									
									
									
									
									
								
							@ -1,22 +1,28 @@
 | 
			
		||||
import os
 | 
			
		||||
import pickle
 | 
			
		||||
import numpy as np 
 | 
			
		||||
import numpy as np
 | 
			
		||||
from xgboost import XGBRegressor
 | 
			
		||||
from zipfile import ZipFile
 | 
			
		||||
from datetime import datetime
 | 
			
		||||
from datetime import datetime, timedelta
 | 
			
		||||
import pandas as pd
 | 
			
		||||
from sklearn.model_selection import train_test_split
 | 
			
		||||
from updater import download_binance_monthly_data, download_binance_daily_data
 | 
			
		||||
from config import data_base_path, model_file_path
 | 
			
		||||
 | 
			
		||||
binance_data_path = os.path.join(data_base_path, "binance/futures-klines")
 | 
			
		||||
training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_training_data_path(token):
 | 
			
		||||
    """
 | 
			
		||||
    Возвращает путь к файлу данных для указанного токена.
 | 
			
		||||
    """
 | 
			
		||||
    return os.path.join(data_base_path, f"{token}_price_data.csv")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def download_data():
 | 
			
		||||
    cm_or_um = "um"
 | 
			
		||||
    symbols = ["ETHUSDT"]
 | 
			
		||||
    intervals = ["1d"]
 | 
			
		||||
    symbols = ["ETHUSDT", "BTCUSDT", "SOLUSDT", "BNBUSDT", "ARBUSDT"]
 | 
			
		||||
    intervals = ["10min", "1d"]
 | 
			
		||||
    years = ["2020", "2021", "2022", "2023", "2024"]
 | 
			
		||||
    months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
 | 
			
		||||
    download_path = binance_data_path
 | 
			
		||||
@ -33,8 +39,10 @@ def download_data():
 | 
			
		||||
    print(f"Downloaded daily data to {download_path}.")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def format_data():
 | 
			
		||||
    files = sorted([x for x in os.listdir(binance_data_path) if x.endswith(".zip")])
 | 
			
		||||
def format_data(token):
 | 
			
		||||
    files = sorted(
 | 
			
		||||
        [x for x in os.listdir(binance_data_path) if x.endswith(".zip") and token in x]
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    if len(files) == 0:
 | 
			
		||||
        return
 | 
			
		||||
@ -75,10 +83,12 @@ def format_data():
 | 
			
		||||
    price_df.dropna(inplace=True)
 | 
			
		||||
 | 
			
		||||
    # Сохраняем данные
 | 
			
		||||
    training_price_data_path = get_training_data_path(token)
 | 
			
		||||
    price_df.sort_index().to_csv(training_price_data_path)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def train_model():
 | 
			
		||||
def train_model(token):
 | 
			
		||||
    training_price_data_path = get_training_data_path(token)
 | 
			
		||||
    price_data = pd.read_csv(training_price_data_path)
 | 
			
		||||
 | 
			
		||||
    # Используем дополнительные признаки
 | 
			
		||||
@ -98,21 +108,17 @@ def train_model():
 | 
			
		||||
        x, y, test_size=0.2, random_state=0
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Train the model
 | 
			
		||||
    print("Training model...")
 | 
			
		||||
    model = XGBRegressor()
 | 
			
		||||
    model.fit(x_train, y_train)
 | 
			
		||||
    print("Model trained.")
 | 
			
		||||
 | 
			
		||||
    # create the model's parent directory if it doesn't exist
 | 
			
		||||
    os.makedirs(os.path.dirname(model_file_path), exist_ok=True)
 | 
			
		||||
    token_model_path = model_file_path[token]
 | 
			
		||||
    os.makedirs(os.path.dirname(token_model_path), exist_ok=True)
 | 
			
		||||
 | 
			
		||||
    # Save the trained model to a file
 | 
			
		||||
    with open(model_file_path, "wb") as f:
 | 
			
		||||
    with open(token_model_path, "wb") as f:
 | 
			
		||||
        pickle.dump(model, f)
 | 
			
		||||
 | 
			
		||||
    print(f"Trained model saved to {model_file_path}")
 | 
			
		||||
    print(f"Trained model saved to {token_model_path}")
 | 
			
		||||
 | 
			
		||||
    # Optional: Оценка модели
 | 
			
		||||
    y_pred = model.predict(x_test)
 | 
			
		||||
    print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
 | 
			
		||||
    print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user