From ca552f5a7a24f8e20707bae753c510845276a9ba Mon Sep 17 00:00:00 2001 From: vvzvlad Date: Wed, 4 Sep 2024 22:25:48 +0300 Subject: [PATCH] add many tokens --- app.py | 30 ++++++++++++------------ config.py | 13 ++++++++++- model.py | 70 ++++++++++++++++--------------------------------------- 3 files changed, 47 insertions(+), 66 deletions(-) diff --git a/app.py b/app.py index 7a6b249..9fcb915 100644 --- a/app.py +++ b/app.py @@ -4,40 +4,41 @@ import pandas as pd import numpy as np from datetime import datetime from flask import Flask, jsonify, Response -from model import download_data, format_data, train_model, training_price_data_path +from model import download_data, format_data, train_model, get_training_data_path from config import model_file_path app = Flask(__name__) def update_data(): - """Download price data, format data and train model.""" + """Download price data, format data and train model for each token.""" + tokens = ["ETH", "BTC", "SOL", "BNB", "ARB"] download_data() - format_data() - train_model() + for token in tokens: + format_data(token) + train_model(token) def get_inference(token, period): try: - with open(model_file_path, "rb") as f: + model_path = model_file_path[token] + + with open(model_path, "rb") as f: loaded_model = pickle.load(f) - # Загружаем последние данные из файла + # Загружаем последние данные для данного токена + training_price_data_path = get_training_data_path(token) price_data = pd.read_csv(training_price_data_path) # Используем последние значения признаков для предсказания last_row = price_data.iloc[-1] - - # Получаем последний timestamp last_timestamp = last_row["timestamp"] - # Преобразуем период в секунды (пример) + # Преобразуем период в секунды period_seconds = convert_period_to_seconds(period) - - # Рассчитываем новый временной штамп на основе периода new_timestamp = last_timestamp + period_seconds - # timestamp + # Формируем данные для предсказания с новым timestamp X_new = np.array( [ new_timestamp, @@ -49,9 +50,8 @@ def get_inference(token, period): ] ).reshape(1, -1) - # Предсказание + # Делаем предсказание future_price_pred = loaded_model.predict(X_new) - return future_price_pred[0] except Exception as e: @@ -95,4 +95,4 @@ def update(): if __name__ == "__main__": update_data() - app.run(host="0.0.0.0", port=8127) + app.run(host="0.0.0.0", port=8080) diff --git a/config.py b/config.py index c1b91db..50f184e 100644 --- a/config.py +++ b/config.py @@ -2,4 +2,15 @@ import os app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd()) data_base_path = os.path.join(app_base_path, "data") -model_file_path = os.path.join(data_base_path, "model.pkl") + +model_file_path = { + "ETH": os.path.join(data_base_path, "eth_model.pkl"), + "BTC": os.path.join(data_base_path, "btc_model.pkl"), + "SOL": os.path.join(data_base_path, "sol_model.pkl"), + "BNB": os.path.join(data_base_path, "bnb_model.pkl"), + "ARB": os.path.join(data_base_path, "arb_model.pkl"), +} + + +def get_training_data_path(token): + return os.path.join(data_base_path, f"{token.lower()}_price_data.csv") diff --git a/model.py b/model.py index f780264..eddc378 100644 --- a/model.py +++ b/model.py @@ -10,13 +10,19 @@ from updater import download_binance_monthly_data, download_binance_daily_data from config import data_base_path, model_file_path binance_data_path = os.path.join(data_base_path, "binance/futures-klines") -training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv") + + +def get_training_data_path(token): + """ + Возвращает путь к файлу данных для указанного токена. + """ + return os.path.join(data_base_path, f"{token}_price_data.csv") def download_data(): cm_or_um = "um" - symbols = ["ETHUSDT"] - intervals = ["10min"] + symbols = ["ETHUSDT", "BTCUSDT", "SOLUSDT", "BNBUSDT", "ARBUSDT"] + intervals = ["10min", "1d"] years = ["2020", "2021", "2022", "2023", "2024"] months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] download_path = binance_data_path @@ -33,8 +39,10 @@ def download_data(): print(f"Downloaded daily data to {download_path}.") -def format_data(): - files = sorted([x for x in os.listdir(binance_data_path) if x.endswith(".zip")]) +def format_data(token): + files = sorted( + [x for x in os.listdir(binance_data_path) if x.endswith(".zip") and token in x] + ) if len(files) == 0: return @@ -75,10 +83,12 @@ def format_data(): price_df.dropna(inplace=True) # Сохраняем данные + training_price_data_path = get_training_data_path(token) price_df.sort_index().to_csv(training_price_data_path) -def train_model(): +def train_model(token): + training_price_data_path = get_training_data_path(token) price_data = pd.read_csv(training_price_data_path) # Используем дополнительные признаки @@ -98,57 +108,17 @@ def train_model(): x, y, test_size=0.2, random_state=0 ) - # Train the model - print("Training model...") model = XGBRegressor() model.fit(x_train, y_train) - print("Model trained.") - os.makedirs(os.path.dirname(model_file_path), exist_ok=True) + token_model_path = model_file_path[token] + os.makedirs(os.path.dirname(token_model_path), exist_ok=True) - with open(model_file_path, "wb") as f: + with open(token_model_path, "wb") as f: pickle.dump(model, f) - print(f"Trained model saved to {model_file_path}") + print(f"Trained model saved to {token_model_path}") # Optional: Оценка модели y_pred = model.predict(x_test) print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}") - - -def get_inference_data(token, period): - """ - Генерирует данные для инференса на основе переданного токена и периода. - """ - price_data = pd.read_csv(training_price_data_path) - - # Настроить разницу времени в зависимости от периода - time_delta_map = { - "1min": timedelta(minutes=1), - "5min": timedelta(minutes(5)), - "10min": timedelta(minutes(10)), - "30min": timedelta(minutes(30)), - "1h": timedelta(hours=1), - "1d": timedelta(days=1), - } - - # Получаем последний таймстамп и вычисляем следующий - last_timestamp = pd.to_datetime(price_data["timestamp"].iloc[-1], unit="s") - next_timestamp = last_timestamp + time_delta_map.get(period, timedelta(minutes=10)) - - # Используем последние значения признаков - last_data = price_data.iloc[-1] - X_new = np.array( - [ - [ - next_timestamp.timestamp(), - last_data["price_diff"], - last_data["volatility"], - last_data["volume"], - last_data["moving_avg_7"], - last_data["moving_avg_30"], - ] - ] - ) - - return X_new \ No newline at end of file