add many tokens

This commit is contained in:
vvzvlad 2024-09-04 22:25:48 +03:00
parent 2475e22c1a
commit ca552f5a7a
3 changed files with 47 additions and 66 deletions

30
app.py
View File

@ -4,40 +4,41 @@ import pandas as pd
import numpy as np import numpy as np
from datetime import datetime from datetime import datetime
from flask import Flask, jsonify, Response from flask import Flask, jsonify, Response
from model import download_data, format_data, train_model, training_price_data_path from model import download_data, format_data, train_model, get_training_data_path
from config import model_file_path from config import model_file_path
app = Flask(__name__) app = Flask(__name__)
def update_data(): def update_data():
"""Download price data, format data and train model.""" """Download price data, format data and train model for each token."""
tokens = ["ETH", "BTC", "SOL", "BNB", "ARB"]
download_data() download_data()
format_data() for token in tokens:
train_model() format_data(token)
train_model(token)
def get_inference(token, period): def get_inference(token, period):
try: try:
with open(model_file_path, "rb") as f: model_path = model_file_path[token]
with open(model_path, "rb") as f:
loaded_model = pickle.load(f) loaded_model = pickle.load(f)
# Загружаем последние данные из файла # Загружаем последние данные для данного токена
training_price_data_path = get_training_data_path(token)
price_data = pd.read_csv(training_price_data_path) price_data = pd.read_csv(training_price_data_path)
# Используем последние значения признаков для предсказания # Используем последние значения признаков для предсказания
last_row = price_data.iloc[-1] last_row = price_data.iloc[-1]
# Получаем последний timestamp
last_timestamp = last_row["timestamp"] last_timestamp = last_row["timestamp"]
# Преобразуем период в секунды (пример) # Преобразуем период в секунды
period_seconds = convert_period_to_seconds(period) period_seconds = convert_period_to_seconds(period)
# Рассчитываем новый временной штамп на основе периода
new_timestamp = last_timestamp + period_seconds new_timestamp = last_timestamp + period_seconds
# timestamp # Формируем данные для предсказания с новым timestamp
X_new = np.array( X_new = np.array(
[ [
new_timestamp, new_timestamp,
@ -49,9 +50,8 @@ def get_inference(token, period):
] ]
).reshape(1, -1) ).reshape(1, -1)
# Предсказание # Делаем предсказание
future_price_pred = loaded_model.predict(X_new) future_price_pred = loaded_model.predict(X_new)
return future_price_pred[0] return future_price_pred[0]
except Exception as e: except Exception as e:
@ -95,4 +95,4 @@ def update():
if __name__ == "__main__": if __name__ == "__main__":
update_data() update_data()
app.run(host="0.0.0.0", port=8127) app.run(host="0.0.0.0", port=8080)

View File

@ -2,4 +2,15 @@ import os
app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd()) app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd())
data_base_path = os.path.join(app_base_path, "data") data_base_path = os.path.join(app_base_path, "data")
model_file_path = os.path.join(data_base_path, "model.pkl")
model_file_path = {
"ETH": os.path.join(data_base_path, "eth_model.pkl"),
"BTC": os.path.join(data_base_path, "btc_model.pkl"),
"SOL": os.path.join(data_base_path, "sol_model.pkl"),
"BNB": os.path.join(data_base_path, "bnb_model.pkl"),
"ARB": os.path.join(data_base_path, "arb_model.pkl"),
}
def get_training_data_path(token):
return os.path.join(data_base_path, f"{token.lower()}_price_data.csv")

View File

@ -10,13 +10,19 @@ from updater import download_binance_monthly_data, download_binance_daily_data
from config import data_base_path, model_file_path from config import data_base_path, model_file_path
binance_data_path = os.path.join(data_base_path, "binance/futures-klines") binance_data_path = os.path.join(data_base_path, "binance/futures-klines")
training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv")
def get_training_data_path(token):
"""
Возвращает путь к файлу данных для указанного токена.
"""
return os.path.join(data_base_path, f"{token}_price_data.csv")
def download_data(): def download_data():
cm_or_um = "um" cm_or_um = "um"
symbols = ["ETHUSDT"] symbols = ["ETHUSDT", "BTCUSDT", "SOLUSDT", "BNBUSDT", "ARBUSDT"]
intervals = ["10min"] intervals = ["10min", "1d"]
years = ["2020", "2021", "2022", "2023", "2024"] years = ["2020", "2021", "2022", "2023", "2024"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
download_path = binance_data_path download_path = binance_data_path
@ -33,8 +39,10 @@ def download_data():
print(f"Downloaded daily data to {download_path}.") print(f"Downloaded daily data to {download_path}.")
def format_data(): def format_data(token):
files = sorted([x for x in os.listdir(binance_data_path) if x.endswith(".zip")]) files = sorted(
[x for x in os.listdir(binance_data_path) if x.endswith(".zip") and token in x]
)
if len(files) == 0: if len(files) == 0:
return return
@ -75,10 +83,12 @@ def format_data():
price_df.dropna(inplace=True) price_df.dropna(inplace=True)
# Сохраняем данные # Сохраняем данные
training_price_data_path = get_training_data_path(token)
price_df.sort_index().to_csv(training_price_data_path) price_df.sort_index().to_csv(training_price_data_path)
def train_model(): def train_model(token):
training_price_data_path = get_training_data_path(token)
price_data = pd.read_csv(training_price_data_path) price_data = pd.read_csv(training_price_data_path)
# Используем дополнительные признаки # Используем дополнительные признаки
@ -98,57 +108,17 @@ def train_model():
x, y, test_size=0.2, random_state=0 x, y, test_size=0.2, random_state=0
) )
# Train the model
print("Training model...")
model = XGBRegressor() model = XGBRegressor()
model.fit(x_train, y_train) model.fit(x_train, y_train)
print("Model trained.")
os.makedirs(os.path.dirname(model_file_path), exist_ok=True) token_model_path = model_file_path[token]
os.makedirs(os.path.dirname(token_model_path), exist_ok=True)
with open(model_file_path, "wb") as f: with open(token_model_path, "wb") as f:
pickle.dump(model, f) pickle.dump(model, f)
print(f"Trained model saved to {model_file_path}") print(f"Trained model saved to {token_model_path}")
# Optional: Оценка модели # Optional: Оценка модели
y_pred = model.predict(x_test) y_pred = model.predict(x_test)
print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}") print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
def get_inference_data(token, period):
"""
Генерирует данные для инференса на основе переданного токена и периода.
"""
price_data = pd.read_csv(training_price_data_path)
# Настроить разницу времени в зависимости от периода
time_delta_map = {
"1min": timedelta(minutes=1),
"5min": timedelta(minutes(5)),
"10min": timedelta(minutes(10)),
"30min": timedelta(minutes(30)),
"1h": timedelta(hours=1),
"1d": timedelta(days=1),
}
# Получаем последний таймстамп и вычисляем следующий
last_timestamp = pd.to_datetime(price_data["timestamp"].iloc[-1], unit="s")
next_timestamp = last_timestamp + time_delta_map.get(period, timedelta(minutes=10))
# Используем последние значения признаков
last_data = price_data.iloc[-1]
X_new = np.array(
[
[
next_timestamp.timestamp(),
last_data["price_diff"],
last_data["volatility"],
last_data["volume"],
last_data["moving_avg_7"],
last_data["moving_avg_30"],
]
]
)
return X_new