add universal for many periods

This commit is contained in:
vvzvlad 2024-09-04 22:22:25 +03:00
parent 9a211a4748
commit 2475e22c1a
2 changed files with 84 additions and 34 deletions

70
app.py
View File

@ -17,8 +17,7 @@ def update_data():
train_model() train_model()
def get_eth_inference(): def get_inference(token, period):
"""Load model and predict current price."""
try: try:
with open(model_file_path, "rb") as f: with open(model_file_path, "rb") as f:
loaded_model = pickle.load(f) loaded_model = pickle.load(f)
@ -27,41 +26,56 @@ def get_eth_inference():
price_data = pd.read_csv(training_price_data_path) price_data = pd.read_csv(training_price_data_path)
# Используем последние значения признаков для предсказания # Используем последние значения признаков для предсказания
X_new = ( last_row = price_data.iloc[-1]
price_data[
[ # Получаем последний timestamp
"timestamp", last_timestamp = last_row["timestamp"]
"price_diff",
"volatility", # Преобразуем период в секунды (пример)
"volume", period_seconds = convert_period_to_seconds(period)
"moving_avg_7",
"moving_avg_30", # Рассчитываем новый временной штамп на основе периода
] new_timestamp = last_timestamp + period_seconds
# timestamp
X_new = np.array(
[
new_timestamp,
last_row["price_diff"],
last_row["volatility"],
last_row["volume"],
last_row["moving_avg_7"],
last_row["moving_avg_30"],
] ]
.iloc[-1] ).reshape(1, -1)
.values.reshape(1, -1)
)
# Делаем предсказание # Предсказание
current_price_pred = loaded_model.predict(X_new) future_price_pred = loaded_model.predict(X_new)
return future_price_pred[0]
return current_price_pred[0]
except Exception as e: except Exception as e:
print(f"Error during inference: {str(e)}") print(f"Error during inference: {str(e)}")
raise raise
@app.route("/inference/<string:token>") def convert_period_to_seconds(period):
def generate_inference(token): """Конвертируем период в секунды."""
"""Generate inference for given token.""" if period.endswith("min"):
if not token or token != "ETH": return int(period[:-3]) * 60
error_msg = "Token is required" if not token else "Token not supported" elif period.endswith("h"):
return Response( return int(period[:-1]) * 3600
json.dumps({"error": error_msg}), status=400, mimetype="application/json" elif period.endswith("d"):
) return int(period[:-1]) * 86400
else:
raise ValueError(f"Unknown period format: {period}")
@app.route("/inference/<string:token>/<string:period>")
def generate_inference(token, period):
"""Generate inference for given token and period."""
try: try:
inference = get_eth_inference() inference = get_inference(token, period)
return Response(str(inference), status=200) return Response(str(inference), status=200)
except Exception as e: except Exception as e:
return Response( return Response(
@ -81,4 +95,4 @@ def update():
if __name__ == "__main__": if __name__ == "__main__":
update_data() update_data()
app.run(host="0.0.0.0", port=8000) app.run(host="0.0.0.0", port=8127)

View File

@ -1,9 +1,9 @@
import os import os
import pickle import pickle
import numpy as np import numpy as np
from xgboost import XGBRegressor from xgboost import XGBRegressor
from zipfile import ZipFile from zipfile import ZipFile
from datetime import datetime from datetime import datetime, timedelta
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from updater import download_binance_monthly_data, download_binance_daily_data from updater import download_binance_monthly_data, download_binance_daily_data
@ -16,7 +16,7 @@ training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv")
def download_data(): def download_data():
cm_or_um = "um" cm_or_um = "um"
symbols = ["ETHUSDT"] symbols = ["ETHUSDT"]
intervals = ["1d"] intervals = ["10min"]
years = ["2020", "2021", "2022", "2023", "2024"] years = ["2020", "2021", "2022", "2023", "2024"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
download_path = binance_data_path download_path = binance_data_path
@ -104,10 +104,8 @@ def train_model():
model.fit(x_train, y_train) model.fit(x_train, y_train)
print("Model trained.") print("Model trained.")
# create the model's parent directory if it doesn't exist
os.makedirs(os.path.dirname(model_file_path), exist_ok=True) os.makedirs(os.path.dirname(model_file_path), exist_ok=True)
# Save the trained model to a file
with open(model_file_path, "wb") as f: with open(model_file_path, "wb") as f:
pickle.dump(model, f) pickle.dump(model, f)
@ -115,4 +113,42 @@ def train_model():
# Optional: Оценка модели # Optional: Оценка модели
y_pred = model.predict(x_test) y_pred = model.predict(x_test)
print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}") print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
def get_inference_data(token, period):
"""
Генерирует данные для инференса на основе переданного токена и периода.
"""
price_data = pd.read_csv(training_price_data_path)
# Настроить разницу времени в зависимости от периода
time_delta_map = {
"1min": timedelta(minutes=1),
"5min": timedelta(minutes(5)),
"10min": timedelta(minutes(10)),
"30min": timedelta(minutes(30)),
"1h": timedelta(hours=1),
"1d": timedelta(days=1),
}
# Получаем последний таймстамп и вычисляем следующий
last_timestamp = pd.to_datetime(price_data["timestamp"].iloc[-1], unit="s")
next_timestamp = last_timestamp + time_delta_map.get(period, timedelta(minutes=10))
# Используем последние значения признаков
last_data = price_data.iloc[-1]
X_new = np.array(
[
[
next_timestamp.timestamp(),
last_data["price_diff"],
last_data["volatility"],
last_data["volume"],
last_data["moving_avg_7"],
last_data["moving_avg_30"],
]
]
)
return X_new