add universal for many periods

This commit is contained in:
vvzvlad 2024-09-04 22:22:25 +03:00
parent 9a211a4748
commit 2475e22c1a
2 changed files with 84 additions and 34 deletions

70
app.py
View File

@ -17,8 +17,7 @@ def update_data():
train_model()
def get_eth_inference():
"""Load model and predict current price."""
def get_inference(token, period):
try:
with open(model_file_path, "rb") as f:
loaded_model = pickle.load(f)
@ -27,41 +26,56 @@ def get_eth_inference():
price_data = pd.read_csv(training_price_data_path)
# Используем последние значения признаков для предсказания
X_new = (
price_data[
[
"timestamp",
"price_diff",
"volatility",
"volume",
"moving_avg_7",
"moving_avg_30",
]
last_row = price_data.iloc[-1]
# Получаем последний timestamp
last_timestamp = last_row["timestamp"]
# Преобразуем период в секунды (пример)
period_seconds = convert_period_to_seconds(period)
# Рассчитываем новый временной штамп на основе периода
new_timestamp = last_timestamp + period_seconds
# timestamp
X_new = np.array(
[
new_timestamp,
last_row["price_diff"],
last_row["volatility"],
last_row["volume"],
last_row["moving_avg_7"],
last_row["moving_avg_30"],
]
.iloc[-1]
.values.reshape(1, -1)
)
).reshape(1, -1)
# Делаем предсказание
current_price_pred = loaded_model.predict(X_new)
# Предсказание
future_price_pred = loaded_model.predict(X_new)
return future_price_pred[0]
return current_price_pred[0]
except Exception as e:
print(f"Error during inference: {str(e)}")
raise
@app.route("/inference/<string:token>")
def generate_inference(token):
"""Generate inference for given token."""
if not token or token != "ETH":
error_msg = "Token is required" if not token else "Token not supported"
return Response(
json.dumps({"error": error_msg}), status=400, mimetype="application/json"
)
def convert_period_to_seconds(period):
"""Конвертируем период в секунды."""
if period.endswith("min"):
return int(period[:-3]) * 60
elif period.endswith("h"):
return int(period[:-1]) * 3600
elif period.endswith("d"):
return int(period[:-1]) * 86400
else:
raise ValueError(f"Unknown period format: {period}")
@app.route("/inference/<string:token>/<string:period>")
def generate_inference(token, period):
"""Generate inference for given token and period."""
try:
inference = get_eth_inference()
inference = get_inference(token, period)
return Response(str(inference), status=200)
except Exception as e:
return Response(
@ -81,4 +95,4 @@ def update():
if __name__ == "__main__":
update_data()
app.run(host="0.0.0.0", port=8000)
app.run(host="0.0.0.0", port=8127)

View File

@ -1,9 +1,9 @@
import os
import pickle
import numpy as np
import numpy as np
from xgboost import XGBRegressor
from zipfile import ZipFile
from datetime import datetime
from datetime import datetime, timedelta
import pandas as pd
from sklearn.model_selection import train_test_split
from updater import download_binance_monthly_data, download_binance_daily_data
@ -16,7 +16,7 @@ training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv")
def download_data():
cm_or_um = "um"
symbols = ["ETHUSDT"]
intervals = ["1d"]
intervals = ["10min"]
years = ["2020", "2021", "2022", "2023", "2024"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
download_path = binance_data_path
@ -104,10 +104,8 @@ def train_model():
model.fit(x_train, y_train)
print("Model trained.")
# create the model's parent directory if it doesn't exist
os.makedirs(os.path.dirname(model_file_path), exist_ok=True)
# Save the trained model to a file
with open(model_file_path, "wb") as f:
pickle.dump(model, f)
@ -115,4 +113,42 @@ def train_model():
# Optional: Оценка модели
y_pred = model.predict(x_test)
print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
def get_inference_data(token, period):
"""
Генерирует данные для инференса на основе переданного токена и периода.
"""
price_data = pd.read_csv(training_price_data_path)
# Настроить разницу времени в зависимости от периода
time_delta_map = {
"1min": timedelta(minutes=1),
"5min": timedelta(minutes(5)),
"10min": timedelta(minutes(10)),
"30min": timedelta(minutes(30)),
"1h": timedelta(hours=1),
"1d": timedelta(days=1),
}
# Получаем последний таймстамп и вычисляем следующий
last_timestamp = pd.to_datetime(price_data["timestamp"].iloc[-1], unit="s")
next_timestamp = last_timestamp + time_delta_map.get(period, timedelta(minutes=10))
# Используем последние значения признаков
last_data = price_data.iloc[-1]
X_new = np.array(
[
[
next_timestamp.timestamp(),
last_data["price_diff"],
last_data["volatility"],
last_data["volume"],
last_data["moving_avg_7"],
last_data["moving_avg_30"],
]
]
)
return X_new