Compare commits

..

No commits in common. "505ba1a42d4826f9e8a3b285a16f3cd53914dde9" and "9a211a4748cc2e733d5d6095b26517339b2e7b40" have entirely different histories.

4 changed files with 72 additions and 121 deletions

80
app.py
View File

@ -4,78 +4,64 @@ import pandas as pd
import numpy as np import numpy as np
from datetime import datetime from datetime import datetime
from flask import Flask, jsonify, Response from flask import Flask, jsonify, Response
from model import download_data, format_data, train_model, get_training_data_path from model import download_data, format_data, train_model, training_price_data_path
from config import model_file_path from config import model_file_path
app = Flask(__name__) app = Flask(__name__)
def update_data(): def update_data():
"""Download price data, format data and train model for each token.""" """Download price data, format data and train model."""
tokens = ["ETH", "BTC", "SOL", "BNB", "ARB"]
download_data() download_data()
for token in tokens: format_data()
format_data(token) train_model()
train_model(token)
def get_inference(token, period): def get_eth_inference():
"""Load model and predict current price."""
try: try:
model_path = model_file_path[token] with open(model_file_path, "rb") as f:
with open(model_path, "rb") as f:
loaded_model = pickle.load(f) loaded_model = pickle.load(f)
# Загружаем последние данные для данного токена # Загружаем последние данные из файла
training_price_data_path = get_training_data_path(token)
price_data = pd.read_csv(training_price_data_path) price_data = pd.read_csv(training_price_data_path)
# Используем последние значения признаков для предсказания # Используем последние значения признаков для предсказания
last_row = price_data.iloc[-1] X_new = (
last_timestamp = last_row["timestamp"] price_data[
[
# Преобразуем период в секунды "timestamp",
period_seconds = convert_period_to_seconds(period) "price_diff",
new_timestamp = last_timestamp + period_seconds "volatility",
"volume",
# Формируем данные для предсказания с новым timestamp "moving_avg_7",
X_new = np.array( "moving_avg_30",
[ ]
new_timestamp,
last_row["price_diff"],
last_row["volatility"],
last_row["volume"],
last_row["moving_avg_7"],
last_row["moving_avg_30"],
] ]
).reshape(1, -1) .iloc[-1]
.values.reshape(1, -1)
)
# Делаем предсказание # Делаем предсказание
future_price_pred = loaded_model.predict(X_new) current_price_pred = loaded_model.predict(X_new)
return future_price_pred[0]
return current_price_pred[0]
except Exception as e: except Exception as e:
print(f"Error during inference: {str(e)}") print(f"Error during inference: {str(e)}")
raise raise
def convert_period_to_seconds(period): @app.route("/inference/<string:token>")
"""Конвертируем период в секунды.""" def generate_inference(token):
if period.endswith("m"): """Generate inference for given token."""
return int(period[:-1]) * 60 if not token or token != "ETH":
elif period.endswith("h"): error_msg = "Token is required" if not token else "Token not supported"
return int(period[:-1]) * 3600 return Response(
elif period.endswith("d"): json.dumps({"error": error_msg}), status=400, mimetype="application/json"
return int(period[:-1]) * 86400 )
else:
raise ValueError(f"Unknown period format: {period}")
@app.route("/inference/<string:token>/<string:period>")
def generate_inference(token, period):
"""Generate inference for given token and period."""
try: try:
inference = get_inference(token, period) inference = get_eth_inference()
return Response(str(inference), status=200) return Response(str(inference), status=200)
except Exception as e: except Exception as e:
return Response( return Response(
@ -95,4 +81,4 @@ def update():
if __name__ == "__main__": if __name__ == "__main__":
update_data() update_data()
app.run(host="0.0.0.0", port=8080) app.run(host="0.0.0.0", port=8000)

View File

@ -11,50 +11,32 @@
"submitTx": false "submitTx": false
}, },
"worker": [ "worker": [
{ {
"topicId": 1, "topicId": 1,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5, "inferenceEntrypointName": "api-worker-reputer",
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/10m", "Token": "ETH" } "loopSeconds": 5,
"parameters": {
"InferenceEndpoint": "http://inference:8000/inference/{Token}",
"Token": "ETH"
}
}, },
{ {
"topicId": 2, "topicId": 2,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5, "inferenceEntrypointName": "api-worker-reputer",
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/24h", "Token": "ETH" } "loopSeconds": 5,
"parameters": {
"InferenceEndpoint": "http://inference:8000/inference/{Token}",
"Token": "ETH"
}
}, },
{ {
"topicId": 3,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/BTC/10m", "Token": "BTC" }
},
{
"topicId": 4,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/BTC/24h", "Token": "BTC" }
},
{
"topicId": 5,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/SOL/10m", "Token": "SOL" }
},
{
"topicId": 6,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/SOL/24h", "Token": "SOL" }
},
{
"topicId": 7, "topicId": 7,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5, "inferenceEntrypointName": "api-worker-reputer",
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/20m", "Token": "ETH" } "loopSeconds": 5,
}, "parameters": {
{ "InferenceEndpoint": "http://inference:8000/inference/{Token}",
"topicId": 8, "Token": "ETH"
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5, }
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/BNB/20m", "Token": "BNB" }
},
{
"topicId": 9,
"inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
"parameters": { "InferenceEndpoint": "http://inference:8080/inference/ARB/20m", "Token": "ARB" }
} }
] ]
} }

View File

@ -2,15 +2,4 @@ import os
app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd()) app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd())
data_base_path = os.path.join(app_base_path, "data") data_base_path = os.path.join(app_base_path, "data")
model_file_path = os.path.join(data_base_path, "model.pkl")
model_file_path = {
"ETH": os.path.join(data_base_path, "eth_model.pkl"),
"BTC": os.path.join(data_base_path, "btc_model.pkl"),
"SOL": os.path.join(data_base_path, "sol_model.pkl"),
"BNB": os.path.join(data_base_path, "bnb_model.pkl"),
"ARB": os.path.join(data_base_path, "arb_model.pkl"),
}
def get_training_data_path(token):
return os.path.join(data_base_path, f"{token.lower()}_price_data.csv")

View File

@ -1,28 +1,22 @@
import os import os
import pickle import pickle
import numpy as np import numpy as np
from xgboost import XGBRegressor from xgboost import XGBRegressor
from zipfile import ZipFile from zipfile import ZipFile
from datetime import datetime, timedelta from datetime import datetime
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from updater import download_binance_monthly_data, download_binance_daily_data from updater import download_binance_monthly_data, download_binance_daily_data
from config import data_base_path, model_file_path from config import data_base_path, model_file_path
binance_data_path = os.path.join(data_base_path, "binance/futures-klines") binance_data_path = os.path.join(data_base_path, "binance/futures-klines")
training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv")
def get_training_data_path(token):
"""
Возвращает путь к файлу данных для указанного токена.
"""
return os.path.join(data_base_path, f"{token}_price_data.csv")
def download_data(): def download_data():
cm_or_um = "um" cm_or_um = "um"
symbols = ["ETHUSDT", "BTCUSDT", "SOLUSDT", "BNBUSDT", "ARBUSDT"] symbols = ["ETHUSDT"]
intervals = ["10min", "1d"] intervals = ["1d"]
years = ["2020", "2021", "2022", "2023", "2024"] years = ["2020", "2021", "2022", "2023", "2024"]
months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"] months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
download_path = binance_data_path download_path = binance_data_path
@ -39,10 +33,8 @@ def download_data():
print(f"Downloaded daily data to {download_path}.") print(f"Downloaded daily data to {download_path}.")
def format_data(token): def format_data():
files = sorted( files = sorted([x for x in os.listdir(binance_data_path) if x.endswith(".zip")])
[x for x in os.listdir(binance_data_path) if x.endswith(".zip") and token in x]
)
if len(files) == 0: if len(files) == 0:
return return
@ -83,12 +75,10 @@ def format_data(token):
price_df.dropna(inplace=True) price_df.dropna(inplace=True)
# Сохраняем данные # Сохраняем данные
training_price_data_path = get_training_data_path(token)
price_df.sort_index().to_csv(training_price_data_path) price_df.sort_index().to_csv(training_price_data_path)
def train_model(token): def train_model():
training_price_data_path = get_training_data_path(token)
price_data = pd.read_csv(training_price_data_path) price_data = pd.read_csv(training_price_data_path)
# Используем дополнительные признаки # Используем дополнительные признаки
@ -108,17 +98,21 @@ def train_model(token):
x, y, test_size=0.2, random_state=0 x, y, test_size=0.2, random_state=0
) )
# Train the model
print("Training model...")
model = XGBRegressor() model = XGBRegressor()
model.fit(x_train, y_train) model.fit(x_train, y_train)
print("Model trained.")
token_model_path = model_file_path[token] # create the model's parent directory if it doesn't exist
os.makedirs(os.path.dirname(token_model_path), exist_ok=True) os.makedirs(os.path.dirname(model_file_path), exist_ok=True)
with open(token_model_path, "wb") as f: # Save the trained model to a file
with open(model_file_path, "wb") as f:
pickle.dump(model, f) pickle.dump(model, f)
print(f"Trained model saved to {token_model_path}") print(f"Trained model saved to {model_file_path}")
# Optional: Оценка модели # Optional: Оценка модели
y_pred = model.predict(x_test) y_pred = model.predict(x_test)
print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}") print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")