probably fix

remove topics
2024-09-06 02:17:59 +03:00 · 2024-09-05 18:20:25 +03:00 · 2024-09-05 16:13:16 +03:00 · 2024-09-05 03:23:06 +03:00 · 2024-09-05 03:20:57 +03:00 · 2024-09-05 03:20:48 +03:00
8 changed files with 203 additions and 105 deletions
--- a/2
+++ b/2
@ -4,6 +4,8 @@ FROM amd64/python:3.9-buster as project_env
 # Set the working directory in the container
 WORKDIR /app
 ENV FLASK_ENV=production
 # Install dependencies
 COPY requirements.txt requirements.txt
 RUN pip install --upgrade pip setuptools \
--- a/app.py
+++ b/app.py
@ -4,43 +4,83 @@ import pandas as pd
 import numpy as np
 from datetime import datetime
 from flask import Flask, jsonify, Response
-from model import download_data, format_data, train_model
+from model import download_data, format_data, train_model, get_training_data_path
 from config import model_file_path
 app = Flask(__name__)
 def update_data():
-    """Download price data, format data and train model."""
+    """Download price data, format data and train model for each token."""
    tokens = ["ETH", "BTC", "SOL", "BNB", "ARB"]
    download_data()
-    format_data()
+    for token in tokens:
-    train_model()
+        format_data(token)
        train_model(token)
-def get_eth_inference():
+def get_inference(token, period):
-    """Load model and predict current price."""
+    try:
-    with open(model_file_path, "rb") as f:
+        model_path = model_file_path[token]
        with open(model_path, "rb") as f:
            loaded_model = pickle.load(f)
-    now_timestamp = pd.Timestamp(datetime.now()).timestamp()
+        # Загружаем последние данные для данного токена
-    X_new = np.array([now_timestamp]).reshape(-1, 1)
+        training_price_data_path = get_training_data_path(token)
-    current_price_pred = loaded_model.predict(X_new)
+        price_data = pd.read_csv(training_price_data_path)
-    return current_price_pred[0]
+        # Используем последние значения признаков для предсказания
        last_row = price_data.iloc[-1]
        last_timestamp = last_row["timestamp"]
        # Преобразуем период в секунды
        period_seconds = convert_period_to_seconds(period)
        new_timestamp = last_timestamp + period_seconds
        # Формируем данные для предсказания с новым timestamp
        X_new = np.array(
            [
                new_timestamp,
                last_row["price_diff"],
                last_row["volatility"],
                last_row["volume"],
                last_row["moving_avg_7"],
                last_row["moving_avg_30"],
            ]
        ).reshape(1, -1)
        # Делаем предсказание
        future_price_pred = loaded_model.predict(X_new)
        return future_price_pred[0]
    except Exception as e:
        print(f"Error during inference: {str(e)}")
        raise
-@app.route("/inference/<string:token>")
+def convert_period_to_seconds(period):
-def generate_inference(token):
+    """Конвертируем период в секунды."""
-    """Generate inference for given token."""
+    if period.endswith("m"):
-    if not token or token != "ETH":
+        return int(period[:-1]) * 60
-        error_msg = "Token is required" if not token else "Token not supported"
+    elif period.endswith("h"):
-        return Response(json.dumps({"error": error_msg}), status=400, mimetype='application/json')
+        return int(period[:-1]) * 3600
    elif period.endswith("d"):
        return int(period[:-1]) * 86400
    else:
        raise ValueError(f"Unknown period format: {period}")
@app.route("/inference/<string:token>/<string:period>")
 def generate_inference(token, period):
    """Generate inference for given token and period."""
    try:
-        inference = get_eth_inference()
+        inference = get_inference(token, period)
        return Response(str(inference), status=200)
    except Exception as e:
-        return Response(json.dumps({"error": str(e)}), status=500, mimetype='application/json')
+        return Response(
            json.dumps({"error": str(e)}), status=500, mimetype="application/json"
        )
@app.route("/update")
@ -55,4 +95,4 @@ def update():
 if __name__ == "__main__":
    update_data()
-    app.run(host="0.0.0.0", port=8000)
+    app.run(host="0.0.0.0", port=8080)
--- a/config.json
+++ b/config.json
@ -7,36 +7,54 @@
        "gasAdjustment": 1.0,
        "nodeRpc": "###RPC_URL###",
        "maxRetries": 10,
-        "delay": 10,
+        "delay": 30,
-        "submitTx": false
+        "submitTx": true
    },
    "worker": [
        {   
            "topicId": 1,
-            "inferenceEntrypointName": "api-worker-reputer",
+            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
-            "loopSeconds": 5,
+            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/10m", "Token": "ETH" }
            "parameters": {
                "InferenceEndpoint": "http://inference:8000/inference/{Token}",
                "Token": "ETH"
            }
        },
        {   
            "topicId": 2,
-            "inferenceEntrypointName": "api-worker-reputer",
+            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
-            "loopSeconds": 5,
+            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/24h", "Token": "ETH" }
-            "parameters": {
+        },
-                "InferenceEndpoint": "http://inference:8000/inference/{Token}",
+        {   
-                "Token": "ETH"
+            "topicId": 3,
-            }
+            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/BTC/10m", "Token": "BTC" }
        },
        {   
            "topicId": 4,
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/BTC/24h", "Token": "BTC" }
        },
        {   
            "topicId": 5,
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/SOL/10m", "Token": "SOL" }
        },
        {   
            "topicId": 6,
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/SOL/24h", "Token": "SOL" }
        },
        {   
            "topicId": 7,
-            "inferenceEntrypointName": "api-worker-reputer",
+            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
-            "loopSeconds": 5,
+            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ETH/20m", "Token": "ETH" }
-            "parameters": {
+        },
-                "InferenceEndpoint": "http://inference:8000/inference/{Token}",
+        {   
-                "Token": "ETH"
+            "topicId": 8,
-            }
+            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/BNB/20m", "Token": "BNB" }
        },
        {   
            "topicId": 9,
            "inferenceEntrypointName": "api-worker-reputer", "loopSeconds": 5,
            "parameters": { "InferenceEndpoint": "http://inference:8080/inference/ARB/20m", "Token": "ARB" }
        }
    ]
 }
--- a/config.py
+++ b/config.py
@ -2,4 +2,15 @@ import os
 app_base_path = os.getenv("APP_BASE_PATH", default=os.getcwd())
 data_base_path = os.path.join(app_base_path, "data")
-model_file_path = os.path.join(data_base_path, "model.pkl")
+
 model_file_path = {
    "ETH": os.path.join(data_base_path, "eth_model.pkl"),
    "BTC": os.path.join(data_base_path, "btc_model.pkl"),
    "SOL": os.path.join(data_base_path, "sol_model.pkl"),
    "BNB": os.path.join(data_base_path, "bnb_model.pkl"),
    "ARB": os.path.join(data_base_path, "arb_model.pkl"),
 }
 def get_training_data_path(token):
    return os.path.join(data_base_path, f"{token.lower()}_price_data.csv")
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -4,12 +4,12 @@ services:
    build: .
    command: python -u /app/app.py
    ports:
-      - "8000:8000"
+      - "8080:8080"
    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8000/inference/ETH"]
+      test: ["CMD", "curl", "-f", "http://localhost:8080/inference/ETH/10m"]
-      interval: 10s
+      interval: 30s
      timeout: 5s
-      retries: 12
+      retries: 20
    volumes:
      - ./inference-data:/app/data
    restart: always
@ -18,7 +18,7 @@ services:
    container_name: updater-basic-eth-pred
    build: .
    environment:
-      - INFERENCE_API_ADDRESS=http://inference:8000
+      - INFERENCE_API_ADDRESS=http://inference:8080
    command: >
      sh -c "
      while true; do
--- a/logs_parser.py
+++ b/logs_parser.py
@ -2,7 +2,6 @@ import subprocess
 import json
 import sys
 import time
 import os
 def is_json(myjson):
    try:
@ -11,7 +10,7 @@ def is_json(myjson):
        return False
    return True
-def parse_logs():
+def parse_logs(timeout):
    start_time = time.time()
    while True:
        unsuccessful_attempts = 0
@ -50,26 +49,28 @@ def parse_logs():
                            return False, "Max Retry Reached"
        except Exception as e:
            print(f"Exception occurred: {e}", flush=True)
        finally:
            process.stdout.close()
        print("Sleeping before next log request...", flush=True)
        time.sleep(30)
-        if time.time() - start_time > 30 * 60:
+        if time.time() - start_time > timeout * 60:
-            print("Timeout reached: 30 minutes elapsed without success.", flush=True)
+            print(f"Timeout reached: {timeout} minutes elapsed without success.", flush=True)
-            return False, "Timeout reached: 30 minutes elapsed without success."
+            return False, f"Timeout reached: {timeout} minutes elapsed without success."
    return False, "No Success"
 if __name__ == "__main__":
    print("Parsing logs...")
-    result = parse_logs()
+    if len(sys.argv) > 1: 
        timeout = eval(sys.argv[1]) 
    else: 
        timeout = 30
    result = parse_logs(timeout)
    print(result[1])
    if result[0] == False:
        print("Exiting 1...")
-        os._exit(1)
+        sys.exit(1)
    else:
        print("Exiting 0...")
-        os._exit(0)
+        sys.exit(0)
--- a/model.py
+++ b/model.py
@ -1,23 +1,28 @@
 import os
 import pickle
 from zipfile import ZipFile
 from datetime import datetime
 import pandas as pd
 import numpy as np
 from xgboost import XGBRegressor
 from zipfile import ZipFile
 from datetime import datetime, timedelta
 import pandas as pd
 from sklearn.model_selection import train_test_split
 from sklearn import linear_model
 from updater import download_binance_monthly_data, download_binance_daily_data
 from config import data_base_path, model_file_path
 binance_data_path = os.path.join(data_base_path, "binance/futures-klines")
-training_price_data_path = os.path.join(data_base_path, "eth_price_data.csv")
+
 def get_training_data_path(token):
    """
    Возвращает путь к файлу данных для указанного токена.
    """
    return os.path.join(data_base_path, f"{token}_price_data.csv")
 def download_data():
    cm_or_um = "um"
-    symbols = ["ETHUSDT"]
+    symbols = ["ETHUSDT", "BTCUSDT", "SOLUSDT", "BNBUSDT", "ARBUSDT"]
-    intervals = ["1d"]
+    intervals = ["10min", "1d"]
    years = ["2020", "2021", "2022", "2023", "2024"]
    months = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12"]
    download_path = binance_data_path
@ -34,20 +39,17 @@ def download_data():
    print(f"Downloaded daily data to {download_path}.")
-def format_data():
+def format_data(token):
-    files = sorted([x for x in os.listdir(binance_data_path)])
+    files = sorted(
        [x for x in os.listdir(binance_data_path) if x.endswith(".zip") and token in x]
    )
    # No files to process
    if len(files) == 0:
        return
    price_df = pd.DataFrame()
    for file in files:
        zip_file_path = os.path.join(binance_data_path, file)
        if not zip_file_path.endswith(".zip"):
            continue
        myzip = ZipFile(zip_file_path)
        with myzip.open(myzip.filelist[0]) as f:
            line = f.readline()
@ -70,38 +72,53 @@ def format_data():
        df.index.name = "date"
        price_df = pd.concat([price_df, df])
    price_df["timestamp"] = price_df.index.map(pd.Timestamp.timestamp)
    price_df["price_diff"] = price_df["close"].diff()
    price_df["volatility"] = (price_df["high"] - price_df["low"]) / price_df["open"]
    price_df["volume"] = price_df["volume"]
    price_df["moving_avg_7"] = price_df["close"].rolling(window=7).mean()
    price_df["moving_avg_30"] = price_df["close"].rolling(window=30).mean()
    # Удаляем строки с NaN значениями
    price_df.dropna(inplace=True)
    # Сохраняем данные
    training_price_data_path = get_training_data_path(token)
    price_df.sort_index().to_csv(training_price_data_path)
-def train_model():
+def train_model(token):
-    # Load the eth price data
+    training_price_data_path = get_training_data_path(token)
    price_data = pd.read_csv(training_price_data_path)
    df = pd.DataFrame()
-    # Convert 'date' to a numerical value (timestamp) we can use for regression
+    # Используем дополнительные признаки
-    df["date"] = pd.to_datetime(price_data["date"])
+    x = price_data[
-    df["date"] = df["date"].map(pd.Timestamp.timestamp)
+        [
            "timestamp",
            "price_diff",
            "volatility",
            "volume",
            "moving_avg_7",
            "moving_avg_30",
        ]
    ]
    y = price_data["close"]
-    df["price"] = price_data[["open", "close", "high", "low"]].mean(axis=1)
+    x_train, x_test, y_train, y_test = train_test_split(
        x, y, test_size=0.2, random_state=0
    )
-    # Reshape the data to the shape expected by sklearn
+    model = XGBRegressor()
    x = df["date"].values.reshape(-1, 1)
    y = df["price"].values.reshape(-1, 1)
    # Split the data into training set and test set
    x_train, _, y_train, _ = train_test_split(x, y, test_size=0.2, random_state=0)
    # Train the model
    print("Training model...")
    model = linear_model.Lasso(alpha=0.1)
    model.fit(x_train, y_train)
    print("Model trained.")
-    # create the model's parent directory if it doesn't exist
+    token_model_path = model_file_path[token]
-    os.makedirs(os.path.dirname(model_file_path), exist_ok=True)
+    os.makedirs(os.path.dirname(token_model_path), exist_ok=True)
-    # Save the trained model to a file
+    with open(token_model_path, "wb") as f:
    with open(model_file_path, "wb") as f:
        pickle.dump(model, f)
-    print(f"Trained model saved to {model_file_path}")
+    print(f"Trained model saved to {token_model_path}")
    # Optional: Оценка модели
    y_pred = model.predict(x_test)
    print(f"Mean Absolute Error: {np.mean(np.abs(y_test - y_pred))}")
--- a/requirements.txt
+++ b/requirements.txt
@ -5,3 +5,12 @@ pandas==2.1.3
 Requests==2.32.0
 scikit_learn==1.3.2
 werkzeug>=3.0.3 # not directly required, pinned by Snyk to avoid a vulnerability
 itsdangerous
 Jinja2
 MarkupSafe
 python-dateutil
 pytz
 scipy
 six
 scikit-learn
 xgboost
Author	SHA1	Message	Date
vvzvlad	1ba4c0158d	probably fix	2024-09-06 02:17:59 +03:00
vvzvlad	fc7097fd50	remove topics	2024-09-05 18:20:25 +03:00
vvzvlad	4b7f57d0dd	remove topics	2024-09-05 16:13:16 +03:00
vvzvlad	61fa099391	fix interval and retries	2024-09-05 03:23:06 +03:00
vvzvlad	520416b772	fix warning	2024-09-05 03:20:57 +03:00
vvzvlad	3f17f7f0b7	fix url	2024-09-05 03:20:48 +03:00
vvzvlad	59672292e2	fix port	2024-09-05 02:29:32 +03:00
vvzvlad	505ba1a42d	add new topics to configjs	2024-09-04 22:32:28 +03:00
vvzvlad	7fd61d13e5	fix min>m	2024-09-04 22:32:07 +03:00
vvzvlad	ca552f5a7a	add many tokens	2024-09-04 22:25:48 +03:00
vvzvlad	2475e22c1a	add universal for many periods	2024-09-04 22:22:25 +03:00
vvzvlad	9a211a4748	fix requirements.txt	2024-09-03 04:44:52 +03:00
vvzvlad	14e8c74962	new model	2024-09-03 04:24:43 +03:00
vvzvlad	c7cc0079a8	fix bug	2024-08-28 02:24:48 +03:00
vvzvlad	c5522e8c72	add timeout	2024-08-28 02:22:54 +03:00
vvzvlad	7ecfd10d50	fix exit	2024-08-27 03:56:27 +03:00
vvzvlad	d75baceae9	Update config.json	2024-08-26 17:22:44 +03:00