Run updater once a day, downloading only the new files

2024-05-29 13:41:09 +03:00
parent 87acbc110a
commit 8b93b94ab2
4 changed files with 16 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,6 +6,7 @@ logs/*
 .env
 keys
 data
 inference-data
 worker-data
 head-data
 lib
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -18,7 +18,9 @@ services:
      interval: 10s
      timeout: 5s
      retries: 12
-    
+    volumes:
      - ./inference-data:/app/data
  updater:
    container_name: updater-basic-eth-pred
    build: .
@ -28,7 +30,7 @@ services:
      sh -c "
      while true; do
        python -u /app/update_app.py;
-        sleep 60;
+        sleep 24h;
      done
      "
    depends_on:
@ -114,5 +116,6 @@ networks:
        - subnet: 172.22.0.0/24
 volumes:
  inference-data:
  worker-data:
  head-data:
--- a/update_app.py
+++ b/update_app.py
@ -4,6 +4,8 @@ import requests
 inference_address = os.environ["INFERENCE_API_ADDRESS"]
 url = f"{inference_address}/update"
 print("UPDATING INFERENCE WORKER DATA")
 response = requests.get(url)
 if response.status_code == 200:
    # Request was successful
--- a/updater.py
+++ b/updater.py
@ -5,18 +5,22 @@ from concurrent.futures import ThreadPoolExecutor
 # Function to download the URL, called asynchronously by several child processes
 def download_url(url, download_path):
    target_file_path = os.path.join(download_path, os.path.basename(url)) 
    if os.path.exists(target_file_path):
        print(f"File already exists: {url}")
        return
    response = requests.get(url)
    if response.status_code == 404:
        print(f"File not exist: {url}")
    else:
        file_name = os.path.join(download_path, os.path.basename(url))
        # create the entire path if it doesn't exist
-        os.makedirs(os.path.dirname(file_name), exist_ok=True)
+        os.makedirs(os.path.dirname(target_file_path), exist_ok=True)
-        with open(file_name, "wb") as f:
+        with open(target_file_path, "wb") as f:
            f.write(response.content)
-        print(f"Downloaded: {url} to {file_name}")
+        print(f"Downloaded: {url} to {target_file_path}")
 def download_binance_monthly_data(