allora/updater.py
clementupshot cb8b17a3ed
Add real-time data fetching and configuration options
See README and .env.example
2024-09-04 20:24:42 +02:00

176 lines
5.5 KiB
Python

import os
from datetime import date, timedelta
import pathlib
import time
import requests
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import json
# Define the retry strategy
retry_strategy = Retry(
total=4, # Maximum number of retries
backoff_factor=2, # Exponential backoff factor (e.g., 2 means 1, 2, 4, 8 seconds, ...)
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
)
# Create an HTTP adapter with the retry strategy and mount it to session
adapter = HTTPAdapter(max_retries=retry_strategy)
# Create a new session object
session = requests.Session()
session.mount('http://', adapter)
session.mount('https://', adapter)
files = []
# Function to download the URL, called asynchronously by several child processes
def download_url(url, download_path, name=None):
try:
global files
if name:
file_name = os.path.join(download_path, name)
else:
file_name = os.path.join(download_path, os.path.basename(url))
dir_path = os.path.dirname(file_name)
pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True)
if os.path.isfile(file_name):
# print(f"{file_name} already exists")
return
# Make a request using the session object
response = session.get(url)
if response.status_code == 404:
print(f"File does not exist: {url}")
elif response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
# print(f"Downloaded: {url} to {file_name}")
files.append(file_name)
return
else:
print(f"Failed to download {url}")
return
except Exception as e:
print(str(e))
# Function to generate a range of dates
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
# Function to download daily data from Binance
def download_binance_daily_data(pair, training_days, region, download_path):
base_url = f"https://data.binance.vision/data/spot/daily/klines"
end_date = date.today()
start_date = end_date - timedelta(days=int(training_days))
global files
files = []
with ThreadPoolExecutor() as executor:
print(f"Downloading data for {pair}")
for single_date in daterange(start_date, end_date):
url = f"{base_url}/{pair}/1m/{pair}-1m-{single_date}.zip"
executor.submit(download_url, url, download_path)
return files
def download_binance_current_day_data(pair, region):
limit = 1000
base_url = f'https://api.binance.{region}/api/v3/klines?symbol={pair}&interval=1m&limit={limit}'
# Make a request using the session object
response = session.get(base_url)
response.raise_for_status()
resp = str(response.content, 'utf-8').rstrip()
columns = ['start_time','open','high','low','close','volume','end_time','volume_usd','n_trades','taker_volume','taker_volume_usd','ignore']
df = pd.DataFrame(json.loads(resp),columns=columns)
df['date'] = [pd.to_datetime(x+1,unit='ms') for x in df['end_time']]
df['date'] = df['date'].apply(pd.to_datetime)
df[["volume", "taker_volume", "open", "high", "low", "close"]] = df[["volume", "taker_volume", "open", "high", "low", "close"]].apply(pd.to_numeric)
return df.sort_index()
def get_coingecko_coin_id(token):
token_map = {
'ETH': 'ethereum',
'SOL': 'solana',
'BTC': 'bitcoin',
'BNB': 'binancecoin',
'ARB': 'arbitrum',
# Add more tokens here
}
token = token.upper()
if token in token_map:
return token_map[token]
else:
raise ValueError("Unsupported token")
def download_coingecko_data(token, training_days, download_path, CG_API_KEY):
if training_days <= 7:
days = 7
elif training_days <= 14:
days = 14
elif training_days <= 30:
days = 30
elif training_days <= 90:
days = 90
elif training_days <= 180:
days = 180
elif training_days <= 365:
days = 365
else:
days = "max"
print(f"Days: {days}")
coin_id = get_coingecko_coin_id(token)
print(f"Coin ID: {coin_id}")
# Get OHLC data from Coingecko
url = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency=usd&days={days}&api_key={CG_API_KEY}'
global files
files = []
with ThreadPoolExecutor() as executor:
print(f"Downloading data for {coin_id}")
name = os.path.basename(url).split("?")[0].replace("/", "_") + ".json"
executor.submit(download_url, url, download_path, name)
return files
def download_coingecko_current_day_data(token, CG_API_KEY):
coin_id = get_coingecko_coin_id(token)
print(f"Coin ID: {coin_id}")
url = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency=usd&days=1&api_key={CG_API_KEY}'
# Make a request using the session object
response = session.get(url)
response.raise_for_status()
resp = str(response.content, 'utf-8').rstrip()
columns = ['timestamp','open','high','low','close']
df = pd.DataFrame(json.loads(resp), columns=columns)
df['date'] = [pd.to_datetime(x,unit='ms') for x in df['timestamp']]
df['date'] = df['date'].apply(pd.to_datetime)
df[["open", "high", "low", "close"]] = df[["open", "high", "low", "close"]].apply(pd.to_numeric)
return df.sort_index()