allora/updater.py

176 lines
5.5 KiB
Python
Raw Normal View History

2024-03-06 18:53:48 +03:00
import os
from datetime import date, timedelta
import pathlib
import time
2024-03-06 18:53:48 +03:00
import requests
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
2024-03-06 18:53:48 +03:00
from concurrent.futures import ThreadPoolExecutor
import pandas as pd
import json
# Define the retry strategy
retry_strategy = Retry(
total=4, # Maximum number of retries
backoff_factor=2, # Exponential backoff factor (e.g., 2 means 1, 2, 4, 8 seconds, ...)
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
)
# Create an HTTP adapter with the retry strategy and mount it to session
adapter = HTTPAdapter(max_retries=retry_strategy)
# Create a new session object
session = requests.Session()
session.mount('http://', adapter)
session.mount('https://', adapter)
files = []
2024-03-06 18:53:48 +03:00
# Function to download the URL, called asynchronously by several child processes
def download_url(url, download_path, name=None):
try:
global files
if name:
file_name = os.path.join(download_path, name)
else:
file_name = os.path.join(download_path, os.path.basename(url))
dir_path = os.path.dirname(file_name)
pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True)
if os.path.isfile(file_name):
# print(f"{file_name} already exists")
return
# Make a request using the session object
response = session.get(url)
if response.status_code == 404:
print(f"File does not exist: {url}")
elif response.status_code == 200:
with open(file_name, 'wb') as f:
f.write(response.content)
# print(f"Downloaded: {url} to {file_name}")
files.append(file_name)
return
else:
print(f"Failed to download {url}")
return
except Exception as e:
print(str(e))
# Function to generate a range of dates
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
# Function to download daily data from Binance
def download_binance_daily_data(pair, training_days, region, download_path):
base_url = f"https://data.binance.vision/data/spot/daily/klines"
end_date = date.today()
start_date = end_date - timedelta(days=int(training_days))
global files
files = []
with ThreadPoolExecutor() as executor:
print(f"Downloading data for {pair}")
for single_date in daterange(start_date, end_date):
url = f"{base_url}/{pair}/1m/{pair}-1m-{single_date}.zip"
executor.submit(download_url, url, download_path)
return files
def download_binance_current_day_data(pair, region):
limit = 1000
base_url = f'https://api.binance.{region}/api/v3/klines?symbol={pair}&interval=1m&limit={limit}'
# Make a request using the session object
response = session.get(base_url)
response.raise_for_status()
resp = str(response.content, 'utf-8').rstrip()
columns = ['start_time','open','high','low','close','volume','end_time','volume_usd','n_trades','taker_volume','taker_volume_usd','ignore']
df = pd.DataFrame(json.loads(resp),columns=columns)
df['date'] = [pd.to_datetime(x+1,unit='ms') for x in df['end_time']]
df['date'] = df['date'].apply(pd.to_datetime)
df[["volume", "taker_volume", "open", "high", "low", "close"]] = df[["volume", "taker_volume", "open", "high", "low", "close"]].apply(pd.to_numeric)
return df.sort_index()
def get_coingecko_coin_id(token):
token_map = {
'ETH': 'ethereum',
'SOL': 'solana',
'BTC': 'bitcoin',
'BNB': 'binancecoin',
'ARB': 'arbitrum',
# Add more tokens here
}
token = token.upper()
if token in token_map:
return token_map[token]
2024-03-06 18:53:48 +03:00
else:
raise ValueError("Unsupported token")
2024-03-06 18:53:48 +03:00
def download_coingecko_data(token, training_days, download_path, CG_API_KEY):
if training_days <= 7:
days = 7
elif training_days <= 14:
days = 14
elif training_days <= 30:
days = 30
elif training_days <= 90:
days = 90
elif training_days <= 180:
days = 180
elif training_days <= 365:
days = 365
else:
days = "max"
print(f"Days: {days}")
2024-03-06 18:53:48 +03:00
coin_id = get_coingecko_coin_id(token)
print(f"Coin ID: {coin_id}")
2024-03-06 18:53:48 +03:00
# Get OHLC data from Coingecko
url = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency=usd&days={days}&api_key={CG_API_KEY}'
2024-03-06 18:53:48 +03:00
global files
files = []
2024-03-06 18:53:48 +03:00
with ThreadPoolExecutor() as executor:
print(f"Downloading data for {coin_id}")
name = os.path.basename(url).split("?")[0].replace("/", "_") + ".json"
executor.submit(download_url, url, download_path, name)
return files
def download_coingecko_current_day_data(token, CG_API_KEY):
coin_id = get_coingecko_coin_id(token)
print(f"Coin ID: {coin_id}")
url = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency=usd&days=1&api_key={CG_API_KEY}'
# Make a request using the session object
response = session.get(url)
response.raise_for_status()
resp = str(response.content, 'utf-8').rstrip()
columns = ['timestamp','open','high','low','close']
df = pd.DataFrame(json.loads(resp), columns=columns)
df['date'] = [pd.to_datetime(x,unit='ms') for x in df['timestamp']]
df['date'] = df['date'].apply(pd.to_datetime)
df[["open", "high", "low", "close"]] = df[["open", "high", "low", "close"]].apply(pd.to_numeric)
return df.sort_index()