2024-03-06 18:53:48 +03:00
|
|
|
import os
|
2024-09-04 21:24:42 +03:00
|
|
|
from datetime import date, timedelta
|
|
|
|
import pathlib
|
|
|
|
import time
|
2024-03-06 18:53:48 +03:00
|
|
|
import requests
|
2024-09-04 21:24:42 +03:00
|
|
|
from requests.adapters import HTTPAdapter
|
|
|
|
from urllib3.util import Retry
|
2024-03-06 18:53:48 +03:00
|
|
|
from concurrent.futures import ThreadPoolExecutor
|
2024-09-04 21:24:42 +03:00
|
|
|
import pandas as pd
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
|
|
# Define the retry strategy
|
|
|
|
retry_strategy = Retry(
|
|
|
|
total=4, # Maximum number of retries
|
|
|
|
backoff_factor=2, # Exponential backoff factor (e.g., 2 means 1, 2, 4, 8 seconds, ...)
|
|
|
|
status_forcelist=[429, 500, 502, 503, 504], # HTTP status codes to retry on
|
|
|
|
)
|
|
|
|
|
|
|
|
# Create an HTTP adapter with the retry strategy and mount it to session
|
|
|
|
adapter = HTTPAdapter(max_retries=retry_strategy)
|
|
|
|
|
|
|
|
# Create a new session object
|
|
|
|
session = requests.Session()
|
|
|
|
session.mount('http://', adapter)
|
|
|
|
session.mount('https://', adapter)
|
|
|
|
|
|
|
|
|
|
|
|
files = []
|
2024-03-06 18:53:48 +03:00
|
|
|
|
|
|
|
|
|
|
|
# Function to download the URL, called asynchronously by several child processes
|
2024-09-04 21:24:42 +03:00
|
|
|
def download_url(url, download_path, name=None):
|
|
|
|
try:
|
|
|
|
global files
|
|
|
|
if name:
|
|
|
|
file_name = os.path.join(download_path, name)
|
|
|
|
else:
|
|
|
|
file_name = os.path.join(download_path, os.path.basename(url))
|
|
|
|
dir_path = os.path.dirname(file_name)
|
|
|
|
pathlib.Path(dir_path).mkdir(parents=True, exist_ok=True)
|
|
|
|
if os.path.isfile(file_name):
|
|
|
|
# print(f"{file_name} already exists")
|
|
|
|
return
|
|
|
|
# Make a request using the session object
|
|
|
|
response = session.get(url)
|
|
|
|
if response.status_code == 404:
|
|
|
|
print(f"File does not exist: {url}")
|
|
|
|
elif response.status_code == 200:
|
|
|
|
with open(file_name, 'wb') as f:
|
|
|
|
f.write(response.content)
|
|
|
|
# print(f"Downloaded: {url} to {file_name}")
|
|
|
|
files.append(file_name)
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
print(f"Failed to download {url}")
|
|
|
|
return
|
|
|
|
except Exception as e:
|
|
|
|
print(str(e))
|
|
|
|
|
|
|
|
|
|
|
|
# Function to generate a range of dates
|
|
|
|
def daterange(start_date, end_date):
|
|
|
|
for n in range(int((end_date - start_date).days)):
|
|
|
|
yield start_date + timedelta(n)
|
|
|
|
|
|
|
|
|
|
|
|
# Function to download daily data from Binance
|
|
|
|
def download_binance_daily_data(pair, training_days, region, download_path):
|
|
|
|
base_url = f"https://data.binance.vision/data/spot/daily/klines"
|
|
|
|
|
|
|
|
end_date = date.today()
|
|
|
|
start_date = end_date - timedelta(days=int(training_days))
|
|
|
|
|
|
|
|
global files
|
|
|
|
files = []
|
|
|
|
|
|
|
|
with ThreadPoolExecutor() as executor:
|
|
|
|
print(f"Downloading data for {pair}")
|
|
|
|
for single_date in daterange(start_date, end_date):
|
|
|
|
url = f"{base_url}/{pair}/1m/{pair}-1m-{single_date}.zip"
|
|
|
|
executor.submit(download_url, url, download_path)
|
2024-05-29 13:41:09 +03:00
|
|
|
|
2024-09-04 21:24:42 +03:00
|
|
|
return files
|
|
|
|
|
|
|
|
|
|
|
|
def download_binance_current_day_data(pair, region):
|
|
|
|
limit = 1000
|
|
|
|
base_url = f'https://api.binance.{region}/api/v3/klines?symbol={pair}&interval=1m&limit={limit}'
|
|
|
|
|
|
|
|
# Make a request using the session object
|
|
|
|
response = session.get(base_url)
|
|
|
|
response.raise_for_status()
|
|
|
|
resp = str(response.content, 'utf-8').rstrip()
|
|
|
|
|
|
|
|
columns = ['start_time','open','high','low','close','volume','end_time','volume_usd','n_trades','taker_volume','taker_volume_usd','ignore']
|
|
|
|
|
|
|
|
df = pd.DataFrame(json.loads(resp),columns=columns)
|
|
|
|
df['date'] = [pd.to_datetime(x+1,unit='ms') for x in df['end_time']]
|
|
|
|
df['date'] = df['date'].apply(pd.to_datetime)
|
|
|
|
df[["volume", "taker_volume", "open", "high", "low", "close"]] = df[["volume", "taker_volume", "open", "high", "low", "close"]].apply(pd.to_numeric)
|
|
|
|
|
|
|
|
return df.sort_index()
|
|
|
|
|
|
|
|
|
|
|
|
def get_coingecko_coin_id(token):
|
|
|
|
token_map = {
|
|
|
|
'ETH': 'ethereum',
|
|
|
|
'SOL': 'solana',
|
|
|
|
'BTC': 'bitcoin',
|
|
|
|
'BNB': 'binancecoin',
|
|
|
|
'ARB': 'arbitrum',
|
|
|
|
# Add more tokens here
|
|
|
|
}
|
|
|
|
|
|
|
|
token = token.upper()
|
|
|
|
if token in token_map:
|
|
|
|
return token_map[token]
|
2024-03-06 18:53:48 +03:00
|
|
|
else:
|
2024-09-04 21:24:42 +03:00
|
|
|
raise ValueError("Unsupported token")
|
2024-03-06 18:53:48 +03:00
|
|
|
|
|
|
|
|
2024-09-04 21:24:42 +03:00
|
|
|
def download_coingecko_data(token, training_days, download_path, CG_API_KEY):
|
|
|
|
if training_days <= 7:
|
|
|
|
days = 7
|
|
|
|
elif training_days <= 14:
|
|
|
|
days = 14
|
|
|
|
elif training_days <= 30:
|
|
|
|
days = 30
|
|
|
|
elif training_days <= 90:
|
|
|
|
days = 90
|
|
|
|
elif training_days <= 180:
|
|
|
|
days = 180
|
|
|
|
elif training_days <= 365:
|
|
|
|
days = 365
|
|
|
|
else:
|
|
|
|
days = "max"
|
|
|
|
print(f"Days: {days}")
|
2024-03-06 18:53:48 +03:00
|
|
|
|
2024-09-04 21:24:42 +03:00
|
|
|
coin_id = get_coingecko_coin_id(token)
|
|
|
|
print(f"Coin ID: {coin_id}")
|
2024-03-06 18:53:48 +03:00
|
|
|
|
2024-09-04 21:24:42 +03:00
|
|
|
# Get OHLC data from Coingecko
|
|
|
|
url = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency=usd&days={days}&api_key={CG_API_KEY}'
|
2024-03-06 18:53:48 +03:00
|
|
|
|
2024-09-04 21:24:42 +03:00
|
|
|
global files
|
|
|
|
files = []
|
2024-03-06 18:53:48 +03:00
|
|
|
|
|
|
|
with ThreadPoolExecutor() as executor:
|
2024-09-04 21:24:42 +03:00
|
|
|
print(f"Downloading data for {coin_id}")
|
|
|
|
name = os.path.basename(url).split("?")[0].replace("/", "_") + ".json"
|
|
|
|
executor.submit(download_url, url, download_path, name)
|
|
|
|
|
|
|
|
return files
|
|
|
|
|
|
|
|
|
|
|
|
def download_coingecko_current_day_data(token, CG_API_KEY):
|
|
|
|
coin_id = get_coingecko_coin_id(token)
|
|
|
|
print(f"Coin ID: {coin_id}")
|
|
|
|
|
|
|
|
url = f'https://api.coingecko.com/api/v3/coins/{coin_id}/ohlc?vs_currency=usd&days=1&api_key={CG_API_KEY}'
|
|
|
|
|
|
|
|
# Make a request using the session object
|
|
|
|
response = session.get(url)
|
|
|
|
response.raise_for_status()
|
|
|
|
resp = str(response.content, 'utf-8').rstrip()
|
|
|
|
|
|
|
|
columns = ['timestamp','open','high','low','close']
|
|
|
|
|
|
|
|
df = pd.DataFrame(json.loads(resp), columns=columns)
|
|
|
|
df['date'] = [pd.to_datetime(x,unit='ms') for x in df['timestamp']]
|
|
|
|
df['date'] = df['date'].apply(pd.to_datetime)
|
|
|
|
df[["open", "high", "low", "close"]] = df[["open", "high", "low", "close"]].apply(pd.to_numeric)
|
|
|
|
|
|
|
|
return df.sort_index()
|