get_ohlcv
ohlcvを保存してcsvまたはpickle形式で保存する
・csv:ExcelやSpreadSeetで直接中身を見れる
・pickle:爆速読み込みできるのでバックテストに便利だけど、read_pickleで読み込まないと中身を見れない
参考:https://note.com/shiba_memo_note/n/ne9bbbfd818dc
pickleについて:https://note.nkmk.me/python-pandas-to-pickle-read-pickle/
bybit
# jupyterでasyncio使う時に必要
# nest_asyncio.apply()
import math
import os
import sys
import time
from datetime import datetime, timedelta, timezone
import pandas as pd
import pybotters
from rich import print
from tqdm.asyncio import tqdm
#=====入力してください=====
symbol = "BTCUSD"
interval = 1 # min
interval_list = [1, 5, 15, 30, 60, 240, 360, 720]
start_time = '2021-1-1 00:00'
end_time = '2021-2-1 00:00'
#-----保存形式-----
create_csv = True #True or False
create_pkl = True #True or False
#-----param-----
start_time = int(datetime.strptime(start_time, '%Y-%m-%d %H:%M').timestamp())
end_time = int(datetime.strptime(end_time, '%Y-%m-%d %H:%M').timestamp())
diff = end_time-start_time
if interval not in interval_list:
print("intervalの値が異常です. {}の中から選んでください".format(interval_list))
sys.exit()
#-----get_ohlcv-----
def get_ohlcv(start_time):
base_url = 'https://api.bybit.com'
df = pd.DataFrame()
for r in tqdm(range(math.ceil(diff/interval/60/200)),desc = "{}分足データ取得中".format(interval)):
temp_r = pybotters.get(base_url + f'/v2/public/kline/list', params={
'symbol': symbol,
'interval':interval,
'from':start_time
})
temp_data = temp_r.json()
temp_df = pd.DataFrame(temp_data['result'])
df = pd.concat([df,temp_df])
start_time += 200*60*interval
df.open_time = pd.to_datetime(df.open_time*10**9,)
df = df.set_index('open_time').tz_localize('UTC').tz_convert('Asia/Tokyo')
df.drop(["symbol","interval"],axis = 1,inplace=True)
df = df.astype('float')
df = df[df.index < datetime.fromtimestamp(end_time,timezone(timedelta(hours=9)))]
print(df)
#-----data_format-----
if create_csv:
df.to_csv("Data/{0}min_Bybit_BTCUSD_{1} to {2}.csv".format(int(interval),df.index[0].strftime('%Y-%m-%d'),df.index[-1].strftime('%Y-%m-%d')))
if create_pkl:
df.to_pickle("Data/{0}min_Bybit_BTCUSD_{1} to {2}.pkl".format(int(interval),df.index[0].strftime('%Y-%m-%d'),df.index[-1].strftime('%Y-%m-%d')))
return df
if __name__ == '__main__':
start = time.time()
if not os.path.isdir("Data"):
os.makedirs("Data")
get_ohlcv(start_time)
print('{}sec'.format(round(time.time() - start, 2)))
Binance
# jupyterでasyncio使う時に必要
# nest_asyncio.apply()
import math
import os
import sys
import time
from datetime import datetime, timedelta, timezone
import pandas as pd
import pybotters
from rich import print
from tqdm.asyncio import tqdm
#=====入力してください=====
symbol = "BTCUSDT"
interval = 1 # min
interval_list = [1, 3, 5, 15, 30, 60, 120,240, 360, 480, 720, 1440]
interval_min_list = {1:'1m', 3:'3m', 5:'5m', 15:'15m', 30:'30m', 60:'1h', 120:'2h',240:'4h', 360:'6h', 480:'8h', 720:'12h', 1440:'1d'}
start_time = '2021-1-1 00:00'
end_time = '2021-2-1 00:00'
#-----保存形式-----
create_csv = True #True or False
create_pkl = True #True or False
#-----param-----
start_time = int(datetime.strptime(start_time, '%Y-%m-%d %H:%M').timestamp())
end_time = int(datetime.strptime(end_time, '%Y-%m-%d %H:%M').timestamp())
diff = end_time-start_time
start_time = start_time*10**3
if interval not in interval_list:
print("intervalの値が異常です. {}の中から選んでください".format(interval_list))
sys.exit()
#-----get_ohlcv-----
def get_ohlcv(startTime):
start_time = startTime
base_url = 'https://fapi.binance.com'
df = pd.DataFrame()
for r in tqdm(range(math.ceil(diff/interval/60/500)),desc = "{}分足データ取得中".format(interval)):
temp_r = pybotters.get(base_url + f'/fapi/v1/klines', params={
'symbol': symbol,
'interval':interval_min_list[interval],
'startTime':start_time,
'limit':500
})
temp_data = temp_r.json()
temp_df = pd.DataFrame(temp_data)
df = pd.concat([df,temp_df])
start_time += 500*60*interval*10**3
df.columns = ['open_time','open', 'high', 'low', 'close', 'volume', 'close_time', 'Quote asset volume', 'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore.']
df['open_time'] = pd.to_datetime(df.open_time*10**6,)
df = df.set_index('open_time').tz_localize('UTC').tz_convert('Asia/Tokyo')
df.drop(["Ignore."],axis = 1,inplace=True)
df = df.astype('float')
df = df[df.index < datetime.fromtimestamp(end_time,timezone(timedelta(hours=9)))]
print(df)
#-----data_format-----
if create_csv:
df.to_csv("Data/{0}min_Binance_BTCUSDT_{1} to {2}.csv".format(int(interval),df.index[0].strftime('%Y-%m-%d'),df.index[-1].strftime('%Y-%m-%d')))
if create_pkl:
df.to_pickle("Data/{0}minBinance_BTCUSDT_{1} to {2}.pkl".format(int(interval),df.index[0].strftime('%Y-%m-%d'),df.index[-1].strftime('%Y-%m-%d')))
return df
if __name__ == '__main__':
start = time.time()
if not os.path.isdir("Data"):
os.makedirs("Data")
get_ohlcv(start_time)
print('{}sec'.format(round(time.time() - start, 2)))
FTX
# 参考
# https://note.com/shiba_memo_note/n/ne9bbbfd818dc
# jupyterでasyncio使う時に必要
# nest_asyncio.apply()
import math
import os
import sys
import time
from datetime import datetime
import pandas as pd
import pybotters
from rich import print
from tqdm.asyncio import tqdm
#=====入力してください=====
symbol = 'BTC-PERP'
resolution = 60 #sec
resolution_list = [15, 60, 300, 900, 3600, 14400, 86400]
start_time = '2021-01-01 00:00'
end_time = '2021-02-01 00:00'
#-----保存形式-----
create_csv = True #True or False
create_pkl = True #True or False
#=========================
#-----param-----
limit = 5000
start_time = int(datetime.strptime(start_time, '%Y-%m-%d %H:%M').timestamp())
end_time = int(datetime.strptime(end_time, '%Y-%m-%d %H:%M').timestamp())
diff = end_time-start_time
if resolution not in resolution_list:
print("resolutionの値が異常です. {}の中から選んでください".format(resolution_list))
sys.exit()
#-----get_ohlcv-----
def get_ohlcv(end_time):
base_url = 'https://ftx.com/api/'
df = pd.DataFrame()
i = 0
for r in tqdm(range(round(diff/resolution/5000)),desc = "{}分足データ取得中".format(int(resolution/60))):
temp_r = pybotters.get(base_url + f'markets/{symbol}/candles', params={
'resolution': resolution,
'limit': limit,
'start_time': start_time,
'end_time': end_time-i*limit*resolution-1
})
temp_data = temp_r.json()
temp_df = pd.DataFrame(temp_data['result'])
df = pd.concat([temp_df, df])
i += 1
df['time'] = df['time'] * 10 ** -3
df['open_time'] = pd.to_datetime(df['time'].astype(int), unit='s', utc=True, infer_datetime_format=True)
df = df.set_index('open_time').tz_convert('Asia/Tokyo').reindex(columns=['open', 'high', 'low', 'close', 'volume'])
df = df.astype('float')
print(df)
#-----data_format-----
if create_csv:
df.to_csv("Data/{0}min_FTX_BTC-PREP_{1} to {2}.csv".format(int(resolution/60),df.index[0].strftime('%Y-%m-%d'),df.index[-1].strftime('%Y-%m-%d')))
if create_pkl:
df.to_pickle("Data/{0}min_FTX_BTC-PREP_{1} to {2}.pkl".format(int(resolution/60),df.index[0].strftime('%Y-%m-%d'),df.index[-1].strftime('%Y-%m-%d')))
return df
if __name__ == '__main__':
start = time.time()
if not os.path.isdir("Data"):
os.makedirs("Data")
get_ohlcv(end_time)
print('{}sec'.format(round(time.time() - start, 2)))