-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathbinance_candle_data.py
243 lines (166 loc) · 9.18 KB
/
binance_candle_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#######################
# Author: slurpxbt
#######################
from binance.client import Client
import pandas as pd
import pickle
import datetime
import time
from pathlib import Path
def get_candle_data(symbol="BTCUSDT", time_interval="1D", start_date=datetime.datetime.today(), end_date=None):
"""
this function cannot get binance futures data\n
it should be used only the first time, then you should use update_candle_data() function instead\n
params:
symbol : string -> coin ticker
time_interval: string -> ["1min", "3min", "5min", "15min", "30min", "1h", "4h", "6h", "1D"]
start_date : datetime.datetime -> from when to collect data
end_date : datetime.datetime -> until when we want to collect data
"""
print(start_date)
print(end_date)
# checks if end date is before start date
if end_date != None:
if start_date > end_date and end_date:
end_date = None
else:
pass
else:
pass
# client initialization
client = Client()
# checks the selected time interval
if time_interval == "1min":
kline_interval = Client.KLINE_INTERVAL_1MINUTE
elif time_interval == "3min":
kline_interval = Client.KLINE_INTERVAL_3MINUTE
elif time_interval == "5min":
kline_interval = Client.KLINE_INTERVAL_5MINUTE
elif time_interval == "15min":
kline_interval = Client.KLINE_INTERVAL_15MINUTE
elif time_interval == "30min":
kline_interval = Client.KLINE_INTERVAL_30MINUTE
elif time_interval == "1h":
kline_interval = Client.KLINE_INTERVAL_1HOUR
elif time_interval == "4h":
kline_interval = Client.KLINE_INTERVAL_4HOUR
elif time_interval == "6h":
kline_interval = Client.KLINE_INTERVAL_6HOUR
elif time_interval == "12h":
kline_interval = Client.KLINE_INTERVAL_12HOUR
elif time_interval == "1D":
kline_interval = Client.KLINE_INTERVAL_1DAY
# date conversion in american format
startDate = f"{start_date.month}-{start_date.day}-{start_date.year}"
candle_data = client.get_historical_klines(symbol=symbol, interval=kline_interval, start_str=startDate)
# creates dataframe from list of lists
candle_data_df = pd.DataFrame(candle_data, columns=["open_time", "open", "high", "low", "close", "volume", "close_time", "quote_asset_volume", "number_of_trades", "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"])
# converting datatypes
candle_data_df["open_time"] = pd.to_datetime(candle_data_df["open_time"], unit='ms') # time conversion from timestamp to datetime
candle_data_df["open"] = pd.to_numeric(candle_data_df["open"]) # string to numeric
candle_data_df["high"] = pd.to_numeric(candle_data_df["high"]) # string to numeric
candle_data_df["low"] = pd.to_numeric(candle_data_df["low"]) # string to numeric
candle_data_df["close"] = pd.to_numeric(candle_data_df["close"]) # string to numeric
candle_data_df["volume"] = pd.to_numeric(candle_data_df["volume"]).round(2) # rounds the float
candle_data_df["close_time"] = pd.to_datetime(candle_data_df["close_time"], unit='ms') + datetime.timedelta(milliseconds=1) # time conversion from timestamp to datetime ==> adds 1 milisecond so close is at 24:00:00.000 -> data gives you 23:59:59.999
# column drops
candle_data_df.drop("quote_asset_volume", 1, inplace=True)
candle_data_df.drop("taker_buy_base_asset_volume", 1, inplace=True)
candle_data_df.drop("taker_buy_quote_asset_volume", 1, inplace=True)
candle_data_df.drop("ignore", 1, inplace=True)
# row drops
if end_date != None:
candle_data_df.drop(candle_data_df[candle_data_df["open_time"] > end_date].index, inplace=True) # drops rows which are after the specified end date
print("dropped data")
else:
print("data drop skipped")
pass
return candle_data_df
def update_candle_data(filepath, ticker, update_date):
"""
params:
filepath : string - path to the file
ticker : string - ticker name
update_date: datetime.datetime - date to where we want to update our data
"""
root = Path(".") # set root for filepath to current directory
intervals = ["1min", "3min", "5min", "15min", "30min", "1h", "4h", "6h", "12h","1D"] # possible time intervals
# checks the file name and finds the data time_frame
for i in intervals:
if i in filepath:
time_frame = i
# loads data from file
data = pickle.load(open(filepath, "rb"))
last_data_point_date = data.loc[data.index[-1]]["open_time"].date() # checks date of last data point
missing_data = update_date.date() - last_data_point_date # check many days of data is missing
last_data_point = data.loc[data.index[-1]]["open_time"] # set the date from when we update data => 1 day + our last data entry
# sets date into right format
startDate = datetime.datetime(last_data_point.year, last_data_point.month, last_data_point.day, last_data_point.hour, last_data_point.minute, 0, 0)
print(f"start date: {startDate}")
print(f"end date: {update_date}")
# updates data if there is atleast 1 day of missing data
if missing_data.days != 0:
print(f"updating missing days of data {missing_data.days}")
update = get_candle_data(ticker, time_frame, startDate, update_date) # gets missing data
data = data.append(update, ignore_index=True) # appends new data to previously loaded
data.reset_index(drop=True) # reindexes dataframe because missing data indexes start with 1 again
pickle.dump(data, open(filepath, "wb"))
else:
print(f"no missing data for {ticker}")
def get_data_by_date(start_date, end_date, file_path):
"""
params:
start_date: datetime.datetime -> start date from when you want to select data
end_date: datetime.datetime -> end date to where you want your data to be selected
filepath: string -> path to your data file
"""
data = pickle.load(open(file_path, "rb")) # load data
data_df = pd.DataFrame(data) # convert data to dataframe
data_df.drop(data_df[data_df["open_time"] < start_date].index, inplace=True) # drops rows which are before specified start date
data_df.drop(data_df[data_df["open_time"] > end_date].index, inplace=True) # drops rows which are after the specified end date
data_df.reset_index(drop=True) # reindex data
return data_df
###########################################################################################################
# main function is meant just for testing purposes when developing new functions and testing current ones
###########################################################################################################
def main():
root = Path(".") # set root for filepath to current directory
# start date
s_day = 1
s_month = 1
s_year = 2018
startDate = datetime.datetime(s_year, s_month , s_day)
candle_interval = "1D" # [15min, 30min, 1h, 4h, 6h, 1D]
ticker = "BTCUSDT"
# end date
e_day = 20
e_month = 9
e_year = 2020
endDate = datetime.datetime(e_year, e_month, e_day)
# ----------------------------------------------------------------------------------------------------------------------
# data path where we want to have data
data_path = f"{root}/data_test/{ticker}_{candle_interval}.p"
# get data
daily_data = get_candle_data(time_interval=candle_interval,symbol=ticker, start_date=startDate, end_date=endDate)
# dumps downloaded data into file
pickle.dump(daily_data, open(data_path,"wb"))
print("data loaded------------------------------------------------------------")
up_date = datetime.datetime(2020, 1, 1) # date to where we want to update data
update_candle_data(data_path, ticker, update_date=up_date) # update data function call
updated_data = pickle.load(open(data_path, "rb") )
# ------------------------------------------------------------------------------------------------------------------------
start = time.time()
for index in range(len(updated_data)):
row = updated_data.loc[index].tolist()
open_date = row[0]
close_date = row[6]
open_ = round(float(row[1]), 2)
high = round(float(row[2]), 2)
low = round(float(row[3]), 2)
close = round(float(row[4]), 2)
volume = round(float(row[5]), 2) # BTC volume
no_trades = row[7]
print("open_date:", open_date, "|open:", open_, "|high:", high, "|low:", low, "|close:", close, "|close_date:", close_date, "number of trades", no_trades)
print("printig took ", time.time()-start, "s")
# if __name__ == "__main__":
# main()