-
Notifications
You must be signed in to change notification settings - Fork 7
/
scraper.py
55 lines (46 loc) · 1.69 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# i am not liable for any damage
# use it at your own risk !!
# its against Etherscan's TOS
# your ip might get ban for requesting too many times
import csv
import sys
import datetime
import requests
from time import sleep
from bs4 import BeautifulSoup
def scraper(num_pages=1, req_delay=0.1):
timestamp = datetime.datetime.now().strftime ("%Y%m%d_%H%M%S")
print("%d pages to parse with delay of %f seconds between each page" % (num_pages, req_delay))
api_url = "https://etherscan.io/contractsVerified/"
with open('VerifiedContracts-'+timestamp+'.csv', 'w') as csvfile:
fieldnames = ['addr', 'contract_name', 'compiler', 'balance', 'tx_count', 'date_verified']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i in range(1, num_pages+1):
url = api_url + str(i) + str('?ps=100')
sleep(req_delay)
response = requests.get(url)
print("URL: %s, Status: %s" % (url, response.status_code))
content = response.content
soup = BeautifulSoup(content, 'html.parser')
for row in soup.select('table.table-hover tbody tr'):
cells = row.findAll('td')
cells = map(lambda x: x.text, cells)
addr, contract_name, compiler, balance, tx_count, settings, date_verified = cells
writer.writerow({
'addr': addr,
'contract_name': contract_name,
'compiler': compiler,
'balance': balance,
'tx_count': tx_count,
'date_verified': date_verified,
})
def main():
if len(sys.argv) > 2:
scraper(int(sys.argv[1]), float(sys.argv[2]))
elif len(sys.argv) == 2:
scraper(int(sys.argv[1]))
else:
scraper()
if __name__ == "__main__":
main()