-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathjio_scraping.py
79 lines (66 loc) · 2.51 KB
/
jio_scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from bs4 import BeautifulSoup as soup
import pandas as pd
from urllib.request import urlopen as uReq
#driver = webdriver.Chrome()
basic_url="https://www.jiomart.com/c/groceries/dairy-bakery/dairy/62" # pls have your exact link here and at below line with comment.
page=1
my_url=basic_url
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup=soup(page_html,"html.parser")
infos=page_soup.find("span",{"id":"total_count"})
print(infos.text)
print(infos)
length=int(infos.text)
containers=list()
containers= page_soup.findAll("div",{"class": "col-md-3 p-0"})
#print(soup.prettify(containers[0]))
#container=containers[0]
#print(container.a.div.img["alt"])
#price=container.findAll("div",{"class": "col col-5-12 _2o7WAb"})
#print(price[0].text)
filename="products_jio.csv"
f=open(filename,"w")
header="product , strike_price , price \n"
f.write(header)
print(len(containers))
for container in containers:
product_name_container=container.findAll("span",{"class": "clsgetname"})
product_name=product_name_container[0].text.strip()
strike_price_container=container.findAll("strike",{"id": "price"})
strike_price=strike_price_container[0].text.strip()
price_container=container.findAll("span",{"id":"final_price"})
price=price_container[0].text
#str=""
#str=str.join(price)
print(product_name)
print(strike_price)
print(price)
f.write(product_name+" , "+strike_price+" , "+price+"\n")
page=1
print("page 1 completed")
while(length>20):
page=page+1
print("page set to 2")
length=length-20
mod_url="https://www.jiomart.com/c/groceries/dairy-bakery/dairy/62/page/{}" #have your link before /page
my_url=mod_url.format(page)
print(my_url)
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup=soup(page_html,"html.parser")
containers= page_soup.findAll("div",{"class": "col-md-3 p-0"})
for container in containers:
product_name_container=container.findAll("span",{"class": "clsgetname"})
product_name=product_name_container[0].text.strip()
strike_price_container=container.findAll("strike",{"id": "price"})
strike_price=strike_price_container[0].text.strip()
price_container=container.findAll("span",{"id":"final_price"})
price=price_container[0].text
print(product_name)
print(strike_price)
print(price)
f.write(product_name+" , "+strike_price+" , "+price+"\n")
f.close()