-
Notifications
You must be signed in to change notification settings - Fork 0
/
shunfengguoji.py
186 lines (120 loc) · 6.42 KB
/
shunfengguoji.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# -*- coding: utf-8 -*-
import random
import requests
import time
import redis
import json
import threading
import datetime
class Download:
"""下载网页html"""
def __init__(self):
"""
创建一个随机的请求头
"""
self.user_agent_list = [
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) "
"CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1",
"Mozilla/5.0 (Linux; U; Android 5.1; zh-cn; m1 metal Build/LMY47I) AppleWebKit/537.36 "
"(KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.6 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 5.1.1; vivo X7 Build/LMY47V; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/48.0.2564.116 Mobile Safari/537.36 baiduboxapp/8.6.5 (Baidu; P1 5.1.1)",
"Mozilla/5.0 (Linux; Android 6.0; MP1512 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/35.0.1916.138 Mobile Safari/537.36 T7/7.4 baiduboxapp/8.4 (Baidu; P1 6.0)",
"Mozilla/5.0 (Linux; U; Android 4.4.4; zh-cn; X9007 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko)"
"Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.6 Mobile Safari/537.36",
"Mozilla/5.0 (iPhone 6s; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) "
"Version/10.0 MQQBrowser/7.6.0 Mobile/14E304 Safari/8536.25 MttCustomUA/2 QBWebViewType/1 WKType/1",
"Mozilla/5.0 (Linux; U; Android 6.0.1; zh-cn; vivo Xplay6 Build/MXB48T) AppleWebKit/537.36 "
"(KHTML, like Gecko)Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.6 Mobile Safari/537.36",
"Mozilla/5.0 (Linux; Android 6.0.1; SM-A9000 Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/48.0.2564.116 Mobile Safari/537.36 baiduboxapp/8.6.5 (Baidu; P1 6.0.1)",
"Mozilla/5.0 (Linux; Android 6.0.1; vivo X9Plus Build/MMB29M; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/48.0.2564.116 Mobile Safari/537.36 baiduboxapp/8.6.5 (Baidu; P1 6.0.1)",
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_2 like Mac OS X) AppleWebKit/602.3.12 "
"(KHTML, like Gecko) Mobile/14C92 MicroMessenger/6.5.9 NetType/WIFI Language/zh_C",
"Mozilla/5.0 (Linux; Android 7.1.1; OPPO R11t Build/NMF26X; wv) AppleWebKit/537.36 (KHTML, like Gecko) "
"Version/4.0 Chrome/53.0.2785.49 Mobile MQQBrowser/6.2 TBS/043307 Safari/537.36 "
"MicroMessenger/6.5.8.1060 NetType/WIFI Language/zh_CN",
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) "
"CriOS/56.0.2924.75 Mobile/14E5239e Safari/602.1",
"Mozilla/5.0 (Linux; U; Android 7.0; zh-cn; MI 5 Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko)"
"Version/4.0 Chrome/37.0.0.0 MQQBrowser/7.1 Mobile Safari/537.36",
]
self.redis_client = redis.Redis(host='192.168.3.83', port=6379)
self.shunfeng_url = 'https://www.trackingmore.com/gettracedetail.php?lang=cn&callback=' \
'jQuery17105806801095112044_1521165921825&tracknumber='
self.bash_url = '&express=sf-express&pt=0&tracm=&destination=&exception=0&_=1521165988944'
def get_url(self, timeout):
"""
通邮挂号,trackingmore
构造请求头,并获取响应
:param url:
:param timeout:
:return:
"""
while True:
shunfeng = self.redis_client.spop('shunfeng_url')
# laowo = 'RB023076373LA'
if shunfeng:
tracking_number = (str(shunfeng, encoding="utf-8"))
self.download_data(tracking_number)
else:
break
def download_data(self, tracking_number, num_tries=5):
UA = random.choice(self.user_agent_list)
shunfeng_url = self.shunfeng_url + tracking_number + self.bash_url # 网址
referer = 'https://www.trackingmore.com/sf-express-tracking/cn.html'
headers = {
'Host': 'www.trackingmore.com',
'Connection': 'keep-alive',
'Accept': '* / *',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
'referer': referer,
'user-agent': UA,
}
try:
response = requests.get(shunfeng_url, timeout=10, headers=headers,)
except Exception as e:
print("获取数据%s出错" % tracking_number)
print(e)
num_tries -= 1 # 重新获取数据次数
if num_tries > 0:
self.download_data(tracking_number, num_tries) # 获取网页失败重新再获取
else:
try:
a = response.text
b = a.lstrip('\tjQuery17105806801095112044_1521165921825').lstrip('(').rstrip(')')
j = json.loads(b)
trackinfo = j['originCountryData']['trackinfo'][0]
except:
print('无法识别的物流单号%s' % tracking_number)
self.redis_client.sadd('shunfeng_absent', tracking_number)
else:
print(j['originCountryData']['trackinfo'])
j['nu'] = tracking_number
self.redis_client.lpush('shuefeng_item', j)
seconds = [0.6, 0.8, 1.0, 1.2, 1.3, 1.6, 1.5, 1.7, 1.8, 1.9, 2.0, 2.2, 2.4, 2.8, 2.9,
3.0, 3.4, 3.2, 2.5]
time.sleep((random.choice(seconds)))
def run(self):
thread_list = []
try:
# 创建线程,并指定执行的任务
for i in range(10):
t = threading.Thread(target=self.get_text, args=[])
thread_list.append(t)
print('%s start at %s' % (threading.current_thread().name + str(i+1),
datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
for i in range(10):
thread_list[i].start()
# 让主线程阻塞,等待所有的子线程结束,再继续执行。
for i in range(10):
thread_list[i].join()
except Exception as e:
print(e)
print('Error: unable to start thread')
if __name__ == '__main__':
d = Download()
d.run()