Skip to content

Commit

Permalink
fix the bug on topic_tester.
Browse files Browse the repository at this point in the history
  • Loading branch information
wuyingren committed May 26, 2017
1 parent a87361f commit ded1d2a
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,4 @@ dump.rdb
.topics_all.json
settings.py
.node_number.json
.topics_tester.json
39 changes: 26 additions & 13 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ def __init__(self):
self.tester_tasker()
#end
self.end()

def end(self):
self.SQ.close_datebase()
self.dump_json()

def load_json(self):
#load .time_log.json
if os.path.exists('.time_log.json'):
Expand All @@ -62,7 +62,7 @@ def load_json(self):
else:
self.node_number=list()
return

def dump_json(self):
#dump .time_log.json
with open('.time_log.json','w') as f1:
Expand All @@ -77,7 +77,7 @@ def update_cookies(self):
if int(time.time())-int(self.time_log["cookies_time"]) >= 86400:
cookies_time_status = False
else:
cookies_time_status = True
cookies_time_status = True
if not os.path.exists('cookies.txt') or cookies_time_status is False:
try:
log_s=log_in.v2ex_log_in()
Expand All @@ -88,17 +88,17 @@ def update_cookies(self):
return
self.time_log["cookies_time"]=str(int(time.time()))
return

def update_nodes(self):
if int(time.time())-int(self.time_log["nodes_time"]) >= 18000:
if int(time.time())-int(self.time_log["nodes_time"]) >= 10800:
nodes_time_status=False
else:
nodes_time_status=True
if not nodes_time_status:
resp=self.s.get('https://www.v2ex.com/api/nodes/all.json')
if resp.status_code != 200:
self.end()
raise APIError
error_info='proxy status: %s, proxy: %s' % (str(settings.proxy_enable),str(self.s.proxies))
raise APIError(error_info)
nodes=resp.json()
for node in nodes:
n_id=node["id"]
Expand All @@ -117,7 +117,7 @@ def update_nodes(self):
self.time_log["nodes_time"]=str(int(time.time()))
self.node_number=list(set(self.node_number))
return

def tasker(self):
node_configs_1=[{'sql':'SELECT ID FROM NODES WHERE topics >= 8000;','sleep_time':5,'between_time':900,'time_log':'8000_node','queue_name':'node1'},
{'sql':'SELECT ID FROM NODES WHERE topics BETWEEN 3000 AND 8000;','sleep_time':10,'between_time':1800,'time_log':'4000_node','queue_name':'node2'},
Expand Down Expand Up @@ -152,13 +152,13 @@ def tasker(self):
q_node.enqueue(node_spider.start,node_id,sleep_time)
self.time_log[time_log_name]=str(int(time.time()))
return

def get_rss(self):
if int(time.time())-int(self.time_log["rss_time"]) >= 600:
rss_spider.Rss_spider()
self.time_log["rss_time"]=str(int(time.time()))
return

def load_config(self):
self.proxy_enable=settings.proxy_enable
self.s=requests.session()
Expand All @@ -169,13 +169,26 @@ def load_config(self):

def tester_tasker(self):
if int(time.time())-int(self.time_log["tester"]) >= 1800:
sql="SELECT ID FROM TOPIC WHERE (time - created) < 172800 AND ID NOT IN (SELECT T_ID FROM STATUS) AND (STRFTIME('%s','now') - time) > 1209600;"
#losd json
if os.path.exists('.topics_tester.json'):
with open('.topics_tester.json','r') as f:
tmp_topics=json.load(f)
else:
tmp_topics=list()
#main
sql="SELECT ID FROM TOPIC WHERE (time - created) < 345600 AND ID NOT IN (SELECT T_ID FROM STATUS) AND (STRFTIME('%s','now') - created) > 1209600;"
sleep_time=20
self.SQ.cursor.execute(sql)
topic_ids=[x[0] for x in self.SQ.cursor.fetchall()]
q=Queue('tester',connection=self.redis_conn)
for topic_id in topic_ids:
q.enqueue(topic_tester.start,topic_id, sleep_time)
if topic_id not in tmp_topics:
q.enqueue(topic_tester.start,topic_id, sleep_time)
tmp_topics.append(topic_id)
#end
tmp_topics=list(set(tmp_topics))
with open('.topics_tester.json','w') as f:
json.dump(tmp_topics,f)
self.time_log["tester"]=str(int(time.time()))
return

Expand Down
2 changes: 1 addition & 1 deletion sql/create_table.sql
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ CREATE VIEW HUMAN_READER AS
SELECT TOPIC.ID,TOPIC.title,TOPIC.author,TOPIC.author_id,TOPIC.content,TOPIC.content_rendered,TOPIC.replies,
NODES_1.name AS node_name,TOPIC.node AS node_id,
TOPIC.created AS create_time,DATETIME(TOPIC.created,'unixepoch') AS create_time_h,TOPIC.time AS grab_time,DATETIME(TOPIC.time,'unixepoch') AS grab_time_h,
STATUS.TIME AS test_time,DATETIME(STATUS.TIME,'unixepoch') AS test_time_h,STATUS.NODE AS node_id_on_test,NODES_2.name AS node_name_on_test,STATUS.STATUS
STATUS.TIME AS test_time,DATETIME(STATUS.TIME,'unixepoch') AS test_time_h,NODES_2.name AS node_name_on_test,STATUS.NODE AS node_id_on_test,STATUS.STATUS
FROM TOPIC
INNER JOIN NODES AS NODES_1
ON NODES_1.ID = TOPIC.node
Expand Down
3 changes: 2 additions & 1 deletion v2ex_base/log_in.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ def log_in(self):
#1
r1=self.s.get('https://www.v2ex.com/signin')
if r1.status_code != 200:
raise LogError
error_info='proxy status: %s, proxy: %s' % (str(settings.proxy_enable),str(self.s.proxies))
raise LogError(error_info)
self.s.headers={'Referer': 'https://v2ex.com/signin'}
t1=etree.HTML(r1.text)
text_name=t1.xpath('//input[@type="text"]/@name')[-1]
Expand Down
3 changes: 2 additions & 1 deletion v2ex_spider/base_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def spider(self):
resp=self.s.get(self.url)
if resp.status_code != 200:
self.SQ.close_datebase()
raise APIError
error_info='proxy status: %s, proxy: %s' % (str(settings.proxy_enable),str(self.s.proxies))
raise APIError(error_info)
topics=resp.json()
for topic in topics:
t_id=topic["id"]
Expand Down
3 changes: 2 additions & 1 deletion v2ex_spider/rss_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def latest_and_hot(self):
resp=self.s.get(url)
if resp.status_code != 200:
self.SQ.close_datebase()
raise APIError
error_info='proxy status: %s, proxy: %s' % (str(settings.proxy_enable),str(self.s.proxies))
raise APIError(error_info)
topics=resp.json()
for topic in topics:
t_id=topic["id"]
Expand Down
15 changes: 12 additions & 3 deletions v2ex_tester/topic_tester.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@ def __init__(self):
'''
self.s=requests.session()
self.s.proxies=settings.proxies
self.SQ=SQL()
self.SQ.open_datebase()
self.s.headers=settings.WEB_headers
self.log_status=False


def init_database(self):
self.SQ=SQL()
self.SQ.open_datebase()

def log_in(self):
with open('.cookies.json','r') as f:
cookies=requests.utils.cookiejar_from_dict(json.load(f))
Expand Down Expand Up @@ -61,17 +63,24 @@ def api_test(self,t_id,status):
url='https://www.v2ex.com/api/topics/show.json?id=%s' % str(t_id)
n_time=int(time.time())
resp=self.s_a.get(url)
if resp.status_code != 200:
error_info='proxy status: %s, proxy: %s' % (str(settings.proxy_enable),str(self.s.proxies))
raise APIError(error_info)
topic=resp.json()[0]
node_id=topic["node"]["id"]
return {'T_ID':int(t_id),'NODE':node_id,'STATUS':status,'TIME':n_time}

def write_to_sql(self,T_ID, NODE, STATUS, TIME):
self.SQ.write_to_db_status(T_ID, NODE, STATUS, TIME)
return

class APIError(ValueError):
pass

def start(t_id,sleep_time):
time.sleep(sleep_time)
t=tester()
t.init_database()
result=t.web_test(t_id, 0)
t.write_to_sql(result['T_ID'],result['NODE'],result['STATUS'],result['TIME'])
t.SQ.close_datebase()
Expand Down

0 comments on commit ded1d2a

Please sign in to comment.