-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBahaInform(oldDesign).py
121 lines (92 loc) · 3.48 KB
/
BahaInform(oldDesign).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
# coding: utf-8
# In[1]:
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
from datetime import datetime
import time
import re
import warnings
import urllib.request
import csv
import schedule
import time
import threading
import os
warnings.filterwarnings("ignore")
# In[2]:
#主要是獲得傳入的url的回應,怕遇到403 error這種情形所以加入header
def get_html(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0')
response = urllib.request.urlopen(req)
html = response.read()
return html
# In[3]:
#主要是將更新訊息推播至自己的line notify上
def lineNotify(token, msg):
headers = {
"Authorization": "Bearer " + token,
"Content-Type" : "application/x-www-form-urlencoded"
}
payload = {'message': msg}
r = requests.post("https://notify-api.line.me/api/notify", headers = headers, params = payload)
return r.status_code
# In[4]:
#利用beautiful soup找尋巴哈動畫瘋 動畫的url,動畫名,動畫更新日,動畫集數 並且將這些資訊利用panda套件存成Dataframe
def product_df(req_bs):
list_new = req_bs.find(class_ = "index_season view-grid").findAll(class_ = re.compile(r"view-grid__item week-*"))
output = []
for item in list_new:
item_url = item.find(class_ = "newanime__content").get("href")
title = item.find(class_ = "newanime-title").text
date = item.find(class_ = "newanime-date").text
episode = item.find(class_ = "newanime-vol").text
pic = item.find(class_ = "lazyload").get("data-src")
output.append({
"title": title,
"item_url": item_url,
"date": date,
"episode": episode,
"pic": pic
})
df = pd.DataFrame(output)
return df
# In[5]:
def jb():
url = "https://ani.gamer.com.tw/"
req_bs = bs(get_html(url), "html.parser") #利用beautiful soup把抓下來的網站排版
d = product_df(req_bs) #將排版好的網站透過product_df篩選想要的資料產生data frame表
b = pd.read_csv("baha.csv", header=0).head().to_dict() #讀取前一次的csv並轉成dict類型和新抓取的資料做比對,這邊比對前五個動畫有無更新,若有更新將message跟token傳到lineNotify
i = 0;
for new_title in d['title'][0:5]:
if new_title == b['title'][0]:
break
else:
msg = "[巴哈姆特動畫瘋] "+ new_title + " " + d['date'][i] + d['episode'][i] + " " + d['item_url'][i]
token = '填入自己的token'
lineNotify(token, msg)
i = i + 1
d.to_csv("baha.csv") #更新這次抓的資料存成csv
# In[6]:
#因為第一次執行沒有bahacsv這個檔案所以我們要先做初始化,先去抓一次網站並產生baha.csv
def iniCsv():
url = "https://ani.gamer.com.tw/"
req_bs = bs(get_html(url), "html.parser")
inti_d = product_df(req_bs)
inti_d.to_csv("baha.csv")
# In[7]:
#檢查有沒有baha.csv這個檔案,有的話就執行jb進行抓取網站並比對有無動畫更新,沒有的話就利用iniCsv初始化baha.csv
def check_Csv():
filepath = "自己的路徑/baha.csv"
if os.path.isfile(filepath):
jb()
else:
iniCsv()
# In[ ]:
#利用schedule套件讓這個程式每一個小時執行一次
schedule.every().hour.do(check_Csv)
while True:
schedule.run_pending()
time.sleep(1)