forked from linsir/doubanrobot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
douban_urllib.py
211 lines (178 loc) · 7.18 KB
/
douban_urllib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date : 2014-10-09 17:11:42
# @Author : Linsir ([email protected])
# @Link : http://linsir.org
import re
import urllib
import urllib2
import cookielib
import random
email = '[email protected]'
password = 'your_passwd'
cookies_file = 'Cookies_saved.txt'
class douban_robot:
def __init__(self):
self.email = email
self.password = password
self.data = {
"form_email": email,
"form_password": password,
"source": "index_nav",
"remember": "on"
}
self.login_url = 'https://www.douban.com/accounts/login'
self.load_cookies()
self.opener = urllib2.build_opener(
urllib2.HTTPCookieProcessor(self.cookie))
self.opener.addheaders = [("User-agent", "Mozilla/5.0 (X11; Linux x86_64)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36")]
# self.opener.addheaders = [("Origin", "https://www.douban.com")]
self.get_ck()
def load_cookies(self):
try:
self.cookie = cookielib.MozillaCookieJar()
self.cookie.load(cookies_file)
print "loading cookies for file..."
except Exception, e:
print "The cookies file is not exist."
self.login_douban()
# reload the cookies.
self.load_cookies()
def get_ck(self):
# open a url to get the value of ck.
self.opener.open('https://www.douban.com')
# read ck from cookies.
for c in list(self.cookie):
if c.name == 'ck':
self.ck = c.value.strip('"')
print "ck:%s" % self.ck
break
else:
print 'ck is end of date.'
self.login_douban()
# #reload the cookies.
self.cookie.revert(cookies_file)
self.get_ck()
def login_douban(self):
'''
login douban and save the cookies into file.
'''
cookieJar = cookielib.MozillaCookieJar(cookies_file)
# will create (and save to) new cookie file
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookieJar))
#!!! following urllib2 will auto handle cookies
response = opener.open(self.login_url, urllib.urlencode(self.data))
html = response.read()
regex = r'<img id="captcha_image" src="(.+?)" alt="captcha"'
imgurl = re.compile(regex).findall(html)
if imgurl:
# urllib.urlretrieve(imgurl[0], 'captcha.jpg')
print "The captcha_image url address is %s" % imgurl[0]
# download the captcha_image file.
# data = opener.open(imgurl[0]).read()
# f = file("captcha.jpg","wb")
# f.write(data)
# f.close()
captcha = re.search(
'<input type="hidden" name="captcha-id" value="(.+?)"/>', html)
if captcha:
vcode = raw_input('图片上的验证码是:')
self.data["captcha-solution"] = vcode
self.data["captcha-id"] = captcha.group(1)
self.data["user_login"] = "登录"
# 验证码验证
response = opener.open(
self.login_url, urllib.urlencode(self.data))
# fp = open("2.html","wb")
# fp.write(response.read())
# fp.close
# 登录成功
cookieJar.save()
if response.geturl() == "http://www.douban.com/":
print 'login success !'
# update cookies, save cookies into file
# cookieJar.save();
else:
return False
return True
def new_topic(self, group_id, title, content):
group_url = "https://www.douban.com/group/" + group_id
post_url = group_url + "/new_topic"
post_data = urllib.urlencode({
'ck': self.ck,
'rev_title': title,
'rev_text': content,
'rev_submit': '好了,发言',
})
request = urllib2.Request(post_url)
# request.add_header("Origin", "https://www.douban.com")
request.add_header("Referer", post_url)
response = self.opener.open(request, post_data)
if response.geturl() == group_url:
print 'Okay, Success !'
return True
return False
def talk_statuses(self, content='(⊙o⊙)…'):
post_data = urllib.urlencode({
'ck': self.ck,
'comment': content,
})
request = urllib2.Request("https://www.douban.com/")
# request.add_header("Origin", "https://www.douban.com")
request.add_header("Referer", "https://www.douban.com/")
self.opener.open(request, post_data)
def send_mail(self, id, content='Hey,girl !'):
post_data = urllib.urlencode({
"ck": self.ck,
"m_submit": "好了,寄出去",
"m_text": content,
"to": id,
})
request = urllib2.Request("https://www.douban.com/doumail/write")
# request.add_header("Origin", "https://www.douban.com")
request.add_header("Referer", "https://www.douban.com/doumail/write")
self.opener.open(request, post_data)
def sofa(self,
group_id,
content=['丫鬟命,公主心,怪不得人。',
'要交流就平等交流,弄得一副跪舔样,谁还能瞧得起你?',
'己所欲,勿施于人..',
'人在做,天在看.', ]
):
group_url = "https://www.douban.com/group/" + group_id + "/#topics"
html = self.opener.open(group_url).read()
topics = re.findall(r'topic/(\d+?)/.*?class="">.*?<td nowrap="nowrap" class="">(.*?)</td>',
html, re.DOTALL)
for item in topics:
if item[1] == '':
post_data = urllib.urlencode({
"ck": self.ck,
"rv_comment": random.choice(content),
"start": "0",
"submit_btn": "加上去"
})
self.opener.open(
"https://www.douban.com/group/topic/" + item[0] + "/add_comment#last?", post_data)
def get_joke(self):
html = urllib2.urlopen('http://www.xiaohuayoumo.com/').read()
result = re.compile(
r']<a href="(.+?)">(.+?)</a></div>.+?', re.DOTALL).findall(html)
for x in result[:1]:
title = x[1]
joke_url = 'http://www.xiaohuayoumo.com' + x[0]
page = self.opener.open(joke_url).read()
result = re.compile(r'content:encoded">(.+?)<p.+?</p>(.+?)</div></div></div></div>',
re.DOTALL).findall(page)
for x in result[:1]:
content = x[0] + x[1]
content = re.sub(r'</?\w+[^>]*>', ' ', content)
return title, content
if __name__ == '__main__':
app = douban_robot()
titile, content = app.get_joke()
# if titile and content:
# print app.new_topic("cd", titile, content)
app.send_mail(63666378, content)
# app.talk_statuses('Hello.it\'s a test message using python.')
# app.sofa("CentOS")