forked from saeedaghabozorgi/TwitterSparkStreamClustering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scraper_tweeter_socket.py
76 lines (64 loc) · 2.38 KB
/
scraper_tweeter_socket.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import socket
import sys
from thread import *
import requests
import requests_oauthlib
import tweepy
from tweepy import OAuthHandler
import json
import oauth2 as oauth
from datetime import datetime
#Variables that contains the user credentials to access Twitter API
# Replace it with your credentials
access_token = "582342005-QGM3VSdAL1cjAPzL6jxxxxxxxxxx"
access_token_secret = "keEVSlaNz5fegUq8ytMrTXq62pxxxxxxxxxx"
consumer_key = "PjlYiBasD06wnMOxxxxxxx"
consumer_secret = "EXVZnDVb3wLA6KhwOfp9weBSngJxxxxcx"
auth = requests_oauthlib.OAuth1(consumer_key, consumer_secret,access_token, access_token_secret)
HOST = '' # Symbolic name meaning all available interfaces
PORT = 9999 # Arbitrary non-privileged port
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
print 'Socket created'
#Bind socket to local host and port
try:
s.bind((HOST, PORT))
except socket.error , msg:
print 'Bind failed. Error Code : ' + str(msg[0]) + ' Message ' + msg[1]
sys.exit()
print 'Socket bind complete'
import time
#Start listening on socket
s.listen(10)
print 'Socket now listening'
#Function for handling connections. This will be used to create threads
def clientthread(conn):
url='https://stream.twitter.com/1.1/statuses/filter.json'
#
data = [('language', 'en'), ('locations', '-130,-20,100,50')]
#,('track','christmas')
query_url = url + '?' + '&'.join([str(t[0]) + '=' + str(t[1]) for t in data])
response = requests.get(query_url, auth=auth, stream=True)
print(query_url, response) # 200 <OK>
count = 0
for line in response.iter_lines(): # Iterate over streaming tweets
try:
if count > 10000000:
break
post= json.loads(line.decode('utf-8'))
#contents = [post['text'], post['coordinates'], post['place']]
count+= 1
conn.send(line+'\n')
#time.sleep(1)
print (str(datetime.now())+' '+'count:'+str(count))
except:
e = sys.exc_info()[0]
print( "Error: %s" % e )
conn.close()
#now keep talking with the client
while 1:
#wait to accept a connection - blocking call
conn, addr = s.accept()
print 'Connected with ' + addr[0] + ':' + str(addr[1])
#start new thread takes 1st argument as a function name to be run, second is the tuple of arguments to the function.
start_new_thread(clientthread ,(conn,))
s.close()