-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHNTools.py
135 lines (101 loc) · 3.53 KB
/
HNTools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
import http.client
import json
import datetime
# tools built to assist in scraping data from hackernews for building data files from whoishiring threads
# written for v0 of Hacker News api
class HNUser:
def __init__(self, about, created, karma, submitted):
self.about = about
# creation date in unix time
self.created = created
self.karma = karma
#submitted posts
self.submitted = submitted
def __iter__(self):
yield from {
"about": self.about,
"created": self.created,
"karma": self.karma,
"submitted": self.submitted
}.items()
def __str__(self):
return json.dumps(dict(self), ensure_ascii=False)
def to_json(self):
return json.dumps(dict(self), ensure_ascii=False)
@staticmethod
def from_json(json_dct):
return HNUser(json_dct['about'], json_dct['created'], json_dct['karma'], json_dct['submitted'])
#may want to add subtypes for different item models e.g. base, job, story, etc.
class HNItem:
def __init__(self, by, descendants, itemId, kids, score, text, time, title, itemType):
self.by = by
#number of descendants
self.descendants = descendants
self.id = itemId
#direct children
self.kids = kids
self.score = score
self.text = text
self.time = time
self.title = title
self.type = itemType
def getDateTime(self):
return datetime.datetime().fromtimestamp(self.time)
def __iter__(self):
yield from {
"by": self.by,
"descendants": self.descendants,
"id": self.id,
"kids": self.kids,
"score": self.score,
"text": self.text,
"time": self.time,
"title": self.title,
"type": self.type
}.items()
def __str__(self):
return json.dumps(dict(self), ensure_ascii=False)
def to_json(self):
return json.dumps(dict(self), ensure_ascii=False)
@staticmethod
def from_json(json_dct):
by = json_dct['by']
descendants = json_dct['descendants']
itemId = json_dct['id']
kids = json_dct['kids']
score = json_dct['score']
text = json_dct['text']
time = json_dct['time']
title = json_dct['title']
itemType = json_dct['type']
return HNItem(by, descendants, itemId, kids, score, text, time, title, itemType)
def getUser(userId):
conn = http.client.HTTPSConnection("hacker-news.firebaseio.com")
payload = "{}"
userRequest = "/v0/user/{userId}.json"
conn.request("GET", userRequest.format(userId=userId), payload)
res = conn.getresponse()
# typeof data is 'bytes'
data = res.read()
jsonStr = data.decode('utf8')
user = json.loads(jsonStr, object_hook=HNUser.from_json)
return user
# made to be a parallel to https://hackernews.api-docs.io/v0/items/get-by-id
def getItem(itemId):
conn = http.client.HTTPSConnection("hacker-news.firebaseio.com")
payload = "{}"
itemRequest = "/v0/item/{itemId}.json"
conn.request("GET", itemRequest.format(itemId=itemId), payload)
res = conn.getresponse()
data = res.read()
jsonStr = data.decode('utf8')
item = json.loads(jsonStr, object_hook=HNItem.from_json)
return item
def main():
userId = "whoishiring"
user = getUser(userId)
itemId = user.submitted[0]
thread = getItem(itemId)
print(thread)
if __name__ == "__main__":
main()