forked from jasxnle/ICS_Search_Engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
posting.py
51 lines (42 loc) · 1.58 KB
/
posting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
from json import JSONEncoder, JSONDecoder
import json
from collections import defaultdict
class Posting():
def __init__(self, docID, freq):
#dict.__init__(self, docID = docID,freq = freq)
#url name
self.docID = docID
self.freq = freq
#self.tfIdf = tfIdf
#frequency of the number of pages appeared
#self.importantWord = importantWord
#W x,y = term frequecy X log(total number of document/number of document containing x)
#updating freqency
def __str__(self):
return f"{self.docID},{self.freq}"
def __repr__(self) -> str:
return f"Posting<DocID:{self.docID}, Freq:{self.freq}>"
#FIXME backwardfs compat with search if key =
# def __getitem__(self, key):
# if key==0:
# return self.docID
class PostingEncoder(JSONEncoder):
def default(self, obj : Posting):
return f"{obj.docID},{obj.freq}"
class PostingDecoder(JSONDecoder):
def decode(self, json_str):
data = super().decode(json_str)
postings_dict = {}
for key, value in data.items():
postings_dict[key] = []
for posting_str in value:
docID, freq = posting_str.split(',')
postings_dict[key].append(Posting(int(docID), float(freq)))
return postings_dict
if __name__ == '__main__':
index = defaultdict(list)
index['a'].append(Posting(0,1))
index['a'].append(Posting(1,1))
dump = json.dumps(index, cls=PostingEncoder)
load = json.loads(dump, cls=PostingDecoder)
print(type(load['a'][0]))