forked from jasxnle/ICS_Search_Engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrix.py
61 lines (46 loc) · 1.84 KB
/
matrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
class InstanceMatrix:
def __init__(self, index : dict, map : dict):
self.matrix = np.zeros((len(index), len(map)))
self.queries = np.fromiter(index.keys(), dtype='object')
self._setMatrix(index)
#initialize matrix
def _setMatrix(self, index : dict) -> None:
"""Set the matrix to 1 if the query is in the document.
Args:
index (dict): index of the documents
"""
for query, ls in index.items():
#print(index)
for p in ls:
self.matrix[self.queries == query, p.docID] = 1
def checkQuery(self, query :list) -> list:
"""Check which documents contain all the words in the query.
Args:
query (list): list of words
Returns:
list: document ids that contain all the words in the query
"""
for q in query:
if q not in self.queries:
return []
query_indices = [np.where(self.queries == q)[0][0] for q in query]
docs = np.where(np.all(self.matrix[query_indices, :] == 1, axis=0))[0]
return docs
if __name__ == '__main__':
index = {"caesar": [[0, 1], [2, 3]], "julius": [[0,2], [2, 3]], "jason": [[0, 3], [3,2]]}
map = {0:"caesar.txt", 1:"julius.txt", 2:"jason.txt", 3:"julius2.txt"}
im = InstanceMatrix(index, map)
'''
0 1 2 3
caesar [[1 0 1 0],
julius [1 0 1 0],
jason [1 0 0 1]]
'''
im.checkQuery(["caesar", "julius", "jason"]) #[0]
im.checkQuery(["julius", "jason"]) #[0]
im.checkQuery(["julius", "caesar"]) #[0, 2]
user_input = input("Enter a query: ")
user_input = user_input.split()
im2 = InstanceMatrix(index, map)
print(im2.checkQuery(user_input))