-
Notifications
You must be signed in to change notification settings - Fork 1
/
search.py
38 lines (34 loc) · 987 Bytes
/
search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
from nltk.corpus import wordnet
# get synonyms of word
def get_synonyms(word):
synonyms = []
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.append(l.name())
return list(set(synonyms)) # remove duplicates
# associate scorew with each text file
def search(word, num):
synonyms = get_synonyms(word)
ratings = {}
for filename in os.listdir(os.getcwd()+'/processed/'):
if filename[-4:]=='.txt':
basename = filename[:-4]
with open('processed/' + filename, 'r') as f:
count = 0
for word in f.read().split():
if word in synonyms:
count = count + 1
ratings[basename] = count
return getbestkeys(ratings, num)
# choose up to num best values from dictionary
def getbestkeys(ratings, num):
v = list(ratings.values()) # ratings
k = list(ratings.keys()) # file names
result = []
for i in range(min(num, len(v))):
maxindex = v.index(max(v))
result.append(k[maxindex])
del v[maxindex]
del k[maxindex]
return result