-
Notifications
You must be signed in to change notification settings - Fork 12
/
subjective.py
80 lines (72 loc) · 2.88 KB
/
subjective.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import numpy as np
import nltk as nlp
class SubjectiveTest:
def __init__(self, data, noOfQues):
self.question_pattern = [
"Explain in detail ",
"Define ",
"Write a short note on ",
"What do you mean by "
]
self.grammar = r"""
CHUNK: {<NN>+<IN|DT>*<NN>+}
{<NN>+<IN|DT>*<NNP>+}
{<NNP>+<NNS>*}
"""
self.summary = data
self.noOfQues = noOfQues
@staticmethod
def word_tokenizer(sequence):
word_tokens = list()
for sent in nlp.sent_tokenize(sequence):
for w in nlp.word_tokenize(sent):
word_tokens.append(w)
return word_tokens
def create_vector(answer_tokens, tokens):
return np.array([1 if tok in answer_tokens else 0 for tok in tokens])
def cosine_similarity_score(vector1, vector2):
def vector_value(vector):
return np.sqrt(np.sum(np.square(vector)))
v1 = vector_value(vector1)
v2 = vector_value(vector2)
v1_v2 = np.dot(vector1, vector2)
return (v1_v2 / (v1 * v2)) * 100
def generate_test(self):
sentences = nlp.sent_tokenize(self.summary)
cp = nlp.RegexpParser(self.grammar)
question_answer_dict = dict()
for sentence in sentences:
tagged_words = nlp.pos_tag(nlp.word_tokenize(sentence))
tree = cp.parse(tagged_words)
for subtree in tree.subtrees():
if subtree.label() == "CHUNK":
temp = ""
for sub in subtree:
temp += sub[0]
temp += " "
temp = temp.strip()
temp = temp.upper()
if temp not in question_answer_dict:
if len(nlp.word_tokenize(sentence)) > 20:
question_answer_dict[temp] = sentence
else:
question_answer_dict[temp] += sentence
keyword_list = list(question_answer_dict.keys())
question_answer = list()
for _ in range(int(self.noOfQues)):
rand_num = np.random.randint(0, len(keyword_list))
selected_key = keyword_list[rand_num]
answer = question_answer_dict[selected_key]
rand_num %= 4
question = self.question_pattern[rand_num] + selected_key + "."
question_answer.append({"Question": question, "Answer": answer})
que = list()
ans = list()
while len(que) < int(self.noOfQues):
rand_num = np.random.randint(0, len(question_answer))
if question_answer[rand_num]["Question"] not in que:
que.append(question_answer[rand_num]["Question"])
ans.append(question_answer[rand_num]["Answer"])
else:
continue
return que, ans