-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
89 lines (75 loc) · 2.63 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from summarize import (
ParagraphSummarizer,
FirstOccurrenceSummarizer,
SummarizerBase,
RandomSummarizer,
TFSummarizer,
SigFactorSummarizer,
TFIDFSummarizer,
TFIDFCSummarizer
)
from intersection import IntersectionSummarizer
import sys
policy_dir = 'policies'
summary_dir = 'summaries'
truth_dir = 'truths'
def verify_truths(policydir, truthdir):
summarizer = SummarizerBase()
for f in os.listdir(policydir):
pol_path = os.path.join(policydir, f)
truth_path = os.path.join(truthdir, f)
# Make sure truth file exists
if not os.path.exists(truth_path):
e_str = 'Truth %s not found' % f
raise Exception(e_str)
try:
policy = open(pol_path, 'r').read().strip()
truth = open(truth_path, 'r').read().strip()
except UnicodeDecodeError as e:
print('%s: %s' % (f, e))
sys.exit(1)
p_sents = summarizer.split_content_to_sentences(policy)
t_sents = summarizer.split_content_to_sentences(truth)
def strip_list(l):
return [s.strip() for s in l]
p_sents = strip_list(p_sents)
t_sents = strip_list(t_sents)
if len(t_sents) != 5:
e_str = 'Truth %s has length %d' % (f, len(t_sents))
raise Exception(e_str)
for t_s in t_sents:
if t_s not in p_sents:
e_str = 'Error with truth %s. ' % f
e_str += 'The following not found in policy sentences.\
\n%s' % t_s
raise Exception(e_str)
verify_truths(policydir=policy_dir, truthdir=truth_dir)
test_classes = [
RandomSummarizer(),
ParagraphSummarizer(),
FirstOccurrenceSummarizer(),
IntersectionSummarizer(),
SigFactorSummarizer(),
TFSummarizer(),
TFIDFSummarizer(),
TFIDFCSummarizer(corpus_dir=policy_dir)
]
for summarizer in test_classes:
print('Testing %s' % summarizer.__class__.__name__)
dir_files = os.listdir(policy_dir)
n = len(dir_files)
recall = 0
for i, f in enumerate(dir_files):
print('\tProgress: %d/%d' % (i, n), end='\r')
policy_f = open(os.path.join(policy_dir, f), 'r')
summary = summarizer.summarize(policy_f.read())
summary_lines = summary.split('\n')
truth = open(os.path.join(truth_dir, f), 'r').read().strip()
truth_lines = truth.split('\n')
num_extracted = sum(1 for tl in truth_lines if tl in summary_lines)
recall += num_extracted
fout = open(os.path.join(summary_dir, f), 'w')
fout.write(summary)
recall /= (5*n)
print('\tRecall: %.4f' % recall)