-
Notifications
You must be signed in to change notification settings - Fork 301
/
Copy pathletter_coverage.py
206 lines (145 loc) · 5.31 KB
/
letter_coverage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
"""
letter_coverage.py
Compute the minimum number of words from five-letter-words
needed to cover N letters from the alphabet.
This can be done in O(N^2) time with a dynamic program.
For each word that we choose, we have to look at all other words
to see how many letters those two cover, combined.
https://charlesreid1.com/wiki/Five_Letter_Words
https://charlesreid1.com/wiki/Letter_Coverage
"""
from get_words import get_words
import numpy as np
from pprint import pprint
def word2bitvector(word,N):
"""
Turns a five-letter word into a bit vector representing character coverage.
Uses 26 letters by default.
"""
bit_vector = [False,]*N
for c in word:
i = ord(c)-ord('a')
try:
bit_vector[i] = True
except IndexError:
pass
return np.array(bit_vector)
def printbv(bv):
"""
Pretty printing for boolean bit vector
"""
result = ""
for bit in bv:
if bit:
result += "1"
else:
result += "0"
return result
def btsolution(min_key, min_val, words, bt):
"""
Reconstruct the sequence of words that gives maximum coverage and minimum word count.
Input: minimum word key (last word), minimum value (number of words), backtrack (prior word)
Output: list of words
"""
solution = []
solution.append(words[min_key])
prior_key = bt[min_key]
while prior_key != -1:
solution.append(words[prior_key])
prior_key = bt[prior_key]
return reversed(solution)
def get_dummy_words():
return ["aa","ab","bc","aa","dd","de","bb"]
if __name__=="__main__":
# Searching for words covering first N letters
N = 15
words = get_words()
words = words[:1000]
# Initialization:
# ----------------
# Store best coverage bitvectors for each word
bestcoverage_bv = [np.array([False]*N) for k in range(len(words))]
# Store number of 1s for best coverage vector for each word
ones_bv = [-1]*len(words)
# Store number of words in best solution for each word
ws = [0]*len(words)
# Store prior word for backtracking
bt = [-1]*len(words)
# Fencepost: Initial Step
# Word 0
# ----------------
i = 0
# Start with word 0
wi = words[i]
# Best letter coverage bit vector
bestcoverage_bv[i] = word2bitvector(words[i],N)
# Length of 1s
ones_bv[i] = sum(bestcoverage_bv[i])
# Number of words in best solution:
ws[i] = 1
# Backtracking: first word has no prior word
bt[i] = -1
# Start by assuming the word by itself,
# and then examine each possible pairing
for i in range(1,len(words)):
wi = words[i]
# Start with bitvector of word i's coverage
wi_bv = word2bitvector(wi,N)
# Fencepost: initial step
# Word i by itself
# Assume word i is the first word in the solution,
# and if we find a better combination with prior word,
# overwrite this solution.
# ------------------------
# Best coverage so far (first guess) is word i by itself
bestcoverage_bv[i] = wi_bv
# Count ones in (first guess) best bitvector
ones_bv[i] = sum(bestcoverage_bv[i])
# Number of words in new best solution:
ws[i] = 1
# Backtracking
bt[i] = -1
# Boolean: is this the first word in the sequence of solutions?
first = True
# Now loop over the rest of the words,
# and look for a better solution.
for j in reversed(range(0,i)):
# Get the prior word
wj = words[j]
# Get best coverage bitvector
wj_bv = bestcoverage_bv[j]
# (potential) new combined coverage vector
bestcoverage_bv_i = np.logical_or(wi_bv, wj_bv)
# Number of ones in (potential) new combined coverage vector
ones_bv_i = sum(bestcoverage_bv_i)
# Number of words in (potential) new best solution
ws_i = ws[j]+1
# If this solution is better than our current one,
# overwrite the current solution.
# (Better means, "more ones", or "same ones and fewer words".)
#import pdb; pdb.set_trace();
if( (ones_bv_i > ones_bv[i]) or (ones_bv_i==ones_bv[i] and ws_i < ws[i]) ):
bestcoverage_bv[i] = bestcoverage_bv_i
ones_bv[i] = ones_bv_i
ws[i] = ws_i
bt[i] = j
# This word now follows another word in the sequence of solutions
first = False
# It's tempting to stop early,
# but what if we find the perfect
# solution right at the end?!?
# Okay, now actually get the solution.
# The solution is the maximum of ones_bv and the minimum of ws
#
# Start by finding the maximum(s) of ones_bv
# Then check each corresponding index of ws
ones_bv_indices = [k for k,v in enumerate(ones_bv) if v==max(ones_bv)]
min_key = ones_bv_indices[0]
min_val = ones_bv[ones_bv_indices[0]]
for ix in reversed(ones_bv_indices[1:]):
if(ones_bv[ix] < min_key):
min_key = ix
min_val = ones_bv[ix]
solution = list(btsolution(min_key, min_val, words, bt))
print("Takes "+str(len(solution))+" words to cover "+str(N)+" letters")
pprint(solution)