-
Notifications
You must be signed in to change notification settings - Fork 301
/
Copy pathtries.py
301 lines (230 loc) · 7.56 KB
/
tries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
#!/usr/bin/env python
from get_words import get_words
import sys
import math
"""
tries.py
Donald Knuth, Art of Computer Programming, Volume 4 Fascicle 0
Exercise #35
Problem:
What letters of the alphabet can be used
as the starting letter of sixteen words that
form a complete binary trie within
WORDS(n), given n?
Example trie:
Left side:
s
h
e o
e l r w
Right side:
s
t
a e
l r a e
"""
ALPHABET = "abcdefghijklmnopqrstuvwxyz"
FIVE = 5
class Node(object):
def __init__(self, letter, count=0):
self.letter = letter
self.count = count
self.children = []
self.parent = None
class TryTrieTree(object):
def __init__(self,words):
self.root = None
self.words = words
def __str__(self):
final = ""
depth = 1
runner = self.root
def _str_recursive(runner,depth):
# In order traversal:
# visit this node first,
# then visit children if any
s = ""
s += ">"*depth
s += " "
s += self.get_prefix_from_node(runner)
s += runner.letter
s += ": %d"%(runner.count)
s += "\n"
# Base case
if runner.children == []:
# leaf node
return s
# Recursive case
else:
for child in runner.children:
s += _str_recursive(child,depth+1)
return s
final = _str_recursive(runner,depth)
return final
def set_root(self,root_letter):
self.root = Node(root_letter)
def get_prefix_from_node(self,node):
"""Given a node in the trie,
return the string prefix that
would lead to that node.
"""
if node==None:
return ""
elif node==self.root:
return ""
else:
prefix = ""
while node.parent != None:
node = node.parent
prefix = node.letter + prefix
return prefix
def get_node_from_prefix(self,prefix):
"""Given a string prefix,
return the node that represents
the tail end of that sequence
of letters in this trie. Return
None if the path does not exist.
"""
assert self.root!=None
if prefix=='':
return None
assert prefix[0]==self.root.letter
# Base case
if len(prefix)==1:
return self.root
# Recursive case
parent_prefix, suffix = prefix[:len(prefix)-1],prefix[len(prefix)-1]
parent = self.get_node_from_prefix(parent_prefix)
for child in parent.children:
if child.letter == suffix:
return child
# We know this will end because we handle
# the base case of prefix="", and prefix
# is cut down by one letter each iteration.
def assemble(self):
"""Assemble the trie from the set of words
passed to the constructor.
"""
assert self.root!=None
words = self.words
# start with an empty prefix
prefix = ''
candidate = self.root.letter
self._assemble(prefix,candidate,words)
def _assemble(self,prefix,candidate,words):
"""Recursive private method called by assemble().
"""
prefix_depth = len(prefix)
candidate_depth = prefix_depth+1
ppc = prefix+candidate
words_with_candidate = [w for w in words if w[:candidate_depth]==ppc]
min_branches_req = int(math.pow(2,5-candidate_depth))
max_number_branches = len(words_with_candidate)
# If we exceed the minimum number of
# branches required, add candidate
# as a new node on the trie.
if max_number_branches >= min_branches_req:
parent = self.get_node_from_prefix(prefix)
# If we are looking at the root node,
if prefix=='':
# parent will be None.
# In this case don't worry about
# creating new child or introducing
# parent and child, b/c the "new child"
# is the root (already exists).
pass
else:
# Otherwise, create the new child,
# and introduce the parent & child.
new_child = Node(candidate)
new_child.parent = parent
parent.children.append(new_child)
# Base case
if candidate_depth==4:
new_child.count = max_number_branches
return
# Recursive case
for new_candidate in ALPHABET:
new_prefix = prefix + candidate
self._assemble(new_prefix,new_candidate,words_with_candidate)
# otherwise, we don't have enough
# branches to continue downward,
# so stop here and do nothing.
return
def bubble_up(self):
"""Do a depth-first traversal of the
entire trytrietree, pruning as we go.
This is a pre-order traversal,
meaning we traverse children first,
then the parents, so we always
know the counts of children
(or we are on a leaf node).
"""
self._bubble_up(self.root)
def _bubble_up(self,node):
"""Pre-order depth-first traversal
starting at the leaf nodes and proceeding
upwards.
"""
if len(node.children)==0:
# Base case
# Leaf nodes already have counts
# Do nothing
return
else:
# Recursive case
# Pre-order traversal: visit/bubble up children first
for child in node.children:
self._bubble_up(child)
# Now that we've completed leaf node counts, we can do interior node counts.
# Interior node counts are equal to number of large (>=2) children.
large_children = [child for child in node.children if child.count >= 2]
node.count = len(large_children)
def trie_search(n, verbose=False):
words = get_words()
words = words[:n]
perfect_count = 0
imperfect_count = 0
for letter in ALPHABET:
tree = TryTrieTree(words)
tree.set_root(letter)
tree.assemble()
tree.bubble_up()
#print(tree)
if tree.root.count >= 2:
if verbose:
print("The letter {0:s} has a perfect binary trie in WORDS({1:d}).".format(
letter, n))
perfect_count += 1
else:
if verbose:
print("The letter {0:s} has no perfect binary trie in WORDS({1:d}).".format(
letter, n))
imperfect_count += 1
if verbose:
print("")
print("Perfect count: {:d}".format(perfect_count))
print("Imperfect count: {:d}".format(imperfect_count))
return perfect_count, imperfect_count
def trie_table():
"""Compute and print a table of
number of words n versus number of
perfect tries formed.
"""
print("%8s\t%8s"%("n","perfect tries"))
ns = range(1000,5757,500)
for n in ns:
p,i = trie_search(n)
print("%8d\t%8d"%(n,p))
n = 5757
p,i = trie_search(n)
print("%8d\t%8d"%(n,p))
if __name__=="__main__":
if len(sys.argv)<2:
n = 5757
else:
n = int(sys.argv[1])
if n > 5757:
n = 5757
_,_ = trie_search(n, verbose=True)
trie_table()