-
Notifications
You must be signed in to change notification settings - Fork 1
/
codetree.py
executable file
·308 lines (236 loc) · 9.56 KB
/
codetree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
#!/usr/bin/env python3
import os
import re
HEADER_EXTENSIONS = [".h", ".hpp"]
SOURCE_EXTENSIONS = HEADER_EXTENSIONS + [ ".cpp", ".c", ".cxx"]
class SourceInfo:
def __init__(self):
self.name = ""
self.prefix = ""
self.extension = ""
self.path = ""
self.line_count = 0
self.includeList = []
self.includedByCount = 0
self.includedByList = []
class FileInfo:
def __init__(self):
self.line_count = 0
self.included_files = []
def get_filename_from_string(path, lower=True):
"""
Extracts the filename from a path string
:param str path: the path
:returns str: the filename (name+extension) string
"""
prog = re.compile('[a-zA-Z0-9_]+[.][a-zA-Z0-9_]+')
match = prog.search(path)
if match is not None:
if lower == True:
return match.group().lower()
else:
return match.group()
else:
return None
def get_immediate_subdirectories(dir_path):
"""
Returns the immediate subdirectories of the given directory.
:param str dir_path: a directory path string
:returns list: a list of subdiretory path strings
"""
return [ os.path.join(dir_path,name) for name in os.listdir(dir)
if os.path.isdir(os.path.join(dir_path, name))]
def print_subdir_info(dir_path, excludes):
"""
Prints information in subdirectories
:param str dir_path: a directory path string
:param list excludes: a list of directory names (e.g. "svn") to avoid visiting
"""
from os.path import join, getsize
for root, dirs, files in os.walk(dir_path):
dirs.remove(excludes) # don't visit excluded directories
print(root, "consumes", end=" ")
print(sum(getsize(join(root, name)) for name in files), end=" ")
print("bytes in", len(files), "non-directory files")
def find_extensions(files, extensions):
return [ name for name in files if os.path.splitext(name)[1] in extensions]
def remove_strings(strings, excludes):
"""
Removes strings containing exclude substrings from a list
:param list strings: the list of strings
:param list excludes: the list of exclusion substrings
:returns list: a list of all the everything in strings that doesn't contain the
exlcusion substrings
"""
outList = []
for s in strings:
for e in excludes:
if e in s:
break
else:
outList.append(s)
return outList
def find_files(path, extensions, excludes = []):
"""
Finds all files in the path with a specified set of extensions (recursive).
:param str path: the path
:param list extensions: a list of filename extensions to include
:param list excludes: an optional list of strings. File paths that contain the
extension strings will be excluded.
:returns list: a list of all files (full path) with the specified extensions.
"""
from os.path import join, getsize
fileList = []
for root, dirs, files in os.walk(path):
newFiles = [ name for name in files if os.path.splitext(name)[1] in extensions]
fileList.extend([join(root,name) for name in newFiles] )
fileList = remove_strings(fileList, excludes)
return fileList
def process_file(file_path):
"""
Finds the includes in a source file file by looking for #include lines.
Leading whitespace is ignored. Also does a line count for non-comment
lines
:param string file_path: the file path
:returns FileInfo: a class that stores the list of includes and the line count
"""
file_info = FileInfo()
# have to wrap this in a try-catch block to catch
# unicode issues
count = 0
success = False
for encoding in ['utf8', 'Cp1252']:
try:
with open(file_path, 'r', encoding=encoding) as f:
for s in f:
# count the lines
count = count + s.count(';')
# find the includes
s = s.strip()
if s.startswith("#include"):
headerName = get_filename_from_string(s)
if headerName is not None:
file_info.included_files.append(headerName)
success = True
break
except UnicodeDecodeError:
pass
if not success:
print("Ignored", file_path, "[unicode error]")
f.closed
file_info.line_count = count
return file_info
def create_empty_source_dictionary(source_paths):
"""
Creates an empty dictionary from a list of source files. Hashing is done
on the filename (w/ extension) only, not the full path.
:param list source_paths: a list of source files (full path)
:returns source_dictionary: an empty, initialized source dictionary
"""
source_dictionary = {}
for sourcePath in source_paths:
sourceFile = os.path.basename(sourcePath)
filePrefix, fileExt = os.path.splitext(sourceFile)
# prepare a structure for the header
struct = SourceInfo()
struct.path = sourcePath
struct.name = sourceFile
struct.prefix = filePrefix
struct.extension = fileExt
#hash on the header name (including exetnsion)
source_dictionary[sourceFile] = struct
return source_dictionary
def update_source_dictionary(source_dictionary, file_path):
"""
Update the source dictionary by processing a new source file. Updates are
done in place.
:param string path: The path to the source file
:param dictionary source_dictionary: the source dictionary.
"""
file_info = process_file(file_path)
if file_info.included_files == []:
return
# get the file name (no extension) for the file that contained the includes
file_name = os.path.basename(file_path)
sourceStruct = source_dictionary.get(file_name)
if sourceStruct is None:
return
sourceStruct.includeList = file_info.included_files
sourceStruct.line_count = file_info.line_count
# update the stats for each included header
for f in file_info.included_files:
# assign this to a dictionary
include_struct = source_dictionary.get(f)
# don't increment the count if the filename is the same as
# the header name (e.g. if the include is referenced in the
# corresponding c file
if include_struct is not None:
include_struct.includedByList.append(file_name)
def create_source_dictionary(dirName, excludes = []):
"""
Searches through a directory (recursive) looking for relationships between
headers and cpp files. Stores these relationships in a dictionary hashed on
the filename.
:param string dirName: The directory to search for source files
:param list excludes: Paths or filenames containing these strings will be excluded.
Useful for removing directories named *test*, for example.
:returns dict: A dictionary hashed on the filename. Entries are of type SourceInfo
"""
# find all the headers in the directory (full paths)
path_list = find_files(dirName, SOURCE_EXTENSIONS, excludes)
# create the header dictionary from the list of header files
if len(path_list)==0:
return
source_dictionary = create_empty_source_dictionary(path_list)
# we're going to look through all of the header and code files
# for includes of each header file name in the file and update the
# info for each header file in the dictionary
for file in path_list:
update_source_dictionary(source_dictionary, file)
# update the convenience count
for val in source_dictionary.values():
val.includedByCount = len(val.includedByList)
return source_dictionary
def find_strays(source_dictionary):
"""
Looks through the source dictionary for occurences of strays: a header not
included by anything other than an identically named cpp file, and the cpp
file itself.
:param dict source_dictionary: the source dictionary created by create_source_dictionary
:returns list: the list of stray headers and cpp paths, sorted by path
"""
# find strays and append to the list
itemList = []
for val in source_dictionary.values():
itemList.append(val)
# sort the list by pathname
l= sorted(itemList, key = lambda x: (x.includedByCount, x.path) )
count = 0
line_count = 0
total_line_count = 0
for o in l:
# keep track of all lines
total_line_count = total_line_count + o.line_count
if o.includedByCount == 1 and source_dictionary[o.includedByList[0]].prefix==o.prefix:
print(o.path,"->",source_dictionary[o.includedByList[0]].path)
count=count+2
# add in the strays' line counts
line_count = line_count + o.line_count + source_dictionary[o.includedByList[0]].line_count
print(count, " stray files out of ", len(l)," [",round(count/len(l)*100,2),"%]",sep='')
print(line_count, " stray lloc out of ", total_line_count, " [", round(line_count/total_line_count*100,2),"%]",sep='')
return l
def test():
d = create_source_dictionary("c:\\code\\dev-vr-v3", ["stdafx.h", "stdafx.cpp"])
if d is None:
return
find_strays(d)
if __name__ == '__main__':
import sys
if len(sys.argv) == 2:
root = sys.argv[1]
else:
root = '.'
d = create_source_dictionary(root, ["stdafx.h", "stdafx.cpp"])
if d is None:
exit()
find_strays(d)