-
Notifications
You must be signed in to change notification settings - Fork 1
/
voc_parsing_example.py
120 lines (106 loc) · 2.75 KB
/
voc_parsing_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
"""
parse PASCAL VOC xml annotations
"""
import os
import sys
def pascal_voc_clean_xml(ANN, pick, exclusive = False):
print('Parsing for {} {}'.format(
pick, 'exclusively' * int(exclusive)))
def pp(l): # pretty printing
for i in l: print('{}: {}'.format(i,l[i]))
def parse(line): # exclude the xml tag
x = line.split('>')[1].split('<')[0]
try: r = int(x)
except: r = x
return r
def _int(literal): # for literals supposed to be int
return int(float(literal))
dumps = list()
cur_dir = os.getcwd()
os.chdir(ANN)
annotations = os.listdir('.')
annotations = [file for file in annotations if '.xml' in file]
size = len(os.listdir('.'))
for i, file in enumerate(annotations):
# progress bar
sys.stdout.write('\r')
percentage = 1. * (i+1) / size
progress = int(percentage * 20)
bar_arg = [progress*'=', ' '*(19-progress), percentage*100]
bar_arg += [file]
sys.stdout.write('[{}>{}]{:.0f}% {}'.format(*bar_arg))
sys.stdout.flush()
# actual parsing
with open(file, 'r') as f:
lines = f.readlines()
w = h = int()
all = current = list()
name = str()
obj = False
flag = False
print(">>>" , len(lines), lines)
exit()
for i in range(len(lines)):
line = lines[i]
if '<filename>' in line:
jpg = str(parse(line))
if '<width>' in line:
w = _int(parse(line))
if '<height>' in line:
h = _int(parse(line))
if '<object>' in line:
obj = True
if '</object>' in line:
obj = False
if '<part>' in line:
obj = False
if '</part>' in line:
obj = True
if not obj: continue
if '<name>' in line:
if current != list():
if current[0] in pick:
all += [current]
elif exclusive:
flag = True
break
current = list()
name = str(parse(line))
if name not in pick:
obj = False
continue
current = [name,None,None,None,None]
if len(current) != 5: continue
xn = '<xmin>' in line
xx = '<xmax>' in line
yn = '<ymin>' in line
yx = '<ymax>' in line
if xn: current[1] = _int(parse(line))
if xx: current[3] = _int(parse(line))
if yn: current[2] = _int(parse(line))
if yx: current[4] = _int(parse(line))
if flag: continue
if current != list() and current[0] in pick:
all += [current]
add = [[jpg, [w, h, all]]]
dumps += add
# gather all stats
stat = dict()
for dump in dumps:
all = dump[1][2]
for current in all:
if current[0] in pick:
if current[0] in stat:
stat[current[0]]+=1
else:
stat[current[0]] =1
print()
print('Statistics:')
pp(stat)
print('Dataset size: {}'.format(len(dumps)))
os.chdir(cur_dir)
return dumps
if __name__ == '__main__':
classes_name = ["car", "person"]
dumps = pascal_voc_clean_xml(os.path.join('app', 'static', 'datacenter', 'annotations'), classes_name)
print(dumps)