-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser.py
139 lines (122 loc) · 5.29 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python
#
# Create on : 2015/08/31
#
# @author : Falldog
#
import csv
from defines import DELIMITER, ENCODING, QUOT_CHAR
def boolean(b):
""" for SQLite boolean used """
return '1' if b else '0'
class SubdivisionParser(object):
def __init__(self):
pass
def parse(self, cursor, filepath):
"""
Columns :
CountryCode, SubdivisionCode, SubdivisionName, Type?
"""
with open(filepath, 'rb') as f:
data_reader = csv.reader(f, delimiter=DELIMITER, quotechar=QUOT_CHAR)
for row in data_reader:
country_code, subdivision_code, subdivision_name, _type = row
subdivision_name = subdivision_name.decode(ENCODING)
cursor.execute(
"INSERT OR REPLACE INTO subdivision VALUES (?,?,?)",
(country_code,
subdivision_code,
subdivision_name,
)
)
class CodeParser(object):
def __init__(self):
pass
def parse(self, cursor, filepath):
"""
Columns :
Change, CountryCode, LocationCode, LocationName, LocationName without Diacritics,
Subdivision, Function, Status, Date, IATA, Coordinate, Remark
Column - Change:
+ (newly added);
X (to be removed);
| (changed);
# (name changed);
= (reference entry);
! (US location with duplicate IATA code)
Column - Function:
0 Function not known, to be specified
1 Port, as defined in Rec 16
2 Rail Terminal
3 Road Terminal
4 Airport
5 Postal Exchange Office
6 Multimodal Functions (ICDs, etc.)
7 Fixed Transport Functions (e.g. Oil platform)
8 Inland Port
B Border Crossing
Column - Date:
ym
Column - Coordinate:
(DDMM[N/S] DDDMM[W/E])
"""
with open(filepath, 'rb') as f:
data_reader = csv.reader(f, delimiter=DELIMITER, quotechar=QUOT_CHAR)
for row in data_reader:
change = row[0]
if change == 'X': # skip removed item
continue
elif change == '=': # skip reference entry ex: "Peking = Beijing"
continue
elif change == '\xa6': # '|' skip non location entry
continue
change, country_code, location_code, location_name, location_name_wo_diacritics, subdivision, function, status, date, iata, coordinate, remark = row
if location_name and location_name[0] == '.': # country name
name = location_name.decode(ENCODING)[1:] # filter the first char "."
name = name.split(',')[0]
cursor.execute(
"INSERT OR REPLACE INTO country VALUES (?,?)",
(country_code, name)
)
else: # location name
if not location_code:
print '*** skip unknow location code record : %s' % row
continue
remark = remark.decode(ENCODING)
is_port = '1' in function
is_airport = '4' in function
is_rail_terminal = '2' in function
is_road_terminal = '3' in function
is_postal_exchange_office = '5' in function
is_border_cross = 'B' in function
# insert by replace, or will cause primary key conflict exception
# most case is alternative name switch (ONLY)
# Ex:
# AX MHQ : "Maarianhamina (Mariehamn)" vs "Mariehamn (Maarianhamina)"
# BE BTS : "Bassenge (Bitsingen)" vs "Bitsingen (Bassenge)"
#
# rarely case maybe all different
# Ex:
# ,"US","LEB","Hanover-Lebanon-White River Apt","Hanover-Lebanon-White River Apt","NH","--34----","AI","0307",,"4338N 07215W",
# ,"US","LEB","Lebanon-White River-Hanover Apt","Lebanon-White River-Hanover Apt","VT","---4----","AI","9601",,,
# ,"US","LEB","White River-Hanover-Lebanon Apt","White River-Hanover-Lebanon Apt","VT","---4----","AI","0001",,,
#
# for these rarely case replace by last record
cursor.execute(
"INSERT OR REPLACE INTO location VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
(country_code,
location_code,
location_name_wo_diacritics,
subdivision,
status,
iata,
coordinate,
remark,
boolean(is_port),
boolean(is_airport),
boolean(is_road_terminal),
boolean(is_rail_terminal),
boolean(is_postal_exchange_office),
boolean(is_border_cross),
)
)