forked from jamiemaclennan/ViennaSample-BikeShare
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebservice.py
151 lines (117 loc) · 5.39 KB
/
webservice.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
########################################################################################################################
# This program tests the init and run web service life cycle methods for bike forecasting
# Inputs: expects the pickled/numpy files for regression model and tranforms and test data file (for simulating input)
# for inputs to web service the columns should be as follows (in order, without the names/dictionary)
# Hour Weekday start station id RideInitiationCount N_DryBulbTemp N_RelativeHumidity N_WindSpeed
# Outputs: the model prediction
########################################################################################################################
#############################################
# parser for reading off the test data file
# this is the same method as used in regression.py
#############################################
def readdata(filepath, filename, labelcolumn, excludedcatcolumns, excludednumcolumns):
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn import preprocessing
from sklearn.preprocessing import Imputer
from sklearn.cross_validation import cross_val_score
import csv
import os.path
import pickle
csvfile = open(os.path.join(filepath,filename))
reader = csv.reader(csvfile, delimiter=',')
headers = next(reader)
nanfloat = float('nan')
nanint = 0
parsed = ((row[0],
row[1],
row[2],
nanint if (row[3] == 'NA' or row[3] == 'nan') else int(row[3]),
nanfloat if row[4] == 'NA' else float(row[4]),
nanfloat if row[5] == 'NA' else float(row[5]),
nanfloat if row[6] == 'NA' else float(row[6]),
)
for row in reader)
selector = [x for x in range(len(headers)) if (x not in excludedcatcolumns and x not in excludednumcolumns)]
data = []
label = []
for row in parsed:
data.append([row[i] for i in selector])
label.append(row[labelcolumn])
return{'data':data,'label':label}
#############################################
# init is the lie cycle method for the web
# service that would be called once at the
# initiialization of the web service
#############################################
def init():
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn import preprocessing
from sklearn.preprocessing import Imputer
from sklearn.cross_validation import cross_val_score
import csv
import os.path
import pickle
global categoricalfeaturestart
global categoricalfeaturestartend
global numcatfeatures
categoricalfeaturestart = 0
categoricalfeaturestartend = 3
numcatfeatures = categoricalfeaturestartend
# read in the model/transforms file and make them available globally for run method
global model
model = pickle.load(open('./randomforest.pickle',"rb"))
global meanimputer
meanimputer = pickle.load(open('./meanimputer.pickle',"rb"))
global featureencoders
featureencoders = np.empty((numcatfeatures,), dtype=np.object)
for iter in range(0, numcatfeatures):
featureencoders[iter] = preprocessing.LabelEncoder()
featureencoders[iter].classes_ = np.load('./'+str(iter)+'_labelencoder.npy')
#############################################
# The run method is the life cycle method for
# web service that would be called each time
# a web request is received
#############################################
def run(inputString):
import json
import numpy as np
try:
input_list=json.loads(inputString)
except ValueError:
return 'Bad input: expecting a json encoded list of lists.'
features = np.array(input_list)
numfeatures = 6
if (features.shape != (1, numfeatures)):
return 'Bad input: Expecting a json encoded list of lists of shape (1,'+str(numfeatures)+').'
#categorical to numerical transformatio
processedfeatures = np.empty([len(features),numfeatures])
for iter in range(0, numfeatures):
if(iter >= categoricalfeaturestart and iter < categoricalfeaturestartend):
processedfeatures[:,iter] = np.array(featureencoders[iter].transform([row[iter ] for row in features]))
else:
processedfeatures[:,iter] = np.array([row[iter] for row in features])
#impute nans in numerical cols, auto excluding the categorical features as they have been replaced with classes already
processedfeatures_imp = meanimputer.transform(processedfeatures)
#return predicted output
return str(np.int64(np.ceil(model.predict(processedfeatures_imp)[0])))
#############################################
# This is the main execution, it simulates the
# web service by calling init, and run
#############################################
if __name__ == '__main__':
import json
#perform initialization
init()
#read in the test data file
filepath = '.'
testfilename = 'TestDataV6.csv'
labelcolumn = 3
excludedcatcolumns = [labelcolumn]
excludednumcolumns = []
fileread = readdata(filepath,testfilename, labelcolumn,excludedcatcolumns, excludednumcolumns)
testdata = fileread['data']
numfeatures = len(testdata[0])
#call run to do prediction, passing in last row from test data read above
print(run(json.dumps(testdata[-1:])))