-
Notifications
You must be signed in to change notification settings - Fork 0
/
16_normalise_and_standardise_time_series.py
63 lines (47 loc) · 1.88 KB
/
16_normalise_and_standardise_time_series.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# A value is normalise as follows
# y = (x - min) / (max - min)
# ----------------------------------------- Normalisation ---------------------------------------
from pandas import read_csv
from sklearn.preprocessing import MinMaxScaler
series = read_csv('daily-min-temperatures.csv', header=0, index_col=0)
print(series.head())
print(series.shape)
values = series.values
print(values)
values = values.reshape((len(values), 1))
# train the normalization
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(values)
# fit computes the minimum and maximum to be used for later scaling.
print('Min: %f, Max: %f' % (scaler.data_min_, scaler.data_max_))
# normalise dataset
normalised = scaler.transform(values)
print(normalised)
inversed = scaler.inverse_transform(normalised)
print(inversed)
# ------------------------------------------ Standardisation ------------------------------------------------------
# Standardizing a dataset involves rescaling the distribution of values
# so that the mean of observed values is 0 and the standard deviation is 1.
# Standardization assumes that your observations fit a Gaussian distribution
# (bell curve) with a well behaved mean and standard deviation.
# y = (x - mean) / standard_deviation
# mean = sum(x) / count(x)
# standard_deviation = sqrt( sum( (x - mean)^2 ) / count(x))
from matplotlib import pyplot as plt
from sklearn.preprocessing import StandardScaler
from math import sqrt
series.hist()
plt.show()
values = series.values
values = values.reshape((len(values), 1))
# train the standardization
scaler = StandardScaler()
scaler = scaler.fit(values)
print('Mean: %f, StandardDeviation: %f' % (scaler.mean_, sqrt(scaler.var_)))
standardised = scaler.transform(values)
for i in range(5):
print(standardised[i])
# inverse transform and print the first 5 rows
inversed = scaler.inverse_transform(standardised)
for i in range(5):
print(inversed[i])