forked from km1414/CNN-models
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelpers.py
144 lines (91 loc) · 3.62 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
Helper functions for CIFAR-10 data preparation
Adapted from: https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/cifar10.py
"""
import numpy as np
import pylab as pl
import pickle
import os
import download
from keras.utils.np_utils import to_categorical
data_path = "data/CIFAR-10/"
data_url = "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
img_size = 32
num_channels = 3
img_size_flat = img_size * img_size * num_channels
num_classes = 10
_num_files_train = 5
_images_per_file = 10000
_num_images_train = _num_files_train * _images_per_file
def _get_file_path(filename=""):
return os.path.join(data_path, "cifar-10-batches-py/", filename)
def _unpickle(filename):
# Create full path for the file.
file_path = _get_file_path(filename)
print("Loading data: " + file_path)
with open(file_path, mode='rb') as file:
data = pickle.load(file, encoding='bytes')
return data
def _convert_images(raw):
# Convert the raw images from the data-files to floating-points.
raw_float = np.array(raw, dtype=float) / 255.0
# Reshape the array to 4-dimensions.
images = raw_float.reshape([-1, num_channels, img_size, img_size])
# Reorder the indices of the array.
images = images.transpose([0, 2, 3, 1])
return images
def _load_data(filename):
# Load the pickled data-file.
data = _unpickle(filename)
# Get the raw images.
raw_images = data[b'data']
# Get the class-numbers for each image. Convert to numpy-array.
cls = np.array(data[b'labels'])
# Convert the images.
images = _convert_images(raw_images)
return images, cls
def download_and_extract_data():
download.download(url=data_url, path=data_path, kind='tar.gz', progressbar=True, replace=False, verbose=True)
def load_class_names():
# Load the class-names from the pickled file.
raw = _unpickle(filename="batches.meta")[b'label_names']
# Convert from binary strings.
names = [x.decode('utf-8') for x in raw]
return names
def load_training_data():
# Pre-allocate the arrays for the images and class-numbers for efficiency.
images = np.zeros(shape=[_num_images_train, img_size, img_size, num_channels], dtype=float)
cls = np.zeros(shape=[_num_images_train], dtype=int)
# Begin-index for the current batch.
begin = 0
# For each data-file.
for i in range(_num_files_train):
# Load the images and class-numbers from the data-file.
images_batch, cls_batch = _load_data(filename="data_batch_" + str(i + 1))
# Number of images in this batch.
num_images = len(images_batch)
# End-index for the current batch.
end = begin + num_images
# Store the images into the array.
images[begin:end, :] = images_batch
# Store the class-numbers into the array.
cls[begin:end] = cls_batch
# The begin-index for the next batch is the current end-index.
begin = end
return images, cls, to_categorical(cls, num_classes=num_classes)
def load_test_data():
images, cls = _load_data(filename="test_batch")
return images, cls, to_categorical(cls, num_classes=num_classes)
# Plot results
def plot_results(results):
pl.figure()
pl.subplot(121)
pl.plot(results['train_acc'])
pl.title('Accuracy:')
pl.plot(results['test_acc'])
pl.legend(('Train', 'Test'))
pl.subplot(122)
pl.plot(results['train_loss'])
pl.title('Cost:')
pl.plot(results['test_loss'])
pl.legend(('Train', 'Test'))