forked from hunkim/DeepLearningZeroToAll
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlab-09-7-sigmoid_back_prop.py
152 lines (113 loc) · 4.38 KB
/
lab-09-7-sigmoid_back_prop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
In this file, we will implement back propagations by hands
We will use the Sigmoid Cross Entropy loss function.
This is equivalent to tf.nn.sigmoid_softmax_with_logits(logits, labels)
[References]
1) Tensorflow Document (tf.nn.sigmoid_softmax_with_logits)
https://www.tensorflow.org/api_docs/python/tf/nn/sigmoid_cross_entropy_with_logits
2) Neural Net Backprop in one slide! by Sung Kim
https://docs.google.com/presentation/d/1_ZmtfEjLmhbuM_PqbDYMXXLAqeWN0HwuhcSKnUQZ6MM/edit#slide=id.g1ec1d04b5a_1_83
3) Back Propagation with Tensorflow by Dan Aloni
http://blog.aloni.org/posts/backprop-with-tensorflow/
4) Yes you should understand backprop by Andrej Karpathy
https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b#.cockptkn7
[Network Architecture]
Input: x
Layer1: x * W + b
Output layer = σ(Layer1)
Loss_i = - y * log(σ(Layer1)) - (1 - y) * log(1 - σ(Layer1))
Loss = tf.reduce_sum(Loss_i)
We want to compute that
dLoss/dW = ???
dLoss/db = ???
please read "Neural Net Backprop in one slide!" for deriving formulas
"""
import tensorflow as tf
import numpy as np
tf.set_random_seed(777) # for reproducibility
# Predicting animal type based on various features
xy = np.loadtxt('data-04-zoo.csv', delimiter=',', dtype=np.float32)
X_data = xy[:, 0:-1]
N = X_data.shape[0]
y_data = xy[:, [-1]]
# y_data has labels from 0 ~ 6
print("y has one of the following values")
print(np.unique(y_data))
# X_data.shape = (101, 16) => 101 samples, 16 features
# y_data.shape = (101, 1) => 101 samples, 1 label
print("Shape of X data: ", X_data.shape)
print("Shape of y data: ", y_data.shape)
nb_classes = 7 # 0 ~ 6
X = tf.placeholder(tf.float32, [None, 16])
y = tf.placeholder(tf.int32, [None, 1]) # 0 ~ 6
target = tf.one_hot(y, nb_classes) # one hot
target = tf.reshape(target, [-1, nb_classes])
target = tf.cast(target, tf.float32)
W = tf.Variable(tf.random_normal([16, nb_classes]), name='weight')
b = tf.Variable(tf.random_normal([nb_classes]), name='bias')
def sigma(x):
# sigmoid function
# σ(x) = 1 / (1 + exp(-x))
return 1. / (1. + tf.exp(-x))
def sigma_prime(x):
# derivative of the sigmoid function
# σ'(x) = σ(x) * (1 - σ(x))
return sigma(x) * (1. - sigma(x))
# Forward propagtion
layer_1 = tf.matmul(X, W) + b
y_pred = sigma(layer_1)
# Loss Function (end of forwad propagation)
loss_i = - target * tf.log(y_pred) - (1. - target) * tf.log(1. - y_pred)
loss = tf.reduce_sum(loss_i)
# Dimension Check
assert y_pred.shape.as_list() == target.shape.as_list()
# Back prop (chain rule)
# How to derive? please read "Neural Net Backprop in one slide!"
d_loss = (y_pred - target) / (y_pred * (1. - y_pred) + 1e-7)
d_sigma = sigma_prime(layer_1)
d_layer = d_loss * d_sigma
d_b = d_layer
d_W = tf.matmul(tf.transpose(X), d_layer)
# Updating network using gradients
learning_rate = 0.01
train_step = [
tf.assign(W, W - learning_rate * d_W),
tf.assign(b, b - learning_rate * tf.reduce_sum(d_b)),
]
# Prediction and Accuracy
prediction = tf.argmax(y_pred, 1)
acct_mat = tf.equal(tf.argmax(y_pred, 1), tf.argmax(target, 1))
acct_res = tf.reduce_mean(tf.cast(acct_mat, tf.float32))
# Launch graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for step in range(500):
sess.run(train_step, feed_dict={X: X_data, y: y_data})
if step % 10 == 0:
# Within 300 steps, you should see an accuracy of 100%
step_loss, acc = sess.run([loss, acct_res], feed_dict={
X: X_data, y: y_data})
print("Step: {:5}\t Loss: {:10.5f}\t Acc: {:.2%}" .format(
step, step_loss, acc))
# Let's see if we can predict
pred = sess.run(prediction, feed_dict={X: X_data})
for p, y in zip(pred, y_data):
msg = "[{}]\t Prediction: {:d}\t True y: {:d}"
print(msg.format(p == int(y[0]), p, int(y[0])))
"""
Output Example
Step: 0 Loss: 453.74799 Acc: 38.61%
Step: 20 Loss: 95.05664 Acc: 88.12%
Step: 40 Loss: 66.43570 Acc: 93.07%
Step: 60 Loss: 53.09288 Acc: 94.06%
...
Step: 290 Loss: 18.72972 Acc: 100.00%
Step: 300 Loss: 18.24953 Acc: 100.00%
Step: 310 Loss: 17.79592 Acc: 100.00%
...
[True] Prediction: 0 True y: 0
[True] Prediction: 0 True y: 0
[True] Prediction: 3 True y: 3
[True] Prediction: 0 True y: 0
...
"""