-
Notifications
You must be signed in to change notification settings - Fork 2
/
Week3-backpropagation.py
97 lines (87 loc) · 3.3 KB
/
Week3-backpropagation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 8 20:52:02 2018
@author: wzy
"""
# GRADED FUNCTION
# Jacobian for the third layer weights. There is no need to edit this function.
def J_W3 (x, y) :
# First get all the activations and weighted sums at each layer of the network.
a0, z1, a1, z2, a2, z3, a3 = network_function(x)
# We'll use the variable J to store parts of our result as we go along, updating it in each line.
# Firstly, we calculate dC/da3, using the expressions above.
J = 2 * (a3 - y)
# Next multiply the result we've calculated by the derivative of sigma, evaluated at z3.
J = J * d_sigma(z3)
# Then we take the dot product (along the axis that holds the training examples) with the final partial derivative,
# i.e. dz3/dW3 = a2
# and divide by the number of training examples, for the average over all training examples.
J = J @ a2.T / x.size
# Finally return the result out of the function.
return J
# In this function, you will implement the jacobian for the bias.
# As you will see from the partial derivatives, only the last partial derivative is different.
# The first two partial derivatives are the same as previously.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_b3 (x, y) :
# As last time, we'll first set up the activations.
a0, z1, a1, z2, a2, z3, a3 = network_function(x)
# Next you should implement the first two partial derivatives of the Jacobian.
# ===COPY TWO LINES FROM THE PREVIOUS FUNCTION TO SET UP THE FIRST TWO JACOBIAN TERMS===
J =2 * (a3 - y)
J =J * d_sigma(z3)
# For the final line, we don't need to multiply by dz3/db3, because that is multiplying by 1.
# We still need to sum over all training examples however.
# There is no need to edit this line.
J = np.sum(J, axis=1, keepdims=True) / x.size
return J
# GRADED FUNCTION
# Compare this function to J_W3 to see how it changes.
# There is no need to edit this function.
def J_W2 (x, y) :
#The first two lines are identical to in J_W3.
a0, z1, a1, z2, a2, z3, a3 = network_function(x)
J = 2 * (a3 - y)
# the next two lines implement da3/da2, first σ' and then W3.
J = J * d_sigma(z3)
J = (J.T @ W3).T
# then the final lines are the same as in J_W3 but with the layer number bumped down.
J = J * d_sigma(z2)
J = J @ a1.T / x.size
return J
# As previously, fill in all the incomplete lines.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_b2 (x, y) :
a0, z1, a1, z2, a2, z3, a3 = network_function(x)
J = 2 * (a3 - y)
J =J * d_sigma(z3)
J =(J.T @ W3).T
J =J * d_sigma(z2)
J = np.sum(J, axis=1, keepdims=True) / x.size
return J
# GRADED FUNCTION
# Fill in all incomplete lines.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_W1 (x, y) :
a0, z1, a1, z2, a2, z3, a3 = network_function(x)
J =2 * (a3 - y)
J =J * d_sigma(z3)
J =(J.T @ W3).T
J =J * d_sigma(z2)
J =(J.T @ W2).T
J =J * d_sigma(z1)
J = J @ a0.T / x.size
return J
# Fill in all incomplete lines.
# ===YOU SHOULD EDIT THIS FUNCTION===
def J_b1 (x, y) :
a0, z1, a1, z2, a2, z3, a3 = network_function(x)
J =2 * (a3 - y)
J =J * d_sigma(z3)
J =(J.T @ W3).T
J =J * d_sigma(z2)
J =(J.T @ W2).T
J =J * d_sigma(z1)
J = np.sum(J, axis=1, keepdims=True) / x.size
return J