Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MPS support #1

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 112 additions & 52 deletions parrot/bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,29 @@
Question/comments/concerns? Raise an issue on github:
https://github.com/idptools/parrot

Licensed under the MIT license.
Licensed under the MIT license.
"""

import math

import numpy as np

try:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization
except ImportError:
print('Error importing GPy.')
print(' If trying to run parrot-optimize, make sure to use `pip install idptools-parrot[optimize]`')
print("Error importing GPy.")
print(
" If trying to run parrot-optimize, make sure to use `pip install idptools-parrot[optimize]`"
)

from parrot import train_network
from parrot import brnn_architecture
from parrot import brnn_architecture, train_network


class BayesianOptimizer(object):
"""A class for conducting Bayesian Optimization on a PyTorch RNN

Sets up and runs GPy Bayesian Optimization in order to choose the best-
performing hyperparameters for a RNN for a given machine learning task.
performing hyperparameters for a RNN for a given machine learning task.
Iteratively change learning rate, hidden vector size, and the number of layers
in the network, then train and validating using 5-fold cross validation.

Expand All @@ -55,7 +54,7 @@ class BayesianOptimizer(object):
weights_file : str
Path to which the network weights will be saved during training
device : str
'cpu' or 'cuda' depending on system hardware
'cpu', 'mps' or 'cuda' depending on system hardware
max_iterations : int
Maximum number of iterations to perform the optimization procedure
silent : bool
Expand All @@ -64,8 +63,18 @@ class BayesianOptimizer(object):
GPy-compatible bounds for each of the hyperparameters to be optimized
"""

def __init__(self, cv_dataloaders, input_size, n_epochs, n_classes,
dtype, weights_file, max_iterations, device, silent):
def __init__(
self,
cv_dataloaders,
input_size,
n_epochs,
n_classes,
dtype,
weights_file,
max_iterations,
device,
silent,
):
"""
Parameters
----------
Expand All @@ -83,7 +92,7 @@ def __init__(self, cv_dataloaders, input_size, n_epochs, n_classes,
weights_file : str
Path to which the network weights will be saved during training
device : str
'cpu' or 'cuda' depending on system hardware
'cpu', 'mps' or 'cuda' depending on system hardware
max_iterations : int
Maximum number of iterations to perform the optimization procedure
silent : bool
Expand All @@ -96,19 +105,29 @@ def __init__(self, cv_dataloaders, input_size, n_epochs, n_classes,
self.n_folds = len(cv_dataloaders)
self.n_classes = n_classes
if n_classes > 1:
self.problem_type = 'classification'
self.problem_type = "classification"
else:
self.problem_type = 'regression'
self.problem_type = "regression"

self.dtype = dtype
self.weights_file = weights_file
self.max_iterations = max_iterations
self.device = device
self.silent = silent

self.bds = [{'name': 'log_learning_rate', 'type': 'continuous', 'domain': (-5, -2)}, # 0.00001-0.01
{'name': 'n_layers', 'type': 'discrete', 'domain': tuple(range(1, 6))}, # 1-5
{'name': 'hidden_size', 'type': 'discrete', 'domain': tuple(range(5, 51))}] # 5-50
self.bds = [
{
"name": "log_learning_rate",
"type": "continuous",
"domain": (-5, -2),
}, # 0.00001-0.01
{
"name": "n_layers",
"type": "discrete",
"domain": tuple(range(1, 6)),
}, # 1-5
{"name": "hidden_size", "type": "discrete", "domain": tuple(range(5, 51))},
] # 5-50

def compute_cv_loss(self, hyperparameters):
"""Compute the average cross-val loss for a given set of hyperparameters
Expand All @@ -125,7 +144,7 @@ def compute_cv_loss(self, hyperparameters):
Returns
-------
numpy float array
a Nx1 numpy array of the average cross-val loss
a Nx1 numpy array of the average cross-val loss
per set of input hyperparameters
"""

Expand All @@ -134,7 +153,7 @@ def compute_cv_loss(self, hyperparameters):
for i in range(len(hyperparameters)):

log_lr, nl, hs = hyperparameters[i]
lr = 10**float(log_lr)
lr = 10 ** float(log_lr)
nl = int(nl)
hs = int(hs)

Expand All @@ -143,7 +162,10 @@ def compute_cv_loss(self, hyperparameters):
avg = np.average(cv_outputs[i])

if self.silent is False:
print(' %.6f | %2d | %2d | %.3f' % (lr, nl, hs, avg))
print(
" %.6f | %2d | %2d | %.3f"
% (lr, nl, hs, avg)
)

outputs = np.average(cv_outputs, axis=1)
return outputs
Expand All @@ -166,23 +188,36 @@ def eval_cv_brnns(self, lr, nl, hs):
the best validation loss from each fold of cross validation
"""

cv_losses = np.zeros(self.n_folds) - 1 # -1 so that it's obvious if something goes wrong
cv_losses = (
np.zeros(self.n_folds) - 1
) # -1 so that it's obvious if something goes wrong

for k in range(self.n_folds):
if self.dtype == 'sequence':
if self.dtype == "sequence":
# Use a many-to-one architecture
brnn_network = brnn_architecture.BRNN_MtO(self.input_size, hs, nl,
self.n_classes, self.device).to(self.device)
brnn_network = brnn_architecture.BRNN_MtO(
self.input_size, hs, nl, self.n_classes, self.device
).to(self.device)
else:
# Use a many-to-many architecture
brnn_network = brnn_architecture.BRNN_MtM(self.input_size, hs, nl,
self.n_classes, self.device).to(self.device)
brnn_network = brnn_architecture.BRNN_MtM(
self.input_size, hs, nl, self.n_classes, self.device
).to(self.device)

# Train network with this set of hyperparameters
train_losses, val_losses = train_network.train(brnn_network, self.cv_loaders[k][0],
self.cv_loaders[k][1], self.dtype, self.problem_type,
self.weights_file, stop_condition='iter', device=self.device,
learn_rate=lr, n_epochs=self.n_epochs, silent=True)
train_losses, val_losses = train_network.train(
brnn_network,
self.cv_loaders[k][0],
self.cv_loaders[k][1],
self.dtype,
self.problem_type,
self.weights_file,
stop_condition="iter",
device=self.device,
learn_rate=lr,
n_epochs=self.n_epochs,
silent=True,
)
# Take best val loss
best_val_loss = np.min(val_losses)
cv_losses[k] = best_val_loss
Expand Down Expand Up @@ -211,7 +246,7 @@ def initial_search(self, x):
for i in range(len(x)):

log_lr, nl, hs = x[i]
lr = 10**float(log_lr)
lr = 10 ** float(log_lr)
nl = int(nl)
hs = int(hs)

Expand All @@ -237,41 +272,66 @@ def optimize(self):
"""

# Initial hyperparameter search -- used to get noise estimate
x_init = np.array([[-3.0, 1, 20], [-3.0, 2, 20], [-3.0, 3, 20], [-3.0, 4, 20], [-3.0, 5, 20],
[-2.0, 2, 20], [-3.3, 2, 20], [-4.0, 2, 20], [-5.0, 2, 20],
[-3.0, 2, 5], [-3.0, 2, 15], [-3.0, 2, 35], [-3.0, 2, 50]])
x_init = np.array(
[
[-3.0, 1, 20],
[-3.0, 2, 20],
[-3.0, 3, 20],
[-3.0, 4, 20],
[-3.0, 5, 20],
[-2.0, 2, 20],
[-3.3, 2, 20],
[-4.0, 2, 20],
[-5.0, 2, 20],
[-3.0, 2, 5],
[-3.0, 2, 15],
[-3.0, 2, 35],
[-3.0, 2, 50],
]
)
y_init, noise = self.initial_search(x_init)

if self.silent is False:
print("\nInitial search results:")
print("lr\tnl\ths\toutput")
for i in range(len(x_init)):
print("%.5f\t%2d\t%2d\t%.4f" % (10**x_init[i][0], x_init[i][1], x_init[i][2], y_init[i][0]))
print(
"%.5f\t%2d\t%2d\t%.4f"
% (10 ** x_init[i][0], x_init[i][1], x_init[i][2], y_init[i][0])
)
print("Noise estimate:", noise)
print('\n')
print('Primary optimization:')
print('--------------------\n')
print('Learning rate | n_layers | hidden vector size | avg CV loss ')
print('======================================================================')

optimizer = BayesianOptimization(f=self.compute_cv_loss,
domain=self.bds,
model_type='GP',
acquisition_type='EI',
acquisition_jitter=0.05,
X=x_init,
Y=y_init,
noise_var=noise,
maximize=False)
print("\n")
print("Primary optimization:")
print("--------------------\n")
print(
"Learning rate | n_layers | hidden vector size | avg CV loss "
)
print(
"======================================================================"
)

optimizer = BayesianOptimization(
f=self.compute_cv_loss,
domain=self.bds,
model_type="GP",
acquisition_type="EI",
acquisition_jitter=0.05,
X=x_init,
Y=y_init,
noise_var=noise,
maximize=False,
)

optimizer.run_optimization(max_iter=self.max_iterations)

ins = optimizer.get_evaluations()[0]
outs = optimizer.get_evaluations()[1].flatten()

if self.silent is False:
print("\nThe optimal hyperparameters are:\nlr = %.5f\nnl = %d\nhs = %d" %
(10**optimizer.x_opt[0], optimizer.x_opt[1], optimizer.x_opt[2]))
print(
"\nThe optimal hyperparameters are:\nlr = %.5f\nnl = %d\nhs = %d"
% (10 ** optimizer.x_opt[0], optimizer.x_opt[1], optimizer.x_opt[2])
)
print()

return optimizer.x_opt
Loading