Skip to content

Commit

Permalink
Add MPS support
Browse files Browse the repository at this point in the history
  • Loading branch information
agdiaz committed Aug 22, 2024
1 parent 6e09567 commit 1ca8c67
Show file tree
Hide file tree
Showing 5 changed files with 844 additions and 408 deletions.
164 changes: 112 additions & 52 deletions parrot/bayesian_optimization.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,30 +8,29 @@
Question/comments/concerns? Raise an issue on github:
https://github.com/idptools/parrot
Licensed under the MIT license.
Licensed under the MIT license.
"""

import math

import numpy as np

try:
import GPy
import GPyOpt
from GPyOpt.methods import BayesianOptimization
except ImportError:
print('Error importing GPy.')
print(' If trying to run parrot-optimize, make sure to use `pip install idptools-parrot[optimize]`')
print("Error importing GPy.")
print(
" If trying to run parrot-optimize, make sure to use `pip install idptools-parrot[optimize]`"
)

from parrot import train_network
from parrot import brnn_architecture
from parrot import brnn_architecture, train_network


class BayesianOptimizer(object):
"""A class for conducting Bayesian Optimization on a PyTorch RNN
Sets up and runs GPy Bayesian Optimization in order to choose the best-
performing hyperparameters for a RNN for a given machine learning task.
performing hyperparameters for a RNN for a given machine learning task.
Iteratively change learning rate, hidden vector size, and the number of layers
in the network, then train and validating using 5-fold cross validation.
Expand All @@ -55,7 +54,7 @@ class BayesianOptimizer(object):
weights_file : str
Path to which the network weights will be saved during training
device : str
'cpu' or 'cuda' depending on system hardware
'cpu', 'mps' or 'cuda' depending on system hardware
max_iterations : int
Maximum number of iterations to perform the optimization procedure
silent : bool
Expand All @@ -64,8 +63,18 @@ class BayesianOptimizer(object):
GPy-compatible bounds for each of the hyperparameters to be optimized
"""

def __init__(self, cv_dataloaders, input_size, n_epochs, n_classes,
dtype, weights_file, max_iterations, device, silent):
def __init__(
self,
cv_dataloaders,
input_size,
n_epochs,
n_classes,
dtype,
weights_file,
max_iterations,
device,
silent,
):
"""
Parameters
----------
Expand All @@ -83,7 +92,7 @@ def __init__(self, cv_dataloaders, input_size, n_epochs, n_classes,
weights_file : str
Path to which the network weights will be saved during training
device : str
'cpu' or 'cuda' depending on system hardware
'cpu', 'mps' or 'cuda' depending on system hardware
max_iterations : int
Maximum number of iterations to perform the optimization procedure
silent : bool
Expand All @@ -96,19 +105,29 @@ def __init__(self, cv_dataloaders, input_size, n_epochs, n_classes,
self.n_folds = len(cv_dataloaders)
self.n_classes = n_classes
if n_classes > 1:
self.problem_type = 'classification'
self.problem_type = "classification"
else:
self.problem_type = 'regression'
self.problem_type = "regression"

self.dtype = dtype
self.weights_file = weights_file
self.max_iterations = max_iterations
self.device = device
self.silent = silent

self.bds = [{'name': 'log_learning_rate', 'type': 'continuous', 'domain': (-5, -2)}, # 0.00001-0.01
{'name': 'n_layers', 'type': 'discrete', 'domain': tuple(range(1, 6))}, # 1-5
{'name': 'hidden_size', 'type': 'discrete', 'domain': tuple(range(5, 51))}] # 5-50
self.bds = [
{
"name": "log_learning_rate",
"type": "continuous",
"domain": (-5, -2),
}, # 0.00001-0.01
{
"name": "n_layers",
"type": "discrete",
"domain": tuple(range(1, 6)),
}, # 1-5
{"name": "hidden_size", "type": "discrete", "domain": tuple(range(5, 51))},
] # 5-50

def compute_cv_loss(self, hyperparameters):
"""Compute the average cross-val loss for a given set of hyperparameters
Expand All @@ -125,7 +144,7 @@ def compute_cv_loss(self, hyperparameters):
Returns
-------
numpy float array
a Nx1 numpy array of the average cross-val loss
a Nx1 numpy array of the average cross-val loss
per set of input hyperparameters
"""

Expand All @@ -134,7 +153,7 @@ def compute_cv_loss(self, hyperparameters):
for i in range(len(hyperparameters)):

log_lr, nl, hs = hyperparameters[i]
lr = 10**float(log_lr)
lr = 10 ** float(log_lr)
nl = int(nl)
hs = int(hs)

Expand All @@ -143,7 +162,10 @@ def compute_cv_loss(self, hyperparameters):
avg = np.average(cv_outputs[i])

if self.silent is False:
print(' %.6f | %2d | %2d | %.3f' % (lr, nl, hs, avg))
print(
" %.6f | %2d | %2d | %.3f"
% (lr, nl, hs, avg)
)

outputs = np.average(cv_outputs, axis=1)
return outputs
Expand All @@ -166,23 +188,36 @@ def eval_cv_brnns(self, lr, nl, hs):
the best validation loss from each fold of cross validation
"""

cv_losses = np.zeros(self.n_folds) - 1 # -1 so that it's obvious if something goes wrong
cv_losses = (
np.zeros(self.n_folds) - 1
) # -1 so that it's obvious if something goes wrong

for k in range(self.n_folds):
if self.dtype == 'sequence':
if self.dtype == "sequence":
# Use a many-to-one architecture
brnn_network = brnn_architecture.BRNN_MtO(self.input_size, hs, nl,
self.n_classes, self.device).to(self.device)
brnn_network = brnn_architecture.BRNN_MtO(
self.input_size, hs, nl, self.n_classes, self.device
).to(self.device)
else:
# Use a many-to-many architecture
brnn_network = brnn_architecture.BRNN_MtM(self.input_size, hs, nl,
self.n_classes, self.device).to(self.device)
brnn_network = brnn_architecture.BRNN_MtM(
self.input_size, hs, nl, self.n_classes, self.device
).to(self.device)

# Train network with this set of hyperparameters
train_losses, val_losses = train_network.train(brnn_network, self.cv_loaders[k][0],
self.cv_loaders[k][1], self.dtype, self.problem_type,
self.weights_file, stop_condition='iter', device=self.device,
learn_rate=lr, n_epochs=self.n_epochs, silent=True)
train_losses, val_losses = train_network.train(
brnn_network,
self.cv_loaders[k][0],
self.cv_loaders[k][1],
self.dtype,
self.problem_type,
self.weights_file,
stop_condition="iter",
device=self.device,
learn_rate=lr,
n_epochs=self.n_epochs,
silent=True,
)
# Take best val loss
best_val_loss = np.min(val_losses)
cv_losses[k] = best_val_loss
Expand Down Expand Up @@ -211,7 +246,7 @@ def initial_search(self, x):
for i in range(len(x)):

log_lr, nl, hs = x[i]
lr = 10**float(log_lr)
lr = 10 ** float(log_lr)
nl = int(nl)
hs = int(hs)

Expand All @@ -237,41 +272,66 @@ def optimize(self):
"""

# Initial hyperparameter search -- used to get noise estimate
x_init = np.array([[-3.0, 1, 20], [-3.0, 2, 20], [-3.0, 3, 20], [-3.0, 4, 20], [-3.0, 5, 20],
[-2.0, 2, 20], [-3.3, 2, 20], [-4.0, 2, 20], [-5.0, 2, 20],
[-3.0, 2, 5], [-3.0, 2, 15], [-3.0, 2, 35], [-3.0, 2, 50]])
x_init = np.array(
[
[-3.0, 1, 20],
[-3.0, 2, 20],
[-3.0, 3, 20],
[-3.0, 4, 20],
[-3.0, 5, 20],
[-2.0, 2, 20],
[-3.3, 2, 20],
[-4.0, 2, 20],
[-5.0, 2, 20],
[-3.0, 2, 5],
[-3.0, 2, 15],
[-3.0, 2, 35],
[-3.0, 2, 50],
]
)
y_init, noise = self.initial_search(x_init)

if self.silent is False:
print("\nInitial search results:")
print("lr\tnl\ths\toutput")
for i in range(len(x_init)):
print("%.5f\t%2d\t%2d\t%.4f" % (10**x_init[i][0], x_init[i][1], x_init[i][2], y_init[i][0]))
print(
"%.5f\t%2d\t%2d\t%.4f"
% (10 ** x_init[i][0], x_init[i][1], x_init[i][2], y_init[i][0])
)
print("Noise estimate:", noise)
print('\n')
print('Primary optimization:')
print('--------------------\n')
print('Learning rate | n_layers | hidden vector size | avg CV loss ')
print('======================================================================')

optimizer = BayesianOptimization(f=self.compute_cv_loss,
domain=self.bds,
model_type='GP',
acquisition_type='EI',
acquisition_jitter=0.05,
X=x_init,
Y=y_init,
noise_var=noise,
maximize=False)
print("\n")
print("Primary optimization:")
print("--------------------\n")
print(
"Learning rate | n_layers | hidden vector size | avg CV loss "
)
print(
"======================================================================"
)

optimizer = BayesianOptimization(
f=self.compute_cv_loss,
domain=self.bds,
model_type="GP",
acquisition_type="EI",
acquisition_jitter=0.05,
X=x_init,
Y=y_init,
noise_var=noise,
maximize=False,
)

optimizer.run_optimization(max_iter=self.max_iterations)

ins = optimizer.get_evaluations()[0]
outs = optimizer.get_evaluations()[1].flatten()

if self.silent is False:
print("\nThe optimal hyperparameters are:\nlr = %.5f\nnl = %d\nhs = %d" %
(10**optimizer.x_opt[0], optimizer.x_opt[1], optimizer.x_opt[2]))
print(
"\nThe optimal hyperparameters are:\nlr = %.5f\nnl = %d\nhs = %d"
% (10 ** optimizer.x_opt[0], optimizer.x_opt[1], optimizer.x_opt[2])
)
print()

return optimizer.x_opt
Loading

0 comments on commit 1ca8c67

Please sign in to comment.