Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PyTorch Versions of TC1 and P1B2 Benchmarks #96

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions Pilot1/P1B2/p1b2_baseline_pytorch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from __future__ import print_function

import numpy as np
import os
import sys

import torch

file_path = os.path.dirname(os.path.realpath(__file__))
lib_path2 = os.path.abspath(os.path.join(file_path, '..', '..', 'common'))
sys.path.append(lib_path2)
os.chdir(file_path)

import p1b2 as bmk
import candle
from torch_deps.p1b2_pytorch_model import P1B2Model
from torch_deps.random_seeding import seed_random_state

np.set_printoptions(precision=4)

def initialize_parameters(default_model = 'p1b2_default_model.txt'):

# Build benchmark object
p1b2Bmk = bmk.BenchmarkP1B2(bmk.file_path, default_model, 'pytorch',
prog='p1b2_baseline', desc='Train Classifier - Pilot 1 Benchmark 2')

print("Created P1B2 benchmark")

# Initialize parameters
gParameters = candle.finalize_parameters(p1b2Bmk)
#benchmark.logger.info('Params: {}'.format(gParameters))
print("Parameters initialized")

return gParameters


def run(params):

args = candle.ArgumentStruct(**params)
args.no_cuda = args.no_cuda if hasattr(args,'no_cuda') else False
args.multi_gpu = args.multi_gpu if hasattr(args,'multi_gpu') else True
args.max_num_batches = args.max_num_batches if hasattr(args,'max_num_batches') else 1000
args.dry_run = args.dry_run if hasattr(args,'dry_run') else False
args.log_interval = args.log_interval if hasattr(args,'log_interval') else 10

args.classes = args.classes if hasattr(args,'classes') else 10

if args.loss=='categorical_crossentropy':
args.out_activation='log_softmax'
args.loss='nll'

seed = args.rng_seed
candle.set_seed(seed)
# Setting up random seed for reproducible and deterministic results
seed_random_state(args.rng_seed)

args.keras_defaults = candle.keras_default_config()

# Construct extension to save validation results
ext = bmk.extension_from_parameters(params, '.pytorch')

candle.verify_path(params['save_path'])
prefix = '{}{}'.format(params['save_path'], ext)
logfile = params['logfile'] if params['logfile'] else prefix+'.log'
candle.set_up_logger(logfile, bmk.logger, params['verbose'])
bmk.logger.info('Params: {}'.format(params))

args.tensorboard_dir = "tb/{}".format(ext)
args.logger = bmk.logger

#Autosave model
model_name = params['model_name']
args_filename = "{}.model.args".format(params['save_path'])
args.model_autosave_filename = "{}.autosave.model.pth".format(params['save_path'])
# CSV logging
args.csv_filename = '{}{}_training.log'.format(params['save_path'], ext)

# Computation device config (cuda or cpu)
use_cuda = not args.no_cuda and torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')

# save args to file
import pickle
args_file = open(args_filename, 'wb')
pickle.dump(args, args_file)
args_file.close()

modelP1B2 = P1B2Model(args, use_cuda, device)

#model.summary()
#print(modelP1B2.p1b2_net) # Model summary
bmk.logger.info('Model summary: {}'.format(modelP1B2.p1b2_net)) # Model summary

modelP1B2.train()
modelP1B2.print_final_stats()

#Save model
model_name = params['model_name']
model_filename = "{}.model_state_dict.pth".format(params['save_path'])
if hasattr(modelP1B2.p1b2_net,'module'):
# Saving the DataParallel model
torch.save(modelP1B2.p1b2_net.module.state_dict(), model_filename)
else:
torch.save(modelP1B2.p1b2_net.state_dict(), model_filename)

#reload args from file
args_file = open(args_filename, 'rb')
loaded_args = pickle.load(args_file)
args_file.close()

# load weights into new model
loaded_modelP1B2 = P1B2Model(loaded_args)
loaded_modelP1B2.p1b2_net.load_state_dict(torch.load(model_filename, map_location=torch.device('cpu')))
print("Loaded torch model from disk")

# evaluate loaded model on test data
loaded_modelP1B2.p1b2_net.eval()
val_acc,val_loss = loaded_modelP1B2.validation(0)

print("Model State Dict Validation loss: %5.2f" % (val_loss))
print("Model State Dict Validation accuracy: %5.2f%%" %(val_acc))

print('Test data: ')
test_acc,test_loss = loaded_modelP1B2.test()

print("Model State Dict Test loss: %5.2f" % (test_loss))
print("Model State Dict Test accuracy: %5.2f%%" %(test_acc))


def main():

gParameters = initialize_parameters()
run(gParameters)

if __name__ == '__main__':
main()
try:
tmp = 1
except AttributeError: # theano does not have this function
pass


95 changes: 95 additions & 0 deletions Pilot1/P1B2/torch_deps/p1b2_classification_net.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import torch
import torch.nn as nn
import numpy as np
from pytorch_utils import build_activation
from torch_deps.weight_init import basic_weight_init, basic_weight_init_he_normal_relu, basic_weight_init_he_uniform_relu, basic_weight_init_glorut_uniform

class P1B2Net(nn.Module):

def __init__(self,

layers: list,
activation: str,
out_activation: str,
dropout: int,
classes: int,
input_dim: int,
):

super(P1B2Net, self).__init__()

self.__p1b2_net = nn.Sequential()

module_index = 0
prev_dim = list(input_dim)

# Define MLP architecture

if layers is not None:
if type(layers) != list:
layers = list(layers)
for i, layer in enumerate(layers):
if i == 0:
#model.add(Dense(layer, input_shape=(x_train_len, 1)))
self.__p1b2_net.add_module('dense_%d' % module_index,
nn.Linear(prev_dim[0], layer, True))
prev_dim[0] = layer

else:
self.__p1b2_net.add_module('dense_%d' % module_index,
nn.Linear(prev_dim[0], layer, True))
prev_dim[0] = layer

self.__p1b2_net.add_module('activation_%d' % module_index,
build_activation(activation))
if dropout:
#x = Dropout(gParameters['dropout'])(x)
self.__p1b2_net.add_module('dropout_%d' % module_index,
nn.Dropout(p=dropout))
module_index += 1

#output = Dense(output_dim, activation=activation,
# kernel_initializer=initializer_weights,
# bias_initializer=initializer_bias)(x)

#model.add(Dense(gParameters['classes']))
self.__p1b2_net.add_module('dense_%d' % module_index,
nn.Linear(prev_dim[0], classes))
prev_dim[0] = classes
module_index += 1

#model.add(Activation(gParameters['out_activation']))
self.__p1b2_net.add_module('activation_%d' % module_index,
build_activation(out_activation, dim=1))
else:
#output = Dense(output_dim, activation=activation,
# kernel_initializer=initializer_weights,
# bias_initializer=initializer_bias)(input_vector)
self.__p1b2_net.add_module('dense_%d' % module_index,
nn.Linear(prev_dim[0], classes))
prev_dim[0] = classes
module_index += 1

#model.add(Activation(gParameters['out_activation']))
self.__p1b2_net.add_module('activation_%d' % module_index,
build_activation(out_activation, dim=1))



#kernel_initializer=initializer_weights,
# bias_initializer=initializer_bias,
# kernel_regularizer=l2(gParameters['reg_l2']),
# activity_regularizer=l2(gParameters['reg_l2']


# Weight Initialization ###############################################
if activation == 'relu':
self.__p1b2_net.apply(basic_weight_init_he_uniform_relu)
else:
self.__p1b2_net.apply(basic_weight_init_glorut_uniform)



def forward(self, x):
return self.__p1b2_net(x)

113 changes: 113 additions & 0 deletions Pilot1/P1B2/torch_deps/p1b2_clf_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import torch
import torch.utils.data
import torch.nn as nn
import torch.nn.functional as F
from pytorch_utils import build_loss


def train_p1b2_clf(device: torch.device,

category_clf_net: nn.Module,
data_loader: torch.utils.data.DataLoader,
loss_type: str,
max_num_batches: int,
optimizer: torch.optim,
scheduler: torch.optim.lr_scheduler,
epoch: int,
log_interval: int,
dry_run: bool = False, ):

category_clf_net.train()
pid = os.getpid()

correct_category = 0
train_loss = 0

for batch_idx, (rnaseq, cl_category) \
in enumerate(data_loader):

if batch_idx >= max_num_batches:
break

rnaseq, cl_category = \
rnaseq.to(device), cl_category.to(device)

category_clf_net.zero_grad()

out_category = category_clf_net(rnaseq)

loss = build_loss(loss_type, out_category, cl_category)
train_loss += data_loader.batch_size * loss.item() # sum up batch loss
loss.backward()

optimizer.step()

if batch_idx % log_interval == 0:
print('{}\tTrain Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:5.5f}'.format(
pid, epoch+1, (batch_idx+1) * len(rnaseq), len(data_loader.dataset),
100. * (batch_idx+1) / len(data_loader), loss.item()))
if dry_run:
break


pred_category = out_category.max(1, keepdim=True)[1]

correct_category += pred_category.eq(
cl_category.view_as(pred_category)).sum().item()


# Get overall accuracy
train_loss /= len(data_loader.dataset)
category_acc = 100. * correct_category / len(data_loader.dataset)


print('\tP1B2 classification: '
'\n\t\tTraining Loss: \t\t\t%5.5f '
'\n\t\tTraining Accuracy: \t\t%5.2f%%'
% (train_loss, category_acc))

return category_acc, train_loss



def valid_p1b2_clf(
device: torch.device,
category_clf_net: nn.Module,
data_loader: torch.utils.data.DataLoader,
loss_type: str, ):

category_clf_net.eval()

correct_category = 0

test_loss = 0

with torch.no_grad():
for rnaseq, cl_category in data_loader:

rnaseq, cl_category = \
rnaseq.to(device), cl_category.to(device)

out_category = category_clf_net(rnaseq)

loss = build_loss(loss_type, out_category, cl_category)
test_loss += data_loader.batch_size * loss.item() # sum up batch loss

pred_category = out_category.max(1, keepdim=True)[1]

correct_category += pred_category.eq(
cl_category.view_as(pred_category)).sum().item()


# Get overall accuracy
test_loss /= len(data_loader.dataset)
category_acc = 100. * correct_category / len(data_loader.dataset)


print('\tP1B2 classification: '
'\n\t\tValidation Loss: \t\t%5.5f '
'\n\t\tValidation Accuracy: \t\t%5.2f%%'
% (test_loss, category_acc))

return category_acc, test_loss
Loading