-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Topological trainer and config closes #29
- Loading branch information
Showing
5 changed files
with
461 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
trainer: "TopologicalTrainer" | ||
random_seed: 43 | ||
|
||
model: | ||
type: "efficientnet_b0" | ||
parameters: | ||
num_classes: 152 | ||
pretrained: true | ||
|
||
training: | ||
batch_size: 64 | ||
epochs: | ||
initial: 30 | ||
fine_tuning: 30 | ||
loss_function: | ||
type: "CrossEntropyLoss" | ||
parameters: {} | ||
optimizer: | ||
type: "SGD" | ||
parameters: | ||
lr: 0.05 | ||
learning_rates: | ||
initial: 0.05 | ||
fine_tuning: 0.05 | ||
final_fine_tuning: 0.05 | ||
freeze_until_layer: "classifier.1.0.weight" | ||
|
||
metrics: | ||
- type: "Accuracy" | ||
- type: "Precision" | ||
- type: "Recall" | ||
- type: "F1Score" | ||
|
||
callbacks: | ||
CSVLogging: | ||
parameters: | ||
csv_path: "dinamically/set/by/date.csv" | ||
Checkpoint: | ||
parameters: | ||
save_freq: 5 | ||
EarlyStopping: | ||
parameters: | ||
monitor: "val_loss" | ||
patience: 5 | ||
delta: 0 | ||
verbose: true | ||
|
||
data: | ||
name: "CarDataset" | ||
dataset_path: "./data/processed/DB_Marca_Modelo" | ||
test_size: 0.1 | ||
val_size: 0.1 | ||
transforms: | ||
- type: "Resize" | ||
parameters: | ||
size: [224, 224] | ||
#- type: "TrivialAugmentWide" | ||
# parameters: {} | ||
- type: "ToTensor" | ||
parameters: {} | ||
- type: "Normalize" | ||
parameters: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
eval_transforms: | ||
- type: "Resize" | ||
parameters: | ||
size: [224, 224] | ||
- type: "ToTensor" | ||
parameters: {} | ||
- type: "Normalize" | ||
parameters: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
|
||
paths: | ||
model_path: "./outputs/models/" | ||
log_path: "./logs/" | ||
plot_path: "./outputs/figures/" | ||
checkpoint_path: "./outputs/checkpoints/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
trainer: "TopologicalTrainer" | ||
random_seed: 43 | ||
|
||
model: | ||
type: "densenet121" | ||
parameters: | ||
num_classes: 34 | ||
pretrained: true | ||
|
||
training: | ||
batch_size: 32 | ||
epochs: | ||
initial: 30 | ||
fine_tuning: 30 | ||
loss_function: | ||
type: "CrossEntropyLoss" | ||
parameters: {} | ||
optimizer: | ||
type: "SGD" | ||
parameters: | ||
lr: 0.01 | ||
learning_rates: | ||
initial: 0.05 | ||
fine_tuning: 0.05 | ||
final_fine_tuning: 0.05 | ||
freeze_until_layer: "classifier.0.weight" | ||
|
||
metrics: | ||
- type: "Accuracy" | ||
- type: "Precision" | ||
- type: "Recall" | ||
- type: "F1Score" | ||
|
||
callbacks: | ||
CSVLogging: | ||
parameters: | ||
csv_path: "dinamically/set/by/date.csv" | ||
Checkpoint: | ||
parameters: | ||
save_freq: 5 | ||
EarlyStopping: | ||
parameters: | ||
monitor: "val_loss" | ||
patience: 5 | ||
delta: 0 | ||
verbose: true | ||
|
||
data: | ||
name: "CarDataset" | ||
dataset_path: "./data/processed/DB_Marca" | ||
test_size: 0.1 | ||
val_size: 0.1 | ||
transforms: | ||
- type: "Resize" | ||
parameters: | ||
size: [224, 224] | ||
#- type: "TrivialAugmentWide" | ||
# parameters: {} | ||
- type: "ToTensor" | ||
parameters: {} | ||
- type: "Normalize" | ||
parameters: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
eval_transforms: | ||
- type: "Resize" | ||
parameters: | ||
size: [224, 224] | ||
- type: "ToTensor" | ||
parameters: {} | ||
- type: "Normalize" | ||
parameters: | ||
mean: [0.485, 0.456, 0.406] | ||
std: [0.229, 0.224, 0.225] | ||
|
||
paths: | ||
model_path: "./outputs/models/" | ||
log_path: "./logs/" | ||
plot_path: "./outputs/figures/" | ||
checkpoint_path: "./outputs/checkpoints/" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
import torch | ||
import yaml | ||
import argparse | ||
from datetime import datetime | ||
from torch.utils.data import DataLoader, random_split | ||
from datasets.dataset import get_dataset | ||
from datasets.transformations import get_transforms | ||
from utils.metrics import Accuracy, Precision, Recall, F1Score | ||
from factories.model_factory import ModelFactory | ||
from factories.loss_factory import LossFactory | ||
from factories.optimizer_factory import OptimizerFactory | ||
from factories.callback_factory import CallbackFactory | ||
from trainers import get_trainer | ||
from os import path | ||
|
||
def main(config_path, model_path, alpha): | ||
""" | ||
Train a model using the given configuration file. | ||
Args: | ||
config_path (str): Path to the configuration file. | ||
model_path (str): Path to the trained model file (.pth). | ||
alpha (float): Alpha value for the topological loss. | ||
""" | ||
|
||
with open(config_path, 'r') as file: | ||
config = yaml.safe_load(file) | ||
|
||
# If CUDA not available, finish execution | ||
if not torch.cuda.is_available(): | ||
print("CUDA is not available. Exiting...") | ||
exit() | ||
device = torch.device("cuda") | ||
|
||
# Load and transform data | ||
transforms = get_transforms(config['data']['transforms']) | ||
eval_transforms = get_transforms(config['data']['eval_transforms']) | ||
data = get_dataset(config['data']['name'], config['data']['dataset_path'], train=True, transform=transforms) | ||
|
||
# Split data | ||
total_size = len(data) | ||
test_size = int(total_size * config['data']['test_size']) | ||
val_size = int(total_size * config['data']['val_size']) | ||
train_size = total_size - test_size - val_size | ||
assert train_size > 0 and val_size > 0 and test_size > 0, "One of the splits has zero or negative size." | ||
data_train, data_test = random_split(data, [train_size + val_size, test_size], generator=torch.Generator().manual_seed(config['random_seed'])) | ||
data_train, data_val = random_split(data_train, [train_size, val_size], generator=torch.Generator().manual_seed(config['random_seed'])) | ||
|
||
# Apply evaluation transforms to validation and test datasets | ||
data_test.dataset.transform = eval_transforms | ||
data_val.dataset.transform = eval_transforms | ||
|
||
# Data loaders using the given batch_size | ||
train_loader = DataLoader(data_train, batch_size=config['training']['batch_size'], shuffle=True) | ||
valid_loader = DataLoader(data_val, batch_size=config['training']['batch_size'], shuffle=False) | ||
test_loader = DataLoader(data_test, batch_size=config['training']['batch_size'], shuffle=False) | ||
|
||
# Model setup | ||
model_factory = ModelFactory() | ||
# Initialize with 34 classes, corresponding to the pretrained model | ||
model = model_factory.create(config['model']['type'], num_classes=152, pretrained=True).to(device) | ||
|
||
# Load the pretrained model weights | ||
pretrained_dict = torch.load(model_path) | ||
model_dict = model.state_dict() | ||
|
||
# Remove pretrained classifier weights (since we are modifying the classifier) | ||
pretrained_dict = {k: v for k, v in pretrained_dict.items() if 'classifier' not in k} | ||
model_dict.update(pretrained_dict) | ||
model.load_state_dict(model_dict, strict=False) | ||
|
||
# Reinitialize the classifier for the new number of classes | ||
num_ftrs = model.classifier[0].in_features | ||
|
||
model.classifier = torch.nn.Sequential( | ||
torch.nn.Dropout(p=0.2, inplace=True), | ||
torch.nn.Sequential( | ||
torch.nn.Linear(num_ftrs, 256), | ||
torch.nn.ReLU(), | ||
torch.nn.Dropout(p=0.4, inplace=False), | ||
torch.nn.Linear(256, config['model']['parameters']['num_classes']) | ||
) | ||
).to(device) | ||
|
||
# Ensure the model has been updated correctly | ||
print("Updated model structure: ", model) | ||
|
||
# Loss setup | ||
loss_factory = LossFactory() | ||
criterion = loss_factory.create(config['training']['loss_function']['type']) | ||
|
||
# Optimizer setup with given parameters | ||
optimizer_factory = OptimizerFactory() | ||
optimizer = optimizer_factory.create(config['training']['optimizer']['type']) | ||
optimizer_params = config['training']['optimizer']['parameters'] | ||
print("Using optimizer: ", optimizer, " with params: ", optimizer_params) | ||
print("Batch size: ", config['training']['batch_size']) | ||
|
||
# Training stages setup | ||
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") | ||
model_dataset_time = f"TOP_{config['model']['type']}_{config['data']['name']}_{config['training']['optimizer']['type']}_{config['training']['batch_size']}_{current_time}" | ||
log_filename = path.join(config['paths']['log_path'], f"log_finetuning_{model_dataset_time}.csv") | ||
|
||
# Callbacks setup | ||
callbacks_config = config['callbacks'] | ||
if "CSVLogging" in callbacks_config: | ||
callbacks_config["CSVLogging"]["parameters"]["csv_path"] = log_filename | ||
|
||
# Metrics and trainer setup | ||
metrics = [Accuracy(), Precision(), Recall(), F1Score()] | ||
trainer = get_trainer(config['trainer'], model=model, device=device) | ||
|
||
# Initial training stage | ||
print("Starting initial training stage with frozen layers...") | ||
trainer.build( | ||
criterion=criterion, | ||
optimizer_class=optimizer, | ||
optimizer_params=optimizer_params, | ||
metrics=metrics | ||
) | ||
|
||
callback_factory = CallbackFactory() | ||
callbacks = [] | ||
for name, params in callbacks_config.items(): | ||
if name == "Checkpoint": | ||
params["parameters"]["checkpoint_dir"] = path.join(config['paths']['checkpoint_path'], model_dataset_time) | ||
params["parameters"]["model"] = model | ||
params["parameters"]["optimizer"] = trainer.optimizer | ||
params["parameters"]["scheduler"] = trainer.scheduler | ||
|
||
callback = callback_factory.create(name, **params["parameters"]) | ||
|
||
if name == "EarlyStopping": | ||
callback.set_model_and_optimizer(model, trainer.optimizer) | ||
|
||
callbacks.append(callback) | ||
|
||
kwargs = {'alpha': alpha} | ||
|
||
trainer.train( | ||
train_loader=train_loader, | ||
valid_loader=valid_loader, | ||
num_epochs=config['training']['epochs']['initial'], | ||
callbacks=callbacks, | ||
**kwargs | ||
) | ||
|
||
# Fine-tuning stage with all layers unfrozen | ||
#print("Unfreezing all layers for fine-tuning...") | ||
#trainer.unfreeze_all_layers() | ||
|
||
#optimizer_instance = trainer.optimizer | ||
#optimizer_factory.update(optimizer_instance, config['training']['learning_rates']['initial']) | ||
|
||
#print("Starting full model fine-tuning...") | ||
#trainer.train( | ||
# train_loader=train_loader, | ||
# valid_loader=valid_loader, | ||
# num_epochs=config['training']['epochs']['fine_tuning'], | ||
# callbacks=callbacks | ||
#) | ||
|
||
# Save model | ||
model_path = path.join(config['paths']['model_path'], f"{model_dataset_time}.pth") | ||
torch.save(model.state_dict(), model_path) | ||
|
||
# Evaluate | ||
trainer.evaluate(data_loader=test_loader) | ||
|
||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(description='Train a model using the given configuration file.') | ||
parser.add_argument('config_filename', type=str, help='Filename of the configuration file within the "config" directory') | ||
parser.add_argument('model_path', type=str, help='Path to the trained model file (.pth)') | ||
parser.add_argument('alpha', type=float, help='Alpha value for the topological loss') | ||
|
||
args = parser.parse_args() | ||
|
||
config_path = f"config/{args.config_filename}" | ||
|
||
main(config_path, args.model_path, args.alpha) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
#!/bin/bash | ||
|
||
#SBATCH --job-name=trainTopological # Process name | ||
#SBATCH --partition=dios # Queue for execution | ||
#SBATCH -w dionisio # Node to execute the job | ||
#SBATCH --gres=gpu:1 # Number of GPUs to use | ||
#SBATCH --mail-type=END,FAIL # Notifications for job done & fail | ||
#SBATCH [email protected] # Where to send notification | ||
|
||
# Load necessary paths | ||
export PATH="/opt/anaconda/anaconda3/bin:$PATH" | ||
export PATH="/opt/anaconda/bin:$PATH" | ||
export PYTHONPATH=$(dirname $(dirname "$0")) | ||
|
||
# Setup Conda environment | ||
eval "$(conda shell.bash hook)" | ||
conda activate tda-nn-analysis | ||
export TFHUB_CACHE_DIR=. | ||
|
||
# Check if correct number of arguments is passed | ||
if [ "$#" -ne 3 ]; then | ||
echo "Usage: $0 <config_file> <model_path> <alpha>" | ||
exit 1 | ||
fi | ||
|
||
config_file=$1 | ||
model_path=$2 | ||
alpha=$3 | ||
|
||
# Call the Python script with the provided arguments | ||
python train_topological.py $config_file $model_path $alpha |
Oops, something went wrong.