From 792f6b99a2b26200ade982c624989b00ce612e04 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Wed, 13 Nov 2024 11:41:30 +0100 Subject: [PATCH 01/21] Hardcode the warmup process and the weight initialization --- luxonis_train/config/config.py | 2 + luxonis_train/models/luxonis_lightning.py | 121 +++++++++++++++++- .../backbones/efficientrep/efficientrep.py | 21 +++ luxonis_train/nodes/blocks/blocks.py | 12 ++ .../nodes/heads/efficient_bbox_head.py | 13 ++ .../nodes/necks/reppan_neck/reppan_neck.py | 13 ++ 6 files changed, 177 insertions(+), 5 deletions(-) diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index 67fdc8f0..7ed0c573 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -328,6 +328,8 @@ class CallbackConfig(BaseModelExtraForbid): class OptimizerConfig(BaseModelExtraForbid): name: str = "Adam" + apply_custom_lr: bool = False + warmup_epochs: int = 0 params: Params = {} diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 011c3983..600885bf 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -1,3 +1,4 @@ +import math from collections import defaultdict from collections.abc import Mapping from logging import getLogger @@ -5,6 +6,7 @@ from typing import Literal, cast import lightning.pytorch as pl +import numpy as np import torch from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary from lightning.pytorch.utilities import rank_zero_only # type: ignore @@ -131,6 +133,15 @@ def __init__( self._core = _core self.cfg = cfg + ## + self.max_stepnum = math.ceil( + len(self._core.loaders["train"]) / cfg.trainer.batch_size + ) + self.warmup_stepnum = max( + round(cfg.trainer.optimizer.warmup_epochs * self.max_stepnum), 1000 + ) + self.step = 0 + ## self.original_in_shapes = input_shapes self.image_source = cfg.loader.image_source self.dataset_metadata = dataset_metadata or DatasetMetadata() @@ -857,14 +868,66 @@ def configure_optimizers( list[torch.optim.Optimizer], list[torch.optim.lr_scheduler.LRScheduler], ]: - """Configures model optimizers and schedulers.""" + """Configures model optimizers and schedulers with optional + custom learning rates and warm-up logic.""" + cfg_optimizer = self.cfg.trainer.optimizer cfg_scheduler = self.cfg.trainer.scheduler - optim_params = cfg_optimizer.params | { - "params": filter(lambda p: p.requires_grad, self.parameters()), - } - optimizer = OPTIMIZERS.get(cfg_optimizer.name)(**optim_params) + apply_custom_lr = cfg_optimizer.apply_custom_lr + + if apply_custom_lr: + g_bnw, g_w, g_b = [], [], [] + for v in self.modules(): + if hasattr(v, "bias") and isinstance( + v.bias, torch.nn.Parameter + ): + g_b.append(v.bias) + if isinstance(v, torch.nn.BatchNorm2d): + g_bnw.append(v.weight) + elif hasattr(v, "weight") and isinstance( + v.weight, torch.nn.Parameter + ): + g_w.append(v.weight) + + # Create the optimizer with parameter groups + assert cfg_optimizer.name in [ + "SGD", + "Adam", + ], "ERROR: unknown optimizer, use SGD or Adam" + optimizer = torch.optim.SGD( + g_bnw, + lr=cfg_optimizer.params["lr"], + momentum=cfg_optimizer.params["momentum"], + nesterov=True, + ) + + optimizer.add_param_group( + { + "params": g_w, + "weight_decay": cfg_optimizer.params["weight_decay"], + } + ) + optimizer.add_param_group({"params": g_b}) + + lrf = 0.01 + self.lf = ( + lambda x: ( + (1 - math.cos(x * math.pi / self.cfg.trainer.epochs)) / 2 + ) + * (lrf - 1) + + 1 + ) + scheduler = torch.optim.lr_scheduler.LambdaLR( + optimizer, lr_lambda=self.lf + ) + return [optimizer], [scheduler] + + else: + optim_params = cfg_optimizer.params | { + "params": filter(lambda p: p.requires_grad, self.parameters()), + } + optimizer = OPTIMIZERS.get(cfg_optimizer.name)(**optim_params) def get_scheduler(scheduler_cfg, optimizer): scheduler_class = SCHEDULERS.get( @@ -895,6 +958,54 @@ def get_scheduler(scheduler_cfg, optimizer): return [optimizer], [scheduler] + def on_after_backward(self): + """Custom logic to adjust learning rates and momentum after + loss.backward.""" + # Call your custom logic here + self.custom_logic() + + def custom_logic(self): + """Custom logic to adjust learning rates and momentum after + loss.backward.""" + + # Increment step counter + self.step = ( + self.step % self.max_stepnum + ) # Reset step counter after each epoch + curr_step = self.step + self.max_stepnum * self.current_epoch + + # Warm-up phase adjustments + if curr_step <= self.warmup_stepnum: + optimizer = self.optimizers() + for k, param in enumerate(optimizer.param_groups): + warmup_bias_lr = ( + self.cfg.trainer.optimizer.params["warmup_bias_lr"] + if k == 2 + else 0.0 + ) + param["lr"] = np.interp( + curr_step, + [0, self.warmup_stepnum], + [ + warmup_bias_lr, + self.cfg.trainer.optimizer.params["lr"] + * self.lf(self.current_epoch), + ], + ) + if "momentum" in param: + param["momentum"] = np.interp( + curr_step, + [0, self.warmup_stepnum], + [ + self.cfg.trainer.optimizer.params[ + "warmup_momentum" + ], + self.cfg.trainer.optimizer.params["momentum"], + ], + ) + + self.step += 1 + def load_checkpoint(self, path: str | Path | None) -> None: """Loads checkpoint weights from provided path. diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py index d094da14..4357722d 100644 --- a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -125,9 +125,22 @@ def __init__( ) ) + self.initialize_weights() + if download_weights and var.weights_path: self.load_checkpoint(var.weights_path) + def initialize_weights(self): + for m in self.modules(): + t = type(m) + if t is nn.Conv2d: + pass + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + def set_export_mode(self, mode: bool = True) -> None: """Reparametrizes instances of L{RepVGGBlock} in the network. @@ -143,6 +156,14 @@ def set_export_mode(self, mode: bool = True) -> None: module.reparametrize() def forward(self, inputs: Tensor) -> list[Tensor]: + # # Lets plot the input + # img_plt = inputs[0].cpu().numpy().transpose(1, 2, 0) + # # it was normalised with /255.0 so we have to denormalise it + # img_plt = img_plt * 255.0 + # import matplotlib.pyplot as plt + # plt.imshow(img_plt.astype(int)) + # plt.show(block=True) + outputs: list[Tensor] = [] x = self.repvgg_encoder(inputs) for block in self.blocks: diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 25bea7c5..2ac01805 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -56,6 +56,18 @@ def __init__(self, n_classes: int, in_channels: int): prior_prob = 1e-2 self._initialize_weights_and_biases(prior_prob) + self.initialize_weights() + + def initialize_weights(self): + for m in self.modules(): + t = type(m) + if t is nn.Conv2d: + pass + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]: out_feature = self.decoder(x) diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index c8500915..1394c8fa 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -95,12 +95,25 @@ def __init__( f"output{i+1}_yolov6r2" for i in range(self.n_heads) ] + self.initialize_weights() + if download_weights: # TODO: Handle variants of head in a nicer way if self.in_channels == [32, 64, 128]: weights_path = "https://github.com/luxonis/luxonis-train/releases/download/v0.1.0-beta/efficientbbox_head_n_coco.ckpt" self.load_checkpoint(weights_path, strict=False) + def initialize_weights(self): + for m in self.modules(): + t = type(m) + if t is nn.Conv2d: + pass + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + def forward( self, inputs: list[Tensor] ) -> tuple[list[Tensor], list[Tensor], list[Tensor]]: diff --git a/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py index e6b321be..73908e12 100644 --- a/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py @@ -165,9 +165,22 @@ def __init__( out_channels = channels_list_down_blocks[2 * i + 1] curr_n_repeats = n_repeats_down_blocks[i] + self.initialize_weights() + if download_weights and var.weights_path: self.load_checkpoint(var.weights_path) + def initialize_weights(self): + for m in self.modules(): + t = type(m) + if t is nn.Conv2d: + pass + elif t is nn.BatchNorm2d: + m.eps = 1e-3 + m.momentum = 0.03 + elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + m.inplace = True + def forward(self, inputs: list[Tensor]) -> list[Tensor]: x = inputs[-1] up_block_outs: list[Tensor] = [] From 08f219ecdfb1923ec064b415c4c031be9f303a31 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Thu, 14 Nov 2024 13:35:38 +0100 Subject: [PATCH 02/21] custom SGD idea --- luxonis_train/models/luxonis_lightning.py | 79 ++++++++++++----------- 1 file changed, 43 insertions(+), 36 deletions(-) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 600885bf..fd91691e 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -133,15 +133,6 @@ def __init__( self._core = _core self.cfg = cfg - ## - self.max_stepnum = math.ceil( - len(self._core.loaders["train"]) / cfg.trainer.batch_size - ) - self.warmup_stepnum = max( - round(cfg.trainer.optimizer.warmup_epochs * self.max_stepnum), 1000 - ) - self.step = 0 - ## self.original_in_shapes = input_shapes self.image_source = cfg.loader.image_source self.dataset_metadata = dataset_metadata or DatasetMetadata() @@ -877,40 +868,56 @@ def configure_optimizers( apply_custom_lr = cfg_optimizer.apply_custom_lr if apply_custom_lr: - g_bnw, g_w, g_b = [], [], [] - for v in self.modules(): - if hasattr(v, "bias") and isinstance( - v.bias, torch.nn.Parameter + assert cfg_optimizer.name == "SGD", ( + "Custom learning rates are supported only for SGD optimizer. " + f"Got {cfg_optimizer.name}." + ) + self.max_stepnum = math.ceil( + len(self._core.loaders["train"]) / self.cfg.trainer.batch_size + ) + self.warmup_stepnum = max( + round( + self.cfg.trainer.optimizer.warmup_epochs * self.max_stepnum + ), + 1000, + ) + self.step = 0 + batch_norm_weights, regular_weights, biases = [], [], [] + for module in self.modules(): + if hasattr(module, "bias") and isinstance( + module.bias, torch.nn.Parameter ): - g_b.append(v.bias) - if isinstance(v, torch.nn.BatchNorm2d): - g_bnw.append(v.weight) - elif hasattr(v, "weight") and isinstance( - v.weight, torch.nn.Parameter + biases.append(module.bias) + if isinstance(module, torch.nn.BatchNorm2d): + batch_norm_weights.append(module.weight) + elif hasattr(module, "weight") and isinstance( + module.weight, torch.nn.Parameter ): - g_w.append(v.weight) + regular_weights.append(module.weight) - # Create the optimizer with parameter groups - assert cfg_optimizer.name in [ - "SGD", - "Adam", - ], "ERROR: unknown optimizer, use SGD or Adam" optimizer = torch.optim.SGD( - g_bnw, + [ + { + "params": batch_norm_weights, + "lr": cfg_optimizer.params["lr"], + "momentum": cfg_optimizer.params["momentum"], + "nesterov": True, + }, + { + "params": regular_weights, + "weight_decay": cfg_optimizer.params["weight_decay"], + }, + {"params": biases}, + ], lr=cfg_optimizer.params["lr"], momentum=cfg_optimizer.params["momentum"], - nesterov=True, + nesterov=cfg_optimizer.params["nesterov"], ) - optimizer.add_param_group( - { - "params": g_w, - "weight_decay": cfg_optimizer.params["weight_decay"], - } + lrf = ( + self.cfg.trainer.optimizer.params["lre"] + / self.cfg.trainer.optimizer.params["lr"] ) - optimizer.add_param_group({"params": g_b}) - - lrf = 0.01 self.lf = ( lambda x: ( (1 - math.cos(x * math.pi / self.cfg.trainer.epochs)) / 2 @@ -961,8 +968,8 @@ def get_scheduler(scheduler_cfg, optimizer): def on_after_backward(self): """Custom logic to adjust learning rates and momentum after loss.backward.""" - # Call your custom logic here - self.custom_logic() + if self.cfg.trainer.optimizer.apply_custom_lr: + self.custom_logic() def custom_logic(self): """Custom logic to adjust learning rates and momentum after From dfea01d0cf28ccecfe1cc29354ace8cd72a62d2f Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Thu, 14 Nov 2024 13:43:23 +0100 Subject: [PATCH 03/21] moving warmup_epochs to optim params --- luxonis_train/models/luxonis_lightning.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index fd91691e..bd363153 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -877,7 +877,8 @@ def configure_optimizers( ) self.warmup_stepnum = max( round( - self.cfg.trainer.optimizer.warmup_epochs * self.max_stepnum + self.cfg.trainer.optimizer.params["warmup_epochs"] + * self.max_stepnum ), 1000, ) From 58f7f3fdb526d47f3cf6c2e81b941ad99cb9de72 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Thu, 14 Nov 2024 13:47:10 +0100 Subject: [PATCH 04/21] remove warmup_epochs from config.py --- luxonis_train/config/config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index 7ed0c573..05480319 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -329,7 +329,6 @@ class CallbackConfig(BaseModelExtraForbid): class OptimizerConfig(BaseModelExtraForbid): name: str = "Adam" apply_custom_lr: bool = False - warmup_epochs: int = 0 params: Params = {} From 5c3a3a73df0bf1550039869eb25614fd7a141f44 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Thu, 14 Nov 2024 13:48:32 +0100 Subject: [PATCH 05/21] remove comments --- .../nodes/backbones/efficientrep/efficientrep.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py index 4357722d..34e21020 100644 --- a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -156,14 +156,6 @@ def set_export_mode(self, mode: bool = True) -> None: module.reparametrize() def forward(self, inputs: Tensor) -> list[Tensor]: - # # Lets plot the input - # img_plt = inputs[0].cpu().numpy().transpose(1, 2, 0) - # # it was normalised with /255.0 so we have to denormalise it - # img_plt = img_plt * 255.0 - # import matplotlib.pyplot as plt - # plt.imshow(img_plt.astype(int)) - # plt.show(block=True) - outputs: list[Tensor] = [] x = self.repvgg_encoder(inputs) for block in self.blocks: From cefe3a6946dc88f5b75c7c4c92985d0c4a27892c Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Thu, 14 Nov 2024 14:46:29 +0100 Subject: [PATCH 06/21] Custom scheduler and optimizer classes --- luxonis_train/config/config.py | 2 +- luxonis_train/models/luxonis_lightning.py | 125 +++--------------- luxonis_train/optimizers/custom_optimizers.py | 51 +++++++ luxonis_train/optimizers/optimizers.py | 3 + luxonis_train/schedulers/custom_schedulers.py | 79 +++++++++++ luxonis_train/schedulers/schedulers.py | 3 + 6 files changed, 159 insertions(+), 104 deletions(-) create mode 100644 luxonis_train/optimizers/custom_optimizers.py create mode 100644 luxonis_train/schedulers/custom_schedulers.py diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index 05480319..2a7f9d3b 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -328,7 +328,6 @@ class CallbackConfig(BaseModelExtraForbid): class OptimizerConfig(BaseModelExtraForbid): name: str = "Adam" - apply_custom_lr: bool = False params: Params = {} @@ -356,6 +355,7 @@ class TrainerConfig(BaseModelExtraForbid): profiler: Literal["simple", "advanced"] | None = None matmul_precision: Literal["medium", "high", "highest"] | None = None verbose: bool = True + apply_custom_lr: bool = False seed: int | None = None n_validation_batches: PositiveInt | None = None diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index bd363153..2e5f775f 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -6,7 +6,6 @@ from typing import Literal, cast import lightning.pytorch as pl -import numpy as np import torch from lightning.pytorch.callbacks import ModelCheckpoint, RichModelSummary from lightning.pytorch.utilities import rank_zero_only # type: ignore @@ -865,70 +864,32 @@ def configure_optimizers( cfg_optimizer = self.cfg.trainer.optimizer cfg_scheduler = self.cfg.trainer.scheduler - apply_custom_lr = cfg_optimizer.apply_custom_lr + if self.cfg.trainer.apply_custom_lr: + assert ( + cfg_optimizer.name == "TripleLRSGD" + ), "Custom learning rate is only supported for TripleLRSGD optimizer." + assert ( + cfg_scheduler.name == "TripleLRScheduler" + ), "Custom learning rate is only supported for TripleLRScheduler scheduler." - if apply_custom_lr: - assert cfg_optimizer.name == "SGD", ( - "Custom learning rates are supported only for SGD optimizer. " - f"Got {cfg_optimizer.name}." - ) - self.max_stepnum = math.ceil( + max_stepnum = math.ceil( len(self._core.loaders["train"]) / self.cfg.trainer.batch_size ) - self.warmup_stepnum = max( - round( - self.cfg.trainer.optimizer.params["warmup_epochs"] - * self.max_stepnum - ), - 1000, - ) - self.step = 0 - batch_norm_weights, regular_weights, biases = [], [], [] - for module in self.modules(): - if hasattr(module, "bias") and isinstance( - module.bias, torch.nn.Parameter - ): - biases.append(module.bias) - if isinstance(module, torch.nn.BatchNorm2d): - batch_norm_weights.append(module.weight) - elif hasattr(module, "weight") and isinstance( - module.weight, torch.nn.Parameter - ): - regular_weights.append(module.weight) - - optimizer = torch.optim.SGD( - [ - { - "params": batch_norm_weights, - "lr": cfg_optimizer.params["lr"], - "momentum": cfg_optimizer.params["momentum"], - "nesterov": True, - }, - { - "params": regular_weights, - "weight_decay": cfg_optimizer.params["weight_decay"], - }, - {"params": biases}, - ], - lr=cfg_optimizer.params["lr"], - momentum=cfg_optimizer.params["momentum"], - nesterov=cfg_optimizer.params["nesterov"], + custom_optimizer = OPTIMIZERS.get(cfg_optimizer.name)( + self, cfg_optimizer.params ) + optimizer = custom_optimizer.create_optimizer() - lrf = ( - self.cfg.trainer.optimizer.params["lre"] - / self.cfg.trainer.optimizer.params["lr"] - ) - self.lf = ( - lambda x: ( - (1 - math.cos(x * math.pi / self.cfg.trainer.epochs)) / 2 - ) - * (lrf - 1) - + 1 - ) - scheduler = torch.optim.lr_scheduler.LambdaLR( - optimizer, lr_lambda=self.lf + custom_scheduler = SCHEDULERS.get(cfg_scheduler.name)( + optimizer, + cfg_scheduler.params, + self.cfg.trainer.epochs, + max_stepnum, ) + scheduler = custom_scheduler.create_scheduler() + + self.custom_scheduler = custom_scheduler + return [optimizer], [scheduler] else: @@ -969,50 +930,8 @@ def get_scheduler(scheduler_cfg, optimizer): def on_after_backward(self): """Custom logic to adjust learning rates and momentum after loss.backward.""" - if self.cfg.trainer.optimizer.apply_custom_lr: - self.custom_logic() - - def custom_logic(self): - """Custom logic to adjust learning rates and momentum after - loss.backward.""" - - # Increment step counter - self.step = ( - self.step % self.max_stepnum - ) # Reset step counter after each epoch - curr_step = self.step + self.max_stepnum * self.current_epoch - - # Warm-up phase adjustments - if curr_step <= self.warmup_stepnum: - optimizer = self.optimizers() - for k, param in enumerate(optimizer.param_groups): - warmup_bias_lr = ( - self.cfg.trainer.optimizer.params["warmup_bias_lr"] - if k == 2 - else 0.0 - ) - param["lr"] = np.interp( - curr_step, - [0, self.warmup_stepnum], - [ - warmup_bias_lr, - self.cfg.trainer.optimizer.params["lr"] - * self.lf(self.current_epoch), - ], - ) - if "momentum" in param: - param["momentum"] = np.interp( - curr_step, - [0, self.warmup_stepnum], - [ - self.cfg.trainer.optimizer.params[ - "warmup_momentum" - ], - self.cfg.trainer.optimizer.params["momentum"], - ], - ) - - self.step += 1 + if self.cfg.trainer.apply_custom_lr: + self.custom_scheduler.update_learning_rate(self.current_epoch) def load_checkpoint(self, path: str | Path | None) -> None: """Loads checkpoint weights from provided path. diff --git a/luxonis_train/optimizers/custom_optimizers.py b/luxonis_train/optimizers/custom_optimizers.py new file mode 100644 index 00000000..e000343f --- /dev/null +++ b/luxonis_train/optimizers/custom_optimizers.py @@ -0,0 +1,51 @@ +import torch + + +class TripleLRSGD: + def __init__(self, model: torch.nn.Module, params: dict) -> None: + """TripleLRSGD is a custom optimizer that separates weights into + batch norm weights, regular weights, and biases. + + @type model: torch.nn.Module + @param model: The model to be used + @type params: dict + @param params: The parameters to be used for the optimizer + """ + self.model = model + self.params = params + + def create_optimizer(self): + batch_norm_weights, regular_weights, biases = [], [], [] + + for module in self.model.modules(): + if hasattr(module, "bias") and isinstance( + module.bias, torch.nn.Parameter + ): + biases.append(module.bias) + if isinstance(module, torch.nn.BatchNorm2d): + batch_norm_weights.append(module.weight) + elif hasattr(module, "weight") and isinstance( + module.weight, torch.nn.Parameter + ): + regular_weights.append(module.weight) + + optimizer = torch.optim.SGD( + [ + { + "params": batch_norm_weights, + "lr": self.params["lr"], + "momentum": self.params["momentum"], + "nesterov": self.params["nesterov"], + }, + { + "params": regular_weights, + "weight_decay": self.params["weight_decay"], + }, + {"params": biases}, + ], + lr=self.params["lr"], + momentum=self.params["momentum"], + nesterov=self.params["nesterov"], + ) + + return optimizer diff --git a/luxonis_train/optimizers/optimizers.py b/luxonis_train/optimizers/optimizers.py index c2a4bf12..43ca80ff 100644 --- a/luxonis_train/optimizers/optimizers.py +++ b/luxonis_train/optimizers/optimizers.py @@ -2,6 +2,8 @@ from luxonis_train.utils.registry import OPTIMIZERS +from .custom_optimizers import TripleLRSGD + for optimizer in [ optim.Adadelta, optim.Adagrad, @@ -15,5 +17,6 @@ optim.RAdam, optim.RMSprop, optim.SGD, + TripleLRSGD, ]: OPTIMIZERS.register_module(module=optimizer) diff --git a/luxonis_train/schedulers/custom_schedulers.py b/luxonis_train/schedulers/custom_schedulers.py new file mode 100644 index 00000000..12719322 --- /dev/null +++ b/luxonis_train/schedulers/custom_schedulers.py @@ -0,0 +1,79 @@ +import math + +import numpy as np +import torch + + +class TripleLRScheduler: + def __init__( + self, + optimizer: torch.optim.Optimizer, + params: dict, + epochs: int, + max_stepnum: int, + ) -> None: + """TripleLRScheduler is a custom learning rate scheduler that + combines a cosine annealing. + + @type optimizer: torch.optim.Optimizer + @param optimizer: The optimizer to be used + @type parmas: dict + @param parmas: The parameters to be used for the scheduler + @type epochs: int + @param epochs: The number of epochs to train for + @type max_stepnum: int + @param max_stepnum: The maximum number of steps to train for + """ + self.optimizer = optimizer + self.params = params + self.max_stepnum = max_stepnum + self.warmup_stepnum = max( + round(self.params["warmup_epochs"] * self.max_stepnum), 1000 + ) + self.step = 0 + self.lrf = self.params["lre"] / self.optimizer.defaults["lr"] + self.lf = ( + lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) + * (self.lrf - 1) + + 1 + ) + + def create_scheduler(self): + scheduler = torch.optim.lr_scheduler.LambdaLR( + self.optimizer, lr_lambda=self.lf + ) + return scheduler + + def update_learning_rate(self, current_epoch: int) -> None: + """Update the learning rate based on the current epoch. + + @type current_epoch: int + @param current_epoch: The current epoch + """ + self.step = self.step % self.max_stepnum + curr_step = self.step + self.max_stepnum * current_epoch + + if curr_step <= self.warmup_stepnum: + for k, param in enumerate(self.optimizer.param_groups): + warmup_bias_lr = ( + self.params["warmup_bias_lr"] if k == 2 else 0.0 + ) + param["lr"] = np.interp( + curr_step, + [0, self.warmup_stepnum], + [ + warmup_bias_lr, + self.optimizer.defaults["lr"] * self.lf(current_epoch), + ], + ) + if "momentum" in param: + self.optimizer.defaults["momentum"] = np.interp( + curr_step, + [0, self.warmup_stepnum], + [ + self.params["warmup_momentum"], + self.optimizer.defaults["momentum"], + ], + ) + + self.step += 1 diff --git a/luxonis_train/schedulers/schedulers.py b/luxonis_train/schedulers/schedulers.py index 488a7498..12f184e8 100644 --- a/luxonis_train/schedulers/schedulers.py +++ b/luxonis_train/schedulers/schedulers.py @@ -2,6 +2,8 @@ from luxonis_train.utils.registry import SCHEDULERS +from .custom_schedulers import TripleLRScheduler + for scheduler in [ lr_scheduler.LambdaLR, lr_scheduler.MultiplicativeLR, @@ -18,5 +20,6 @@ lr_scheduler.CyclicLR, lr_scheduler.OneCycleLR, lr_scheduler.CosineAnnealingWarmRestarts, + TripleLRScheduler, ]: SCHEDULERS.register_module(module=scheduler) From 00a2bd3f6eb1c398884ec4950d6bebbb072e3503 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Thu, 14 Nov 2024 15:02:05 +0100 Subject: [PATCH 07/21] default params --- luxonis_train/optimizers/custom_optimizers.py | 9 ++++++++- luxonis_train/schedulers/custom_schedulers.py | 13 ++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/luxonis_train/optimizers/custom_optimizers.py b/luxonis_train/optimizers/custom_optimizers.py index e000343f..f8e28bdb 100644 --- a/luxonis_train/optimizers/custom_optimizers.py +++ b/luxonis_train/optimizers/custom_optimizers.py @@ -12,7 +12,14 @@ def __init__(self, model: torch.nn.Module, params: dict) -> None: @param params: The parameters to be used for the optimizer """ self.model = model - self.params = params + self.params = { + "lr": 0.02, + "momentum": 0.937, + "weight_decay": 0.0005, + "nesterov": True, + } + if params: + self.params.update(params) def create_optimizer(self): batch_norm_weights, regular_weights, biases = [], [], [] diff --git a/luxonis_train/schedulers/custom_schedulers.py b/luxonis_train/schedulers/custom_schedulers.py index 12719322..f0736b06 100644 --- a/luxonis_train/schedulers/custom_schedulers.py +++ b/luxonis_train/schedulers/custom_schedulers.py @@ -24,8 +24,19 @@ def __init__( @type max_stepnum: int @param max_stepnum: The maximum number of steps to train for """ + if optimizer.__class__.__name__ != "SGD": + raise ValueError( + "TripleLRScheduler can only be used with the 'SGD' optimizer." + ) self.optimizer = optimizer - self.params = params + self.params = { + "warmup_epochs": 3, + "warmup_bias_lr": 0.1, + "warmup_momentum": 0.8, + "lre": 0.0002, + } + if params: + self.params.update(params) self.max_stepnum = max_stepnum self.warmup_stepnum = max( round(self.params["warmup_epochs"] * self.max_stepnum), 1000 From 147c1eef54aea5dbae21f883fc66cb71c35271b0 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 15 Nov 2024 13:03:30 +0100 Subject: [PATCH 08/21] code refactor; added training strategies --- luxonis_train/__init__.py | 1 + luxonis_train/callbacks/__init__.py | 3 + luxonis_train/callbacks/training_manager.py | 28 +++ luxonis_train/config/config.py | 7 +- luxonis_train/models/luxonis_lightning.py | 69 +++---- .../backbones/efficientrep/efficientrep.py | 11 +- luxonis_train/nodes/blocks/blocks.py | 11 +- .../nodes/heads/efficient_bbox_head.py | 11 +- .../nodes/necks/reppan_neck/reppan_neck.py | 11 +- luxonis_train/optimizers/custom_optimizers.py | 58 ------ luxonis_train/optimizers/optimizers.py | 3 - luxonis_train/schedulers/custom_schedulers.py | 90 --------- luxonis_train/schedulers/schedulers.py | 3 - luxonis_train/strategies/__init__.py | 5 + luxonis_train/strategies/base_strategy.py | 27 +++ luxonis_train/strategies/triple_lr_sgd.py | 172 ++++++++++++++++++ luxonis_train/utils/registry.py | 5 + 17 files changed, 297 insertions(+), 218 deletions(-) create mode 100644 luxonis_train/callbacks/training_manager.py delete mode 100644 luxonis_train/optimizers/custom_optimizers.py delete mode 100644 luxonis_train/schedulers/custom_schedulers.py create mode 100644 luxonis_train/strategies/__init__.py create mode 100644 luxonis_train/strategies/base_strategy.py create mode 100644 luxonis_train/strategies/triple_lr_sgd.py diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py index ac6e38a1..e9651769 100644 --- a/luxonis_train/__init__.py +++ b/luxonis_train/__init__.py @@ -10,6 +10,7 @@ from .nodes import * from .optimizers import * from .schedulers import * + from .strategies import * from .utils import * except ImportError as e: warnings.warn( diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py index a3cf907c..7bea71a9 100644 --- a/luxonis_train/callbacks/__init__.py +++ b/luxonis_train/callbacks/__init__.py @@ -25,6 +25,7 @@ from .metadata_logger import MetadataLogger from .module_freezer import ModuleFreezer from .test_on_train_end import TestOnTrainEnd +from .training_manager import TrainingManager from .upload_checkpoint import UploadCheckpoint CALLBACKS.register_module(module=EarlyStopping) @@ -38,6 +39,7 @@ CALLBACKS.register_module(module=ModelPruning) CALLBACKS.register_module(module=GradCamCallback) CALLBACKS.register_module(module=EMACallback) +CALLBACKS.register_module(module=TrainingManager) __all__ = [ @@ -53,4 +55,5 @@ "GPUStatsMonitor", "GradCamCallback", "EMACallback", + "TrainingManager", ] diff --git a/luxonis_train/callbacks/training_manager.py b/luxonis_train/callbacks/training_manager.py new file mode 100644 index 00000000..9131fa84 --- /dev/null +++ b/luxonis_train/callbacks/training_manager.py @@ -0,0 +1,28 @@ +import pytorch_lightning as pl + +from luxonis_train.strategies.base_strategy import BaseTrainingStrategy + + +class TrainingManager(pl.Callback): + def __init__(self, strategy: BaseTrainingStrategy | None = None): + """Training manager callback that updates the parameters of the + training strategy. + + @type strategy: BaseTrainingStrategy + @param strategy: The strategy to be used. + """ + self.strategy = strategy + + def on_after_backward( + self, trainer: pl.Trainer, pl_module: pl.LightningModule + ): + """PyTorch Lightning hook that is called after the backward + pass. + + @type trainer: pl.Trainer + @param trainer: The trainer object. + @type pl_module: pl.LightningModule + @param pl_module: The pl_module object. + """ + if self.strategy is not None: + self.strategy.update_parameters(pl_module) diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index 2a7f9d3b..144cbd6b 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -336,6 +336,11 @@ class SchedulerConfig(BaseModelExtraForbid): params: Params = {} +class TrainingStrategyConfig(BaseModelExtraForbid): + name: str = "TripleLRSGDStrategy" + params: Params = {} + + class TrainerConfig(BaseModelExtraForbid): preprocessing: PreprocessingConfig = PreprocessingConfig() use_rich_progress_bar: bool = True @@ -355,7 +360,6 @@ class TrainerConfig(BaseModelExtraForbid): profiler: Literal["simple", "advanced"] | None = None matmul_precision: Literal["medium", "high", "highest"] | None = None verbose: bool = True - apply_custom_lr: bool = False seed: int | None = None n_validation_batches: PositiveInt | None = None @@ -383,6 +387,7 @@ class TrainerConfig(BaseModelExtraForbid): optimizer: OptimizerConfig = OptimizerConfig() scheduler: SchedulerConfig = SchedulerConfig() + training_strategy: TrainingStrategyConfig = TrainingStrategyConfig() @model_validator(mode="after") def validate_deterministic(self) -> Self: diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 2e5f775f..7a7608a7 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -1,4 +1,3 @@ -import math from collections import defaultdict from collections.abc import Mapping from logging import getLogger @@ -26,7 +25,11 @@ combine_visualizations, get_denormalized_images, ) -from luxonis_train.callbacks import BaseLuxonisProgressBar, ModuleFreezer +from luxonis_train.callbacks import ( + BaseLuxonisProgressBar, + ModuleFreezer, + TrainingManager, +) from luxonis_train.config import AttachedModuleConfig, Config from luxonis_train.nodes import BaseNode from luxonis_train.utils import ( @@ -43,6 +46,7 @@ CALLBACKS, OPTIMIZERS, SCHEDULERS, + STRATEGIES, Registry, ) @@ -269,6 +273,16 @@ def __init__( self.load_checkpoint(self.cfg.model.weights) + if self.cfg.trainer.training_strategy.params: + self.training_strategy = STRATEGIES.get( + self.cfg.trainer.training_strategy.name + )( + pl_module=self, + params=self.cfg.trainer.training_strategy.params, + ) + else: + self.training_strategy = None + @property def core(self) -> "luxonis_train.core.LuxonisModel": """Returns the core model.""" @@ -850,6 +864,9 @@ def configure_callbacks(self) -> list[pl.Callback]: CALLBACKS.get(callback.name)(**callback.params) ) + if self.training_strategy is not None: + callbacks.append(TrainingManager(strategy=self.training_strategy)) + return callbacks def configure_optimizers( @@ -858,45 +875,17 @@ def configure_optimizers( list[torch.optim.Optimizer], list[torch.optim.lr_scheduler.LRScheduler], ]: - """Configures model optimizers and schedulers with optional - custom learning rates and warm-up logic.""" + """Configures model optimizers and schedulers.""" + if self.training_strategy is not None: + return self.training_strategy.configure_optimizers() cfg_optimizer = self.cfg.trainer.optimizer cfg_scheduler = self.cfg.trainer.scheduler - if self.cfg.trainer.apply_custom_lr: - assert ( - cfg_optimizer.name == "TripleLRSGD" - ), "Custom learning rate is only supported for TripleLRSGD optimizer." - assert ( - cfg_scheduler.name == "TripleLRScheduler" - ), "Custom learning rate is only supported for TripleLRScheduler scheduler." - - max_stepnum = math.ceil( - len(self._core.loaders["train"]) / self.cfg.trainer.batch_size - ) - custom_optimizer = OPTIMIZERS.get(cfg_optimizer.name)( - self, cfg_optimizer.params - ) - optimizer = custom_optimizer.create_optimizer() - - custom_scheduler = SCHEDULERS.get(cfg_scheduler.name)( - optimizer, - cfg_scheduler.params, - self.cfg.trainer.epochs, - max_stepnum, - ) - scheduler = custom_scheduler.create_scheduler() - - self.custom_scheduler = custom_scheduler - - return [optimizer], [scheduler] - - else: - optim_params = cfg_optimizer.params | { - "params": filter(lambda p: p.requires_grad, self.parameters()), - } - optimizer = OPTIMIZERS.get(cfg_optimizer.name)(**optim_params) + optim_params = cfg_optimizer.params | { + "params": filter(lambda p: p.requires_grad, self.parameters()), + } + optimizer = OPTIMIZERS.get(cfg_optimizer.name)(**optim_params) def get_scheduler(scheduler_cfg, optimizer): scheduler_class = SCHEDULERS.get( @@ -927,12 +916,6 @@ def get_scheduler(scheduler_cfg, optimizer): return [optimizer], [scheduler] - def on_after_backward(self): - """Custom logic to adjust learning rates and momentum after - loss.backward.""" - if self.cfg.trainer.apply_custom_lr: - self.custom_scheduler.update_learning_rate(self.current_epoch) - def load_checkpoint(self, path: str | Path | None) -> None: """Loads checkpoint weights from provided path. diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py index 34e21020..121ac1bc 100644 --- a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -132,13 +132,14 @@ def __init__( def initialize_weights(self): for m in self.modules(): - t = type(m) - if t is nn.Conv2d: + if isinstance(m, nn.Conv2d): pass - elif t is nn.BatchNorm2d: - m.eps = 1e-3 + elif isinstance(m, nn.BatchNorm2d): + m.eps = 0.001 m.momentum = 0.03 - elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + elif isinstance( + m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU) + ): m.inplace = True def set_export_mode(self, mode: bool = True) -> None: diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 2ac01805..9d63853f 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -60,13 +60,14 @@ def __init__(self, n_classes: int, in_channels: int): def initialize_weights(self): for m in self.modules(): - t = type(m) - if t is nn.Conv2d: + if isinstance(m, nn.Conv2d): pass - elif t is nn.BatchNorm2d: - m.eps = 1e-3 + elif isinstance(m, nn.BatchNorm2d): + m.eps = 0.001 m.momentum = 0.03 - elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + elif isinstance( + m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU) + ): m.inplace = True def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]: diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index 1394c8fa..0081c6ce 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -105,13 +105,14 @@ def __init__( def initialize_weights(self): for m in self.modules(): - t = type(m) - if t is nn.Conv2d: + if isinstance(m, nn.Conv2d): pass - elif t is nn.BatchNorm2d: - m.eps = 1e-3 + elif isinstance(m, nn.BatchNorm2d): + m.eps = 0.001 m.momentum = 0.03 - elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + elif isinstance( + m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU) + ): m.inplace = True def forward( diff --git a/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py index 73908e12..9d02ddcf 100644 --- a/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py @@ -172,13 +172,14 @@ def __init__( def initialize_weights(self): for m in self.modules(): - t = type(m) - if t is nn.Conv2d: + if isinstance(m, nn.Conv2d): pass - elif t is nn.BatchNorm2d: - m.eps = 1e-3 + elif isinstance(m, nn.BatchNorm2d): + m.eps = 0.001 m.momentum = 0.03 - elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: + elif isinstance( + m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU) + ): m.inplace = True def forward(self, inputs: list[Tensor]) -> list[Tensor]: diff --git a/luxonis_train/optimizers/custom_optimizers.py b/luxonis_train/optimizers/custom_optimizers.py deleted file mode 100644 index f8e28bdb..00000000 --- a/luxonis_train/optimizers/custom_optimizers.py +++ /dev/null @@ -1,58 +0,0 @@ -import torch - - -class TripleLRSGD: - def __init__(self, model: torch.nn.Module, params: dict) -> None: - """TripleLRSGD is a custom optimizer that separates weights into - batch norm weights, regular weights, and biases. - - @type model: torch.nn.Module - @param model: The model to be used - @type params: dict - @param params: The parameters to be used for the optimizer - """ - self.model = model - self.params = { - "lr": 0.02, - "momentum": 0.937, - "weight_decay": 0.0005, - "nesterov": True, - } - if params: - self.params.update(params) - - def create_optimizer(self): - batch_norm_weights, regular_weights, biases = [], [], [] - - for module in self.model.modules(): - if hasattr(module, "bias") and isinstance( - module.bias, torch.nn.Parameter - ): - biases.append(module.bias) - if isinstance(module, torch.nn.BatchNorm2d): - batch_norm_weights.append(module.weight) - elif hasattr(module, "weight") and isinstance( - module.weight, torch.nn.Parameter - ): - regular_weights.append(module.weight) - - optimizer = torch.optim.SGD( - [ - { - "params": batch_norm_weights, - "lr": self.params["lr"], - "momentum": self.params["momentum"], - "nesterov": self.params["nesterov"], - }, - { - "params": regular_weights, - "weight_decay": self.params["weight_decay"], - }, - {"params": biases}, - ], - lr=self.params["lr"], - momentum=self.params["momentum"], - nesterov=self.params["nesterov"], - ) - - return optimizer diff --git a/luxonis_train/optimizers/optimizers.py b/luxonis_train/optimizers/optimizers.py index 43ca80ff..c2a4bf12 100644 --- a/luxonis_train/optimizers/optimizers.py +++ b/luxonis_train/optimizers/optimizers.py @@ -2,8 +2,6 @@ from luxonis_train.utils.registry import OPTIMIZERS -from .custom_optimizers import TripleLRSGD - for optimizer in [ optim.Adadelta, optim.Adagrad, @@ -17,6 +15,5 @@ optim.RAdam, optim.RMSprop, optim.SGD, - TripleLRSGD, ]: OPTIMIZERS.register_module(module=optimizer) diff --git a/luxonis_train/schedulers/custom_schedulers.py b/luxonis_train/schedulers/custom_schedulers.py deleted file mode 100644 index f0736b06..00000000 --- a/luxonis_train/schedulers/custom_schedulers.py +++ /dev/null @@ -1,90 +0,0 @@ -import math - -import numpy as np -import torch - - -class TripleLRScheduler: - def __init__( - self, - optimizer: torch.optim.Optimizer, - params: dict, - epochs: int, - max_stepnum: int, - ) -> None: - """TripleLRScheduler is a custom learning rate scheduler that - combines a cosine annealing. - - @type optimizer: torch.optim.Optimizer - @param optimizer: The optimizer to be used - @type parmas: dict - @param parmas: The parameters to be used for the scheduler - @type epochs: int - @param epochs: The number of epochs to train for - @type max_stepnum: int - @param max_stepnum: The maximum number of steps to train for - """ - if optimizer.__class__.__name__ != "SGD": - raise ValueError( - "TripleLRScheduler can only be used with the 'SGD' optimizer." - ) - self.optimizer = optimizer - self.params = { - "warmup_epochs": 3, - "warmup_bias_lr": 0.1, - "warmup_momentum": 0.8, - "lre": 0.0002, - } - if params: - self.params.update(params) - self.max_stepnum = max_stepnum - self.warmup_stepnum = max( - round(self.params["warmup_epochs"] * self.max_stepnum), 1000 - ) - self.step = 0 - self.lrf = self.params["lre"] / self.optimizer.defaults["lr"] - self.lf = ( - lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) - * (self.lrf - 1) - + 1 - ) - - def create_scheduler(self): - scheduler = torch.optim.lr_scheduler.LambdaLR( - self.optimizer, lr_lambda=self.lf - ) - return scheduler - - def update_learning_rate(self, current_epoch: int) -> None: - """Update the learning rate based on the current epoch. - - @type current_epoch: int - @param current_epoch: The current epoch - """ - self.step = self.step % self.max_stepnum - curr_step = self.step + self.max_stepnum * current_epoch - - if curr_step <= self.warmup_stepnum: - for k, param in enumerate(self.optimizer.param_groups): - warmup_bias_lr = ( - self.params["warmup_bias_lr"] if k == 2 else 0.0 - ) - param["lr"] = np.interp( - curr_step, - [0, self.warmup_stepnum], - [ - warmup_bias_lr, - self.optimizer.defaults["lr"] * self.lf(current_epoch), - ], - ) - if "momentum" in param: - self.optimizer.defaults["momentum"] = np.interp( - curr_step, - [0, self.warmup_stepnum], - [ - self.params["warmup_momentum"], - self.optimizer.defaults["momentum"], - ], - ) - - self.step += 1 diff --git a/luxonis_train/schedulers/schedulers.py b/luxonis_train/schedulers/schedulers.py index 12f184e8..488a7498 100644 --- a/luxonis_train/schedulers/schedulers.py +++ b/luxonis_train/schedulers/schedulers.py @@ -2,8 +2,6 @@ from luxonis_train.utils.registry import SCHEDULERS -from .custom_schedulers import TripleLRScheduler - for scheduler in [ lr_scheduler.LambdaLR, lr_scheduler.MultiplicativeLR, @@ -20,6 +18,5 @@ lr_scheduler.CyclicLR, lr_scheduler.OneCycleLR, lr_scheduler.CosineAnnealingWarmRestarts, - TripleLRScheduler, ]: SCHEDULERS.register_module(module=scheduler) diff --git a/luxonis_train/strategies/__init__.py b/luxonis_train/strategies/__init__.py new file mode 100644 index 00000000..5e3b5321 --- /dev/null +++ b/luxonis_train/strategies/__init__.py @@ -0,0 +1,5 @@ +from .triple_lr_sgd import TripleLRScheduler + +__all__ = [ + "TripleLRScheduler", +] diff --git a/luxonis_train/strategies/base_strategy.py b/luxonis_train/strategies/base_strategy.py new file mode 100644 index 00000000..caa952e6 --- /dev/null +++ b/luxonis_train/strategies/base_strategy.py @@ -0,0 +1,27 @@ +from abc import ABC, abstractmethod +from typing import Tuple + +import pytorch_lightning as pl +from luxonis_ml.utils.registry import AutoRegisterMeta +from torch.optim import Optimizer +from torch.optim.lr_scheduler import _LRScheduler + +from luxonis_train.utils.registry import STRATEGIES + + +class BaseTrainingStrategy( + ABC, + metaclass=AutoRegisterMeta, + register=False, + registry=STRATEGIES, +): + def __init__(self, pl_module: pl.LightningModule): + self.pl_module = pl_module + + @abstractmethod + def configure_optimizers(self) -> Tuple[_LRScheduler, Optimizer]: + pass + + @abstractmethod + def update_parameters(self, *args, **kwargs): + pass diff --git a/luxonis_train/strategies/triple_lr_sgd.py b/luxonis_train/strategies/triple_lr_sgd.py new file mode 100644 index 00000000..b04ddea5 --- /dev/null +++ b/luxonis_train/strategies/triple_lr_sgd.py @@ -0,0 +1,172 @@ +# strategies/triple_lr_sgd.py +import math + +import numpy as np +import pytorch_lightning as pl +import torch +from torch.optim import SGD +from torch.optim.lr_scheduler import LambdaLR +from torch.optim.optimizer import Optimizer + +from .base_strategy import BaseTrainingStrategy + + +class TripleLRScheduler: + def __init__( + self, + optimizer: torch.optim.Optimizer, + params: dict, + epochs: int, + max_stepnum: int, + ) -> None: + """TripleLRScheduler scheduler. + + @type optimizer: torch.optim.Optimizer + @param optimizer: The optimizer to be used. + @type params: dict + @param params: The parameters for the scheduler. + @type epochs: int + @param epochs: The number of epochs to train for. + @type max_stepnum: int + @param max_stepnum: The maximum number of steps to train for. + """ + self.optimizer = optimizer + self.params = { + "warmup_epochs": 3, + "warmup_bias_lr": 0.1, + "warmup_momentum": 0.8, + "lre": 0.0002, + } + if params: + self.params.update(params) + self.max_stepnum = max_stepnum + self.warmup_stepnum = max( + round(self.params["warmup_epochs"] * self.max_stepnum), 1000 + ) + self.step = 0 + self.lrf = self.params["lre"] / self.optimizer.defaults["lr"] + self.lf = ( + lambda x: ((1 - math.cos(x * math.pi / epochs)) / 2) + * (self.lrf - 1) + + 1 + ) + + def create_scheduler(self): + scheduler = LambdaLR(self.optimizer, lr_lambda=self.lf) + return scheduler + + def update_learning_rate(self, current_epoch: int) -> None: + self.step = self.step % self.max_stepnum + curr_step = self.step + self.max_stepnum * current_epoch + + if curr_step <= self.warmup_stepnum: + for k, param in enumerate(self.optimizer.param_groups): + warmup_bias_lr = ( + self.params["warmup_bias_lr"] if k == 2 else 0.0 + ) + param["lr"] = np.interp( + curr_step, + [0, self.warmup_stepnum], + [ + warmup_bias_lr, + self.optimizer.defaults["lr"] * self.lf(current_epoch), + ], + ) + if "momentum" in param: + self.optimizer.defaults["momentum"] = np.interp( + curr_step, + [0, self.warmup_stepnum], + [ + self.params["warmup_momentum"], + self.optimizer.defaults["momentum"], + ], + ) + self.step += 1 + + +class TripleLRSGD: + def __init__(self, model: torch.nn.Module, params: dict) -> None: + """TripleLRSGD optimizer. + + @type model: torch.nn.Module + @param model: The model to be used. + @type params: dict + @param params: The parameters for the optimizer. + """ + self.model = model + self.params = { + "lr": 0.02, + "momentum": 0.937, + "weight_decay": 0.0005, + "nesterov": True, + } + if params: + self.params.update(params) + + def create_optimizer(self): + batch_norm_weights, regular_weights, biases = [], [], [] + + for module in self.model.modules(): + if hasattr(module, "bias") and isinstance( + module.bias, torch.nn.Parameter + ): + biases.append(module.bias) + if isinstance(module, torch.nn.BatchNorm2d): + batch_norm_weights.append(module.weight) + elif hasattr(module, "weight") and isinstance( + module.weight, torch.nn.Parameter + ): + regular_weights.append(module.weight) + + optimizer = SGD( + [ + { + "params": batch_norm_weights, + "lr": self.params["lr"], + "momentum": self.params["momentum"], + "nesterov": self.params["nesterov"], + }, + { + "params": regular_weights, + "weight_decay": self.params["weight_decay"], + }, + {"params": biases}, + ], + lr=self.params["lr"], + momentum=self.params["momentum"], + nesterov=self.params["nesterov"], + ) + + return optimizer + + +class TripleLRSGDStrategy(BaseTrainingStrategy): + def __init__(self, pl_module: pl.LightningModule, params: dict): + """TripleLRSGD strategy. + + @type pl_module: pl.LightningModule + @param pl_module: The pl_module to be used. + @type params: dict + @param params: The parameters for the strategy. + """ + super().__init__(pl_module) + self.model = pl_module + self.params = params + self.cfg = self.model.cfg + + max_stepnum = math.ceil( + len(self.model._core.loaders["train"]) + / self.cfg.trainer.batch_size + ) + + self.optimizer = TripleLRSGD(self.model, params).create_optimizer() + self.scheduler = TripleLRScheduler( + self.optimizer, params, self.cfg.trainer.epochs, max_stepnum + ) + + def configure_optimizers(self) -> tuple[list[Optimizer], list[LambdaLR]]: + return [self.optimizer], [self.scheduler.create_scheduler()] + + def update_parameters(self, *args, **kwargs): + current_epoch = self.model.current_epoch + self.scheduler.update_learning_rate(current_epoch) diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index 8044f13c..4f413c7a 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -35,6 +35,11 @@ SCHEDULERS: Registry[type[LRScheduler]] = Registry(name="schedulers") """Registry for all schedulers.""" +STRATEGIES: Registry[type["lt.strategies.BaseTrainingStrategy"]] = Registry( + name="strategies" +) +"""Registry for all strategies.""" + VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry( "visualizers" ) From 6e0c888c038dd0627b602cf26919df939e75c4a3 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 15 Nov 2024 13:18:26 +0100 Subject: [PATCH 09/21] use self.core --- luxonis_train/models/luxonis_lightning.py | 2 +- luxonis_train/strategies/triple_lr_sgd.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 7a7608a7..896c4410 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -865,7 +865,7 @@ def configure_callbacks(self) -> list[pl.Callback]: ) if self.training_strategy is not None: - callbacks.append(TrainingManager(strategy=self.training_strategy)) + callbacks.append(TrainingManager(strategy=self.training_strategy)) # type: ignore return callbacks diff --git a/luxonis_train/strategies/triple_lr_sgd.py b/luxonis_train/strategies/triple_lr_sgd.py index b04ddea5..33f7dfe3 100644 --- a/luxonis_train/strategies/triple_lr_sgd.py +++ b/luxonis_train/strategies/triple_lr_sgd.py @@ -155,8 +155,7 @@ def __init__(self, pl_module: pl.LightningModule, params: dict): self.cfg = self.model.cfg max_stepnum = math.ceil( - len(self.model._core.loaders["train"]) - / self.cfg.trainer.batch_size + len(self.model.core.loaders["train"]) / self.cfg.trainer.batch_size ) self.optimizer = TripleLRSGD(self.model, params).create_optimizer() From c0bdbfc30dd54f640a29c4743cf87abf6118e9cb Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 15 Nov 2024 14:12:31 +0100 Subject: [PATCH 10/21] type-check fix --- luxonis_train/strategies/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/luxonis_train/strategies/__init__.py b/luxonis_train/strategies/__init__.py index 5e3b5321..b83d7190 100644 --- a/luxonis_train/strategies/__init__.py +++ b/luxonis_train/strategies/__init__.py @@ -1,5 +1,7 @@ +from .base_strategy import BaseTrainingStrategy from .triple_lr_sgd import TripleLRScheduler __all__ = [ "TripleLRScheduler", + "BaseTrainingStrategy", ] From 0c3bbbe44d6a918297f9c264d5cdddeced43e825 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 15 Nov 2024 14:31:30 +0100 Subject: [PATCH 11/21] type-check fix --- luxonis_train/models/luxonis_lightning.py | 2 +- luxonis_train/strategies/base_strategy.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 896c4410..58cdb5a5 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -278,7 +278,7 @@ def __init__( self.cfg.trainer.training_strategy.name )( pl_module=self, - params=self.cfg.trainer.training_strategy.params, + params=self.cfg.trainer.training_strategy.params, # type: ignore ) else: self.training_strategy = None diff --git a/luxonis_train/strategies/base_strategy.py b/luxonis_train/strategies/base_strategy.py index caa952e6..8de6386d 100644 --- a/luxonis_train/strategies/base_strategy.py +++ b/luxonis_train/strategies/base_strategy.py @@ -1,10 +1,9 @@ from abc import ABC, abstractmethod -from typing import Tuple import pytorch_lightning as pl from luxonis_ml.utils.registry import AutoRegisterMeta from torch.optim import Optimizer -from torch.optim.lr_scheduler import _LRScheduler +from torch.optim.lr_scheduler import LRScheduler from luxonis_train.utils.registry import STRATEGIES @@ -19,7 +18,9 @@ def __init__(self, pl_module: pl.LightningModule): self.pl_module = pl_module @abstractmethod - def configure_optimizers(self) -> Tuple[_LRScheduler, Optimizer]: + def configure_optimizers( + self, + ) -> tuple[list[Optimizer], list[LRScheduler]]: pass @abstractmethod From dc1d468dc9a93db47c78fee3aaf156f7732f270d Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Fri, 15 Nov 2024 14:59:45 +0100 Subject: [PATCH 12/21] add docs --- configs/README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/configs/README.md b/configs/README.md index 8a9e1c01..e8281237 100644 --- a/configs/README.md +++ b/configs/README.md @@ -376,6 +376,37 @@ trainer: eta_min: 0 ``` +### Training Strategy + +Defines the training strategy to be used. Currently, only the `TripleLRSGDStrategy` is supported, but more strategies will be added in the future. + +| Key | Type | Default value | Description | +| ----------------- | ------- | ----------------------- | ---------------------------------------------- | +| `name` | `str` | `"TripleLRSGDStrategy"` | Name of the training strategy | +| `warmup_epochs` | `int` | `3` | Number of epochs for the warmup phase | +| `warmup_bias_lr` | `float` | `0.1` | Learning rate for bias during the warmup phase | +| `warmup_momentum` | `float` | `0.8` | Momentum value during the warmup phase | +| `lr` | `float` | `0.02` | Initial learning rate | +| `lre` | `float` | `0.0002` | End learning rate | +| `momentum` | `float` | `0.937` | Momentum for the optimizer | +| `weight_decay` | `float` | `0.0005` | Weight decay value | +| `nesterov` | `bool` | `true` | Use Nesterov momentum or not | + +**Example:** + +```yaml +training_strategy: + name: "TripleLRSGDStrategy" + warmup_epochs: 3 + warmup_bias_lr: 0.1 + warmup_momentum: 0.8 + lr: 0.02 + lre: 0.0002 + momentum: 0.937 + weight_decay: 0.0005 + nesterov: true +``` + ## Exporter Here you can define configuration for exporting. From 9dfd8da115136543a97f50682561f98f6e9ec282 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Sat, 16 Nov 2024 13:29:44 +0100 Subject: [PATCH 13/21] add docs and minor fix --- README.md | 2 ++ luxonis_train/config/config.py | 4 ++-- luxonis_train/models/luxonis_lightning.py | 10 +++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6d718afa..f6f82ed2 100644 --- a/README.md +++ b/README.md @@ -567,6 +567,7 @@ model.tune() - [**Callbacks**](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/callbacks/README.md): Allow custom code to be executed at different stages of training. - [**Optimizers**](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#optimizer): Control how the model's weights are updated. - [**Schedulers**](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#scheduler): Adjust the learning rate during training. +- [**Training Strategy**](https://github.com/luxonis/luxonis-train/blob/main/configs/README.md#training-strategy): Specify a custom combination of optimizer and scheduler to tailor the training process for specific use cases. **Creating Custom Components:** @@ -581,6 +582,7 @@ Registered components can be referenced in the config file. Custom components ne - **Callbacks** - [`lightning.pytorch.callbacks.Callback`](https://lightning.ai/docs/pytorch/stable/extensions/callbacks.html), requires manual registration to the `CALLBACKS` registry - **Optimizers** - [`torch.optim.Optimizer`](https://pytorch.org/docs/stable/optim.html#torch.optim.Optimizer), requires manual registration to the `OPTIMIZERS` registry - **Schedulers** - [`torch.optim.lr_scheduler.LRScheduler`](https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate), requires manual registration to the `SCHEDULERS` registry +- **Training Strategy** - [`BaseTrainingStrategy`](https://github.com/luxonis/luxonis-train/blob/main/luxonis_train/strategies/base_strategy.py) **Examples:** diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index 144cbd6b..d233c0eb 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -337,7 +337,7 @@ class SchedulerConfig(BaseModelExtraForbid): class TrainingStrategyConfig(BaseModelExtraForbid): - name: str = "TripleLRSGDStrategy" + name: str params: Params = {} @@ -387,7 +387,7 @@ class TrainerConfig(BaseModelExtraForbid): optimizer: OptimizerConfig = OptimizerConfig() scheduler: SchedulerConfig = SchedulerConfig() - training_strategy: TrainingStrategyConfig = TrainingStrategyConfig() + training_strategy: TrainingStrategyConfig | None = None @model_validator(mode="after") def validate_deterministic(self) -> Self: diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 58cdb5a5..429ee4e3 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -273,7 +273,15 @@ def __init__( self.load_checkpoint(self.cfg.model.weights) - if self.cfg.trainer.training_strategy.params: + if self.cfg.trainer.training_strategy is not None: + if self.cfg.trainer.optimizer is not None: + logger.warning( + "Training strategy is active; the specified optimizer will be ignored." + ) + if self.cfg.trainer.scheduler is not None: + logger.warning( + "Training strategy is active; the specified scheduler will be ignored." + ) self.training_strategy = STRATEGIES.get( self.cfg.trainer.training_strategy.name )( From 75096dac50a066f1b7f9a06487bfc7ff622b1b4a Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Mon, 18 Nov 2024 13:22:13 +0100 Subject: [PATCH 14/21] fix failing tests --- tests/integration/test_simple.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index b29e0420..c4024914 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -69,10 +69,11 @@ def test_predefined_models( config_file = f"configs/{config_file}.yaml" opts |= { "loader.params.dataset_name": ( - cifar10_dataset.dataset_name + cifar10_dataset.identifier if "classification" in config_file - else coco_dataset.dataset_name + else coco_dataset.identifier ), + "trainer.epochs": 1, } model = LuxonisModel(config_file, opts) model.train() From 23dda47066ea8e3a33ce9769ad31842c08313e87 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Mon, 18 Nov 2024 16:39:56 +0100 Subject: [PATCH 15/21] modify the logic for assigning the optimizer and scheduler --- luxonis_train/config/config.py | 44 ++++++++++++++++------- luxonis_train/models/luxonis_lightning.py | 14 ++++---- 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index d233c0eb..da7ea603 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -327,12 +327,12 @@ class CallbackConfig(BaseModelExtraForbid): class OptimizerConfig(BaseModelExtraForbid): - name: str = "Adam" + name: str params: Params = {} class SchedulerConfig(BaseModelExtraForbid): - name: str = "ConstantLR" + name: str params: Params = {} @@ -385,8 +385,8 @@ class TrainerConfig(BaseModelExtraForbid): callbacks: list[CallbackConfig] = [] - optimizer: OptimizerConfig = OptimizerConfig() - scheduler: SchedulerConfig = SchedulerConfig() + optimizer: OptimizerConfig | None = None + scheduler: SchedulerConfig | None = None training_strategy: TrainingStrategyConfig | None = None @model_validator(mode="after") @@ -536,16 +536,34 @@ def smart_auto_populate(cls, instance: "Config") -> None: """Automatically populates config fields based on rules, with warnings.""" + # Rule: Set default optimizer and scheduler if training_strategy is not defined and optimizer and scheduler are None + if instance.trainer.training_strategy is None: + if instance.trainer.optimizer is None: + instance.trainer.optimizer = OptimizerConfig( + name="Adam", params={} + ) + logger.warning( + "Optimizer not specified. Automatically set to `Adam`." + ) + if instance.trainer.scheduler is None: + instance.trainer.scheduler = SchedulerConfig( + name="ConstantLR", params={} + ) + logger.warning( + "Scheduler not specified. Automatically set to `ConstantLR`." + ) + # Rule: CosineAnnealingLR should have T_max set to the number of epochs if not provided - scheduler = instance.trainer.scheduler - if ( - scheduler.name == "CosineAnnealingLR" - and "T_max" not in scheduler.params - ): - scheduler.params["T_max"] = instance.trainer.epochs - logger.warning( - "`T_max` was not set for `CosineAnnealingLR`. Automatically set `T_max` to number of epochs." - ) + if instance.trainer.scheduler is not None: + scheduler = instance.trainer.scheduler + if ( + scheduler.name == "CosineAnnealingLR" + and "T_max" not in scheduler.params + ): + scheduler.params["T_max"] = instance.trainer.epochs + logger.warning( + "`T_max` was not set for `CosineAnnealingLR`. Automatically set `T_max` to number of epochs." + ) # Rule: Mosaic4 should have out_width and out_height matching train_image_size if not provided for augmentation in instance.trainer.preprocessing.augmentations: diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 429ee4e3..57cc6cd5 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -274,13 +274,13 @@ def __init__( self.load_checkpoint(self.cfg.model.weights) if self.cfg.trainer.training_strategy is not None: - if self.cfg.trainer.optimizer is not None: - logger.warning( - "Training strategy is active; the specified optimizer will be ignored." - ) - if self.cfg.trainer.scheduler is not None: - logger.warning( - "Training strategy is active; the specified scheduler will be ignored." + if ( + self.cfg.trainer.optimizer is not None + or self.cfg.trainer.scheduler is not None + ): + raise ValueError( + "Training strategy is defined, but optimizer or scheduler is also defined. " + "Please remove optimizer and scheduler from the config." ) self.training_strategy = STRATEGIES.get( self.cfg.trainer.training_strategy.name From 1f16b463de127bcf77b16fad5dbed3910f14be11 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Mon, 18 Nov 2024 16:58:46 +0100 Subject: [PATCH 16/21] fix failing test --- tests/integration/test_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index c4024914..007338c4 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -77,7 +77,7 @@ def test_predefined_models( } model = LuxonisModel(config_file, opts) model.train() - model.test() + model.test(view="train") def test_multi_input(opts: dict[str, Any], infer_path: Path): From d308e3353179ad1f257959d2f595f34c3e514859 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Tue, 19 Nov 2024 06:04:51 +0100 Subject: [PATCH 17/21] fix type-check error --- luxonis_train/models/luxonis_lightning.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 57cc6cd5..08d0066f 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -890,6 +890,11 @@ def configure_optimizers( cfg_optimizer = self.cfg.trainer.optimizer cfg_scheduler = self.cfg.trainer.scheduler + if cfg_optimizer is None or cfg_scheduler is None: + raise ValueError( + "Optimizer and scheduler configuration must not be None." + ) + optim_params = cfg_optimizer.params | { "params": filter(lambda p: p.requires_grad, self.parameters()), } From f5965c704948f2911bd800310c9d89dd726b19c7 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Tue, 19 Nov 2024 07:04:29 +0100 Subject: [PATCH 18/21] type-check fix --- tests/integration/test_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 007338c4..e32980f2 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -281,7 +281,7 @@ def test_smart_cfg_auto_populate( } model = LuxonisModel(config_file, opts) assert ( - model.cfg.trainer.scheduler.params["T_max"] == model.cfg.trainer.epochs + model.cfg.trainer.scheduler.params["T_max"] == model.cfg.trainer.epochs # type: ignore ) assert ( model.cfg.trainer.preprocessing.augmentations[0].params["out_width"] From 093feecf123db7b203e578049338d193e0f9e00e Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Tue, 19 Nov 2024 08:12:03 +0100 Subject: [PATCH 19/21] fix failing tests --- luxonis_train/config/config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/luxonis_train/config/config.py b/luxonis_train/config/config.py index da7ea603..941cd649 100644 --- a/luxonis_train/config/config.py +++ b/luxonis_train/config/config.py @@ -517,6 +517,7 @@ def get_config( ) -> "Config": instance = super().get_config(cfg, overrides) if not isinstance(cfg, str): + cls.smart_auto_populate(instance) return instance fs = LuxonisFileSystem(cfg) if fs.is_mlflow: From 4c61f3823dcf97ad918b6bbd83e1003b6a595968 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Tue, 19 Nov 2024 11:59:32 +0100 Subject: [PATCH 20/21] clamp kpts values --- .../attached_modules/visualizers/keypoint_visualizer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py index 8c7252ee..f9f4150e 100644 --- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py @@ -57,6 +57,12 @@ def draw_predictions( prediction = predictions[i] mask = prediction[..., 2] < visibility_threshold visible_kpts = prediction[..., :2] * (~mask).unsqueeze(-1).float() + visible_kpts[..., 0] = torch.clamp( + visible_kpts[..., 0], 0, canvas.size(-1) - 1 + ) + visible_kpts[..., 1] = torch.clamp( + visible_kpts[..., 1], 0, canvas.size(-2) - 1 + ) viz[i] = draw_keypoints( canvas[i].clone(), visible_kpts[..., :2], From 2951676ad13a288be0057fd2e1344395b3a61765 Mon Sep 17 00:00:00 2001 From: Jernej Sabadin Date: Tue, 19 Nov 2024 13:07:37 +0100 Subject: [PATCH 21/21] initialize_weights configurable param --- luxonis_train/nodes/README.md | 59 ++++++++++--------- .../backbones/efficientrep/efficientrep.py | 6 +- luxonis_train/nodes/blocks/blocks.py | 13 ---- .../nodes/heads/efficient_bbox_head.py | 6 +- .../nodes/necks/reppan_neck/reppan_neck.py | 6 +- 5 files changed, 46 insertions(+), 44 deletions(-) diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md index cef35029..7c7b53c4 100644 --- a/luxonis_train/nodes/README.md +++ b/luxonis_train/nodes/README.md @@ -82,16 +82,17 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). **Parameters:** -| Key | Type | Default value | Description | -| ------------------ | ----------------------------------------------------------------- | --------------------------- | -------------------------------------------------------------------------- | -| `variant` | `Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]` | `"nano"` | Variant of the network | -| `channels_list` | `list[int]` | \[64, 128, 256, 512, 1024\] | List of number of channels for each block | -| `n_repeats` | `list[int]` | \[1, 6, 12, 18, 6\] | List of number of repeats of `RepVGGBlock` | -| `depth_mul` | `float` | `0.33` | Depth multiplier | -| `width_mul` | `float` | `0.25` | Width multiplier | -| `block` | `Literal["RepBlock", "CSPStackRepBlock"]` | `"RepBlock"` | Base block used | -| `csp_e` | `float` | `0.5` | Factor for intermediate channels when block is set to `"CSPStackRepBlock"` | -| `download_weights` | `bool` | `True` | If True download weights from COCO (if available for specified variant) | +| Key | Type | Default value | Description | +| -------------------- | ----------------------------------------------------------------- | --------------------------- | -------------------------------------------------------------------------- | +| `variant` | `Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]` | `"nano"` | Variant of the network | +| `channels_list` | `list[int]` | \[64, 128, 256, 512, 1024\] | List of number of channels for each block | +| `n_repeats` | `list[int]` | \[1, 6, 12, 18, 6\] | List of number of repeats of `RepVGGBlock` | +| `depth_mul` | `float` | `0.33` | Depth multiplier | +| `width_mul` | `float` | `0.25` | Width multiplier | +| `block` | `Literal["RepBlock", "CSPStackRepBlock"]` | `"RepBlock"` | Base block used | +| `csp_e` | `float` | `0.5` | Factor for intermediate channels when block is set to `"CSPStackRepBlock"` | +| `download_weights` | `bool` | `True` | If True download weights from COCO (if available for specified variant) | +| `initialize_weights` | `bool` | `True` | If True, initialize weights. | ### RexNetV1_lite @@ -175,17 +176,18 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). **Parameters:** -| Key | Type | Default value | Description | -| ------------------ | ----------------------------------------------------------------- | -------------------------------- | ------------------------------------------------------------------------------- | -| `variant` | `Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]` | `"nano"` | Variant of the network | -| `n_heads` | `Literal[2,3,4]` | `3` | Number of output heads. Should be same also on the connected head in most cases | -| `channels_list` | `list[int]` | `[256, 128, 128, 256, 256, 512]` | List of number of channels for each block | -| `n_repeats` | `list[int]` | `[12, 12, 12, 12]` | List of number of repeats of `RepVGGBlock` | -| `depth_mul` | `float` | `0.33` | Depth multiplier | -| `width_mul` | `float` | `0.25` | Width multiplier | -| `block` | `Literal["RepBlock", "CSPStackRepBlock"]` | `"RepBlock"` | Base block used | -| `csp_e` | `float` | `0.5` | Factor for intermediate channels when block is set to `"CSPStackRepBlock"` | -| `download_weights` | `bool` | `False` | If True download weights from COCO (if available for specified variant) | +| Key | Type | Default value | Description | +| -------------------- | ----------------------------------------------------------------- | -------------------------------- | ------------------------------------------------------------------------------- | +| `variant` | `Literal["n", "nano", "s", "small", "m", "medium", "l", "large"]` | `"nano"` | Variant of the network | +| `n_heads` | `Literal[2,3,4]` | `3` | Number of output heads. Should be same also on the connected head in most cases | +| `channels_list` | `list[int]` | `[256, 128, 128, 256, 256, 512]` | List of number of channels for each block | +| `n_repeats` | `list[int]` | `[12, 12, 12, 12]` | List of number of repeats of `RepVGGBlock` | +| `depth_mul` | `float` | `0.33` | Depth multiplier | +| `width_mul` | `float` | `0.25` | Width multiplier | +| `block` | `Literal["RepBlock", "CSPStackRepBlock"]` | `"RepBlock"` | Base block used | +| `csp_e` | `float` | `0.5` | Factor for intermediate channels when block is set to `"CSPStackRepBlock"` | +| `download_weights` | `bool` | `False` | If True download weights from COCO (if available for specified variant) | +| `initialize_weights` | `bool` | `True` | If True, initialize weights. | ## Heads @@ -217,13 +219,14 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). **Parameters:** -| Key | Type | Default value | Description | -| ------------------ | ------- | ------------- | --------------------------------------------------------------------- | -| `n_heads` | `bool` | `3` | Number of output heads | -| `conf_thres` | `float` | `0.25` | Confidence threshold for non-maxima-suppression (used for evaluation) | -| `iou_thres` | `float` | `0.45` | `IoU` threshold for non-maxima-suppression (used for evaluation) | -| `max_det` | `int` | `300` | Maximum number of detections retained after NMS | -| `download_weights` | `bool` | `False` | If True download weights from COCO | +| Key | Type | Default value | Description | +| -------------------- | ------- | ------------- | --------------------------------------------------------------------- | +| `n_heads` | `bool` | `3` | Number of output heads | +| `conf_thres` | `float` | `0.25` | Confidence threshold for non-maxima-suppression (used for evaluation) | +| `iou_thres` | `float` | `0.45` | `IoU` threshold for non-maxima-suppression (used for evaluation) | +| `max_det` | `int` | `300` | Maximum number of detections retained after NMS | +| `download_weights` | `bool` | `False` | If True download weights from COCO | +| `initialize_weights` | `bool` | `True` | If True, initialize weights. | ### `EfficientKeypointBBoxHead` diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py index 121ac1bc..340cc10a 100644 --- a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -30,6 +30,7 @@ def __init__( block: Literal["RepBlock", "CSPStackRepBlock"] | None = None, csp_e: float | None = None, download_weights: bool = True, + initialize_weights: bool = True, **kwargs: Any, ): """Implementation of the EfficientRep backbone. Supports the @@ -65,6 +66,8 @@ def __init__( overrides the variant value. @type download_weights: bool @param download_weights: If True download weights from COCO (if available for specified variant). Defaults to True. + @type initialize_weights: bool + @param initialize_weights: If True, initialize weights of the model. """ super().__init__(**kwargs) @@ -125,7 +128,8 @@ def __init__( ) ) - self.initialize_weights() + if initialize_weights: + self.initialize_weights() if download_weights and var.weights_path: self.load_checkpoint(var.weights_path) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 9d63853f..25bea7c5 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -56,19 +56,6 @@ def __init__(self, n_classes: int, in_channels: int): prior_prob = 1e-2 self._initialize_weights_and_biases(prior_prob) - self.initialize_weights() - - def initialize_weights(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - pass - elif isinstance(m, nn.BatchNorm2d): - m.eps = 0.001 - m.momentum = 0.03 - elif isinstance( - m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU) - ): - m.inplace = True def forward(self, x: Tensor) -> tuple[Tensor, Tensor, Tensor]: out_feature = self.decoder(x) diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index 0081c6ce..531a294f 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -30,6 +30,7 @@ def __init__( iou_thres: float = 0.45, max_det: int = 300, download_weights: bool = False, + initialize_weights: bool = True, **kwargs: Any, ): """Head for object detection. @@ -51,6 +52,8 @@ def __init__( @type download_weights: bool @param download_weights: If True download weights from COCO. Defaults to False. + @type initialize_weights: bool + @param initialize_weights: If True, initialize weights. """ super().__init__(**kwargs) @@ -95,7 +98,8 @@ def __init__( f"output{i+1}_yolov6r2" for i in range(self.n_heads) ] - self.initialize_weights() + if initialize_weights: + self.initialize_weights() if download_weights: # TODO: Handle variants of head in a nicer way diff --git a/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py index 9d02ddcf..2c95890a 100644 --- a/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck/reppan_neck.py @@ -27,6 +27,7 @@ def __init__( block: Literal["RepBlock", "CSPStackRepBlock"] | None = None, csp_e: float | None = None, download_weights: bool = False, + initialize_weights: bool = True, **kwargs: Any, ): """Implementation of the RepPANNeck module. Supports the version @@ -65,6 +66,8 @@ def __init__( overrides the variant value. @type download_weights: bool @param download_weights: If True download weights from COCO (if available for specified variant). Defaults to False. + @type initialize_weights: bool + @param initialize_weights: If True, initialize weights of the model. """ super().__init__(**kwargs) @@ -165,7 +168,8 @@ def __init__( out_channels = channels_list_down_blocks[2 * i + 1] curr_n_repeats = n_repeats_down_blocks[i] - self.initialize_weights() + if initialize_weights: + self.initialize_weights() if download_weights and var.weights_path: self.load_checkpoint(var.weights_path)