From 61381a1066c4db81d755cb1f689ae2e1ccbba7e5 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 6 Sep 2024 22:13:06 +0200 Subject: [PATCH 001/102] cleanup, tests --- .github/CODEOWNERS | 1 + .github/labeler.yaml | 32 + .github/workflows/ci.yaml | 127 +++ .github/workflows/tests.yaml | 126 --- .gitignore | 2 +- luxonis_train/__init__.py | 5 +- luxonis_train/__main__.py | 16 +- .../{utils => }/assigners/__init__.py | 0 .../{utils => }/assigners/atts_assigner.py | 15 +- .../{utils => }/assigners/tal_assigner.py | 0 luxonis_train/{utils => }/assigners/utils.py | 2 +- .../attached_modules/base_attached_module.py | 68 +- .../losses/adaptive_detection_loss.py | 36 +- .../losses/bce_with_logits.py | 13 +- .../attached_modules/losses/cross_entropy.py | 4 +- .../losses/efficient_keypoint_bbox_loss.py | 46 +- .../losses/implicit_keypoint_bbox_loss.py | 58 +- .../attached_modules/losses/keypoint_loss.py | 63 +- .../losses/sigmoid_focal_loss.py | 4 +- .../losses/smooth_bce_with_logits.py | 42 +- .../losses/softmax_focal_loss.py | 10 +- .../attached_modules/metrics/base_metric.py | 2 +- .../attached_modules/metrics/common.py | 27 +- .../metrics/mean_average_precision.py | 29 +- .../mean_average_precision_keypoints.py | 47 +- .../metrics/object_keypoint_similarity.py | 103 +-- .../visualizers/base_visualizer.py | 2 +- .../visualizers/bbox_visualizer.py | 3 +- .../visualizers/classification_visualizer.py | 8 +- .../visualizers/multi_visualizer.py | 4 +- .../visualizers/segmentation_visualizer.py | 3 +- .../attached_modules/visualizers/utils.py | 2 +- luxonis_train/callbacks/__init__.py | 8 + luxonis_train/callbacks/gpu_stats_monitor.py | 85 +- .../callbacks/luxonis_progress_bar.py | 6 +- luxonis_train/callbacks/metadata_logger.py | 2 +- luxonis_train/core/core.py | 33 +- luxonis_train/core/utils/archive_utils.py | 2 +- luxonis_train/core/utils/export_utils.py | 4 +- luxonis_train/core/utils/train_utils.py | 6 +- luxonis_train/{utils => }/loaders/__init__.py | 0 .../{utils => }/loaders/base_loader.py | 72 +- .../loaders/luxonis_loader_torch.py | 0 luxonis_train/models/luxonis_lightning.py | 24 +- luxonis_train/models/luxonis_output.py | 3 +- .../base_predefined_model.py | 14 +- .../predefined_models/classification_model.py | 2 +- .../predefined_models/detection_model.py | 2 +- .../keypoint_detection_model.py | 2 +- .../predefined_models/segmentation_model.py | 2 +- luxonis_train/nodes/README.md | 2 +- luxonis_train/nodes/activations/__init__.py | 4 +- .../nodes/activations/activations.py | 11 - .../nodes/backbones/contextspatial.py | 89 +- luxonis_train/nodes/backbones/efficientnet.py | 50 +- .../nodes/backbones/efficientrep/__init__.py | 3 + .../{ => efficientrep}/efficientrep.py | 77 +- .../nodes/backbones/efficientrep/variants.py | 44 + luxonis_train/nodes/backbones/micronet.py | 842 ------------------ .../nodes/backbones/micronet/__init__.py | 3 + .../nodes/backbones/micronet/blocks.py | 489 ++++++++++ .../nodes/backbones/micronet/micronet.py | 61 ++ .../nodes/backbones/micronet/variants.py | 344 +++++++ luxonis_train/nodes/backbones/mobilenetv2.py | 57 +- .../nodes/backbones/mobileone/__init__.py | 3 + .../{mobileone.py => mobileone/blocks.py} | 171 +--- .../nodes/backbones/mobileone/mobileone.py | 197 ++++ .../nodes/backbones/mobileone/variants.py | 37 + .../nodes/backbones/repvgg/__init__.py | 3 + .../nodes/backbones/{ => repvgg}/repvgg.py | 123 ++- .../nodes/backbones/repvgg/variants.py | 31 + luxonis_train/nodes/backbones/resnet.py | 124 ++- luxonis_train/nodes/backbones/rexnetv1.py | 79 +- luxonis_train/nodes/base_node.py | 172 ++-- luxonis_train/nodes/blocks/blocks.py | 11 +- luxonis_train/nodes/heads/bisenet_head.py | 50 +- .../nodes/heads/classification_head.py | 11 +- .../nodes/heads/efficient_bbox_head.py | 51 +- .../heads/efficient_keypoint_bbox_head.py | 28 +- .../heads/implicit_keypoint_bbox_head.py | 41 +- .../nodes/heads/segmentation_head.py | 30 +- luxonis_train/nodes/necks/reppan_neck.py | 68 +- luxonis_train/optimizers/__init__.py | 1 + .../{utils => optimizers}/optimizers.py | 2 +- luxonis_train/schedulers/__init__.py | 1 + .../{utils => schedulers}/schedulers.py | 0 luxonis_train/utils/__init__.py | 58 +- .../utils/{boxutils.py => boundingbox.py} | 52 +- luxonis_train/utils/config.py | 4 +- luxonis_train/utils/dataset_metadata.py | 157 ++++ luxonis_train/utils/exceptions.py | 9 + luxonis_train/utils/general.py | 333 ++----- luxonis_train/utils/graph.py | 80 ++ luxonis_train/utils/keypoints.py | 81 ++ luxonis_train/utils/registry.py | 29 +- luxonis_train/utils/tracker.py | 6 +- luxonis_train/utils/types.py | 36 +- pyproject.toml | 29 + tests/configs/parking_lot_config.yaml | 75 +- tests/integration/conftest.py | 32 +- tests/integration/multi_input_modules.py | 17 +- tests/integration/overfit/conftest.py | 43 + tests/integration/overfit/test_detection.py | 93 ++ .../integration/overfit/test_segmentation.py | 126 +++ tests/integration/parking_lot.json | 87 +- tests/integration/test_sanity.py | 72 +- tests/unittests/__init__.py | 2 - tests/unittests/test_base_node.py | 44 + tests/unittests/test_core.py | 0 .../test_assigners/test_atts_assigner.py | 2 +- .../test_assigners/test_tal_assigner.py | 2 +- .../test_utils/test_assigners/test_utils.py | 6 +- tests/unittests/test_utils/test_boxutils.py | 2 +- .../test_loaders/test_base_loader.py | 4 +- 114 files changed, 3405 insertions(+), 2553 deletions(-) create mode 100644 .github/CODEOWNERS create mode 100644 .github/labeler.yaml create mode 100644 .github/workflows/ci.yaml delete mode 100644 .github/workflows/tests.yaml rename luxonis_train/{utils => }/assigners/__init__.py (100%) rename luxonis_train/{utils => }/assigners/atts_assigner.py (96%) rename luxonis_train/{utils => }/assigners/tal_assigner.py (100%) rename luxonis_train/{utils => }/assigners/utils.py (98%) rename luxonis_train/{utils => }/loaders/__init__.py (100%) rename luxonis_train/{utils => }/loaders/base_loader.py (73%) rename luxonis_train/{utils => }/loaders/luxonis_loader_torch.py (100%) create mode 100644 luxonis_train/nodes/backbones/efficientrep/__init__.py rename luxonis_train/nodes/backbones/{ => efficientrep}/efficientrep.py (63%) create mode 100644 luxonis_train/nodes/backbones/efficientrep/variants.py delete mode 100644 luxonis_train/nodes/backbones/micronet.py create mode 100644 luxonis_train/nodes/backbones/micronet/__init__.py create mode 100644 luxonis_train/nodes/backbones/micronet/blocks.py create mode 100644 luxonis_train/nodes/backbones/micronet/micronet.py create mode 100644 luxonis_train/nodes/backbones/micronet/variants.py create mode 100644 luxonis_train/nodes/backbones/mobileone/__init__.py rename luxonis_train/nodes/backbones/{mobileone.py => mobileone/blocks.py} (60%) create mode 100644 luxonis_train/nodes/backbones/mobileone/mobileone.py create mode 100644 luxonis_train/nodes/backbones/mobileone/variants.py create mode 100644 luxonis_train/nodes/backbones/repvgg/__init__.py rename luxonis_train/nodes/backbones/{ => repvgg}/repvgg.py (50%) create mode 100644 luxonis_train/nodes/backbones/repvgg/variants.py create mode 100644 luxonis_train/optimizers/__init__.py rename luxonis_train/{utils => optimizers}/optimizers.py (92%) create mode 100644 luxonis_train/schedulers/__init__.py rename luxonis_train/{utils => schedulers}/schedulers.py (100%) rename luxonis_train/utils/{boxutils.py => boundingbox.py} (94%) create mode 100644 luxonis_train/utils/dataset_metadata.py create mode 100644 luxonis_train/utils/exceptions.py create mode 100644 luxonis_train/utils/graph.py create mode 100644 luxonis_train/utils/keypoints.py create mode 100644 tests/integration/overfit/conftest.py create mode 100644 tests/integration/overfit/test_detection.py create mode 100644 tests/integration/overfit/test_segmentation.py create mode 100644 tests/unittests/test_base_node.py create mode 100644 tests/unittests/test_core.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..a6eef919 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @luxonis/ML-Reviewers diff --git a/.github/labeler.yaml b/.github/labeler.yaml new file mode 100644 index 00000000..33749bd5 --- /dev/null +++ b/.github/labeler.yaml @@ -0,0 +1,32 @@ +tests: + - changed-files: + - any-glob-to-any-file: 'tests/*' + - head-branch: + - 'test/*' + - 'tests/*' + +DevOps: + - changed-files: + - any-glob-to-any-file: '.github/*' + +CLI: + - changed-files: + - any-glob-to-any-file: '**/__main__.py' + +release: + - base-branch: 'main' + +enhancement: + - head-branch: + - 'feature/*' + - 'feat/*' + - 'enhancement/*' + +fix: + - head-branch: + - 'fix/*' + - 'bug/*' + - 'hotfix/*' + - 'issue/*' + - 'bugfix/*' + - 'patch/*' diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..67328122 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,127 @@ +name: Tests + +on: + pull_request: + branches: [ dev, main ] + paths: + - 'luxonis_train/**/**.py' + - 'tests/**/**.py' + - .github/workflows/ci.yaml + +permissions: + pull-requests: write + contents: write + checks: write + +jobs: + assigner: + runs-on: ubuntu-latest + steps: + - name: Auto-assign + uses: toshimaru/auto-author-assign@v2.1.1 + + labeler: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Labeler + uses: actions/labeler@v5 + with: + configuration-path: .github/labeler.yaml + + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Run pre-commit + uses: pre-commit/action@v3.0.1 + + docs: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Install dependencies + run: | + pip install pydoctor + curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py" + + - name: Build docs + run: python gen-docs.py luxonis_ml + + tests: + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macOS-latest] + + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y pandoc + pip install -e .[dev] + + - name: Authenticate to Google Cloud + id: google-auth + uses: google-github-actions/auth@v2 + with: + credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} + create_credentials_file: true + export_environment_variables: true + token_format: access_token + + - name: Run pytest + uses: pavelzw/pytest-action@v2 + env: + LUXONISML_BUCKET: luxonis-test-bucket + PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 + with: + emoji: false + custom-arguments: --cov luxonis_train --cov-report json --junit-xml pytest.xml + + - name: Generate coverage report + if: matrix.os == 'ubuntu-latest' + uses: orgoro/coverage@v3.1 + with: + coverageFile: coverage.xml + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Update Coverage Badge + uses: we-cli/coverage-badge-action@main + + - name: Push changes + if: matrix.os == 'ubuntu-latest' + uses: ad-m/github-push-action@master + with: + branch: ${{ github.head_ref }} + + - name: Create Test Report + uses: EnricoMi/publish-unit-test-result-action@v2 + if: matrix.os == 'ubuntu-latest' + with: + files: pytest.xml diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml deleted file mode 100644 index a0999d9b..00000000 --- a/.github/workflows/tests.yaml +++ /dev/null @@ -1,126 +0,0 @@ -name: Tests - -on: - pull_request: - branches: [ dev, main ] - paths: - - 'luxonis_train/**/**.py' - - 'tests/**/**.py' - - .github/workflows/tests.yaml - -jobs: - run_tests: - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, windows-latest] - version: ['3.10'] - - runs-on: ${{ matrix.os }} - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.version }} - cache: pip - - - name: Install dependencies [Ubuntu] - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt update - sudo apt install -y pandoc - pip install -e .[dev] - - - name: Install dependencies [Windows] - if: matrix.os == 'windows-latest' - run: pip install -e .[dev] - - - name: Install dependencies [macOS] - if: matrix.os == 'macOS-latest' - run: pip install -e .[dev] - - - name: Authenticate to Google Cloud - id: google-auth - uses: google-github-actions/auth@v2 - with: - credentials_json: ${{ secrets.GOOGLE_APPLICATION_CREDENTIALS }} - create_credentials_file: true - export_environment_variables: true - token_format: access_token - - - name: Run tests with coverage [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: pytest tests --cov=luxonis_train --cov-report xml --junit-xml pytest.xml - - - name: Run tests [Windows, macOS] - env: - PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 - if: matrix.os != 'ubuntu-latest' || matrix.version != '3.10' - run: pytest tests --junit-xml pytest.xml - - - name: Generate coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: coverage-badge -o media/coverage_badge.svg -f - - - name: Generate coverage report [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - uses: orgoro/coverage@v3.1 - with: - coverageFile: coverage.xml - token: ${{ secrets.GITHUB_TOKEN }} - - - name: Commit coverage badge [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - run: | - git config --global user.name 'GitHub Actions' - git config --global user.email 'actions@github.com' - git diff --quiet media/coverage_badge.svg || { - git add media/coverage_badge.svg - git commit -m "[Automated] Updated coverage badge" - } - - - name: Push changes [Ubuntu] - if: matrix.os == 'ubuntu-latest' && matrix.version == '3.10' - uses: ad-m/github-push-action@master - with: - branch: ${{ github.head_ref }} - - - name: Upload Test Results - if: always() - uses: actions/upload-artifact@v4 - with: - name: Test Results [${{ matrix.os }}] (Python ${{ matrix.version }}) - path: pytest.xml - retention-days: 10 - if-no-files-found: error - - publish-test-results: - name: "Publish Tests Results" - needs: run_tests - runs-on: ubuntu-latest - permissions: - checks: write - pull-requests: write - if: always() - - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - name: Download Artifacts - uses: actions/download-artifact@v4 - with: - path: artifacts - - - name: Publish Test Results - uses: EnricoMi/publish-unit-test-result-action@v2 - with: - files: "artifacts/**/*.xml" diff --git a/.gitignore b/.gitignore index 7f182cf4..03ba884c 100644 --- a/.gitignore +++ b/.gitignore @@ -152,5 +152,5 @@ mlartifacts mlruns wandb tests/_data -tests/integration/_test-output +tests/integration/save-directory data diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py index 60d8d501..52f18281 100644 --- a/luxonis_train/__init__.py +++ b/luxonis_train/__init__.py @@ -1,7 +1,8 @@ from .attached_modules import * from .core import * +from .loaders import * from .models import * from .nodes import * +from .optimizers import * +from .schedulers import * from .utils import * - -__version__ = "0.0.1" diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 454e9525..3351f067 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -115,13 +115,26 @@ def inspect( case_sensitive=False, ), ] = "train", # type: ignore + size_multiplier: Annotated[ + float, + typer.Option( + ..., + "--size-multiplier", + "-s", + help=( + "Multiplier for the image size. " + "By default the images are shown in their original size." + ), + show_default=False, + ), + ] = 1.0, opts: OptsType = None, ): """Inspect dataset.""" from lightning.pytorch import seed_everything from luxonis_ml.data.__main__ import inspect as lxml_inspect - from luxonis_train.utils.config import Config + from luxonis_train.utils import Config cfg = Config.get_config(config, opts) if cfg.trainer.seed is not None: @@ -144,6 +157,7 @@ def inspect( name=cfg.loader.params["dataset_name"], view=[view], aug_config=f.name, + size_multiplier=size_multiplier, ) diff --git a/luxonis_train/utils/assigners/__init__.py b/luxonis_train/assigners/__init__.py similarity index 100% rename from luxonis_train/utils/assigners/__init__.py rename to luxonis_train/assigners/__init__.py diff --git a/luxonis_train/utils/assigners/atts_assigner.py b/luxonis_train/assigners/atts_assigner.py similarity index 96% rename from luxonis_train/utils/assigners/atts_assigner.py rename to luxonis_train/assigners/atts_assigner.py index 9a0466da..4fea425c 100644 --- a/luxonis_train/utils/assigners/atts_assigner.py +++ b/luxonis_train/assigners/atts_assigner.py @@ -108,10 +108,9 @@ def forward( ) # Soft label with IoU - if pred_bboxes is not None: - ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos - ious = ious.max(dim=-2)[0].unsqueeze(-1) - assigned_scores *= ious + ious = batch_iou(gt_bboxes, pred_bboxes) * mask_pos + ious = ious.max(dim=-2)[0].unsqueeze(-1) + assigned_scores *= ious out_mask_positive = mask_pos_sum.bool() @@ -145,8 +144,8 @@ def _select_topk_candidates( """ mask_gt = mask_gt.repeat(1, 1, self.topk).bool() level_distances = torch.split(distances, n_level_bboxes, dim=-1) - is_in_topk_list = [] - topk_idxs = [] + is_in_topk_list: list[Tensor] = [] + topk_idxs: list[Tensor] = [] start_idx = 0 for per_level_distances, per_level_boxes in zip( level_distances, n_level_bboxes @@ -167,9 +166,7 @@ def _select_topk_candidates( is_in_topk_list.append(is_in_topk.to(distances.dtype)) start_idx = end_idx - is_in_topk_list = torch.cat(is_in_topk_list, dim=-1) - topk_idxs = torch.cat(topk_idxs, dim=-1) - return is_in_topk_list, topk_idxs + return torch.cat(is_in_topk_list, dim=-1), torch.cat(topk_idxs, dim=-1) def _get_positive_samples( self, diff --git a/luxonis_train/utils/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py similarity index 100% rename from luxonis_train/utils/assigners/tal_assigner.py rename to luxonis_train/assigners/tal_assigner.py diff --git a/luxonis_train/utils/assigners/utils.py b/luxonis_train/assigners/utils.py similarity index 98% rename from luxonis_train/utils/assigners/utils.py rename to luxonis_train/assigners/utils.py index fadf5f8e..8987fc59 100644 --- a/luxonis_train/utils/assigners/utils.py +++ b/luxonis_train/assigners/utils.py @@ -2,7 +2,7 @@ import torch.nn.functional as F from torch import Tensor -from luxonis_train.utils.boxutils import bbox_iou +from luxonis_train.utils import bbox_iou def candidates_in_gt( diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index 17a4c277..b5c6747d 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -1,13 +1,14 @@ import logging from abc import ABC -from typing import Generic +from typing import Any, Generic +from luxonis_ml.data import LabelType from luxonis_ml.utils.registry import AutoRegisterMeta from torch import Tensor, nn from typing_extensions import TypeVarTuple, Unpack from luxonis_train.nodes import BaseNode -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils import IncompatibleException, Labels, Packet logger = logging.getLogger(__name__) @@ -53,7 +54,7 @@ class BaseAttachedModule( supported_labels: list[LabelType | tuple[LabelType, ...]] | None = None - def __init__(self, *, node: BaseNode | None = None): + def __init__(self, *, node: BaseNode[Any, Any] | None = None): super().__init__() self._node = node self._epoch = 0 @@ -68,10 +69,18 @@ def __init__(self, *, node: BaseNode | None = None): self._required_labels = required_labels break else: + module_supported = [ + label.value + if isinstance(label, LabelType) + else f"({' + '.join(label)})" + for label in self.supported_labels + ] + module_supported = f"[{', '.join(module_supported)}]" + node_supported = [task.value for task in self.node.tasks] raise ValueError( - f"Module {self.name} supports labels {self.supported_labels}, " - f"but is connected to node {self.node.name} which does not support any of them. " - f"{self.node.name} supports {list(self.node_tasks.keys())}." + f"Module '{self.name}' requires one of the following labels or combinations of labels: {module_supported}, " + f"but is connected to node '{self.node.name}' which does not support any of them. " + f"{self.node.name} supports {node_supported}." ) @property @@ -79,7 +88,7 @@ def name(self) -> str: return self.__class__.__name__ @property - def node(self) -> BaseNode: + def node(self) -> BaseNode[Any, Any]: """Reference to the node that this module is attached to. @type: L{BaseNode} @@ -104,9 +113,7 @@ def node_tasks(self) -> dict[LabelType, str]: raise ValueError("Node must have the `tasks` attribute specified.") return self.node._tasks - def get_label( - self, labels: Labels, label_type: LabelType | None = None - ) -> tuple[Tensor, LabelType]: + def get_label(self, labels: Labels, label_type: LabelType | None = None) -> Tensor: """Extracts a specific label from the labels dictionary. If the label type is not provided, the first label that matches the @@ -114,11 +121,11 @@ def get_label( Example:: >>> # supported_labels = [LabelType.SEGMENTATION] - >>> labels = {"segmentation": ..., "boundingbox": ...} + >>> labels = {"segmentation": seg_tensor, "boundingbox": bbox_tensor} >>> get_label(labels) - (..., LabelType.SEGMENTATION) # returns the first matching label + seg_tensor # returns the first matching label >>> get_label(labels, LabelType.BOUNDINGBOX) - (..., LabelType.BOUNDINGBOX) # returns the bounding box label + bbox_tensor # returns the bounding box label >>> get_label(labels, LabelType.CLASSIFICATION) IncompatibleException: Label 'classification' is missing from the dataset. @@ -130,9 +137,14 @@ def get_label( @raises NotImplementedError: If the module requires multiple labels. For such cases, the `prepare` method should be overridden. - @rtype: tuple[Tensor, LabelType] - @return: Extracted label and its type. + @rtype: Tensor + @return: Extracted label """ + return self._get_label(labels, label_type)[0] + + def _get_label( + self, labels: Labels, label_type: LabelType | None = None + ) -> tuple[Tensor, LabelType]: if label_type is None: if len(self.required_labels) == 1: label_type = self.required_labels[0] @@ -153,6 +165,7 @@ def get_label( for label, label_type in labels.values(): if label_type == self.required_labels[0]: return label, label_type + raise IncompatibleException.from_missing_task( self.required_labels[0].value, list(labels.keys()), self.name ) @@ -252,19 +265,18 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: set(self.supported_labels) & set(self.node._tasks) ) x = self.get_input_tensors(inputs) - label, label_type = self.get_label(labels) + label, label_type = self._get_label(labels) if label_type in [LabelType.CLASSIFICATION, LabelType.SEGMENTATION]: - if isinstance(x, list): - if len(x) == 1: - x = x[0] - else: - logger.warning( - f"Module {self.name} expects a single tensor as input, " - f"but got {len(x)} tensors. Using the last tensor. " - f"If this is not the desired behavior, please override the " - "`prepare` method of the attached module or the `wrap` " - f"method of {self.node.name}." - ) - x = x[-1] + if len(x) == 1: + x = x[0] + else: + logger.warning( + f"Module {self.name} expects a single tensor as input, " + f"but got {len(x)} tensors. Using the last tensor. " + f"If this is not the desired behavior, please override the " + "`prepare` method of the attached module or the `wrap` " + f"method of {self.node.name}." + ) + x = x[-1] return x, label # type: ignore diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py index 6a28bff9..a0c21eb2 100644 --- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py +++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py @@ -1,32 +1,33 @@ -from typing import Literal, cast +import logging +from typing import Any, Literal, cast import torch import torch.nn.functional as F +from luxonis_ml.data import LabelType from torch import Tensor, nn from torchvision.ops import box_convert +from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner from luxonis_train.nodes import EfficientBBoxHead -from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner -from luxonis_train.utils.boxutils import ( - IoUType, +from luxonis_train.utils import ( + IncompatibleException, + Labels, + Packet, anchors_for_fpn_features, compute_iou_loss, dist2bbox, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils.boundingbox import IoUType from .base_loss import BaseLoss +logger = logging.getLogger(__name__) + class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]): node: EfficientBBoxHead supported_labels = [LabelType.BOUNDINGBOX] - class NodePacket(Packet[Tensor]): - features: list[Tensor] - class_scores: Tensor - distributions: Tensor - def __init__( self, n_warmup_epochs: int = 4, @@ -34,7 +35,7 @@ def __init__( reduction: Literal["sum", "mean"] = "mean", class_loss_weight: float = 1.0, iou_loss_weight: float = 2.5, - **kwargs, + **kwargs: Any, ): """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications }. It combines IoU based bbox regression loss and varifocal loss @@ -51,8 +52,6 @@ def __init__( @param class_loss_weight: Weight of classification loss. @type iou_loss_weight: float @param iou_loss_weight: Weight of IoU loss. - @type kwargs: dict - @param kwargs: Additional arguments to pass to L{BaseLoss}. """ super().__init__(**kwargs) @@ -86,15 +85,15 @@ def __init__( self.gt_bboxes_scale = None def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: - feats = self.get_input_tensors(outputs, "features") - pred_scores = self.get_input_tensors(outputs, "class_scores")[0] - pred_distri = self.get_input_tensors(outputs, "distributions")[0] + feats = self.get_input_tensors(inputs, "features") + pred_scores = self.get_input_tensors(inputs, "class_scores")[0] + pred_distri = self.get_input_tensors(inputs, "distributions")[0] batch_size = pred_scores.shape[0] device = pred_scores.device - target = self.get_label(labels)[0] + target = self.get_label(labels) if self.gt_bboxes_scale is None: self.gt_bboxes_scale = torch.tensor( [ @@ -142,7 +141,6 @@ def prepare( pred_bboxes.detach() * self.stride_tensor, ) else: - # TODO: log change of assigner (once common Logger) ( assigned_labels, assigned_bboxes, diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py index 442a89c3..86f45c3d 100644 --- a/luxonis_train/attached_modules/losses/bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/bce_with_logits.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal import torch from luxonis_ml.data import LabelType @@ -15,7 +15,7 @@ def __init__( weight: list[float] | None = None, reduction: Literal["none", "mean", "sum"] = "mean", pos_weight: Tensor | None = None, - **kwargs, + **kwargs: Any, ): """This loss combines a L{nn.Sigmoid} layer and the L{nn.BCELoss} in one single class. This version is more numerically stable than using a plain C{Sigmoid} @@ -53,6 +53,15 @@ def __init__( ) def forward(self, predictions: Tensor, target: Tensor) -> Tensor: + """Computes the BCE loss from logits. + + @type predictions: Tensor + @param predictions: Network predictions of shape (N, C, H, W) + @type target: Tensor + @param target: A tensor of shape (N, C, H, W). + @rtype: Tensor + @return: A scalar tensor. + """ if predictions.shape != target.shape: raise RuntimeError( f"Target tensor dimension ({target.shape}) and preds tensor " diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py index 05a0f524..af545e9a 100644 --- a/luxonis_train/attached_modules/losses/cross_entropy.py +++ b/luxonis_train/attached_modules/losses/cross_entropy.py @@ -1,5 +1,5 @@ from logging import getLogger -from typing import Literal +from typing import Any, Literal import torch import torch.nn as nn @@ -24,7 +24,7 @@ def __init__( ignore_index: int = -100, reduction: Literal["none", "mean", "sum"] = "mean", label_smoothing: float = 0.0, - **kwargs, + **kwargs: Any, ): super().__init__(**kwargs) diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py index 2e6621de..04a9d641 100644 --- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py @@ -1,23 +1,24 @@ -from typing import Literal, cast +from typing import Any, Literal, cast import torch import torch.nn.functional as F +from luxonis_ml.data import LabelType from torch import Tensor, nn from torchvision.ops import box_convert -from luxonis_train.attached_modules.metrics.object_keypoint_similarity import ( - get_area_factor, - get_sigmas, -) +from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner from luxonis_train.nodes import EfficientKeypointBBoxHead -from luxonis_train.utils.assigners import ATSSAssigner, TaskAlignedAssigner -from luxonis_train.utils.boxutils import ( - IoUType, +from luxonis_train.utils import ( + IncompatibleException, + Labels, + Packet, anchors_for_fpn_features, compute_iou_loss, dist2bbox, + get_sigmas, + get_with_default, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet +from luxonis_train.utils.boundingbox import IoUType from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss @@ -46,7 +47,7 @@ def __init__( vis_kpts_loss_weight: float = 1.0, sigmas: list[float] | None = None, area_factor: float | None = None, - **kwargs, + **kwargs: Any, ): """BBox loss adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications }. It combines IoU based bbox regression loss and varifocal loss @@ -55,7 +56,7 @@ def __init__( @type n_warmup_epochs: int @param n_warmup_epochs: Number of epochs where ATSS assigner is used, after that we switch to TAL assigner. - @type iou_type: L{IoUType} + @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"] @param iou_type: IoU type used for bbox regression loss. @type reduction: Literal["sum", "mean"] @param reduction: Reduction type for loss. @@ -71,8 +72,6 @@ def __init__( @param sigmas: Sigmas used in KeypointLoss for OKS metric. If None then use COCO ones if possible or default ones. Defaults to C{None}. @type area_factor: float | None @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}. - @type kwargs: dict - @param kwargs: Additional arguments to pass to L{BaseLoss}. """ super().__init__(**kwargs) @@ -88,14 +87,15 @@ def __init__( self.grid_cell_size = self.node.grid_cell_size self.grid_cell_offset = self.node.grid_cell_offset self.original_img_size = self.node.original_in_shape[1:] - self.n_heads = self.node.n_heads self.n_kps = self.node.n_keypoints self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw])) self.sigmas = get_sigmas( - sigmas=sigmas, n_keypoints=self.n_kps, class_name=self.name + sigmas=sigmas, n_keypoints=self.n_kps, caller_name=self.name + ) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 ) - self.area_factor = get_area_factor(area_factor, class_name=self.name) self.n_warmup_epochs = n_warmup_epochs self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) @@ -110,18 +110,18 @@ def __init__( self.vis_kpts_loss_weight = vis_kpts_loss_weight def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: - feats = self.get_input_tensors(outputs, "features") - pred_scores = self.get_input_tensors(outputs, "class_scores")[0] - pred_distri = self.get_input_tensors(outputs, "distributions")[0] - pred_kpts = self.get_input_tensors(outputs, "keypoints_raw")[0] + feats = self.get_input_tensors(inputs, "features") + pred_scores = self.get_input_tensors(inputs, "class_scores")[0] + pred_distri = self.get_input_tensors(inputs, "distributions")[0] + pred_kpts = self.get_input_tensors(inputs, "keypoints_raw")[0] batch_size = pred_scores.shape[0] device = pred_scores.device - target_kpts = self.get_label(labels, LabelType.KEYPOINTS)[0] - target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX)[0] + target_kpts = self.get_label(labels, LabelType.KEYPOINTS) + target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX) n_kpts = (target_kpts.shape[1] - 2) // 3 gt_bboxes_scale = torch.tensor( diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py index d174c555..fd73b36a 100644 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py @@ -1,17 +1,21 @@ -from typing import cast +import logging +from typing import Any, cast import torch +from luxonis_ml.data import LabelType from torch import Tensor from torchvision.ops import box_convert from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss from luxonis_train.nodes import ImplicitKeypointBBoxHead -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + IncompatibleException, + Labels, + Packet, compute_iou_loss, match_to_anchor, process_bbox_predictions, ) -from luxonis_train.utils.types import IncompatibleException, Labels, LabelType, Packet from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss @@ -25,6 +29,8 @@ list[Tensor], ] +logger = logging.getLogger(__name__) + class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]): node: ImplicitKeypointBBoxHead @@ -47,7 +53,7 @@ def __init__( anchor_threshold: float = 4.0, bias: float = 0.5, balance: list[float] | None = None, - **kwargs, + **kwargs: Any, ): """Joint loss for keypoint and box predictions for cases where the keypoints and boxes are inherently linked. @@ -102,16 +108,16 @@ def __init__( self.anchors = self.node.anchors self.balance = balance or [4.0, 1.0, 0.4] if len(self.balance) < self.num_heads: - raise ValueError( + logger.warning( f"Balance list must have at least {self.num_heads} elements." + "Filling the rest with 1.0." ) + self.balance += [1.0] * (self.num_heads - len(self.balance)) self.min_objectness_iou = min_objectness_iou self.bbox_weight = bbox_loss_weight self.class_weight = class_loss_weight self.objectness_weight = objectness_loss_weight - self.kpt_visibility_weight = keypoint_visibility_loss_weight - self.keypoint_regression_loss_weight = keypoint_regression_loss_weight self.anchor_threshold = anchor_threshold self.bias = bias @@ -126,6 +132,8 @@ def __init__( bce_power=viz_pw, sigmas=sigmas, area_factor=area_factor, + regression_loss_weight=keypoint_regression_loss_weight, + visibility_loss_weight=keypoint_visibility_loss_weight, ) self.positive_smooth_const = 1 - 0.5 * label_smoothing @@ -153,19 +161,20 @@ def prepare( """ predictions = self.get_input_tensors(outputs, "features") - kpts = self.get_label(labels, LabelType.KEYPOINTS)[0] - boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0] + kpt_label = self.get_label(labels, LabelType.KEYPOINTS) + bbox_label = self.get_label(labels, LabelType.BOUNDINGBOX) - nkpts = (kpts.shape[1] - 2) // 3 - targets = torch.zeros((len(boxes), nkpts * 3 + self.box_offset + 1)) - targets[:, :2] = boxes[:, :2] + targets = torch.zeros( + (kpt_label.shape[0], self.n_keypoints * 3 + self.box_offset + 1) + ) + targets[:, :2] = kpt_label[:, :2] targets[:, 2 : self.box_offset + 1] = box_convert( - boxes[:, 2:], "xywh", "cxcywh" + bbox_label[:, 2:], "xywh", "cxcywh" ) - targets[:, self.box_offset + 1 :: 3] = kpts[:, 2::3] # insert kp x coordinates - targets[:, self.box_offset + 2 :: 3] = kpts[:, 3::3] # insert kp y coordinates - targets[:, self.box_offset + 3 :: 3] = kpts[:, 4::3] # insert kp visibility + # insert keypoints + for i in range(1, 4): + targets[:, self.box_offset + i :: 3] = kpt_label[:, i + 1 :: 3] n_targets = targets.shape[0] @@ -280,13 +289,8 @@ def forward( kpt_target.to(device), area.to(device), ) - - sub_losses["kpt_regression"] += ( - kpt_sublosses["regression"] * self.keypoint_regression_loss_weight - ) - sub_losses["kpt_visibility"] += ( - kpt_sublosses["visibility"] * self.kpt_visibility_weight - ) + for name, kpt_subloss in kpt_sublosses.items(): + sub_losses[name] += kpt_subloss obj_targets[index] = (self.min_objectness_iou) + ( 1 - self.min_objectness_iou @@ -295,11 +299,9 @@ def forward( if self.n_classes > 1: sub_losses["class"] += ( self.class_loss.forward( - [ - pred_subset[ - :, - self.box_offset : self.box_offset + self.n_classes, - ] + pred_subset[ + :, + self.box_offset : self.box_offset + self.n_classes, ], class_target, ) diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py index d5ca278f..fecf40ce 100644 --- a/luxonis_train/attached_modules/losses/keypoint_loss.py +++ b/luxonis_train/attached_modules/losses/keypoint_loss.py @@ -1,17 +1,20 @@ +from typing import Any + import torch +from luxonis_ml.data import LabelType from torch import Tensor -from luxonis_train.attached_modules.metrics.object_keypoint_similarity import ( - get_area_factor, +from luxonis_train.utils import ( get_sigmas, + get_with_default, + process_keypoints_predictions, ) -from luxonis_train.utils.boxutils import process_keypoints_predictions -from luxonis_train.utils.types import Labels, LabelType, Packet from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss +# TODO: Make it work on its own class KeypointLoss(BaseLoss[Tensor, Tensor]): supported_labels = [LabelType.KEYPOINTS] @@ -21,7 +24,9 @@ def __init__( bce_power: float = 1.0, sigmas: list[float] | None = None, area_factor: float | None = None, - **kwargs, + regression_loss_weight: float = 1.0, + visibility_loss_weight: float = 1.0, + **kwargs: Any, ): """Keypoint based loss that is computed from OKS-based regression and visibility loss. @@ -35,19 +40,22 @@ def __init__( @type area_factor: float | None @param area_factor: Factor by which we multiply bbox area. If None then use default one. Defaults to C{None}. + @type regression_loss_weight: float + @param regression_loss_weight: Weight of regression loss. Defaults to C{1.0}. + @type visibility_loss_weight: float + @param visibility_loss_weight: Weight of visibility loss. Defaults to C{1.0}. """ super().__init__(**kwargs) self.b_cross_entropy = BCEWithLogitsLoss( pos_weight=torch.tensor([bce_power]), **kwargs ) - self.sigmas = get_sigmas( - sigmas=sigmas, n_keypoints=n_keypoints, class_name=self.name + self.sigmas = get_sigmas(sigmas, n_keypoints, caller_name=self.name) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 ) - self.area_factor = get_area_factor(area_factor, class_name=self.name) - - def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Tensor, Tensor]: - return torch.cat(inputs["keypoints"], dim=0), self.get_label(labels)[0] + self.regression_loss_weight = regression_loss_weight + self.visibility_loss_weight = visibility_loss_weight def forward( self, prediction: Tensor, target: Tensor, area: Tensor @@ -65,29 +73,34 @@ def forward( @return: A tuple containing the total loss tensor of shape C{[1,]} and a dictionary with the regression loss and visibility loss tensors. """ - device = prediction.device - sigmas = self.sigmas.to(device) + sigmas = self.sigmas.to(prediction.device) pred_x, pred_y, pred_v = process_keypoints_predictions(prediction) - gt_x = target[:, 0::3] - gt_y = target[:, 1::3] - gt_v = (target[:, 2::3] > 0).float() + target_x = target[:, 0::3] + target_y = target[:, 1::3] + target_visibility = (target[:, 2::3] > 0).float() - visibility_loss = self.b_cross_entropy.forward(pred_v, gt_v) + visibility_loss = ( + self.b_cross_entropy.forward(pred_v, target_visibility) + * self.visibility_loss_weight + ) scales = area * self.area_factor - d = (gt_x - pred_x) ** 2 + (gt_y - pred_y) ** 2 - e = d / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2 + distance = (target_x - pred_x) ** 2 + (target_y - pred_y) ** 2 + normalized_distance = ( + distance / (2 * sigmas**2) / (scales.view(-1, 1) + 1e-9) / 2 + ) - regression_loss_unreduced = 1 - torch.exp(-e) - regression_loss_reduced = (regression_loss_unreduced * gt_v).sum(dim=1) / ( - gt_v.sum(dim=1) + 1e-9 + regression_loss = 1 - torch.exp(-normalized_distance) + regression_loss = (regression_loss * target_visibility).sum(dim=1) / ( + target_visibility.sum(dim=1) + 1e-9 ) - regression_loss = regression_loss_reduced.mean() + regression_loss = regression_loss.mean() + regression_loss *= self.regression_loss_weight total_loss = regression_loss + visibility_loss return total_loss, { - "regression": regression_loss, - "visibility": visibility_loss, + "kpt_regression": regression_loss, + "kpt_visibility": visibility_loss, } diff --git a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py index f3affc74..7915cdce 100644 --- a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py +++ b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal from luxonis_ml.data import LabelType from torch import Tensor @@ -15,7 +15,7 @@ def __init__( alpha: float = 0.25, gamma: float = 2.0, reduction: Literal["none", "mean", "sum"] = "mean", - **kwargs, + **kwargs: Any, ): """Focal loss from U{Focal Loss for Dense Object Detection }. diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py index ac976428..62a9335f 100644 --- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Any, Literal import torch from luxonis_ml.data import LabelType @@ -17,7 +17,7 @@ def __init__( bce_pow: float = 1.0, weight: list[float] | None = None, reduction: Literal["mean", "sum", "none"] = "mean", - **kwargs, + **kwargs: Any, ): """BCE with logits loss and label smoothing. @@ -36,12 +36,10 @@ def __init__( C{reduce} are in the process of being deprecated, and in the meantime, specifying either of those two args will override C{reduction}. Defaults to C{'mean'}. - @type kwargs: dict - @param kwargs: Additional arguments to pass to L{BaseLoss}. """ super().__init__(**kwargs) - self.negative_smooth_const = 1.0 - 0.5 * label_smoothing - self.positive_smooth_const = 0.5 * label_smoothing + self.positive_smooth_const = 1.0 - label_smoothing + self.negative_smooth_const = label_smoothing self.criterion = BCEWithLogitsLoss( pos_weight=torch.tensor( [bce_pow], @@ -50,24 +48,26 @@ def __init__( reduction=reduction, ) - def forward(self, predictions: list[Tensor], target: Tensor) -> Tensor: + def forward(self, predictions: Tensor, target: Tensor) -> Tensor: """Computes the BCE loss with label smoothing. - @type predictions: list[Tensor] - @param predictions: List of tensors of shape (N, n_classes), containing the - predicted class scores. + @type predictions: Tensor + @param predictions: Network predictions of shape (N, C, H, W) @type target: Tensor - @param target: A tensor of shape (N,), containing the ground-truth class labels + @param target: A tensor of shape (N, C, H, W). @rtype: Tensor @return: A scalar tensor. """ - prediction = predictions[0] - smoothed_target = torch.full_like( - prediction, - self.negative_smooth_const, - device=prediction.device, - ) - smoothed_target[ - torch.arange(target.shape[0]), target - ] = self.positive_smooth_const - return self.criterion.forward(prediction, smoothed_target) + if predictions.shape != target.shape: + raise RuntimeError( + f"Target tensor dimension ({target.shape}) and predictions tensor " + f"dimension ({predictions.shape}) should be the same." + ) + + if self.negative_smooth_const != 0.0: + target = ( + target * self.positive_smooth_const + + (1 - target) * self.negative_smooth_const + ) + + return self.criterion(predictions, target) diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py index 14f32e54..f347421f 100644 --- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py +++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py @@ -1,6 +1,4 @@ -# TODO: document - -from typing import Literal +from typing import Any, Literal import torch from luxonis_ml.data import LabelType @@ -11,6 +9,7 @@ from .cross_entropy import CrossEntropyLoss +# TODO: Make focal losses support multi-class tasks class SoftmaxFocalLoss(BaseLoss[Tensor, Tensor]): supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION] @@ -19,9 +18,10 @@ def __init__( alpha: float | list[float] = 0.25, gamma: float = 2.0, reduction: Literal["none", "mean", "sum"] = "mean", - **kwargs, + **kwargs: Any, ): - """Focal loss implementation for multi-class/multi-label tasks using Softmax. + """Focal loss implementation for binary classification and segmentation tasks + using Softmax. @type alpha: float | list[float] @param alpha: Weighting factor for the rare class. Defaults to C{0.25}. diff --git a/luxonis_train/attached_modules/metrics/base_metric.py b/luxonis_train/attached_modules/metrics/base_metric.py index b2e456c9..ed232d26 100644 --- a/luxonis_train/attached_modules/metrics/base_metric.py +++ b/luxonis_train/attached_modules/metrics/base_metric.py @@ -5,8 +5,8 @@ from typing_extensions import TypeVarTuple, Unpack from luxonis_train.attached_modules import BaseAttachedModule +from luxonis_train.utils import Labels, Packet from luxonis_train.utils.registry import METRICS -from luxonis_train.utils.types import Labels, Packet Ts = TypeVarTuple("Ts") diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/common.py index 97e8a7ec..a678d54e 100644 --- a/luxonis_train/attached_modules/metrics/common.py +++ b/luxonis_train/attached_modules/metrics/common.py @@ -1,4 +1,5 @@ import logging +from typing import Any import torchmetrics from luxonis_ml.data import LabelType @@ -9,8 +10,10 @@ logger = logging.getLogger(__name__) -class TorchMetricWrapper(BaseMetric): - def __init__(self, **kwargs): +class TorchMetricWrapper(BaseMetric[Tensor]): + Metric: type[torchmetrics.Metric] + + def __init__(self, **kwargs: Any): super().__init__(node=kwargs.pop("node", None)) task = kwargs.get("task") @@ -38,27 +41,29 @@ def __init__(self, **kwargs): if self._task == "multiclass": if "num_classes" not in kwargs: - if self.node is None: + try: + kwargs["num_classes"] = self.node.n_classes + except RuntimeError as e: raise ValueError( "Either `node` or `num_classes` must be provided to " "multiclass torchmetrics." - ) - kwargs["num_classes"] = self.node.n_classes - elif self._task == "multilabel": + ) from e + else: if "num_labels" not in kwargs: - if self.node is None: + try: + kwargs["num_labels"] = self.node.n_classes + except RuntimeError as e: raise ValueError( "Either `node` or `num_labels` must be provided to " "multilabel torchmetrics." - ) - kwargs["num_labels"] = self.node.n_classes + ) from e self.metric = self.Metric(**kwargs) - def update(self, preds, target, *args, **kwargs) -> None: + def update(self, preds: Tensor, target: Tensor) -> None: if self._task in ["multiclass"]: target = target.argmax(dim=1) - self.metric.update(preds, target, *args, **kwargs) + self.metric.update(preds, target) def compute(self) -> Tensor: return self.metric.compute() diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py index ffdf5e22..ea64afd0 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py @@ -1,13 +1,18 @@ +from typing import Any + import torchmetrics.detection as detection +from luxonis_ml.data import LabelType from torch import Tensor from torchvision.ops import box_convert -from luxonis_train.utils.types import Labels, LabelType, Packet +from luxonis_train.utils import Labels, Packet from .base_metric import BaseMetric -class MeanAveragePrecision(BaseMetric): +class MeanAveragePrecision( + BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]] +): """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object detection predictions. @@ -17,7 +22,7 @@ class MeanAveragePrecision(BaseMetric): supported_labels = [LabelType.BOUNDINGBOX] - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): super().__init__(**kwargs) self.metric = detection.MeanAveragePrecision() @@ -29,10 +34,10 @@ def update( self.metric.update(outputs, labels) def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: - box_label = self.get_label(labels)[0] - output_nms = self.get_input_tensors(outputs) + box_label = self.get_label(labels) + output_nms = self.get_input_tensors(inputs) image_size = self.node.original_in_shape[1:] @@ -59,11 +64,21 @@ def reset(self) -> None: self.metric.reset() def compute(self) -> tuple[Tensor, dict[str, Tensor]]: - metric_dict = self.metric.compute() + metric_dict: dict[str, Tensor] = self.metric.compute() del metric_dict["classes"] del metric_dict["map_per_class"] del metric_dict["mar_100_per_class"] + for key in list(metric_dict.keys()): + if "map" in key: + map = metric_dict[key] + mar_key = key.replace("map", "mar") + if mar_key in metric_dict: + mar = metric_dict[mar_key] + metric_dict[key.replace("map", "f1")] = ( + 2 * (map * mar) / (map + mar) + ) + map = metric_dict.pop("map") return map, metric_dict diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py index 0d558b43..e424d2dd 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py @@ -3,21 +3,20 @@ from typing import Any, Literal import torch +from luxonis_ml.data import LabelType from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval from torch import Tensor from torchvision.ops import box_convert -from luxonis_train.attached_modules.metrics.object_keypoint_similarity import ( - get_area_factor, - get_sigmas, -) -from luxonis_train.utils.types import Labels, LabelType, Packet +from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default from .base_metric import BaseMetric -class MeanAveragePrecisionKeypoints(BaseMetric): +class MeanAveragePrecisionKeypoints( + BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]] +): """Mean Average Precision metric for keypoints. Uses C{OKS} as IoU measure. @@ -66,15 +65,15 @@ def __init__( @param max_dets: Maximum number of detections to be considered per image. Defaults to C{20}. @type box_format: Literal["xyxy", "xywh", "cxcywh"] @param box_format: Input bbox format. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseMetric}. """ super().__init__(**kwargs) self.n_keypoints = self.node.n_keypoints - self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name) - self.area_factor = get_area_factor(area_factor, self.name) + self.sigmas = get_sigmas(sigmas, self.n_keypoints, caller_name=self.name) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 + ) self.max_dets = max_dets allowed_box_formats = ("xyxy", "xywh", "cxcywh") @@ -95,10 +94,12 @@ def __init__( self.add_state("groundtruth_crowds", default=[], dist_reduce_fx=None) self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None) - def prepare(self, outputs: Packet[Tensor], labels: Labels): + def prepare( + self, inputs: Packet[Tensor], labels: Labels + ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: assert self.node.tasks is not None - kpts = self.get_label(labels, LabelType.KEYPOINTS)[0] - boxes = self.get_label(labels, LabelType.BOUNDINGBOX)[0] + kpts = self.get_label(labels, LabelType.KEYPOINTS) + boxes = self.get_label(labels, LabelType.BOUNDINGBOX) nkpts = (kpts.shape[1] - 2) // 3 label = torch.zeros((len(boxes), nkpts * 3 + 6)) @@ -108,12 +109,12 @@ def prepare(self, outputs: Packet[Tensor], labels: Labels): label[:, 7::3] = kpts[:, 3::3] # y label[:, 8::3] = kpts[:, 4::3] # visiblity - output_list_kpt_map = [] - label_list_kpt_map = [] + output_list_kpt_map: list[dict[str, Tensor]] = [] + label_list_kpt_map: list[dict[str, Tensor]] = [] image_size = self.node.original_in_shape[1:] - output_kpts = self.get_input_tensors(outputs, LabelType.KEYPOINTS) - output_bboxes = self.get_input_tensors(outputs, LabelType.BOUNDINGBOX) + output_kpts = self.get_input_tensors(inputs, LabelType.KEYPOINTS) + output_bboxes = self.get_input_tensors(inputs, LabelType.BOUNDINGBOX) for i in range(len(output_kpts)): output_list_kpt_map.append( { @@ -258,16 +259,16 @@ def _get_coco_format( Format is defined at U{https://cocodataset.org/#format-data}. """ - images = [] - annotations = [] + images: list[dict[str, int]] = [] + annotations: list[dict[str, Any]] = [] annotation_id = 1 # has to start with 1, otherwise COCOEval results are wrong for image_id, (image_boxes, image_kpts, image_labels) in enumerate( zip(boxes, keypoints, labels) ): - image_boxes_list = image_boxes.cpu().tolist() - image_kpts_list = image_kpts.cpu().tolist() - image_labels_list = image_labels.cpu().tolist() + image_boxes_list: list[list[float]] = image_boxes.cpu().tolist() + image_kpts_list: list[list[float]] = image_kpts.cpu().tolist() + image_labels_list: list[int] = image_labels.cpu().tolist() images.append({"id": image_id}) @@ -315,6 +316,8 @@ def _get_coco_format( if scores is not None: score = scores[image_id][k].cpu().tolist() + # `tolist` returns a number for scalar tensors, + # the name is misleading if not isinstance(score, float): raise ValueError( f"Invalid input score of sample {image_id}, element {k}" diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py index 4cbd1cac..77c05ea4 100644 --- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py +++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py @@ -1,11 +1,13 @@ import logging +from typing import Any import torch +from luxonis_ml.data import LabelType from scipy.optimize import linear_sum_assignment from torch import Tensor from torchvision.ops import box_convert -from luxonis_train.utils.types import Labels, LabelType, Packet +from luxonis_train.utils import Labels, Packet, get_sigmas, get_with_default from .base_metric import BaseMetric @@ -33,7 +35,7 @@ def __init__( sigmas: list[float] | None = None, area_factor: float | None = None, use_cocoeval_oks: bool = True, - **kwargs, + **kwargs: Any, ) -> None: """Object Keypoint Similarity metric for evaluating keypoint predictions. @@ -51,14 +53,16 @@ def __init__( """ super().__init__(**kwargs) - if n_keypoints is None and self.node is None: + if n_keypoints is None and self._node is None: raise ValueError( f"Either `n_keypoints` or `node` must be provided to {self.name}." ) self.n_keypoints = n_keypoints or self.node.n_keypoints - self.sigmas = get_sigmas(sigmas, self.n_keypoints, self.name) - self.area_factor = get_area_factor(area_factor, self.name) + self.sigmas = get_sigmas(sigmas, self.n_keypoints, caller_name=self.name) + self.area_factor = get_with_default( + area_factor, "bbox area scaling", self.name, default=0.53 + ) self.use_cocoeval_oks = use_cocoeval_oks self.add_state("pred_keypoints", default=[], dist_reduce_fx=None) @@ -66,11 +70,11 @@ def __init__( self.add_state("groundtruth_scales", default=[], dist_reduce_fx=None) def prepare( - self, outputs: Packet[Tensor], labels: Labels + self, inputs: Packet[Tensor], labels: Labels ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: assert self.node.tasks is not None - kpts_labels = self.get_label(labels, LabelType.KEYPOINTS)[0] - bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX)[0] + kpts_labels = self.get_label(labels, LabelType.KEYPOINTS) + bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX) num_keypoints = (kpts_labels.shape[1] - 2) // 3 label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6)) label[:, :2] = bbox_labels[:, :2] @@ -84,7 +88,7 @@ def prepare( image_size = self.node.original_in_shape[1:] for i, pred_kpt in enumerate( - self.get_input_tensors(outputs, LabelType.KEYPOINTS) + self.get_input_tensors(inputs, LabelType.KEYPOINTS) ): output_list_oks.append({"keypoints": pred_kpt}) @@ -129,11 +133,11 @@ def update( width and height are unnormalized. """ for item in preds: - keypoints = fix_empty_tensors(item["keypoints"]) + keypoints = self._fix_empty_tensors(item["keypoints"]) self.pred_keypoints.append(keypoints) for item in target: - keypoints = fix_empty_tensors(item["keypoints"]) + keypoints = self._fix_empty_tensors(item["keypoints"]) self.groundtruth_keypoints.append(keypoints) self.groundtruth_scales.append(item["scales"]) @@ -166,6 +170,13 @@ def compute(self) -> Tensor: return final_oks + @staticmethod + def _fix_empty_tensors(input_tensor: Tensor) -> Tensor: + """Empty tensors can cause problems in DDP mode, this methods corrects them.""" + if input_tensor.numel() == 0 and input_tensor.ndim == 1: + return input_tensor.unsqueeze(0) + return input_tensor + def compute_oks( pred: Tensor, @@ -211,73 +222,3 @@ def compute_oks( return (torch.exp(-oks) * kpt_mask[:, None]).sum(-1) / ( kpt_mask.sum(-1)[:, None] + eps ) - - -def fix_empty_tensors(input_tensor: Tensor) -> Tensor: - """Empty tensors can cause problems in DDP mode, this methods corrects them.""" - if input_tensor.numel() == 0 and input_tensor.ndim == 1: - return input_tensor.unsqueeze(0) - return input_tensor - - -def get_sigmas( - sigmas: list[float] | None, n_keypoints: int, class_name: str | None -) -> Tensor: - """Validate and set the sigma values.""" - if sigmas is not None: - if len(sigmas) == n_keypoints: - return torch.tensor(sigmas, dtype=torch.float32) - else: - error_msg = "The length of the sigmas list must be the same as the number of keypoints." - if class_name: - error_msg = f"[{class_name}] {error_msg}" - raise ValueError(error_msg) - else: - if n_keypoints == 17: - warn_msg = "Default COCO sigmas are being used." - if class_name: - warn_msg = f"[{class_name}] {warn_msg}" - logger.warning(warn_msg) - return torch.tensor( - [ - 0.026, - 0.025, - 0.025, - 0.035, - 0.035, - 0.079, - 0.079, - 0.072, - 0.072, - 0.062, - 0.062, - 0.107, - 0.107, - 0.087, - 0.087, - 0.089, - 0.089, - ], - dtype=torch.float32, - ) - else: - warn_msg = "Default sigma of 0.04 is being used for each keypoint." - if class_name: - warn_msg = f"[{class_name}] {warn_msg}" - logger.warning(warn_msg) - return torch.tensor([0.04] * n_keypoints, dtype=torch.float32) - - -def get_area_factor(area_factor: float | None, class_name: str | None) -> float: - """Set the default area factor if not defined.""" - factor = 0.53 - if area_factor is None: - warn_msg = ( - f"Default area_factor of {factor} is being used for bbox area scaling." - ) - if class_name: - warn_msg = f"[{class_name}] {warn_msg}" - logger.warning(warn_msg) - return factor - else: - return area_factor diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py index 5fa6db62..2690f04c 100644 --- a/luxonis_train/attached_modules/visualizers/base_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py @@ -4,8 +4,8 @@ from typing_extensions import TypeVarTuple, Unpack from luxonis_train.attached_modules import BaseAttachedModule +from luxonis_train.utils import Labels, Packet from luxonis_train.utils.registry import VISUALIZERS -from luxonis_train.utils.types import Labels, Packet Ts = TypeVarTuple("Ts") diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py index df3ac933..44595ea6 100644 --- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py @@ -1,10 +1,9 @@ import logging import torch +from luxonis_ml.data import LabelType from torch import Tensor -from luxonis_train.utils.types import LabelType - from .base_visualizer import BaseVisualizer from .utils import Color, draw_bounding_box_labels, draw_bounding_boxes, get_color diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py index 20a5710e..343ab3cb 100644 --- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py @@ -39,14 +39,14 @@ def _get_class_name(self, pred: Tensor) -> str: return self.node.class_names[idx] def _generate_plot(self, prediction: Tensor, width: int, height: int) -> Tensor: - prediction = prediction.softmax(-1).detach().cpu().numpy() + pred = prediction.softmax(-1).detach().cpu().numpy() fig, ax = plt.subplots(figsize=(width / 100, height / 100)) - ax.bar(np.arange(len(prediction)), prediction) - ax.set_xticks(np.arange(len(prediction))) + ax.bar(np.arange(len(pred)), pred) + ax.set_xticks(np.arange(len(pred))) if self.node.class_names is not None: ax.set_xticklabels(self.node.class_names, rotation=90) else: - ax.set_xticklabels(np.arange(1, len(prediction) + 1)) + ax.set_xticklabels(np.arange(1, len(pred) + 1)) ax.set_ylim(0, 1) ax.set_xlabel("Class") ax.set_ylabel("Probability") diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py index c7925ecc..85dfec47 100644 --- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py @@ -29,10 +29,10 @@ def __init__(self, visualizers: list[Kwargs], **kwargs): self.visualizers.append(visualizer) def prepare( - self, output: Packet[Tensor], label: Labels, idx: int = 0 + self, inputs: Packet[Tensor], label: Labels, idx: int = 0 ) -> tuple[Packet[Tensor], Labels]: self._idx = idx - return output, label + return inputs, label def forward( self, diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py index 85b93ce1..a95511e7 100644 --- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py @@ -1,10 +1,9 @@ import logging import torch +from luxonis_ml.data import LabelType from torch import Tensor -from luxonis_train.utils.types import LabelType - from .base_visualizer import BaseVisualizer from .utils import Color, draw_segmentation_labels, get_color, seg_output_to_bool diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py index c55b12ce..d5603bda 100644 --- a/luxonis_train/attached_modules/visualizers/utils.py +++ b/luxonis_train/attached_modules/visualizers/utils.py @@ -19,7 +19,7 @@ draw_segmentation_masks, ) -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config Color = str | tuple[int, int, int] """Color type alias. diff --git a/luxonis_train/callbacks/__init__.py b/luxonis_train/callbacks/__init__.py index 4c7f7824..95f860a1 100644 --- a/luxonis_train/callbacks/__init__.py +++ b/luxonis_train/callbacks/__init__.py @@ -1,9 +1,13 @@ from lightning.pytorch.callbacks import ( DeviceStatsMonitor, EarlyStopping, + GradientAccumulationScheduler, LearningRateMonitor, ModelCheckpoint, + ModelPruning, RichModelSummary, + StochasticWeightAveraging, + Timer, ) from luxonis_train.utils.registry import CALLBACKS @@ -26,6 +30,10 @@ CALLBACKS.register_module(module=ModelCheckpoint) CALLBACKS.register_module(module=RichModelSummary) CALLBACKS.register_module(module=DeviceStatsMonitor) +CALLBACKS.register_module(module=GradientAccumulationScheduler) +CALLBACKS.register_module(module=StochasticWeightAveraging) +CALLBACKS.register_module(module=Timer) +CALLBACKS.register_module(module=ModelPruning) __all__ = [ diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py index 9479d4d2..32983bde 100644 --- a/luxonis_train/callbacks/gpu_stats_monitor.py +++ b/luxonis_train/callbacks/gpu_stats_monitor.py @@ -27,11 +27,11 @@ import pytorch_lightning as pl import torch -from lightning.pytorch.accelerators import CUDAAccelerator # type: ignore -from pytorch_lightning.utilities import rank_zero_only -from pytorch_lightning.utilities.exceptions import ( - MisconfigurationException, # type: ignore +from lightning.pytorch.accelerators.cuda import CUDAAccelerator +from lightning_fabric.utilities.exceptions import ( + MisconfigurationException, # noqa: F401 ) +from pytorch_lightning.utilities import rank_zero_only from pytorch_lightning.utilities.parsing import AttributeDict from pytorch_lightning.utilities.types import STEP_OUTPUT @@ -40,49 +40,6 @@ @CALLBACKS.register_module() class GPUStatsMonitor(pl.Callback): - """Automatically monitors and logs GPU stats during training stage. - C{GPUStatsMonitor} is a callback and in order to use it you need to assign a logger - in the C{Trainer}. - - Args: - memory_utilization: Set to C{True} to monitor used, free and percentage of memory - utilization at the start and end of each step. Default: C{True}. - gpu_utilization: Set to C{True} to monitor percentage of GPU utilization - at the start and end of each step. Default: C{True}. - intra_step_time: Set to C{True} to monitor the time of each step. Default: {False}. - inter_step_time: Set to C{True} to monitor the time between the end of one step - and the start of the next step. Default: C{False}. - fan_speed: Set to C{True} to monitor percentage of fan speed. Default: C{False}. - temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius. - Default: C{False}. - - Raises: - MisconfigurationException: - If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger. - - Example:: - - >>> from pytorch_lightning import Trainer - >>> from pytorch_lightning.callbacks import GPUStatsMonitor - >>> gpu_stats = GPUStatsMonitor() # doctest: +SKIP - >>> trainer = Trainer(callbacks=[gpu_stats]) # doctest: +SKIP - - GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows: - - - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently - intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed. - If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. - Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure. - - **memory.used** – Total memory allocated by active contexts. - - **memory.free** – Total free memory. - - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was - executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product. - - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was - being read or written. The sample period may be between 1 second and 1/6 second depending on the product. - - **temperature.gpu** – Core GPU temperature, in degrees C. - - **temperature.memory** – HBM memory temperature, in degrees C. - """ - def __init__( self, memory_utilization: bool = True, @@ -92,6 +49,40 @@ def __init__( fan_speed: bool = False, temperature: bool = False, ): + """Automatically monitors and logs GPU stats during training stage. + C{GPUStatsMonitor} is a callback and in order to use it you need to assign a + logger in the C{Trainer}. + + GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows: + + - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently + intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed. + If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. + Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure. + - **memory.used** – Total memory allocated by active contexts. + - **memory.free** – Total free memory. + - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was + executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product. + - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was + being read or written. The sample period may be between 1 second and 1/6 second depending on the product. + - **temperature.gpu** – Core GPU temperature, in degrees C. + - **temperature.memory** – HBM memory temperature, in degrees C. + + @type memory_utilization: bool + @param memory_utilization: Set to C{True} to monitor used, free and percentage of memory utilization at the start and end of each step. Defaults to C{True}. + @type gpu_utilization: bool + @param gpu_utilization: Set to C{True} to monitor percentage of GPU utilization at the start and end of each step. Defaults to C{True}. + @type intra_step_time: bool + @param intra_step_time: Set to C{True} to monitor the time of each step. Defaults to {False}. + @type inter_step_time: bool + @param inter_step_time: Set to C{True} to monitor the time between the end of one step and the start of the next step. Defaults to C{False}. + @type fan_speed: bool + @param fan_speed: Set to C{True} to monitor percentage of fan speed. Defaults to C{False}. + @type temperature: bool + @param temperature: Set to C{True} to monitor the memory and gpu temperature in degree Celsius. Defaults to C{False}. + @raises MisconfigurationException: If NVIDIA driver is not installed, not running on GPUs, or C{Trainer} has no logger. + """ + super().__init__() if shutil.which("nvidia-smi") is None: diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py index d14fcf08..fc09b26a 100644 --- a/luxonis_train/callbacks/luxonis_progress_bar.py +++ b/luxonis_train/callbacks/luxonis_progress_bar.py @@ -14,7 +14,6 @@ class BaseLuxonisProgressBar(ABC, ProgressBar): def get_metrics( self, trainer: pl.Trainer, pl_module: pl.LightningModule ) -> dict[str, int | str | float | dict[str, float]]: - # NOTE: there might be a cleaner way of doing this items = super().get_metrics(trainer, pl_module) items.pop("v_num", None) if trainer.training and pl_module.training_step_outputs: @@ -140,10 +139,7 @@ def print_table( rich_table.add_column(key_name, style="magenta") rich_table.add_column(value_name, style="white") for name, value in table.items(): - if isinstance(value, float): - rich_table.add_row(name, f"{value:.5f}") - else: - rich_table.add_row(name, str(value)) + rich_table.add_row(name, f"{value:.5f}") self.console.print(rich_table) def print_results( diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py index 45ff8717..f4fa851f 100644 --- a/luxonis_train/callbacks/metadata_logger.py +++ b/luxonis_train/callbacks/metadata_logger.py @@ -6,7 +6,7 @@ import yaml import luxonis_train -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config from luxonis_train.utils.registry import CALLBACKS diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 16953062..34dfca75 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -3,7 +3,7 @@ import threading from logging import getLogger from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Mapping, overload import lightning.pytorch as pl import lightning_utilities.core.rank_zero as rank_zero_module @@ -19,12 +19,10 @@ from luxonis_train.attached_modules.visualizers import get_unnormalized_images from luxonis_train.callbacks import LuxonisRichProgressBar, LuxonisTQDMProgressBar +from luxonis_train.loaders import BaseLoaderTorch, collate_fn from luxonis_train.models import LuxonisLightningModule -from luxonis_train.utils.config import Config -from luxonis_train.utils.general import DatasetMetadata -from luxonis_train.utils.loaders import BaseLoaderTorch, collate_fn +from luxonis_train.utils import Config, DatasetMetadata, LuxonisTrackerPL from luxonis_train.utils.registry import LOADERS -from luxonis_train.utils.tracker import LuxonisTrackerPL from .utils.export_utils import ( blobconverter_export, @@ -174,7 +172,6 @@ def __init__( self.error_message = None self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"]) - self.dataset_metadata.set_loader(self.pytorch_loaders["train"]) self.cfg.save_data(osp.join(self.run_save_dir, "config.yaml")) @@ -224,7 +221,7 @@ def train( LuxonisFileSystem.download(resume_weights, self.run_save_dir) ) - def graceful_exit(signum: int, _): + def graceful_exit(signum: int, _): # pragma: no cover logger.info(f"{signal.Signals(signum).name} received, stopping training...") ckpt_path = osp.join(self.run_save_dir, "resume.ckpt") self.pl_trainer.save_checkpoint(ckpt_path) @@ -349,15 +346,33 @@ def export( if self.cfg.exporter.upload_url is not None: LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url) + @overload def test( - self, new_thread: bool = False, view: Literal["train", "test", "val"] = "val" + self, + new_thread: Literal[False] = ..., + view: Literal["train", "test", "val"] = "val", + ) -> Mapping[str, float]: + ... + + @overload + def test( + self, + new_thread: Literal[True] = ..., + view: Literal["train", "test", "val"] = "val", ) -> None: + ... + + def test( + self, new_thread: bool = False, view: Literal["train", "test", "val"] = "val" + ) -> Mapping[str, float] | None: """Runs testing. @type new_thread: bool @param new_thread: Runs testing in a new thread if set to True. @type view: Literal["train", "test", "val"] @param view: Which view to run the testing on. Defauls to "val". + @rtype: Mapping[str, float] | None + @return: If new_thread is False, returns a dictionary test results. """ if view not in self.pytorch_loaders: @@ -367,7 +382,7 @@ def test( loader = self.pytorch_loaders[view] if not new_thread: - self.pl_trainer.test(self.lightning_module, loader) + return self.pl_trainer.test(self.lightning_module, loader)[0] else: self.thread = threading.Thread( target=self.pl_trainer.test, diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index 72cdefc7..bf2432f5 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -15,7 +15,7 @@ ImplementedHeads, ImplementedHeadsIsSoxtmaxed, ) -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config logger = logging.getLogger(__name__) diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py index 3b34a912..eb3e2775 100644 --- a/luxonis_train/core/utils/export_utils.py +++ b/luxonis_train/core/utils/export_utils.py @@ -100,7 +100,7 @@ def blobconverter_export( logger.info("Converting ONNX to .blob") - optimizer_params = [] + optimizer_params: list[str] = [] if scale_values: optimizer_params.append(f"--scale_values={scale_values}") if mean_values: @@ -111,7 +111,7 @@ def blobconverter_export( blob_path = blobconverter.from_onnx( model=onnx_path, optimizer_params=optimizer_params, - data_type=cfg.data_type, + data_type=cfg.data_type.upper(), shaves=cfg.blobconverter.shaves, version=cfg.blobconverter.version, use_cache=False, diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py index 3a45a85b..1197000b 100644 --- a/luxonis_train/core/utils/train_utils.py +++ b/luxonis_train/core/utils/train_utils.py @@ -1,9 +1,11 @@ +from typing import Any + import lightning.pytorch as pl -from luxonis_train.utils.config import Config +from luxonis_train.utils import Config -def create_trainer(cfg: Config, **kwargs) -> pl.Trainer: +def create_trainer(cfg: Config, **kwargs: Any) -> pl.Trainer: """Creates Pytorch Lightning trainer. @type cfg: Config diff --git a/luxonis_train/utils/loaders/__init__.py b/luxonis_train/loaders/__init__.py similarity index 100% rename from luxonis_train/utils/loaders/__init__.py rename to luxonis_train/loaders/__init__.py diff --git a/luxonis_train/utils/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py similarity index 73% rename from luxonis_train/utils/loaders/base_loader.py rename to luxonis_train/loaders/base_loader.py index 5e884955..78607a29 100644 --- a/luxonis_train/utils/loaders/base_loader.py +++ b/luxonis_train/loaders/base_loader.py @@ -1,13 +1,13 @@ from abc import ABC, abstractmethod import torch -from luxonis_ml.data import Augmentations +from luxonis_ml.data import Augmentations, LabelType from luxonis_ml.utils.registry import AutoRegisterMeta from torch import Size, Tensor from torch.utils.data import Dataset from luxonis_train.utils.registry import LOADERS -from luxonis_train.utils.types import Labels, LabelType +from luxonis_train.utils.types import Labels LuxonisLoaderTorchOutput = tuple[dict[str, Tensor], Labels] """LuxonisLoaderTorchOutput is a tuple of source tensors and corresponding labels.""" @@ -38,6 +38,8 @@ def image_source(self) -> str: """Name of the input image group. Example: 'image' + + @type: str """ if self._image_source is None: raise ValueError("image_source is not set") @@ -47,39 +49,46 @@ def image_source(self) -> str: @abstractmethod def input_shapes(self) -> dict[str, Size]: """ - Shape of each loader group (sub-element), WITHOUT batch dimension. + Shape (c, h, w) of each loader group (sub-element), WITHOUT batch dimension. Examples: - 1. Single image input:: - { - 'image': torch.Size([3, 224, 224]), - } - - 2. Image and segmentation input:: - { - 'image': torch.Size([3, 224, 224]), - 'segmentation': torch.Size([1, 224, 224]), - } - - 3. Left image, right image and disparity input:: - { - 'left': torch.Size([3, 224, 224]), - 'right': torch.Size([3, 224, 224]), - 'disparity': torch.Size([1, 224, 224]), - } - - 4. Image, keypoints, and point cloud input:: - { - 'image': torch.Size([3, 224, 224]), - 'keypoints': torch.Size([17, 2]), - 'point_cloud': torch.Size([20000, 3]), - } - - @rtype: dict[str, Size] - @return: A dictionary mapping group names to their shapes. + 1. Single image input:: + { + 'image': torch.Size([3, 224, 224]), + } + + 2. Image and segmentation input:: + { + 'image': torch.Size([3, 224, 224]), + 'segmentation': torch.Size([1, 224, 224]), + } + + 3. Left image, right image and disparity input:: + { + 'left': torch.Size([3, 224, 224]), + 'right': torch.Size([3, 224, 224]), + 'disparity': torch.Size([1, 224, 224]), + } + + 4. Image, keypoints, and point cloud input:: + { + 'image': torch.Size([3, 224, 224]), + 'keypoints': torch.Size([17, 2]), + 'point_cloud': torch.Size([20000, 3]), + } + + @type: dict[str, Size] """ ... + @property + def input_shape(self) -> Size: + """Shape (c, h, w) of the input tensor, WITHOUT batch dimension. + + @type: torch.Size + """ + return self.input_shapes[self.image_source] + @abstractmethod def __len__(self) -> int: """Returns length of the dataset.""" @@ -131,9 +140,8 @@ def collate_fn( inputs, labels = zip(*batch) out_inputs = {k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()} - out_labels = {task: {} for task in labels[0].keys()} - out_labels = {} + out_labels: Labels = {} for task in labels[0].keys(): label_type = labels[0][task][1] diff --git a/luxonis_train/utils/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py similarity index 100% rename from luxonis_train/utils/loaders/luxonis_loader_torch.py rename to luxonis_train/loaders/luxonis_loader_torch.py diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index a3671dac..c021e5b3 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -1,6 +1,7 @@ from collections import defaultdict from collections.abc import Mapping from logging import getLogger +from pathlib import Path from typing import Literal, cast import lightning.pytorch as pl @@ -22,16 +23,19 @@ combine_visualizations, get_unnormalized_images, ) -from luxonis_train.callbacks import ( - BaseLuxonisProgressBar, - ModuleFreezer, -) +from luxonis_train.callbacks import BaseLuxonisProgressBar, ModuleFreezer from luxonis_train.nodes import BaseNode +from luxonis_train.utils import ( + DatasetMetadata, + Kwargs, + Labels, + LuxonisTrackerPL, + Packet, + to_shape_packet, + traverse_graph, +) from luxonis_train.utils.config import AttachedModuleConfig, Config -from luxonis_train.utils.general import DatasetMetadata, to_shape_packet, traverse_graph from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry -from luxonis_train.utils.tracker import LuxonisTrackerPL -from luxonis_train.utils.types import Kwargs, Labels, Packet from .luxonis_output import LuxonisOutput @@ -723,7 +727,8 @@ def configure_callbacks(self) -> list[pl.Callback]: def configure_optimizers( self, ) -> tuple[ - list[torch.optim.Optimizer], list[torch.optim.lr_scheduler._LRScheduler] + list[torch.optim.Optimizer], + list[torch.optim.lr_scheduler._LRScheduler], ]: """Configures model optimizers and schedulers.""" cfg_optimizer = self.cfg.trainer.optimizer @@ -739,7 +744,7 @@ def configure_optimizers( return [optimizer], [scheduler] - def load_checkpoint(self, path: str | None) -> None: + def load_checkpoint(self, path: str | Path | None) -> None: """Loads checkpoint weights from provided path. Loads the checkpoints gracefully, ignoring keys that are not found in the model @@ -751,6 +756,7 @@ def load_checkpoint(self, path: str | None) -> None: if path is None: return + path = str(path) checkpoint = torch.load(path, map_location=self.device) if "state_dict" not in checkpoint: diff --git a/luxonis_train/models/luxonis_output.py b/luxonis_train/models/luxonis_output.py index d69943fc..3cf59329 100644 --- a/luxonis_train/models/luxonis_output.py +++ b/luxonis_train/models/luxonis_output.py @@ -3,8 +3,7 @@ from torch import Tensor -from luxonis_train.utils.general import to_shape_packet -from luxonis_train.utils.types import Packet +from luxonis_train.utils import Packet, to_shape_packet @dataclass diff --git a/luxonis_train/models/predefined_models/base_predefined_model.py b/luxonis_train/models/predefined_models/base_predefined_model.py index 33ababdc..08cca4ee 100644 --- a/luxonis_train/models/predefined_models/base_predefined_model.py +++ b/luxonis_train/models/predefined_models/base_predefined_model.py @@ -1,4 +1,4 @@ -from abc import ABC, abstractproperty +from abc import ABC, abstractmethod from luxonis_ml.utils.registry import AutoRegisterMeta @@ -17,19 +17,23 @@ class BasePredefinedModel( registry=MODELS, register=False, ): - @abstractproperty + @property + @abstractmethod def nodes(self) -> list[ModelNodeConfig]: ... - @abstractproperty + @property + @abstractmethod def losses(self) -> list[LossModuleConfig]: ... - @abstractproperty + @property + @abstractmethod def metrics(self) -> list[MetricModuleConfig]: ... - @abstractproperty + @property + @abstractmethod def visualizers(self) -> list[AttachedModuleConfig]: ... diff --git a/luxonis_train/models/predefined_models/classification_model.py b/luxonis_train/models/predefined_models/classification_model.py index c9d782eb..d1253e4d 100644 --- a/luxonis_train/models/predefined_models/classification_model.py +++ b/luxonis_train/models/predefined_models/classification_model.py @@ -1,13 +1,13 @@ from dataclasses import dataclass, field from typing import Literal +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel diff --git a/luxonis_train/models/predefined_models/detection_model.py b/luxonis_train/models/predefined_models/detection_model.py index e9db4462..d6cd4520 100644 --- a/luxonis_train/models/predefined_models/detection_model.py +++ b/luxonis_train/models/predefined_models/detection_model.py @@ -1,12 +1,12 @@ from dataclasses import dataclass, field +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py index 588911c6..04c7a643 100644 --- a/luxonis_train/models/predefined_models/keypoint_detection_model.py +++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py @@ -1,13 +1,13 @@ from dataclasses import dataclass, field from typing import Literal +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel diff --git a/luxonis_train/models/predefined_models/segmentation_model.py b/luxonis_train/models/predefined_models/segmentation_model.py index b5e81f76..d1076239 100644 --- a/luxonis_train/models/predefined_models/segmentation_model.py +++ b/luxonis_train/models/predefined_models/segmentation_model.py @@ -1,13 +1,13 @@ from dataclasses import dataclass, field from typing import Literal +from luxonis_train.utils import Kwargs from luxonis_train.utils.config import ( AttachedModuleConfig, LossModuleConfig, MetricModuleConfig, ModelNodeConfig, ) -from luxonis_train.utils.types import Kwargs from .base_predefined_model import BasePredefinedModel diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md index 2f147e23..ffd0217d 100644 --- a/luxonis_train/nodes/README.md +++ b/luxonis_train/nodes/README.md @@ -210,6 +210,6 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf). | Key | Type | Default value | Description | | ----------- | ----------- | ------------- | -------------------------------------------------- | | n_keypoints | int \| None | None | Number of keypoints. | -| n_heads | int | 3 | Number of output heads | +| num_heads | int | 3 | Number of output heads | | conf_thres | float | 0.25 | confidence threshold for nms (used for evaluation) | | iou_thres | float | 0.45 | iou threshold for nms (used for evaluation) | diff --git a/luxonis_train/nodes/activations/__init__.py b/luxonis_train/nodes/activations/__init__.py index 37aea0fc..0d3d1e0b 100644 --- a/luxonis_train/nodes/activations/__init__.py +++ b/luxonis_train/nodes/activations/__init__.py @@ -1,3 +1,3 @@ -from .activations import HSigmoid, HSwish +from .activations import HSigmoid -__all__ = ["HSigmoid", "HSwish"] +__all__ = ["HSigmoid"] diff --git a/luxonis_train/nodes/activations/activations.py b/luxonis_train/nodes/activations/activations.py index f3abedd6..93703a1c 100644 --- a/luxonis_train/nodes/activations/activations.py +++ b/luxonis_train/nodes/activations/activations.py @@ -10,14 +10,3 @@ def __init__(self): def forward(self, x: Tensor) -> Tensor: return self.relu(x + 3) / 6 - - -class HSwish(nn.Module): - def __init__(self): - """H-Swish activation function from U{Searching for MobileNetV3 - }.""" - super().__init__() - self.sigmoid = HSigmoid() - - def forward(self, x: Tensor) -> Tensor: - return x * self.sigmoid(x) diff --git a/luxonis_train/nodes/backbones/contextspatial.py b/luxonis_train/nodes/backbones/contextspatial.py index 2cac4b81..55de77e1 100644 --- a/luxonis_train/nodes/backbones/contextspatial.py +++ b/luxonis_train/nodes/backbones/contextspatial.py @@ -1,9 +1,3 @@ -"""Implementation of Context Spatial backbone. - -Source: U{BiseNetV1} -""" - - from torch import Tensor, nn from torch.nn import functional as F @@ -13,21 +7,43 @@ ConvModule, FeatureFusionBlock, ) +from luxonis_train.utils import Kwargs from luxonis_train.utils.registry import NODES class ContextSpatial(BaseNode[Tensor, list[Tensor]]): - def __init__(self, context_backbone: str = "MobileNetV2", **kwargs): - """Context spatial backbone. - TODO: Add more documentation. + def __init__( + self, + context_backbone: str | nn.Module = "MobileNetV2", + backbone_kwargs: Kwargs | None = None, + **kwargs, + ): + """Context Spatial backbone introduced in BiseNetV1. + + Source: U{BiseNetV1} + @see: U{BiseNetv1: Bilateral Segmentation Network for + Real-time Semantic Segmentation + } @type context_backbone: str - @param context_backbone: Backbone used. Defaults to C{MobileNetV2}. + @param context_backbone: Backbone used in the context path. + Can be either a string or a C{torch.nn.Module}. + If a string argument is used, it has to be a name of a module + stored in the L{NODES} registry. Defaults to C{MobileNetV2}. + + @type backbone_kwargs: dict + @param backbone_kwargs: Keyword arguments for the backbone. + Only used when the C{context_backbone} argument is a string. """ super().__init__(**kwargs) - self.context_path = ContextPath(NODES.get(context_backbone)(**kwargs)) + if isinstance(context_backbone, str): + backbone_kwargs = backbone_kwargs or {} + backbone_kwargs |= kwargs + context_backbone = NODES.get(context_backbone)(**backbone_kwargs) + + self.context_path = ContextPath(context_backbone) self.spatial_path = SpatialPath(3, 128) self.ffm = FeatureFusionBlock(256, 256) @@ -35,22 +51,41 @@ def forward(self, inputs: Tensor) -> list[Tensor]: spatial_out = self.spatial_path(inputs) context16, _ = self.context_path(inputs) fm_fuse = self.ffm(spatial_out, context16) - outs = [fm_fuse] - return outs + return [fm_fuse] class SpatialPath(nn.Module): def __init__(self, in_channels: int, out_channels: int): super().__init__() intermediate_channels = 64 - self.conv_7x7 = ConvModule(in_channels, intermediate_channels, 7, 2, 3) + self.conv_7x7 = ConvModule( + in_channels, + intermediate_channels, + kernel_size=7, + stride=2, + padding=3, + ) self.conv_3x3_1 = ConvModule( - intermediate_channels, intermediate_channels, 3, 2, 1 + intermediate_channels, + intermediate_channels, + kernel_size=3, + stride=2, + padding=1, ) self.conv_3x3_2 = ConvModule( - intermediate_channels, intermediate_channels, 3, 2, 1 + intermediate_channels, + intermediate_channels, + kernel_size=3, + stride=2, + padding=1, + ) + self.conv_1x1 = ConvModule( + intermediate_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, ) - self.conv_1x1 = ConvModule(intermediate_channels, out_channels, 1, 1, 0) def forward(self, x: Tensor) -> Tensor: x = self.conv_7x7(x) @@ -60,7 +95,7 @@ def forward(self, x: Tensor) -> Tensor: class ContextPath(nn.Module): - def __init__(self, backbone: BaseNode): + def __init__(self, backbone: nn.Module): super().__init__() self.backbone = backbone @@ -70,15 +105,16 @@ def __init__(self, backbone: BaseNode): self.refine16 = ConvModule(128, 128, 3, 1, 1) self.refine32 = ConvModule(128, 128, 3, 1, 1) - def forward(self, x: Tensor) -> list[Tensor]: - *_, down16, down32 = self.backbone.forward(x) + def forward(self, x: Tensor) -> tuple[Tensor, Tensor]: + *_, down16, down32 = self.backbone(x) if not hasattr(self, "arm16"): self.arm16 = AttentionRefinmentBlock(down16.shape[1], 128) self.arm32 = AttentionRefinmentBlock(down32.shape[1], 128) self.global_context = nn.Sequential( - nn.AdaptiveAvgPool2d(1), ConvModule(down32.shape[1], 128, 1, 1, 0) + nn.AdaptiveAvgPool2d(1), + ConvModule(down32.shape[1], 128, 1, 1, 0), ) arm_down16 = self.arm16(down16) @@ -86,15 +122,18 @@ def forward(self, x: Tensor) -> list[Tensor]: global_down32 = self.global_context(down32) global_down32 = F.interpolate( - global_down32, size=down32.size()[2:], mode="bilinear", align_corners=True + global_down32, + size=down32.shape[2:], + mode="bilinear", + align_corners=True, ) - arm_down32 = arm_down32 + global_down32 + arm_down32 += global_down32 arm_down32 = self.up32(arm_down32) arm_down32 = self.refine32(arm_down32) - arm_down16 = arm_down16 + arm_down32 + arm_down16 += arm_down32 arm_down16 = self.up16(arm_down16) arm_down16 = self.refine16(arm_down16) - return [arm_down16, arm_down32] + return arm_down16, arm_down32 diff --git a/luxonis_train/nodes/backbones/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py index e560bc5f..1c7120eb 100644 --- a/luxonis_train/nodes/backbones/efficientnet.py +++ b/luxonis_train/nodes/backbones/efficientnet.py @@ -1,11 +1,7 @@ -"""Implementation of the EfficientNet backbone. - -Source: U{https://github.com/rwightman/gen-efficientnet-pytorch} -@license: U{Apache 2.0} -""" +from typing import Any import torch -from torch import Tensor, nn +from torch import Tensor from luxonis_train.nodes.base_node import BaseNode @@ -13,33 +9,49 @@ class EfficientNet(BaseNode[Tensor, list[Tensor]]): attach_index: int = -1 - def __init__(self, download_weights: bool = False, **kwargs): + def __init__( + self, + download_weights: bool = False, + out_indices: list[int] | None = None, + **kwargs: Any, + ): """EfficientNet backbone. + EfficientNet is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. Unlike conventional practice that arbitrary scales these factors, the EfficientNet scaling method uniformly scales network width, depth, and resolution with a set of fixed scaling coefficients. + + Source: U{https://github.com/rwightman/gen-efficientnet-pytorch} + + @license: U{Apache-2.0 + } + + @see: U{https://paperswithcode.com/method/efficientnet} + @see: U{EfficientNet: Rethinking Model Scaling for + Convolutional Neural Networks + } @type download_weights: bool @param download_weights: If C{True} download weights from imagenet. Defaults to C{False}. + @type out_indices: list[int] | None + @param out_indices: Indices of the output layers. Defaults to [0, 1, 2, 4, 6]. """ super().__init__(**kwargs) - efficientnet_lite0_model = torch.hub.load( + self.backbone = torch.hub.load( # type: ignore "rwightman/gen-efficientnet-pytorch", "efficientnet_lite0", pretrained=download_weights, ) - efficientnet_lite0_model.classifier = nn.Identity() - self.out_indices = [0, 1, 2, 4, 6] - efficientnet_lite0_model.bn2 = nn.Identity() - efficientnet_lite0_model.conv_head = nn.Identity() - self.backbone = efficientnet_lite0_model - - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] - x = self.backbone.conv_stem(x) + self.out_indices = out_indices or [0, 1, 2, 4, 6] + + def forward(self, inputs: Tensor) -> list[Tensor]: + x = self.backbone.conv_stem(inputs) x = self.backbone.bn1(x) x = self.backbone.act1(x) - for i, m in enumerate(self.backbone.blocks): - x = m(x) + + outs: list[Tensor] = [] + + for i, layer in enumerate(self.backbone.blocks): + x = layer(x) if i in self.out_indices: outs.append(x) diff --git a/luxonis_train/nodes/backbones/efficientrep/__init__.py b/luxonis_train/nodes/backbones/efficientrep/__init__.py new file mode 100644 index 00000000..51ff264a --- /dev/null +++ b/luxonis_train/nodes/backbones/efficientrep/__init__.py @@ -0,0 +1,3 @@ +from .efficientrep import EfficientRep + +__all__ = ["EfficientRep"] diff --git a/luxonis_train/nodes/backbones/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py similarity index 63% rename from luxonis_train/nodes/backbones/efficientrep.py rename to luxonis_train/nodes/backbones/efficientrep/efficientrep.py index be558620..717b9bd5 100644 --- a/luxonis_train/nodes/backbones/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -1,11 +1,5 @@ -"""Implementation of the EfficientRep backbone. - -Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial -Applications}. -""" - import logging -from typing import Literal +from typing import Any from torch import Tensor, nn @@ -15,25 +9,40 @@ RepVGGBlock, SpatialPyramidPoolingBlock, ) -from luxonis_train.utils.general import make_divisible +from luxonis_train.utils import make_divisible + +from .variants import VariantLiteral, get_variant logger = logging.getLogger(__name__) class EfficientRep(BaseNode[Tensor, list[Tensor]]): + in_channels: int + def __init__( self, - variant: Literal["s", "n", "m", "l"] = "n", + variant: VariantLiteral = "nano", channels_list: list[int] | None = None, num_repeats: list[int] | None = None, - depth_mul: float = 0.33, - width_mul: float = 0.25, - **kwargs, + depth_mul: float | None = None, + width_mul: float | None = None, + **kwargs: Any, ): - """EfficientRep backbone. - - @type variant: Literal["s", "n", "m", "l"] - @param variant: EfficientRep variant. Defaults to "n". + """Implementation of the EfficientRep backbone. + + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework + for Industrial Applications + }. + + @type variant: Literal["n", "nano", "s", "small", "m", "medium", "l", "large"] + @param variant: EfficientRep variant. Defaults to "nano". + The variant determines the depth and width multipliers. + The depth multiplier determines the number of blocks in each stage and the width multiplier determines the number of channels. + The following variants are available: + - "n" or "nano" (default): depth_multiplier=0.33, width_multiplier=0.25 + - "s" or "small": depth_multiplier=0.33, width_multiplier=0.50 + - "m" or "medium": depth_multiplier=0.60, width_multiplier=0.75 + - "l" or "large": depth_multiplier=1.0, width_multiplier=1.0 @type channels_list: list[int] | None @param channels_list: List of number of channels for each block. If unspecified, defaults to [64, 128, 256, 512, 1024]. @@ -41,23 +50,15 @@ def __init__( @param num_repeats: List of number of repeats of RepVGGBlock. If unspecified, defaults to [1, 6, 12, 18, 6]. @type depth_mul: float - @param depth_mul: Depth multiplier. Depending on the variant, defaults to 0.33. + @param depth_mul: Depth multiplier. If provided, overrides the variant value. @type width_mul: float - @param width_mul: Width multiplier. Depending on the variant, defaults to 0.25. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + @param width_mul: Width multiplier. If provided, overrides the variant value. """ super().__init__(**kwargs) - if variant not in EFFICIENTREP_VARIANTS: - raise ValueError( - f"EfficientRep model variant should be in {list(EFFICIENTREP_VARIANTS.keys())}" - ) - - ( - depth_mul, - width_mul, - ) = EFFICIENTREP_VARIANTS[variant] + var = get_variant(variant) + depth_mul = depth_mul or var.depth_multiplier + width_mul = width_mul or var.width_multiplier channels_list = channels_list or [64, 128, 256, 512, 1024] num_repeats = num_repeats or [1, 6, 12, 18, 6] @@ -66,12 +67,8 @@ def __init__( (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats ] - in_channels = self.in_channels - if not isinstance(in_channels, int): - raise ValueError("EfficientRep module expects only one input.") - self.repvgg_encoder = RepVGGBlock( - in_channels=in_channels, + in_channels=self.in_channels, out_channels=channels_list[0], kernel_size=3, stride=2, @@ -111,23 +108,15 @@ def set_export_mode(self, mode: bool = True) -> None: """ super().set_export_mode(mode) if self.export: - logger.info("Reparametrizing EfficientRep.") + logger.info("Reparametrizing 'EfficientRep'.") for module in self.modules(): if isinstance(module, RepVGGBlock): module.reparametrize() def forward(self, inputs: Tensor) -> list[Tensor]: - outputs = [] + outputs: list[Tensor] = [] x = self.repvgg_encoder(inputs) for block in self.blocks: x = block(x) outputs.append(x) return outputs - - -EFFICIENTREP_VARIANTS = { - "n": (0.33, 0.25), - "s": (0.33, 0.50), - "m": (0.60, 0.75), - "l": (1.0, 1.0), -} diff --git a/luxonis_train/nodes/backbones/efficientrep/variants.py b/luxonis_train/nodes/backbones/efficientrep/variants.py new file mode 100644 index 00000000..2c4df9bc --- /dev/null +++ b/luxonis_train/nodes/backbones/efficientrep/variants.py @@ -0,0 +1,44 @@ +from typing import Literal, TypeAlias + +from pydantic import BaseModel + +VariantLiteral: TypeAlias = Literal[ + "n", "nano", "s", "small", "m", "medium", "l", "large" +] + + +class EfficientRepVariant(BaseModel): + depth_multiplier: float + width_multiplier: float + + +def get_variant(variant: VariantLiteral) -> EfficientRepVariant: + variants = { + "n": EfficientRepVariant( + depth_multiplier=0.33, + width_multiplier=0.25, + ), + "s": EfficientRepVariant( + depth_multiplier=0.33, + width_multiplier=0.50, + ), + "m": EfficientRepVariant( + depth_multiplier=0.60, + width_multiplier=0.75, + ), + "l": EfficientRepVariant( + depth_multiplier=1.0, + width_multiplier=1.0, + ), + } + variants["nano"] = variants["n"] + variants["small"] = variants["s"] + variants["medium"] = variants["m"] + variants["large"] = variants["l"] + + if variant not in variants: + raise ValueError( + f"EfficientRep variant should be one of " + f"{list(variants.keys())}, got '{variant}'." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/micronet.py b/luxonis_train/nodes/backbones/micronet.py deleted file mode 100644 index 074dce2a..00000000 --- a/luxonis_train/nodes/backbones/micronet.py +++ /dev/null @@ -1,842 +0,0 @@ -from typing import Literal - -import torch -from torch import Tensor, nn - -from luxonis_train.nodes.activations import HSigmoid, HSwish -from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import ConvModule - - -class MicroNet(BaseNode[Tensor, list[Tensor]]): - """ - - TODO: DOCS - """ - - def __init__(self, variant: Literal["M1", "M2", "M3"] = "M1", **kwargs): - """MicroNet backbone. - - @type variant: Literal["M1", "M2", "M3"] - @param variant: Model variant to use. Defaults to "M1". - """ - super().__init__(**kwargs) - - if variant not in MICRONET_VARIANTS_SETTINGS: - raise ValueError( - f"MicroNet model variant should be in {list(MICRONET_VARIANTS_SETTINGS.keys())}" - ) - - self.inplanes = 64 - ( - in_channels, - stem_groups, - _, - init_a, - init_b, - out_indices, - channels, - cfgs, - ) = MICRONET_VARIANTS_SETTINGS[variant] - self.out_indices = out_indices - self.channels = channels - - self.features = nn.ModuleList([Stem(3, 2, stem_groups)]) - - for ( - stride, - out_channels, - kernel_size, - c1, - c2, - g1, - g2, - _, - g3, - g4, - y1, - y2, - y3, - r, - ) in cfgs: - self.features.append( - MicroBlock( - in_channels, - out_channels, - kernel_size, - stride, - (c1, c2), - (g1, g2), - (g3, g4), - (y1, y2, y3), - r, - init_a, - init_b, - ) - ) - in_channels = out_channels - - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] - for m in self.features: - x = m(x) - outs.append(x) - return outs - - -class MicroBlock(nn.Module): - def __init__( - self, - in_channels: int, - out_channels: int, - kernel_size: int = 3, - stride: int = 1, - t1: tuple[int, int] = (2, 2), - gs1: tuple[int, int] = (0, 6), - groups_1x1: tuple[int, int] = (1, 1), - dy: tuple[int, int, int] = (2, 0, 1), - r: int = 1, - init_a: tuple[float, float] = (1.0, 1.0), - init_b: tuple[float, float] = (0.0, 0.0), - ): - super().__init__() - - self.identity = stride == 1 and in_channels == out_channels - y1, y2, y3 = dy - g1, g2 = groups_1x1 - reduction = 8 * r - intermediate_channels = in_channels * t1[0] * t1[1] - - if gs1[0] == 0: - self.layers = nn.Sequential( - DepthSpatialSepConv(in_channels, t1, kernel_size, stride), - DYShiftMax( - intermediate_channels, - intermediate_channels, - init_a, - init_b, - True if y2 == 2 else False, - gs1[1], - reduction, - ) - if y2 > 0 - else nn.ReLU6(True), - ChannelShuffle(gs1[1]), - ChannelShuffle(intermediate_channels // 2) - if y2 != 0 - else nn.Sequential(), - ConvModule( - in_channels=intermediate_channels, - out_channels=out_channels, - kernel_size=1, - groups=g1, - activation=nn.Identity(), - ), - DYShiftMax( - out_channels, - out_channels, - (1.0, 0.0), - (0.0, 0.0), - False, - g2, - reduction // 2, - ) - if y3 > 0 - else nn.Sequential(), - ChannelShuffle(g2), - ChannelShuffle(out_channels // 2) - if out_channels % 2 == 0 and y3 != 0 - else nn.Sequential(), - ) - elif g2 == 0: - self.layers = nn.Sequential( - ConvModule( - in_channels=in_channels, - out_channels=intermediate_channels, - kernel_size=1, - groups=gs1[0], - activation=nn.Identity(), - ), - DYShiftMax( - intermediate_channels, - intermediate_channels, - (1.0, 0.0), - (0.0, 0.0), - False, - gs1[1], - reduction, - ) - if y3 > 0 - else nn.Sequential(), - ) - else: - self.layers = nn.Sequential( - ConvModule( - in_channels=in_channels, - out_channels=intermediate_channels, - kernel_size=1, - groups=gs1[0], - activation=nn.Identity(), - ), - DYShiftMax( - intermediate_channels, - intermediate_channels, - init_a, - init_b, - True if y1 == 2 else False, - gs1[1], - reduction, - ) - if y1 > 0 - else nn.ReLU6(True), - ChannelShuffle(gs1[1]), - DepthSpatialSepConv(intermediate_channels, (1, 1), kernel_size, stride), - nn.Sequential(), - DYShiftMax( - intermediate_channels, - intermediate_channels, - init_a, - init_b, - True if y2 == 2 else False, - gs1[1], - reduction, - True, - ) - if y2 > 0 - else nn.ReLU6(True), - ChannelShuffle(intermediate_channels // 4) - if y1 != 0 and y2 != 0 - else nn.Sequential() - if y1 == 0 and y2 == 0 - else ChannelShuffle(intermediate_channels // 2), - ConvModule( - in_channels=intermediate_channels, - out_channels=out_channels, - kernel_size=1, - groups=g1, - activation=nn.Identity(), - ), - DYShiftMax( - out_channels, - out_channels, - (1.0, 0.0), - (0.0, 0.0), - False, - g2, - reduction=reduction // 2 - if out_channels < intermediate_channels - else reduction, - ) - if y3 > 0 - else nn.Sequential(), - ChannelShuffle(g2), - ChannelShuffle(out_channels // 2) if y3 != 0 else nn.Sequential(), - ) - - def forward(self, inputs: Tensor) -> Tensor: - out = self.layers(inputs) - if self.identity: - out += inputs - return out - - -class ChannelShuffle(nn.Module): - def __init__(self, groups: int): - super().__init__() - self.groups = groups - - def forward(self, x: Tensor) -> Tensor: - b, c, h, w = x.size() - channels_per_group = c // self.groups - x = x.view(b, self.groups, channels_per_group, h, w) - x = torch.transpose(x, 1, 2).contiguous() - out = x.view(b, -1, h, w) - return out - - -class DYShiftMax(nn.Module): - def __init__( - self, - in_channels: int, - out_channels: int, - init_a: tuple[float, float] = (0.0, 0.0), - init_b: tuple[float, float] = (0.0, 0.0), - act_relu: bool = True, - g: int = 6, - reduction: int = 4, - expansion: bool = False, - ): - super().__init__() - self.exp: Literal[2, 4] = 4 if act_relu else 2 - self.init_a = init_a - self.init_b = init_b - self.out_channels = out_channels - - self.avg_pool = nn.Sequential(nn.Sequential(), nn.AdaptiveAvgPool2d(1)) - - squeeze = self._make_divisible(in_channels // reduction, 4) - - self.fc = nn.Sequential( - nn.Linear(in_channels, squeeze), - nn.ReLU(True), - nn.Linear(squeeze, out_channels * self.exp), - HSigmoid(), - ) - - if g != 1 and expansion: - g = in_channels // g - - gc = in_channels // g - index = Tensor(range(in_channels)).view(1, in_channels, 1, 1) - index = index.view(1, g, gc, 1, 1) - indexgs = torch.split(index, [1, g - 1], dim=1) - indexgs = torch.cat([indexgs[1], indexgs[0]], dim=1) - indexs = torch.split(indexgs, [1, gc - 1], dim=2) - indexs = torch.cat([indexs[1], indexs[0]], dim=2) - self.index = indexs.view(in_channels).long() - - def forward(self, x: Tensor) -> Tensor: - B, C, _, _ = x.shape - x_out = x - - y = self.avg_pool(x).view(B, C) - y = self.fc(y).view(B, -1, 1, 1) - y = (y - 0.5) * 4.0 - - x2 = x_out[:, self.index, :, :] - - if self.exp == 4: - a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1) - - a1 = a1 + self.init_a[0] - a2 = a2 + self.init_b[1] - b1 = b1 + self.init_b[0] - b2 = b2 + self.init_b[1] - - z1 = x_out * a1 + x2 * b1 - z2 = x_out * a2 + x2 * b2 - - out = torch.max(z1, z2) - - elif self.exp == 2: - a1, b1 = torch.split(y, self.out_channels, dim=1) - a1 = a1 + self.init_a[0] - b1 = b1 + self.init_b[0] - out = x_out * a1 + x2 * b1 - else: - raise RuntimeError("Expansion should be 2 or 4.") - - return out - - def _make_divisible(self, v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -class SwishLinear(nn.Module): - def __init__(self, in_channels: int, out_channels: int): - super().__init__() - self.linear = nn.Sequential( - nn.Linear(in_channels, out_channels), nn.BatchNorm1d(out_channels), HSwish() - ) - - def forward(self, x: Tensor) -> Tensor: - return self.linear(x) - - -class SpatialSepConvSF(nn.Module): - def __init__( - self, in_channels: int, outs: tuple[int, int], kernel_size: int, stride: int - ): - super().__init__() - out_channels1, out_channels2 = outs - self.conv = nn.Sequential( - nn.Conv2d( - in_channels, - out_channels1, - (kernel_size, 1), - (stride, 1), - (kernel_size // 2, 0), - bias=False, - ), - nn.BatchNorm2d(out_channels1), - nn.Conv2d( - out_channels1, - out_channels1 * out_channels2, - (1, kernel_size), - (1, stride), - (0, kernel_size // 2), - groups=out_channels1, - bias=False, - ), - nn.BatchNorm2d(out_channels1 * out_channels2), - ChannelShuffle(out_channels1), - ) - - def forward(self, x: Tensor) -> Tensor: - return self.conv(x) - - -class Stem(nn.Module): - def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)): - super().__init__() - self.stem = nn.Sequential( - SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True) - ) - - def forward(self, x: Tensor) -> Tensor: - return self.stem(x) - - -class DepthSpatialSepConv(nn.Module): - def __init__( - self, in_channels: int, expand: tuple[int, int], kernel_size: int, stride: int - ): - super().__init__() - exp1, exp2 = expand - intermediate_channels = in_channels * exp1 - out_channels = in_channels * exp1 * exp2 - - self.conv = nn.Sequential( - nn.Conv2d( - in_channels, - intermediate_channels, - (kernel_size, 1), - (stride, 1), - (kernel_size // 2, 0), - groups=in_channels, - bias=False, - ), - nn.BatchNorm2d(intermediate_channels), - nn.Conv2d( - intermediate_channels, - out_channels, - (1, kernel_size), - (1, stride), - (0, kernel_size // 2), - groups=intermediate_channels, - bias=False, - ), - nn.BatchNorm2d(out_channels), - ) - - def forward(self, x: Tensor) -> Tensor: - return self.conv(x) - - -MICRONET_VARIANTS_SETTINGS = { - "M1": [ - 6, # stem_ch - [3, 2], # stem_groups - 960, # out_ch - [1.0, 1.0], # init_a - [0.0, 0.0], # init_b - [1, 2, 4, 7], # out indices - [8, 16, 32, 576], - [ - # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [2, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1], - [2, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1], - [ - 2, - 16, - 5, - 2, - 2, - 0, - 16, - 16, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 1, - 32, - 5, - 1, - 6, - 4, - 4, - 32, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 2, - 64, - 5, - 1, - 6, - 8, - 8, - 64, - 8, - 8, - 2, - 2, - 1, - 1, - ], - [ - 1, - 96, - 3, - 1, - 6, - 8, - 8, - 96, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2], # 96->96(4,24)->576 - ], - ], - "M2": [ - 8, - [4, 2], - 1024, - [1.0, 1.0], - [0.0, 0.0], - [1, 3, 6, 9], - [12, 24, 64, 768], - [ - # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [ - 2, - 12, - 3, - 2, - 2, - 0, - 8, - 12, - 4, - 4, - 2, - 0, - 1, - 1, - ], - [ - 2, - 16, - 3, - 2, - 2, - 0, - 12, - 16, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 1, - 24, - 3, - 2, - 2, - 0, - 16, - 24, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 2, - 32, - 5, - 1, - 6, - 6, - 6, - 32, - 4, - 4, - 2, - 2, - 1, - 1, - ], - [ - 1, - 32, - 5, - 1, - 6, - 8, - 8, - 32, - 4, - 4, - 2, - 2, - 1, - 2, - ], - [ - 1, - 64, - 5, - 1, - 6, - 8, - 8, - 64, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [ - 2, - 96, - 5, - 1, - 6, - 8, - 8, - 96, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [ - 1, - 128, - 3, - 1, - 6, - 12, - 12, - 128, - 8, - 8, - 2, - 2, - 1, - 2, - ], - [1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2], - ], - ], - "M3": [ - 12, - [4, 3], - 1024, - [1.0, 0.5], - [0.0, 0.5], - [1, 3, 8, 12], - [16, 24, 80, 864], - [ - # s, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r - [ - 2, - 16, - 3, - 2, - 2, - 0, - 12, - 16, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 2, - 24, - 3, - 2, - 2, - 0, - 16, - 24, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 1, - 24, - 3, - 2, - 2, - 0, - 24, - 24, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 2, - 32, - 5, - 1, - 6, - 6, - 6, - 32, - 4, - 4, - 0, - 2, - 0, - 1, - ], - [ - 1, - 32, - 5, - 1, - 6, - 8, - 8, - 32, - 4, - 4, - 0, - 2, - 0, - 2, - ], - [ - 1, - 64, - 5, - 1, - 6, - 8, - 8, - 48, - 8, - 8, - 0, - 2, - 0, - 2, - ], - [ - 1, - 80, - 5, - 1, - 6, - 8, - 8, - 80, - 8, - 8, - 0, - 2, - 0, - 2, - ], - [ - 1, - 80, - 5, - 1, - 6, - 10, - 10, - 80, - 8, - 8, - 0, - 2, - 0, - 2, - ], - [ - 2, - 120, - 5, - 1, - 6, - 10, - 10, - 120, - 10, - 10, - 0, - 2, - 0, - 2, - ], - [ - 1, - 120, - 5, - 1, - 6, - 12, - 12, - 120, - 10, - 10, - 0, - 2, - 0, - 2, - ], - [ - 1, - 144, - 3, - 1, - 6, - 12, - 12, - 144, - 12, - 12, - 0, - 2, - 0, - 2, - ], - [1, 864, 3, 1, 6, 12, 12, 0, 0, 0, 0, 2, 0, 2], - ], - ], -} diff --git a/luxonis_train/nodes/backbones/micronet/__init__.py b/luxonis_train/nodes/backbones/micronet/__init__.py new file mode 100644 index 00000000..5b41ece3 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/__init__.py @@ -0,0 +1,3 @@ +from .micronet import MicroNet + +__all__ = ["MicroNet"] diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py new file mode 100644 index 00000000..de77b4b2 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/blocks.py @@ -0,0 +1,489 @@ +from typing import Literal + +import torch +from torch import Tensor, nn + +from luxonis_train.nodes.activations import HSigmoid +from luxonis_train.nodes.blocks import ConvModule + + +class MicroBlock(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + kernel_size: int = 3, + stride: int = 1, + expansion_ratios: tuple[int, int] = (2, 2), + groups_1: tuple[int, int] = (0, 6), + groups_2: tuple[int, int] = (1, 1), + use_dynamic_shift: tuple[int, int, int] = (2, 0, 1), + reduction_factor: int = 1, + init_a: tuple[float, float] = (1.0, 1.0), + init_b: tuple[float, float] = (0.0, 0.0), + ): + """ + MicroBlock: The basic building block of MicroNet. + + This block implements the Micro-Factorized Convolution and Dynamic Shift-Max activation. + It can be configured to use different combinations of these components based on the network design. + + @type in_channels: int + @param in_channels: Number of input channels. + @type out_channels: int + @param out_channels: Number of output channels. + @type kernel_size: int + @param kernel_size: Size of the convolution kernel. Defaults to 3. + @type stride: int + @param stride: Stride of the convolution. Defaults to 1. + @type expansion_ratios: tuple[int, int] + @param expansion_ratios: Expansion ratios for the intermediate channels. Defaults to (2, 2). + @type groups_1: tuple[int, int] + @param groups_1: Groups for the first set of convolutions. Defaults to (0, 6). + @type groups_2: tuple[int, int] + @param groups_2: Groups for the second set of convolutions. Defaults to (1, 1). + @type use_dynamic_shift: tuple[int, int, int] + @param use_dynamic_shift: Flags to use Dynamic Shift-Max in different positions. Defaults to (2, 0, 1). + @type reduction_factor: int + @param reduction_factor: Reduction factor for the squeeze-and-excitation-like operation. Defaults to 1. + @type init_a: tuple[float, float] + @param init_a: Initialization parameters for Dynamic Shift-Max. Defaults to (1.0, 1.0). + @type init_b: tuple[float, float] + @param init_b: Initialization parameters for Dynamic Shift-Max. Defaults to (0.0, 0.0). + """ + super().__init__() + + self.use_residual = stride == 1 and in_channels == out_channels + self.expansion_ratios = expansion_ratios + use_dy1, use_dy2, use_dy3 = use_dynamic_shift + group1, group2 = groups_2 + reduction = 8 * reduction_factor + intermediate_channels = in_channels * expansion_ratios[0] * expansion_ratios[1] + + if groups_1[0] == 0: + self.layers = self._create_lite_block( + in_channels, + out_channels, + intermediate_channels, + kernel_size, + stride, + groups_1[1], + group1, + group2, + use_dy2, + use_dy3, + reduction, + init_a, + init_b, + ) + elif group2 == 0: + self.layers = self._create_transition_block( + in_channels, + intermediate_channels, + groups_1[0], + groups_1[1], + use_dy3, + reduction, + ) + else: + self.layers = self._create_full_block( + in_channels, + out_channels, + intermediate_channels, + kernel_size, + stride, + groups_1, + group1, + group2, + use_dy1, + use_dy2, + use_dy3, + reduction, + init_a, + init_b, + ) + + def _create_lite_block( + self, + in_channels: int, + out_channels: int, + intermediate_channels: int, + kernel_size: int, + stride: int, + group1: int, + group2: int, + group3: int, + use_dy2: int, + use_dy3: int, + reduction: int, + init_a: tuple[float, float], + init_b: tuple[float, float], + ) -> nn.Sequential: + return nn.Sequential( + DepthSpatialSepConv( + in_channels, self.expansion_ratios, kernel_size, stride + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + init_a, + init_b, + True if use_dy2 == 2 else False, + group1, + reduction, + ) + if use_dy2 > 0 + else nn.ReLU6(True), + ChannelShuffle(group1), + ChannelShuffle(intermediate_channels // 2) + if use_dy2 != 0 + else nn.Sequential(), + ConvModule( + in_channels=intermediate_channels, + out_channels=out_channels, + kernel_size=1, + groups=group2, + activation=nn.Identity(), + ), + DYShiftMax( + out_channels, + out_channels, + (1.0, 0.0), + (0.0, 0.0), + False, + group3, + reduction // 2, + ) + if use_dy3 > 0 + else nn.Sequential(), + ChannelShuffle(group3), + ChannelShuffle(out_channels // 2) + if out_channels % 2 == 0 and use_dy3 != 0 + else nn.Sequential(), + ) + + def _create_transition_block( + self, + in_channels: int, + intermediate_channels: int, + group1: int, + group2: int, + use_dy3: int, + reduction: int, + ) -> nn.Sequential: + return nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=intermediate_channels, + kernel_size=1, + groups=group1, + activation=nn.Identity(), + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + (1.0, 0.0), + (0.0, 0.0), + False, + group2, + reduction, + ) + if use_dy3 > 0 + else nn.Sequential(), + ) + + def _create_full_block( + self, + in_channels: int, + out_channels: int, + intermediate_channels: int, + kernel_size: int, + stride: int, + groups_1: tuple[int, int], + group1: int, + group2: int, + use_dy1: int, + use_dy2: int, + use_dy3: int, + reduction: int, + init_a: tuple[float, float], + init_b: tuple[float, float], + ) -> nn.Sequential: + return nn.Sequential( + ConvModule( + in_channels=in_channels, + out_channels=intermediate_channels, + kernel_size=1, + groups=groups_1[0], + activation=nn.Identity(), + ), + DYShiftMax( + intermediate_channels, + intermediate_channels, + init_a, + init_b, + True if use_dy1 == 2 else False, + groups_1[1], + reduction, + ) + if use_dy1 > 0 + else nn.ReLU6(True), + ChannelShuffle(groups_1[1]), + DepthSpatialSepConv(intermediate_channels, (1, 1), kernel_size, stride), + DYShiftMax( + intermediate_channels, + intermediate_channels, + init_a, + init_b, + True if use_dy2 == 2 else False, + groups_1[1], + reduction, + True, + ) + if use_dy2 > 0 + else nn.ReLU6(True), + ChannelShuffle(intermediate_channels // 4) + if use_dy1 != 0 and use_dy2 != 0 + else nn.Sequential() + if use_dy1 == 0 and use_dy2 == 0 + else ChannelShuffle(intermediate_channels // 2), + ConvModule( + in_channels=intermediate_channels, + out_channels=out_channels, + kernel_size=1, + groups=group1, + activation=nn.Identity(), + ), + DYShiftMax( + out_channels, + out_channels, + (1.0, 0.0), + (0.0, 0.0), + False, + group2, + reduction=reduction // 2 + if out_channels < intermediate_channels + else reduction, + ) + if use_dy3 > 0 + else nn.Sequential(), + ChannelShuffle(group2), + ChannelShuffle(out_channels // 2) if use_dy3 != 0 else nn.Sequential(), + ) + + def forward(self, inputs: Tensor) -> Tensor: + out = self.layers(inputs) + if self.use_residual: + out += inputs + return out + + +class ChannelShuffle(nn.Module): + def __init__(self, groups: int): + """Shuffle the channels of the input tensor. + + This operation is used to mix information between groups after grouped + convolutions. + + @type groups: int + @param groups: Number of groups to divide the channels into before shuffling. + """ + + super().__init__() + self.groups = groups + + def forward(self, x: Tensor) -> Tensor: + batch_size, channels, height, width = x.size() + channels_per_group = channels // self.groups + x = x.view(batch_size, self.groups, channels_per_group, height, width) + x = torch.transpose(x, 1, 2).contiguous() + out = x.view(batch_size, -1, height, width) + return out + + +class DYShiftMax(nn.Module): + def __init__( + self, + in_channels: int, + out_channels: int, + init_a: tuple[float, float] = (0.0, 0.0), + init_b: tuple[float, float] = (0.0, 0.0), + use_relu: bool = True, + groups: int = 6, + reduction: int = 4, + expansion: bool = False, + ): + """Dynamic Shift-Max activation function. + + This module implements the Dynamic Shift-Max operation, which adaptively fuses + and selects channel information based on the input. + + @type in_channels: int + @param in_channels: Number of input channels. + @type out_channels: int + @param out_channels: Number of output channels. + @type init_a: tuple[float, float] + @param init_a: Initial values for the 'a' parameters. Defaults to (0.0, 0.0). + @type init_b: tuple[float, float] + @param init_b: Initial values for the 'b' parameters. Defaults to (0.0, 0.0). + @type use_relu: bool + @param use_relu: Whether to use ReLU activation. Defaults to True. + @type groups: int + @param groups: Number of groups for channel shuffling. Defaults to 6. + @type reduction: int + @param reduction: Reduction factor for the squeeze operation. Defaults to 4. + @type expansion: bool + @param expansion: Whether to use expansion in grouping. Defaults to False. + """ + super().__init__() + self.exp: Literal[2, 4] = 4 if use_relu else 2 + self.init_a = init_a + self.init_b = init_b + self.out_channels = out_channels + + self.avg_pool = nn.AdaptiveAvgPool2d(1) + + squeeze_channels = self._make_divisible(in_channels // reduction, 4) + + self.fc = nn.Sequential( + nn.Linear(in_channels, squeeze_channels), + nn.ReLU(True), + nn.Linear(squeeze_channels, out_channels * self.exp), + HSigmoid(), + ) + + if groups != 1 and expansion: + groups = in_channels // groups + + channels_per_group = in_channels // groups + index = torch.arange(in_channels).view(1, in_channels, 1, 1) + index = index.view(1, groups, channels_per_group, 1, 1) + index_groups = torch.split(index, [1, groups - 1], dim=1) + index_groups = torch.cat([index_groups[1], index_groups[0]], dim=1) + index_splits = torch.split(index_groups, [1, channels_per_group - 1], dim=2) + index_splits = torch.cat([index_splits[1], index_splits[0]], dim=2) + self.index = index_splits.view(in_channels).long() + + def forward(self, x: Tensor) -> Tensor: + batch_size, channels, _, _ = x.shape + x_out = x + + y = self.avg_pool(x).view(batch_size, channels) + y = self.fc(y).view(batch_size, -1, 1, 1) + y = (y - 0.5) * 4.0 + + x2 = x_out[:, self.index, :, :] + + if self.exp == 4: + a1, b1, a2, b2 = torch.split(y, self.out_channels, dim=1) + + a1 = a1 + self.init_a[0] + a2 = a2 + self.init_b[1] + b1 = b1 + self.init_b[0] + b2 = b2 + self.init_b[1] + + z1 = x_out * a1 + x2 * b1 + z2 = x_out * a2 + x2 * b2 + + out = torch.max(z1, z2) + + elif self.exp == 2: + a1, b1 = torch.split(y, self.out_channels, dim=1) + a1 = a1 + self.init_a[0] + b1 = b1 + self.init_b[0] + out = x_out * a1 + x2 * b1 + else: + raise RuntimeError("Expansion should be 2 or 4.") + + return out + + def _make_divisible( + self, value: int, divisor: int, min_value: int | None = None + ) -> int: + if min_value is None: + min_value = divisor + new_v = max(min_value, int(value + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * value: + new_v += divisor + return new_v + + +class SpatialSepConvSF(nn.Module): + def __init__( + self, in_channels: int, outs: tuple[int, int], kernel_size: int, stride: int + ): + super().__init__() + out_channels1, out_channels2 = outs + self.conv = nn.Sequential( + nn.Conv2d( + in_channels, + out_channels1, + kernel_size=(kernel_size, 1), + stride=(stride, 1), + padding=(kernel_size // 2, 0), + bias=False, + ), + nn.BatchNorm2d(out_channels1), + nn.Conv2d( + out_channels1, + out_channels1 * out_channels2, + kernel_size=(1, kernel_size), + stride=(1, stride), + padding=(0, kernel_size // 2), + groups=out_channels1, + bias=False, + ), + nn.BatchNorm2d(out_channels1 * out_channels2), + ChannelShuffle(out_channels1), + ) + + def forward(self, x: Tensor) -> Tensor: + return self.conv(x) + + +class Stem(nn.Module): + def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)): + super().__init__() + self.stem = nn.Sequential( + SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True) + ) + + def forward(self, x: Tensor) -> Tensor: + return self.stem(x) + + +class DepthSpatialSepConv(nn.Module): + def __init__( + self, in_channels: int, expand: tuple[int, int], kernel_size: int, stride: int + ): + super().__init__() + exp1, exp2 = expand + intermediate_channels = in_channels * exp1 + out_channels = in_channels * exp1 * exp2 + + self.conv = nn.Sequential( + nn.Conv2d( + in_channels, + intermediate_channels, + (kernel_size, 1), + (stride, 1), + padding=(kernel_size // 2, 0), + groups=in_channels, + bias=False, + ), + nn.BatchNorm2d(intermediate_channels), + nn.Conv2d( + intermediate_channels, + out_channels, + (1, kernel_size), + (1, stride), + padding=(0, kernel_size // 2), + groups=intermediate_channels, + bias=False, + ), + nn.BatchNorm2d(out_channels), + ) + + def forward(self, x: Tensor) -> Tensor: + return self.conv(x) diff --git a/luxonis_train/nodes/backbones/micronet/micronet.py b/luxonis_train/nodes/backbones/micronet/micronet.py new file mode 100644 index 00000000..32f51d09 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/micronet.py @@ -0,0 +1,61 @@ +from typing import Any, Literal + +from torch import Tensor, nn + +from luxonis_train.nodes.base_node import BaseNode + +from .blocks import MicroBlock, Stem +from .variants import get_variant + + +class MicroNet(BaseNode[Tensor, list[Tensor]]): + def __init__( + self, + variant: Literal["M1", "M2", "M3"] = "M1", + out_indices: list[int] | None = None, + **kwargs: Any, + ): + """MicroNet backbone. + + This class creates the full MicroNet architecture based on the specified + variant. It consists of a stem layer followed by multiple MicroBlocks. + + @type variant: Literal["M1", "M2", "M3"] + @param variant: Model variant to use. Defaults to "M1". + @type out_indices: list[int] | None + @param out_indices: Indices of the output layers. If provided, overrides the + variant value. + """ + super().__init__(**kwargs) + + var = get_variant(variant) + self.out_indices = out_indices or var.out_indices + in_channels = var.stem_channels + + self.layers = nn.ModuleList([Stem(3, 2, var.stem_groups)]) + + for bc in var.block_configs: + self.layers.append( + MicroBlock( + in_channels, + bc.out_channels, + bc.kernel_size, + bc.stride, + bc.expand_ratio, + bc.groups_1, + bc.groups_2, + bc.dy_shifts, + bc.reduction_factor, + var.init_a, + var.init_b, + ) + ) + in_channels = bc.out_channels + + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] + for i, layer in enumerate(self.layers): + inputs = layer(inputs) + if i in self.out_indices: + outs.append(inputs) + return outs diff --git a/luxonis_train/nodes/backbones/micronet/variants.py b/luxonis_train/nodes/backbones/micronet/variants.py new file mode 100644 index 00000000..48ca6164 --- /dev/null +++ b/luxonis_train/nodes/backbones/micronet/variants.py @@ -0,0 +1,344 @@ +from typing import Literal + +from pydantic import BaseModel + + +class MicroBlockConfig(BaseModel): + stride: int + out_channels: int + kernel_size: int + expand_ratio: tuple[int, int] + groups_1: tuple[int, int] + groups_2: tuple[int, int] + dy_shifts: tuple[int, int, int] + reduction_factor: int + + +class MicroNetVariant(BaseModel): + stem_channels: int + stem_groups: tuple[int, int] + init_a: tuple[float, float] + init_b: tuple[float, float] + out_indices: list[int] + block_configs: list[MicroBlockConfig] + + +M1 = MicroNetVariant( + stem_channels=6, + stem_groups=(3, 2), + init_a=(1.0, 1.0), + init_b=(0.0, 0.0), + out_indices=[1, 2, 4, 7], + block_configs=[ + MicroBlockConfig( + stride=2, + out_channels=8, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 6), + groups_2=(2, 2), + dy_shifts=(2, 0, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 8), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=5, + expand_ratio=(2, 2), + groups_1=(0, 16), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(4, 4), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=64, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=96, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=576, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(0, 0), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + ], +) + +M2 = MicroNetVariant( + stem_channels=8, + stem_groups=(4, 2), + init_a=(1.0, 1.0), + init_b=(0.0, 0.0), + out_indices=[1, 3, 6, 9], + block_configs=[ + MicroBlockConfig( + stride=2, + out_channels=12, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 8), + groups_2=(4, 4), + dy_shifts=(2, 0, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 12), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=24, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 16), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(6, 6), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(4, 4), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=64, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=2, + out_channels=96, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=128, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(8, 8), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=768, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(16, 16), + groups_2=(0, 0), + dy_shifts=(2, 2, 1), + reduction_factor=2, + ), + ], +) + +M3 = MicroNetVariant( + stem_channels=12, + stem_groups=(4, 3), + init_a=(1.0, 0.5), + init_b=(0.0, 0.5), + out_indices=[1, 3, 8, 12], + block_configs=[ + MicroBlockConfig( + stride=2, + out_channels=16, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 12), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=24, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 16), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=24, + kernel_size=3, + expand_ratio=(2, 2), + groups_1=(0, 24), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=2, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(6, 6), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=1, + ), + MicroBlockConfig( + stride=1, + out_channels=32, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(4, 4), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=64, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=2, + out_channels=80, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(8, 8), + groups_2=(8, 8), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=80, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(10, 10), + groups_2=(8, 8), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=120, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(10, 10), + groups_2=(10, 10), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=120, + kernel_size=5, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(10, 10), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=144, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(12, 12), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + MicroBlockConfig( + stride=1, + out_channels=864, + kernel_size=3, + expand_ratio=(1, 6), + groups_1=(12, 12), + groups_2=(0, 0), + dy_shifts=(0, 2, 0), + reduction_factor=2, + ), + ], +) + + +def get_variant(variant: Literal["M1", "M2", "M3"]) -> MicroNetVariant: + variants = {"M1": M1, "M2": M2, "M3": M3} + if variant not in variants: + raise ValueError( + "MicroNet model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/mobilenetv2.py b/luxonis_train/nodes/backbones/mobilenetv2.py index 48161835..8de19854 100644 --- a/luxonis_train/nodes/backbones/mobilenetv2.py +++ b/luxonis_train/nodes/backbones/mobilenetv2.py @@ -1,44 +1,51 @@ -"""MobileNetV2 backbone. - -TODO: source? -""" +from typing import Any import torchvision -from torch import Tensor, nn +from torch import Tensor from luxonis_train.nodes.base_node import BaseNode class MobileNetV2(BaseNode[Tensor, list[Tensor]]): - """Implementation of the MobileNetV2 backbone. - - TODO: add more info - """ - - def __init__(self, download_weights: bool = False, **kwargs): - """Constructor of the MobileNetV2 backbone. + def __init__( + self, + download_weights: bool = False, + out_indices: list[int] | None = None, + **kwargs: Any, + ): + """MobileNetV2 backbone. + + This class implements the MobileNetV2 model as described in: + U{MobileNetV2: Inverted Residuals and Linear Bottlenecks } by Sandler I{et al.} + + The network consists of an initial fully convolutional layer, followed by + 19 bottleneck residual blocks, and a final 1x1 convolution. It can be used + as a feature extractor for tasks like image classification, object detection, + and semantic segmentation. + + Key features: + - Inverted residual structure with linear bottlenecks + - Depth-wise separable convolutions for efficiency + - Configurable width multiplier and input resolution @type download_weights: bool @param download_weights: If True download weights from imagenet. Defaults to False. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + @type out_indices: list[int] | None + @param out_indices: Indices of the output layers. Defaults to [3, 6, 13, 18]. """ super().__init__(**kwargs) - mobilenet_v2 = torchvision.models.mobilenet_v2( + self.backbone = torchvision.models.mobilenet_v2( weights="DEFAULT" if download_weights else None ) - mobilenet_v2.classifier = nn.Identity() - self.out_indices = [3, 6, 13, 18] - self.channels = [24, 32, 96, 1280] - self.backbone = mobilenet_v2 - - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] - for i, module in enumerate(self.backbone.features): - x = module(x) + self.out_indices = out_indices or [3, 6, 13, 18] + + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] + for i, layer in enumerate(self.backbone.features): + inputs = layer(inputs) if i in self.out_indices: - outs.append(x) + outs.append(inputs) return outs diff --git a/luxonis_train/nodes/backbones/mobileone/__init__.py b/luxonis_train/nodes/backbones/mobileone/__init__.py new file mode 100644 index 00000000..a6e573aa --- /dev/null +++ b/luxonis_train/nodes/backbones/mobileone/__init__.py @@ -0,0 +1,3 @@ +from .mobileone import MobileOne + +__all__ = ["MobileOne"] diff --git a/luxonis_train/nodes/backbones/mobileone.py b/luxonis_train/nodes/backbones/mobileone/blocks.py similarity index 60% rename from luxonis_train/nodes/backbones/mobileone.py rename to luxonis_train/nodes/backbones/mobileone/blocks.py index 2d460fd0..17b9d223 100644 --- a/luxonis_train/nodes/backbones/mobileone.py +++ b/luxonis_train/nodes/backbones/mobileone/blocks.py @@ -4,170 +4,12 @@ @license: U{Apple} """ - -from typing import Literal - import torch from torch import Tensor, nn -from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule, SqueezeExciteBlock -class MobileOne(BaseNode[Tensor, list[Tensor]]): - """Implementation of MobileOne backbone. - - TODO: add more details - """ - - in_channels: int - - VARIANTS_SETTINGS: dict[str, dict] = { - "s0": {"width_multipliers": (0.75, 1.0, 1.0, 2.0), "num_conv_branches": 4}, - "s1": {"width_multipliers": (1.5, 1.5, 2.0, 2.5)}, - "s2": {"width_multipliers": (1.5, 2.0, 2.5, 4.0)}, - "s3": {"width_multipliers": (2.0, 2.5, 3.0, 4.0)}, - "s4": {"width_multipliers": (3.0, 3.5, 3.5, 4.0), "use_se": True}, - } - - def __init__(self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", **kwargs): - """Constructor for the MobileOne module. - - @type variant: Literal["s0", "s1", "s2", "s3", "s4"] - @param variant: Specifies which variant of the MobileOne network to use. For - details, see TODO. Defaults to "s0". - """ - super().__init__(**kwargs) - - if variant not in MobileOne.VARIANTS_SETTINGS.keys(): - raise ValueError( - f"MobileOne model variant should be in {list(MobileOne.VARIANTS_SETTINGS.keys())}" - ) - - variant_params = MobileOne.VARIANTS_SETTINGS[variant] - # TODO: make configurable - self.width_multipliers = variant_params["width_multipliers"] - self.num_conv_branches = variant_params.get("num_conv_branches", 1) - self.num_blocks_per_stage = [2, 8, 10, 1] - self.use_se = variant_params.get("use_se", False) - - self.in_planes = min(64, int(64 * self.width_multipliers[0])) - - self.stage0 = MobileOneBlock( - in_channels=self.in_channels, - out_channels=self.in_planes, - kernel_size=3, - stride=2, - padding=1, - ) - self.cur_layer_idx = 1 - self.stage1 = self._make_stage( - int(64 * self.width_multipliers[0]), - self.num_blocks_per_stage[0], - num_se_blocks=0, - ) - self.stage2 = self._make_stage( - int(128 * self.width_multipliers[1]), - self.num_blocks_per_stage[1], - num_se_blocks=0, - ) - self.stage3 = self._make_stage( - int(256 * self.width_multipliers[2]), - self.num_blocks_per_stage[2], - num_se_blocks=int(self.num_blocks_per_stage[2] // 2) if self.use_se else 0, - ) - self.stage4 = self._make_stage( - int(512 * self.width_multipliers[3]), - self.num_blocks_per_stage[3], - num_se_blocks=self.num_blocks_per_stage[3] if self.use_se else 0, - ) - - def forward(self, inputs: Tensor) -> list[Tensor]: - outs = [] - x = self.stage0(inputs) - outs.append(x) - x = self.stage1(x) - outs.append(x) - x = self.stage2(x) - outs.append(x) - x = self.stage3(x) - outs.append(x) - x = self.stage4(x) - outs.append(x) - - return outs - - def export_mode(self, export: bool = True) -> None: - """Sets the module to export mode. - - Reparameterizes the model to obtain a plain CNN-like structure for inference. - TODO: add more details - - @warning: The reparametrization is destructive and cannot be reversed! - - @type export: bool - @param export: Whether to set the export mode to True or False. Defaults to True. - """ - if export: - for module in self.modules(): - if hasattr(module, "reparameterize"): - module.reparameterize() - - def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int): - """Build a stage of MobileOne model. - - @type planes: int - @param planes: Number of output channels. - @type num_blocks: int - @param num_blocks: Number of blocks in this stage. - @type num_se_blocks: int - @param num_se_blocks: Number of SE blocks in this stage. - @rtype: nn.Sequential - @return: A stage of MobileOne model. - """ - # Get strides for all layers - strides = [2] + [1] * (num_blocks - 1) - blocks = [] - for ix, stride in enumerate(strides): - use_se = False - if num_se_blocks > num_blocks: - raise ValueError( - "Number of SE blocks cannot " "exceed number of layers." - ) - if ix >= (num_blocks - num_se_blocks): - use_se = True - - # Depthwise conv - blocks.append( - MobileOneBlock( - in_channels=self.in_planes, - out_channels=self.in_planes, - kernel_size=3, - stride=stride, - padding=1, - groups=self.in_planes, - use_se=use_se, - num_conv_branches=self.num_conv_branches, - ) - ) - # Pointwise conv - blocks.append( - MobileOneBlock( - in_channels=self.in_planes, - out_channels=planes, - kernel_size=1, - stride=1, - padding=0, - groups=1, - use_se=use_se, - num_conv_branches=self.num_conv_branches, - ) - ) - self.in_planes = planes - self.cur_layer_idx += 1 - return nn.Sequential(*blocks) - - class MobileOneBlock(nn.Module): """MobileOne building block. @@ -220,13 +62,14 @@ def __init__( self.inference_mode = False # Check if SE-ReLU is requested + self.se: nn.Module if use_se: self.se = SqueezeExciteBlock( in_channels=out_channels, intermediate_channels=int(out_channels * 0.0625), ) else: - self.se = nn.Identity() # type: ignore + self.se = nn.Identity() self.activation = nn.ReLU() # Re-parameterizable skip connection @@ -237,7 +80,7 @@ def __init__( ) # Re-parameterizable conv branches - rbr_conv = list() + rbr_conv: list[nn.Module] = [] for _ in range(self.num_conv_branches): rbr_conv.append( ConvModule( @@ -315,10 +158,10 @@ def reparameterize(self): # Delete un-used branches for para in self.parameters(): para.detach_() - self.__delattr__("rbr_conv") - self.__delattr__("rbr_scale") + del self.rbr_conv + del self.rbr_scale if hasattr(self, "rbr_skip"): - self.__delattr__("rbr_skip") + del self.rbr_skip self.inference_mode = True @@ -356,7 +199,7 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]: bias_final = bias_conv + bias_scale + bias_identity return kernel_final, bias_final - def _fuse_bn_tensor(self, branch) -> tuple[Tensor, Tensor]: + def _fuse_bn_tensor(self, branch: nn.Module) -> tuple[Tensor, Tensor]: """Method to fuse batchnorm layer with preceeding conv layer. Reference: U{https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py#L95} diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py new file mode 100644 index 00000000..55104f92 --- /dev/null +++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py @@ -0,0 +1,197 @@ +"""MobileOne backbone. + +Source: U{} +@license: U{Apple} +""" + +import logging +from typing import Any, Literal + +from torch import Tensor, nn + +from luxonis_train.nodes.base_node import BaseNode + +from .blocks import MobileOneBlock +from .variants import get_variant + +logger = logging.getLogger(__name__) + + +class MobileOne(BaseNode[Tensor, list[Tensor]]): + in_channels: int + + def __init__( + self, + variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", + width_multipliers: tuple[float, float, float, float] | None = None, + num_conv_branches: int | None = None, + use_se: bool | None = None, + **kwargs: Any, + ): + """MobileOne: An efficient CNN backbone for mobile devices. + + The architecture focuses on reducing memory access costs and improving parallelism + while allowing aggressive parameter scaling for better representation capacity. + Different variants (S0-S4) offer various accuracy-latency tradeoffs. + + Key features: + - Designed for low latency on mobile while maintaining high accuracy + - Uses re-parameterizable branches during training that get folded at inference + - Employs trivial over-parameterization branches for improved accuracy + - Simple feed-forward structure at inference with no branches/skip connections + - Variants achieve <1ms inference time on iPhone 12 with up to 75.9% top-1 ImageNet accuracy + - Outperforms other efficient architectures like MobileNets on image classification, + object detection and semantic segmentation tasks + - Uses only basic operators available across platforms (no custom activations) + + + Reference: U{MobileOne: An Improved One millisecond Mobile Backbone + } + + @type variant: Literal["s0", "s1", "s2", "s3", "s4"] + @param variant: Specifies which variant of the MobileOne network to use. Defaults to "s0". + Each variant specifies a predefined set of values for: + - width multipliers - A tuple of 4 float values specifying the width multipliers for each stage of the network. If the use of SE blocks is disabled, the last two values are ignored. + - number of convolution branches - An integer specifying the number of linear convolution branches in MobileOne block. + - use of SE blocks - A boolean specifying whether to use SE blocks in the network. + + The variants are as follows: + - s0 (default): width_multipliers=(0.75, 1.0, 1.0, 2.0), num_conv_branches=4, use_se=False + - s1: width_multipliers=(1.5, 1.5, 2.0, 2.5), num_conv_branches=1, use_se=False + - s2: width_multipliers=(1.5, 2.0, 2.5, 4.0), num_conv_branches=1, use_se=False + - s3: width_multipliers=(2.0, 2.5, 3.0, 4.0), num_conv_branches=1, use_se=False + - s4: width_multipliers=(3.0, 3.5, 3.5, 4.0), num_conv_branches=1, use_se=True + + @type width_multipliers: tuple[float, float, float, float] | None + @param width_multipliers: Width multipliers for each stage. If provided, overrides the variant values. + @type num_conv_branches: int | None + @param num_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values. + @type use_se: bool | None + @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value. + """ + super().__init__(**kwargs) + + var = get_variant(variant) + + width_multipliers = width_multipliers or var.width_multipliers + use_se = use_se or var.use_se + self.num_blocks_per_stage = [2, 8, 10, 1] + self.num_conv_branches = num_conv_branches or var.num_conv_branches + + self.in_planes = min(64, int(64 * width_multipliers[0])) + + self.stage0 = MobileOneBlock( + in_channels=self.in_channels, + out_channels=self.in_planes, + kernel_size=3, + stride=2, + padding=1, + ) + self.cur_layer_idx = 1 + self.stage1 = self._make_stage( + int(64 * width_multipliers[0]), + self.num_blocks_per_stage[0], + num_se_blocks=0, + ) + self.stage2 = self._make_stage( + int(128 * width_multipliers[1]), + self.num_blocks_per_stage[1], + num_se_blocks=0, + ) + self.stage3 = self._make_stage( + int(256 * width_multipliers[2]), + self.num_blocks_per_stage[2], + num_se_blocks=self.num_blocks_per_stage[2] // 2 if use_se else 0, + ) + self.stage4 = self._make_stage( + int(512 * width_multipliers[3]), + self.num_blocks_per_stage[3], + num_se_blocks=self.num_blocks_per_stage[3] if use_se else 0, + ) + + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] + x = self.stage0(inputs) + outs.append(x) + x = self.stage1(x) + outs.append(x) + x = self.stage2(x) + outs.append(x) + x = self.stage3(x) + outs.append(x) + x = self.stage4(x) + outs.append(x) + + return outs + + def set_export_mode(self, mode: bool = True) -> None: + """Sets the module to export mode. + + Reparameterizes the model to obtain a plain CNN-like structure for inference. + TODO: add more details + + @warning: The reparametrization is destructive and cannot be reversed! + + @type export: bool + @param export: Whether to set the export mode to True or False. Defaults to True. + """ + super().set_export_mode(mode) + if self.export: + logger.info("Reparametrizing 'MobileOne'.") + for module in self.modules(): + if hasattr(module, "reparameterize"): + module.reparameterize() + + def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int): + """Build a stage of MobileOne model. + + @type planes: int + @param planes: Number of output channels. + @type num_blocks: int + @param num_blocks: Number of blocks in this stage. + @type num_se_blocks: int + @param num_se_blocks: Number of SE blocks in this stage. + @rtype: nn.Sequential + @return: A stage of MobileOne model. + """ + # Get strides for all layers + strides = [2] + [1] * (num_blocks - 1) + blocks: list[nn.Module] = [] + for ix, stride in enumerate(strides): + use_se = False + if num_se_blocks > num_blocks: + raise ValueError( + "Number of SE blocks cannot " "exceed number of layers." + ) + if ix >= (num_blocks - num_se_blocks): + use_se = True + + # Depthwise conv + blocks.append( + MobileOneBlock( + in_channels=self.in_planes, + out_channels=self.in_planes, + kernel_size=3, + stride=stride, + padding=1, + groups=self.in_planes, + use_se=use_se, + num_conv_branches=self.num_conv_branches, + ) + ) + # Pointwise conv + blocks.append( + MobileOneBlock( + in_channels=self.in_planes, + out_channels=planes, + kernel_size=1, + stride=1, + padding=0, + groups=1, + use_se=use_se, + num_conv_branches=self.num_conv_branches, + ) + ) + self.in_planes = planes + self.cur_layer_idx += 1 + return nn.Sequential(*blocks) diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py new file mode 100644 index 00000000..a3f65c9b --- /dev/null +++ b/luxonis_train/nodes/backbones/mobileone/variants.py @@ -0,0 +1,37 @@ +from typing import Literal + +from pydantic import BaseModel + + +class MobileOneVariant(BaseModel): + width_multipliers: tuple[float, float, float, float] + num_conv_branches: int = 1 + use_se: bool = False + + +def get_variant(variant: Literal["s0", "s1", "s2", "s3", "s4"]) -> MobileOneVariant: + variants = { + "s0": MobileOneVariant( + width_multipliers=(0.75, 1.0, 1.0, 2.0), + num_conv_branches=4, + ), + "s1": MobileOneVariant( + width_multipliers=(1.5, 1.5, 2.0, 2.5), + ), + "s2": MobileOneVariant( + width_multipliers=(1.5, 2.0, 2.5, 4.0), + ), + "s3": MobileOneVariant( + width_multipliers=(2.0, 2.5, 3.0, 4.0), + ), + "s4": MobileOneVariant( + width_multipliers=(3.0, 3.5, 3.5, 4.0), + use_se=True, + ), + } + if variant not in variants: + raise ValueError( + "MobileOne model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/repvgg/__init__.py b/luxonis_train/nodes/backbones/repvgg/__init__.py new file mode 100644 index 00000000..61a5a4fc --- /dev/null +++ b/luxonis_train/nodes/backbones/repvgg/__init__.py @@ -0,0 +1,3 @@ +from .repvgg import RepVGG + +__all__ = ["RepVGG"] diff --git a/luxonis_train/nodes/backbones/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py similarity index 50% rename from luxonis_train/nodes/backbones/repvgg.py rename to luxonis_train/nodes/backbones/repvgg/repvgg.py index c536c78e..5fc0b4af 100644 --- a/luxonis_train/nodes/backbones/repvgg.py +++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py @@ -1,81 +1,68 @@ import logging -from typing import Literal +from collections import defaultdict +from typing import Any, Literal import torch.utils.checkpoint as checkpoint from torch import Tensor, nn +from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import RepVGGBlock -from ..base_node import BaseNode +from .variants import get_variant logger = logging.getLogger(__name__) -class RepVGG(BaseNode): - """Implementation of RepVGG backbone. - - Source: U{https://github.com/DingXiaoH/RepVGG} - @license: U{MIT}. - - @todo: technical documentation - """ - +class RepVGG(BaseNode[Tensor, list[Tensor]]): in_channels: int attach_index: int = -1 - VARIANTS_SETTINGS = { - "A0": { - "num_blocks": [2, 4, 14, 1], - "width_multiplier": [0.75, 0.75, 0.75, 2.5], - }, - "A1": { - "num_blocks": [2, 4, 14, 1], - "width_multiplier": [1, 1, 1, 2.5], - }, - "A2": { - "num_blocks": [2, 4, 14, 1], - "width_multiplier": [1.5, 1.5, 1.5, 2.75], - }, - } - def __init__( self, variant: Literal["A0", "A1", "A2"] = "A0", - num_blocks: list[int] | None = None, - width_multiplier: list[float] | None = None, + num_blocks: tuple[int, int, int, int] | None = None, + width_multiplier: tuple[float, float, float, float] | None = None, override_groups_map: dict[int, int] | None = None, use_se: bool = False, use_checkpoint: bool = False, - **kwargs, + **kwargs: Any, ): - """Constructor for the RepVGG module. + """RepVGG backbone. + + RepVGG is a VGG-style convolutional architecture. + + - Simple feed-forward topology without any branching. + - 3x3 convolutions and ReLU activations. + - No automatic search, manual refinement or compound scaling. + + @license: U{MIT + }. + + @see: U{https://github.com/DingXiaoH/RepVGG} + @see: U{https://paperswithcode.com/method/repvgg} + @see: U{RepVGG: Making VGG-style ConvNets Great Again + } + @type variant: Literal["A0", "A1", "A2"] @param variant: RepVGG model variant. Defaults to "A0". @type override_groups_map: dict[int, int] | None - @param override_groups_map: Dictionary mapping layer index to number of groups. + @param override_groups_map: Dictionary mapping layer index to number of groups. The layers are indexed starting from 0. @type use_se: bool @param use_se: Whether to use Squeeze-and-Excitation blocks. @type use_checkpoint: bool @param use_checkpoint: Whether to use checkpointing. - @type num_blocks: list[int] | None + @type num_blocks: tuple[int, int, int, int] | None @param num_blocks: Number of blocks in each stage. - @type width_multiplier: list[float] | None + @type width_multiplier: tuple[float, float, float, float] | None @param width_multiplier: Width multiplier for each stage. """ super().__init__(**kwargs) - if variant not in self.VARIANTS_SETTINGS.keys(): - raise ValueError( - f"RepVGG model variant should be one of " - f"{list(self.VARIANTS_SETTINGS.keys())}." - ) + var = get_variant(variant) - num_blocks = num_blocks or self.VARIANTS_SETTINGS[variant]["num_blocks"] - width_multiplier = ( - width_multiplier or self.VARIANTS_SETTINGS[variant]["width_multiplier"] - ) - self.override_groups_map = override_groups_map or {} - assert 0 not in self.override_groups_map + num_blocks = num_blocks or var.num_blocks + width_multiplier = width_multiplier or var.width_multiplier + override_groups_map = defaultdict(lambda: 1, override_groups_map or {}) self.use_se = use_se self.use_checkpoint = use_checkpoint @@ -88,38 +75,37 @@ def __init__( padding=1, use_se=self.use_se, ) - self.cur_layer_idx = 1 - self.stage1 = self._make_stage( - int(64 * width_multiplier[0]), num_blocks[0], stride=2 - ) - self.stage2 = self._make_stage( - int(128 * width_multiplier[1]), num_blocks[1], stride=2 - ) - self.stage3 = self._make_stage( - int(256 * width_multiplier[2]), num_blocks[2], stride=2 - ) - self.stage4 = self._make_stage( - int(512 * width_multiplier[3]), num_blocks[3], stride=2 + self.blocks = nn.ModuleList( + [ + block + for i in range(4) + for block in self._make_stage( + int(2**i * 64 * width_multiplier[i]), + num_blocks[i], + stride=2, + groups=override_groups_map[i], + ) + ] ) self.gap = nn.AdaptiveAvgPool2d(output_size=1) def forward(self, inputs: Tensor) -> list[Tensor]: - outputs = [] + outputs: list[Tensor] = [] out = self.stage0(inputs) - for stage in (self.stage1, self.stage2, self.stage3, self.stage4): - for block in stage: - if self.use_checkpoint: - out = checkpoint.checkpoint(block, out) - else: - out = block(out) - outputs.append(out) + for block in self.blocks: + if self.use_checkpoint: + out = checkpoint.checkpoint(block, out) + else: + out = block(out) + outputs.append(out) # type: ignore return outputs - def _make_stage(self, planes: int, num_blocks: int, stride: int): + def _make_stage( + self, planes: int, num_blocks: int, stride: int, groups: int + ) -> nn.ModuleList: strides = [stride] + [1] * (num_blocks - 1) - blocks = [] + blocks: list[nn.Module] = [] for stride in strides: - cur_groups = self.override_groups_map.get(self.cur_layer_idx, 1) blocks.append( RepVGGBlock( in_channels=self.in_planes, @@ -127,12 +113,11 @@ def _make_stage(self, planes: int, num_blocks: int, stride: int): kernel_size=3, stride=stride, padding=1, - groups=cur_groups, + groups=groups, use_se=self.use_se, ) ) self.in_planes = planes - self.cur_layer_idx += 1 return nn.ModuleList(blocks) def set_export_mode(self, mode: bool = True) -> None: diff --git a/luxonis_train/nodes/backbones/repvgg/variants.py b/luxonis_train/nodes/backbones/repvgg/variants.py new file mode 100644 index 00000000..de13a854 --- /dev/null +++ b/luxonis_train/nodes/backbones/repvgg/variants.py @@ -0,0 +1,31 @@ +from typing import Literal + +from pydantic import BaseModel + + +class RepVGGVariant(BaseModel): + num_blocks: tuple[int, int, int, int] + width_multiplier: tuple[float, float, float, float] + + +def get_variant(variant: Literal["A0", "A1", "A2"]) -> RepVGGVariant: + variants = { + "A0": RepVGGVariant( + num_blocks=(2, 4, 14, 1), + width_multiplier=(0.75, 0.75, 0.75, 2.5), + ), + "A1": RepVGGVariant( + num_blocks=(2, 4, 14, 1), + width_multiplier=(1, 1, 1, 2.5), + ), + "A2": RepVGGVariant( + num_blocks=(2, 4, 14, 1), + width_multiplier=(1.5, 1.5, 1.5, 2.75), + ), + } + if variant not in variants: + raise ValueError( + f"RepVGG variant should be one of " + f"{list(variants.keys())}, got '{variant}'." + ) + return variants[variant] diff --git a/luxonis_train/nodes/backbones/resnet.py b/luxonis_train/nodes/backbones/resnet.py index e4228410..36656e82 100644 --- a/luxonis_train/nodes/backbones/resnet.py +++ b/luxonis_train/nodes/backbones/resnet.py @@ -1,55 +1,94 @@ -"""ResNet backbone. - -Source: U{https://pytorch.org/vision/main/models/resnet.html} -@license: U{PyTorch} -""" -from typing import Literal +from typing import Any, Literal import torchvision -from torch import Tensor, nn +from torch import Tensor +from torchvision.models import ResNet as TorchResNet -from ..base_node import BaseNode +from luxonis_train.nodes.base_node import BaseNode class ResNet(BaseNode[Tensor, list[Tensor]]): def __init__( self, variant: Literal["18", "34", "50", "101", "152"] = "18", - channels_list: list[int] | None = None, download_weights: bool = False, - **kwargs, + zero_init_residual: bool = False, + groups: int = 1, + width_per_group: int = 64, + replace_stride_with_dilation: tuple[bool, bool, bool] = (False, False, False), + **kwargs: Any, ): - """Implementation of the ResNetX backbone. + """ResNet backbone. + + Implements the backbone of a ResNet (Residual Network) architecture. + + ResNet is designed to address the vanishing gradient problem in deep neural networks + by introducing skip connections. These connections allow the network to learn + residual functions with reference to the layer inputs, enabling training of much + deeper networks. + + This backbone can be used as a feature extractor for various computer vision tasks + such as image classification, object detection, and semantic segmentation. It + provides a robust set of features that can be fine-tuned for specific applications. - TODO: add more info + The architecture consists of stacked residual blocks, each containing convolutional + layers, batch normalization, and ReLU activations. The skip connections can be + either identity mappings or projections, depending on the block type. + Source: U{https://pytorch.org/vision/main/models/resnet.html} + + @license: U{PyTorch} + + @param variant: ResNet variant, determining the depth and structure of the network. Options are: + - "18": 18 layers, uses basic blocks, smaller model suitable for simpler tasks. + - "34": 34 layers, uses basic blocks, good balance of depth and computation. + - "50": 50 layers, introduces bottleneck blocks, deeper feature extraction. + - "101": 101 layers, uses bottleneck blocks, high capacity for complex tasks. + - "152": 152 layers, deepest variant, highest capacity but most computationally intensive. + The number in each variant represents the total number of weighted layers. + Deeper networks generally offer higher accuracy but require more computation. @type variant: Literal["18", "34", "50", "101", "152"] - @param variant: ResNet variant. Defaults to "18". - @type channels_list: list[int] | None - @param channels_list: List of channels to return. - If unset, defaults to [64, 128, 256, 512]. + @default variant: "18" @type download_weights: bool - @param download_weights: If True download weights from imagenet. + @param download_weights: If True download weights trained on imagenet. Defaults to False. + @type zero_init_residual: bool + @param zero_init_residual: Zero-initialize the last BN in each residual branch, + so that the residual branch starts with zeros, and each residual block behaves like an identity. + This improves the model by 0.2~0.3% according to U{Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour }. Defaults to C{False}. + + @type groups: int + @param groups: Number of groups for each block. + Defaults to 1. Can be set to a different value only + for ResNet-50, ResNet-101, and ResNet-152. + The width of the convolutional blocks is computed as + C{int(in_channels * (width_per_group / 64.0)) * groups} + + @type width_per_group: int + @param width_per_group: Number of channels per group. + Defaults to 64. Can be set to a different value only + for ResNet-50, ResNet-101, and ResNet-152. + The width of the convolutional blocks is computed as + C{int(in_channels * (width_per_group / 64.0)) * groups} + + @type replace_stride_with_dilation: tuple[bool, bool, bool] + @param replace_stride_with_dilation: Tuple of booleans where each + indicates if the 2x2 strides should be replaced with a dilated convolution instead. + Defaults to (False, False, False). Can be set to a different value only for ResNet-50, ResNet-101, and ResNet-152. """ super().__init__(**kwargs) - - if variant not in RESNET_VARIANTS: - raise ValueError( - f"ResNet model variant should be in {list(RESNET_VARIANTS.keys())}" - ) - - self.backbone = RESNET_VARIANTS[variant]( - weights="DEFAULT" if download_weights else None + self.backbone = self._get_backbone( + variant, + weights="DEFAULT" if download_weights else None, + zero_init_residual=zero_init_residual, + groups=groups, + width_per_group=width_per_group, + replace_stride_with_dilation=replace_stride_with_dilation, ) - self.backbone.fc = nn.Identity() - - self.channels_list = channels_list or [64, 128, 256, 512] - def forward(self, inputs: Tensor) -> list[Tensor]: - outs = [] + outs: list[Tensor] = [] x = self.backbone.conv1(inputs) x = self.backbone.bn1(x) x = self.backbone.relu(x) @@ -66,11 +105,20 @@ def forward(self, inputs: Tensor) -> list[Tensor]: return outs - -RESNET_VARIANTS = { - "18": torchvision.models.resnet18, - "34": torchvision.models.resnet34, - "50": torchvision.models.resnet50, - "101": torchvision.models.resnet101, - "152": torchvision.models.resnet152, -} + @staticmethod + def _get_backbone( + variant: Literal["18", "34", "50", "101", "152"], **kwargs: Any + ) -> TorchResNet: + variants = { + "18": torchvision.models.resnet18, + "34": torchvision.models.resnet34, + "50": torchvision.models.resnet50, + "101": torchvision.models.resnet101, + "152": torchvision.models.resnet152, + } + if variant not in variants: + raise ValueError( + "ResNet model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant](**kwargs) diff --git a/luxonis_train/nodes/backbones/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py index 6d23857e..e03110b3 100644 --- a/luxonis_train/nodes/backbones/rexnetv1.py +++ b/luxonis_train/nodes/backbones/rexnetv1.py @@ -1,15 +1,11 @@ -"""Implementation of the ReXNetV1 backbone. - -Source: U{https://github.com/clovaai/rexnet} -@license: U{MIT} -""" +from typing import Any import torch from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.utils.general import make_divisible +from luxonis_train.utils import make_divisible class ReXNetV1_lite(BaseNode[Tensor, list[Tensor]]): @@ -21,10 +17,33 @@ def __init__( final_ch: int = 164, multiplier: float = 1.0, kernel_sizes: int | list[int] = 3, - **kwargs, + out_indices: list[int] | None = None, + **kwargs: Any, ): - """ReXNetV1_lite backbone. + """ReXNetV1 (Rank Expansion Networks) backbone, lite version. + + ReXNet proposes a new approach to designing lightweight CNN architectures by: + + - Studying proper channel dimension expansion at the layer level using rank analysis + - Searching for effective channel configurations across the entire network + - Parameterizing channel dimensions as a linear function of network depth + + Key aspects: + + - Uses inverted bottleneck blocks similar to MobileNetV2 + - Employs a linear parameterization of channel dimensions across blocks + - Replaces ReLU6 with SiLU (Swish-1) activation in certain layers + - Incorporates Squeeze-and-Excitation modules + ReXNet achieves state-of-the-art performance among lightweight models on ImageNet + classification and transfers well to tasks like object detection and fine-grained classification. + + Source: U{https://github.com/clovaai/rexnet} + + @license: U{MIT + } + @copyright: 2021-present NAVER Corp. + @see U{Rethinking Channel Dimensions for Efficient Model Design } @type fix_head_stem: bool @param fix_head_stem: Whether to multiply head stem. Defaults to False. @type divisible_value: int @@ -37,30 +56,30 @@ def __init__( @param multiplier: Channel dimension multiplier. Defaults to 1.0. @type kernel_sizes: int | list[int] @param kernel_sizes: Kernel size for each block. Defaults to 3. + @param out_indices: list[int] | None + @param out_indices: Indices of the output layers. Defaults to [1, 4, 10, 17]. """ super().__init__(**kwargs) - self.out_indices = [1, 4, 10, 17] - self.channels = [16, 48, 112, 184] layers = [1, 2, 2, 3, 3, 5] strides = [1, 2, 2, 2, 1, 2] + self.num_convblocks = sum(layers) + self.out_indices = out_indices or [1, 4, 10, 17] + kernel_sizes = ( [kernel_sizes] * 6 if isinstance(kernel_sizes, int) else kernel_sizes ) - strides = sum( - [ - [element] + [1] * (layers[idx] - 1) - for idx, element in enumerate(strides) - ], - [], - ) + strides = [ + s if i == 0 else 1 + for layer, s in zip(layers, strides) + for i in range(layer) + ] ts = [1] * layers[0] + [6] * sum(layers[1:]) - kernel_sizes = sum( - [[element] * layers[idx] for idx, element in enumerate(kernel_sizes)], [] - ) - self.num_convblocks = sum(layers[:]) + kernel_sizes = [ + ks for ks, layer in zip(kernel_sizes, layers) for _ in range(layer) + ] features: list[nn.Module] = [] inplanes = input_ch / multiplier if multiplier < 1.0 else input_ch @@ -69,8 +88,8 @@ def __init__( int(round(first_channel * multiplier)), divisible_value ) - in_channels_group = [] - channels_group = [] + in_channels_group: list[int] = [] + channels_group: list[int] = [] features.append( ConvModule( @@ -121,12 +140,12 @@ def __init__( ) self.features = nn.Sequential(*features) - def forward(self, x: Tensor) -> list[Tensor]: - outs = [] + def forward(self, inputs: Tensor) -> list[Tensor]: + outs: list[Tensor] = [] for i, module in enumerate(self.features): - x = module(x) + inputs = module(inputs) if i in self.out_indices: - outs.append(x) + outs.append(inputs) return outs @@ -138,14 +157,12 @@ def __init__( t: int, kernel_size: int = 3, stride: int = 1, - **kwargs, ): - super(LinearBottleneck, self).__init__(**kwargs) - self.conv_shortcut = None + super().__init__() self.use_shortcut = stride == 1 and in_channels <= channels self.in_channels = in_channels self.out_channels = channels - out = [] + out: list[nn.Module] = [] if t != 1: dw_channels = in_channels * t out.append( diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 9db45316..bfb28701 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -1,25 +1,26 @@ import inspect +import logging from abc import ABC, abstractmethod +from contextlib import suppress from typing import Generic, TypeVar +from luxonis_ml.data import LabelType from luxonis_ml.utils.registry import AutoRegisterMeta -from pydantic import BaseModel, ValidationError from torch import Size, Tensor, nn +from typeguard import TypeCheckError, check_type -from luxonis_train.utils.general import DatasetMetadata, validate_packet -from luxonis_train.utils.registry import NODES -from luxonis_train.utils.types import ( +from luxonis_train.utils import ( AttachIndexType, - FeaturesProtocol, + DatasetMetadata, IncompatibleException, - LabelType, Packet, ) +from luxonis_train.utils.registry import NODES ForwardOutputT = TypeVar("ForwardOutputT") ForwardInputT = TypeVar("ForwardInputT") -__all__ = ["BaseNode"] +logger = logging.getLogger(__name__) class BaseNode( @@ -41,13 +42,10 @@ class BaseNode( of lists of tensors. Each key in the dictionary represents a different output from the previous node. Input to the node is a list of L{Packet}s, output is a single L{Packet}. - Each node can define a list of L{BaseProtocol}s that the inputs must conform to. - L{BaseProtocol} is a pydantic model that defines the structure of the input. - When the node is called, the inputs are validated against the protocols and - then sent to the L{unwrap} method. The C{unwrap} method should return a valid - input to the L{forward} method. Outputs of the C{forward} method are then - send to L{weap} method, which wraps the output into a C{Packet}, which is the - output of the node. + When the node is called, the inputs are sent to the L{unwrap} method. + The C{unwrap} method should return a valid input to the L{forward} method. + Outputs of the C{forward} method are then send to L{wrap} method, + which wraps the output into a C{Packet}. The wrapped C{Packet} is the final output of the node. The L{run} method combines the C{unwrap}, C{forward} and C{wrap} methods together with input validation. @@ -55,13 +53,12 @@ class BaseNode( When subclassing, the following methods should be implemented: - L{forward}: Forward pass of the module. - L{unwrap}: Optional. Unwraps the inputs from the input packet. - The default implementation expects a single input with `features` key. + The default implementation expects a single input with `features` key. - L{wrap}: Optional. Wraps the output of the forward pass - into a `Packet[Tensor]`. The default implementation expects wraps the output - of the forward pass into a packet with either "features" or the task name as the key. + into a `Packet[Tensor]`. The default implementation expects wraps the output + of the forward pass into a packet with either "features" or the task name as the key. Additionally, the following class attributes can be defined: - - L{input_protocols}: List of input protocols used to validate inputs to the node. - L{attach_index}: Index of previous output that this node attaches to. - L{tasks}: Dictionary of tasks that the node supports. @@ -94,32 +91,6 @@ def wrap(output: Tensor) -> Packet[Tensor]: # by the attached modules. return {"classification": [output]} - @type input_shapes: list[Packet[Size]] | None - @param input_shapes: List of input shapes for the module. - - @type original_in_shape: Size | None - @param original_in_shape: Original input shape of the model. Some - nodes won't function if not provided. - - @type dataset_metadata: L{DatasetMetadata} | None - @param dataset_metadata: Metadata of the dataset. - Some nodes won't function if not provided. - - @type n_classes: int | None - @param n_classes: Number of classes in the dataset. Provide only - in case `dataset_metadata` is not provided. Defaults to None. - - @type in_sizes: Size | list[Size] | None - @param in_sizes: List of input sizes for the node. - Provide only in case the `input_shapes` were not provided. - - @type _tasks: dict[LabelType, str] | None - @param _tasks: Dictionary of tasks that the node supports. Overrides the - class L{tasks} attribute. Shouldn't be provided by the user in most cases. - - @type input_protocols: list[type[BaseModel]] - @ivar input_protocols: List of input protocols used to validate inputs to the node. - Defaults to [L{FeaturesProtocol}]. @type attach_index: AttachIndexType @ivar attach_index: Index of previous output that this node attaches to. @@ -135,7 +106,6 @@ class L{tasks} attribute. Shouldn't be provided by the user in most cases. Only needs to be defined for head nodes. """ - input_protocols: list[type[BaseModel]] = [FeaturesProtocol] attach_index: AttachIndexType tasks: list[LabelType] | dict[LabelType, str] | None = None @@ -148,10 +118,50 @@ def __init__( n_classes: int | None = None, n_keypoints: int | None = None, in_sizes: Size | list[Size] | None = None, + attach_index: AttachIndexType | None = None, _tasks: dict[LabelType, str] | None = None, ): + """Constructor for the BaseNode. + + @type input_shapes: list[Packet[Size]] | None + @param input_shapes: List of input shapes for the module. + + @type original_in_shape: Size | None + @param original_in_shape: Original input shape of the model. Some + nodes won't function if not provided. + + @type dataset_metadata: L{DatasetMetadata} | None + @param dataset_metadata: Metadata of the dataset. + Some nodes won't function if not provided. + + @type n_classes: int | None + @param n_classes: Number of classes in the dataset. Provide only + in case `dataset_metadata` is not provided. Defaults to None. + + @type in_sizes: Size | list[Size] | None + @param in_sizes: List of input sizes for the node. + Provide only in case the `input_shapes` were not provided. + + @type attach_index: AttachIndexType + @param attach_index: Index of previous output that this node attaches to. + Can be a single integer to specify a single output, a tuple of + two or three integers to specify a range of outputs or `"all"` to + specify all outputs. Defaults to "all". Python indexing conventions apply. If provided as a constructor argument, overrides the class attribute. + + + @type _tasks: dict[LabelType, str] | None + @param _tasks: Dictionary of tasks that the node supports. Overrides the + class L{tasks} attribute. Shouldn't be provided by the user in most cases. + """ super().__init__() + if attach_index is not None: + logger.warning( + f"Node {self.name} overrides `attach_index` " + f"by setting it to '{attach_index}'. " + "Make sure this is intended." + ) + self.attach_index = attach_index self._tasks = None if _tasks is not None: self._tasks = _tasks @@ -180,15 +190,36 @@ def __init__( self._epoch = 0 self._in_sizes = in_sizes + self._check_type_overrides() + @staticmethod def _process_tasks( tasks: dict[LabelType, str] | list[LabelType], ) -> dict[LabelType, str]: if isinstance(tasks, dict): return tasks - if isinstance(tasks, list): + else: return {task: task.value for task in tasks} + def _check_type_overrides(self) -> None: + properties = [] + for name, value in inspect.getmembers(self.__class__): + if isinstance(value, property): + properties.append(name) + for name, typ in self.__annotations__.items(): + if name in properties: + with suppress(ValueError): + value = getattr(self, name) + try: + check_type(value, typ) + except TypeCheckError as e: + raise IncompatibleException( + f"Node '{self.name}' specifies the type of the property `{name}` as `{typ}`, " + f"but received `{type(value)}`. " + f"This may indicate that the '{self.name}' node is " + "not compatible with its predecessor." + ) from e + def get_task_name(self, task: LabelType) -> str: """Gets the name of a task for a particular C{LabelType}. @@ -329,7 +360,11 @@ def input_shapes(self) -> list[Packet[Size]]: @property def original_in_shape(self) -> Size: - """Getter for the original input shape.""" + """Getter for the original input shape as [N, H, W]. + + @type: Size + @raises ValueError: If the C{original_in_shape} is C{None}. + """ if self._original_in_shape is None: raise self._non_set_error("original_in_shape") return self._original_in_shape @@ -381,10 +416,7 @@ def in_sizes(self) -> Size | list[Size]: f"Feature field is missing in {self.name}. " "The default implementation of `in_sizes` cannot be used." ) - shapes = self.get_attached(self.input_shapes[0]["features"]) - if isinstance(shapes, list) and len(shapes) == 1: - return shapes[0] - return shapes + return self.get_attached(self.input_shapes[0]["features"]) @property def in_channels(self) -> int | list[int]: @@ -504,15 +536,16 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: @return: Wrapped output. """ - match output: - case Tensor() as out: - outputs = [out] - case list(tensors) if all(isinstance(t, Tensor) for t in tensors): - outputs = tensors - case _: - raise IncompatibleException( - "Default `wrap` expects a single tensor or a list of tensors." - ) + if isinstance(output, Tensor): + outputs = [output] + elif isinstance(output, (list, tuple)) and all( + isinstance(t, Tensor) for t in output + ): + outputs = list(output) + else: + raise IncompatibleException( + "Default `wrap` expects a single tensor or a list of tensors." + ) try: task = self.task except ValueError: @@ -522,8 +555,6 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: """Combines the forward pass with the wrapping and unwrapping of the inputs. - Additionally validates the inputs against `input_protocols`. - @type inputs: list[Packet[Tensor]] @param inputs: Inputs to the module. @@ -533,7 +564,7 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: @raises IncompatibleException: If the inputs are not compatible with the node. """ - unwrapped = self.unwrap(self.validate(inputs)) + unwrapped = self.unwrap(inputs) outputs = self(unwrapped) wrapped = self.wrap(outputs) str_tasks = [task.value for task in self._tasks] if self._tasks else [] @@ -543,21 +574,6 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: wrapped[self.get_task_name(LabelType(key))] = value return wrapped - def validate(self, data: list[Packet[Tensor]]) -> list[Packet[Tensor]]: - """Validates the inputs against `input_protocols`.""" - if len(data) != len(self.input_protocols): - raise IncompatibleException( - f"Node {self.name} expects {len(self.input_protocols)} inputs, " - f"but got {len(data)} inputs instead." - ) - try: - return [ - validate_packet(d, protocol) - for d, protocol in zip(data, self.input_protocols) - ] - except ValidationError as e: - raise IncompatibleException.from_validation_error(e, self.name) from e - T = TypeVar("T", Tensor, Size) def get_attached(self, lst: list[T]) -> list[T] | T: diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 0e0a4ad2..ea7c8290 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -1,6 +1,3 @@ -# TODO: cleanup, document -# Check if some blocks could be merged togetner. - import math from typing import TypeVar @@ -318,12 +315,12 @@ def reparametrize(self) -> None: ) self.rbr_reparam.weight.data = kernel # type: ignore self.rbr_reparam.bias.data = bias # type: ignore - self.__delattr__("rbr_dense") - self.__delattr__("rbr_1x1") + del self.rbr_dense + del self.rbr_1x1 if hasattr(self, "rbr_identity"): - self.__delattr__("rbr_identity") + del self.rbr_identity if hasattr(self, "id_tensor"): - self.__delattr__("id_tensor") + del self.id_tensor def _get_equivalent_kernel_bias(self) -> tuple[Tensor, Tensor]: """Derives the equivalent kernel and bias in a DIFFERENTIABLE way.""" diff --git a/luxonis_train/nodes/heads/bisenet_head.py b/luxonis_train/nodes/heads/bisenet_head.py index 3fef7584..dd6e6333 100644 --- a/luxonis_train/nodes/heads/bisenet_head.py +++ b/luxonis_train/nodes/heads/bisenet_head.py @@ -1,31 +1,28 @@ -"""BiSeNet segmentation head. - -Adapted from U{https://github.com/taveraantonio/BiseNetv1}. -License: NOT SPECIFIED. -""" - +from typing import Any +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.utils.general import infer_upscale_factor -from luxonis_train.utils.types import LabelType, Packet +from luxonis_train.utils import infer_upscale_factor class BiSeNetHead(BaseNode[Tensor, Tensor]): in_height: int + in_width: int in_channels: int tasks: list[LabelType] = [LabelType.SEGMENTATION] - def __init__( - self, - intermediate_channels: int = 64, - **kwargs, - ): + def __init__(self, intermediate_channels: int = 64, **kwargs: Any): """BiSeNet segmentation head. - TODO: Add more documentation. + + Source: U{BiseNetV1} + @license: NOT SPECIFIED. + @see: U{BiseNetv1: Bilateral Segmentation Network for + Real-time Semantic Segmentation + } @type intermediate_channels: int @param intermediate_channels: How many intermediate channels to use. @@ -33,17 +30,28 @@ def __init__( """ super().__init__(**kwargs) - original_height = self.original_in_shape[1] - upscale_factor = 2 ** infer_upscale_factor(self.in_height, original_height) + h, w = self.original_in_shape[1:] + upscale_factor = 2 ** infer_upscale_factor( + (self.in_height, self.in_width), (h, w) + ) out_channels = self.n_classes * upscale_factor * upscale_factor - self.conv_3x3 = ConvModule(self.in_channels, intermediate_channels, 3, 1, 1) - self.conv_1x1 = nn.Conv2d(intermediate_channels, out_channels, 1, 1, 0) + self.conv_3x3 = ConvModule( + self.in_channels, + intermediate_channels, + kernel_size=3, + stride=1, + padding=1, + ) + self.conv_1x1 = nn.Conv2d( + intermediate_channels, + out_channels, + kernel_size=1, + stride=1, + padding=0, + ) self.upscale = nn.PixelShuffle(upscale_factor) - def wrap(self, output: Tensor) -> Packet[Tensor]: - return {"segmentation": [output]} - def forward(self, inputs: Tensor) -> Tensor: x = self.conv_3x3(inputs) x = self.conv_1x1(x) diff --git a/luxonis_train/nodes/heads/classification_head.py b/luxonis_train/nodes/heads/classification_head.py index 07b3d72b..93b5c684 100644 --- a/luxonis_train/nodes/heads/classification_head.py +++ b/luxonis_train/nodes/heads/classification_head.py @@ -1,3 +1,5 @@ +from typing import Any + from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode @@ -8,13 +10,12 @@ class ClassificationHead(BaseNode[Tensor, Tensor]): in_channels: int tasks: list[LabelType] = [LabelType.CLASSIFICATION] - def __init__( - self, - dropout_rate: float = 0.2, - **kwargs, - ): + def __init__(self, dropout_rate: float = 0.2, **kwargs: Any): """Simple classification head. + Consists of a global average pooling layer followed by a dropout layer and a + single linear layer. + @type dropout_rate: float @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults to C{0.2}. diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index 5607a2a8..11be28cb 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -1,22 +1,20 @@ -"""Head for object detection. - -Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial -Applications}. -""" - -from typing import Literal +import logging +from typing import Any, Literal import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import EfficientDecoupledBlock -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Packet, anchors_for_fpn_features, dist2bbox, non_max_suppression, ) -from luxonis_train.utils.types import LabelType, Packet + +logger = logging.getLogger(__name__) class EfficientBBoxHead( @@ -27,32 +25,30 @@ class EfficientBBoxHead( def __init__( self, - n_heads: Literal[2, 3, 4] = 3, + num_heads: Literal[2, 3, 4] = 3, conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, - **kwargs, + **kwargs: Any, ): """Head for object detection. - TODO: add more documentation - - @type n_heads: Literal[2,3,4] - @param n_heads: Number of output heads. Defaults to 3. - ***Note:*** Should be same also on neck in most cases. - + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial + Applications }. + @type num_heads: Literal[2,3,4] + @param num_heads: Number of output heads. Defaults to 3. B{Note:} Should be same + also on neck in most cases. @type conf_thres: float @param conf_thres: Threshold for confidence. Defaults to C{0.25}. - @type iou_thres: float @param iou_thres: Threshold for IoU. Defaults to C{0.45}. - @type max_det: int - @param max_det: Maximum number of detections retained after NMS. Defaults to C{300}. + @param max_det: Maximum number of detections retained after NMS. Defaults to + C{300}. """ super().__init__(**kwargs) - self.n_heads = n_heads + self.num_heads = num_heads self.conf_thres = conf_thres self.iou_thres = iou_thres @@ -63,7 +59,14 @@ def __init__( self.grid_cell_size = 5.0 self.heads = nn.ModuleList() - for i in range(self.n_heads): + if len(self.in_channels) < self.num_heads: + logger.warning( + f"Head '{self.name}' was set to use {self.num_heads} heads, " + f"but received only {len(self.in_channels)} inputs. " + f"Changing number of heads to {len(self.in_channels)}." + ) + self.num_heads = len(self.in_channels) + for i in range(self.num_heads): curr_head = EfficientDecoupledBlock( n_classes=self.n_classes, in_channels=self.in_channels[i], @@ -92,7 +95,7 @@ def wrap( features, cls_score_list, reg_distri_list = output if self.export: - outputs = [] + outputs: list[Tensor] = [] for out_cls, out_reg in zip(cls_score_list, reg_distri_list, strict=True): conf, _ = out_cls.max(1, keepdim=True) out = torch.cat([out_reg, conf, out_cls], dim=1) @@ -127,7 +130,7 @@ def _fit_stride_to_num_heads(self): stride = torch.tensor( [ self.original_in_shape[1] / x[2] # type: ignore - for x in self.in_sizes[: self.n_heads] + for x in self.in_sizes[: self.num_heads] ], dtype=torch.int, ) diff --git a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py index 03d29296..fffd361c 100644 --- a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py @@ -1,15 +1,16 @@ -from typing import Literal +from typing import Any, Literal import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Packet, anchors_for_fpn_features, dist2bbox, non_max_suppression, ) -from luxonis_train.utils.types import LabelType, Packet from .efficient_bbox_head import EfficientBBoxHead @@ -19,20 +20,20 @@ class EfficientKeypointBBoxHead(EfficientBBoxHead): def __init__( self, - n_heads: Literal[2, 3, 4] = 3, + num_heads: Literal[2, 3, 4] = 3, conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, - **kwargs, + **kwargs: Any, ): """Head for object and keypoint detection. Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications}. - @param n_heads: Number of output heads. Defaults to C{3}. + @param num_heads: Number of output heads. Defaults to C{3}. B{Note:} Should be same also on neck in most cases. - @type n_heads: int + @type num_heads: int @param conf_thres: Threshold for confidence. Defaults to C{0.25}. @type conf_thres: float @@ -44,7 +45,7 @@ def __init__( @type max_det: int """ super().__init__( - n_heads=n_heads, + num_heads=num_heads, conf_thres=conf_thres, iou_thres=iou_thres, max_det=max_det, @@ -77,7 +78,7 @@ def forward( ) kpt_list: list[Tensor] = [] - for i in range(self.n_heads): + for i in range(self.num_heads): kpt_pred = self.kpt_layers[i](inputs[i]) kpt_list.append(kpt_pred) @@ -89,12 +90,12 @@ def wrap( features, cls_score_list, reg_distri_list, kpt_list = output bs = features[0].shape[0] if self.export: - outputs = [] + outputs: list[Tensor] = [] for out_cls, out_reg, out_kpts in zip( cls_score_list, reg_distri_list, kpt_list, strict=True ): - chunks = out_kpts.split(3, dim=1) - modified_chunks = [] + chunks = torch.split(out_kpts, 3, dim=1) + modified_chunks: list[Tensor] = [] for chunk in chunks: x = chunk[:, 0:1, :, :] y = chunk[:, 1:2, :, :] @@ -105,6 +106,7 @@ def wrap( out = torch.cat([out_reg, out_cls, out_kpts_modified], dim=1) outputs.append(out) return {"outputs": outputs} + cls_tensor = torch.cat( [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2 ).permute(0, 2, 1) @@ -143,7 +145,7 @@ def wrap( "keypoints_raw": [kpt_tensor], } - def _dist2kpts(self, kpts): + def _dist2kpts(self, kpts: Tensor) -> Tensor: """Decodes keypoints.""" y = kpts.clone() diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py index 0ca995c5..b74fb240 100644 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py @@ -1,24 +1,26 @@ import logging import math -from typing import cast +from typing import Any, cast import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import KeypointBlock, LearnableMulAddConv -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( + Packet, non_max_suppression, process_bbox_predictions, process_keypoints_predictions, ) -from luxonis_train.utils.types import LabelType, Packet logger = logging.getLogger(__name__) -class ImplicitKeypointBBoxHead(BaseNode): - tasks: list[LabelType] = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX] +class ImplicitKeypointBBoxHead(BaseNode[list[Tensor], tuple[list[Tensor], Tensor]]): + tasks = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX] + in_channels: list[int] def __init__( self, @@ -28,7 +30,7 @@ def __init__( conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, - **kwargs, + **kwargs: Any, ): """Head for object and keypoint detection. @@ -53,16 +55,23 @@ def __init__( """ super().__init__(**kwargs) - if anchors is None: - logger.info("No anchors provided, generating them automatically.") - anchors, recall = self.dataset_metadata.autogenerate_anchors(num_heads) - logger.info(f"Anchors generated. Best possible recall: {recall:.2f}") - self.conf_thres = conf_thres self.iou_thres = iou_thres self.max_det = max_det self.num_heads = num_heads + if len(self.in_channels) < self.num_heads: + logger.warning( + f"Head '{self.name}' was set to use {self.num_heads} heads, " + f"but received only {len(self.in_channels)} inputs. " + f"Changing number of heads to {len(self.in_channels)}." + ) + self.num_heads = len(self.in_channels) + + if anchors is None: + logger.info("No anchors provided, generating them automatically.") + anchors, recall = self.dataset_metadata.autogenerate_anchors(self.num_heads) + logger.info(f"Anchors generated. Best possible recall: {recall:.2f}") self.box_offset = 5 self.n_det_out = self.n_classes + self.box_offset @@ -74,9 +83,7 @@ def __init__( self.anchors = torch.tensor(anchors).float().view(self.num_heads, -1, 2) self.anchor_grid = self.anchors.clone().view(self.num_heads, 1, -1, 1, 1, 2) - self.channel_list, self.stride = self._fit_to_num_heads( - cast(list[int], self.in_channels) - ) + self.channel_list, self.stride = self._fit_to_num_heads(self.in_channels) self.learnable_mul_add_conv = nn.ModuleList( LearnableMulAddConv( @@ -139,8 +146,8 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: return features, torch.cat(predictions, dim=1) - def wrap(self, outputs: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: - features, predictions = outputs + def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: + features, predictions = output if self.export: return {"boxes_and_keypoints": [predictions]} @@ -200,7 +207,7 @@ def _infer_bbox( ) return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1) - def _fit_to_num_heads(self, channel_list: list): + def _fit_to_num_heads(self, channel_list: list[int]) -> tuple[list[int], Tensor]: out_channel_list = channel_list[: self.num_heads] stride = torch.tensor( [ diff --git a/luxonis_train/nodes/heads/segmentation_head.py b/luxonis_train/nodes/heads/segmentation_head.py index 1b29df7b..19f87f3b 100644 --- a/luxonis_train/nodes/heads/segmentation_head.py +++ b/luxonis_train/nodes/heads/segmentation_head.py @@ -1,37 +1,31 @@ -"""Implementation of a basic segmentation head. +from typing import Any -Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py} -@license: U{BSD-3 } -""" - -import torch.nn as nn -from torch import Tensor +from luxonis_ml.data import LabelType +from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import UpBlock -from luxonis_train.utils.general import infer_upscale_factor -from luxonis_train.utils.types import LabelType +from luxonis_train.utils import infer_upscale_factor class SegmentationHead(BaseNode[Tensor, Tensor]): in_height: int + in_width: int in_channels: int + tasks: list[LabelType] = [LabelType.SEGMENTATION] - def __init__(self, **kwargs): + def __init__(self, **kwargs: Any): """Basic segmentation FCN head. - Note that it doesn't ensure that ouptut is same size as input. - - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + Adapted from: U{https://github.com/pytorch/vision/blob/main/torchvision/models/segmentation/fcn.py} + @license: U{BSD-3 } """ super().__init__(**kwargs) + h, w = self.original_in_shape[1:] + num_up = infer_upscale_factor((self.in_height, self.in_width), (h, w)) - original_height = self.original_in_shape[1] - num_up = infer_upscale_factor(self.in_height, original_height, strict=False) - - modules = [] + modules: list[nn.Module] = [] in_channels = self.in_channels for _ in range(int(num_up)): modules.append( diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py index bd05f083..a2bf668e 100644 --- a/luxonis_train/nodes/necks/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck.py @@ -1,21 +1,15 @@ -"""Implementation of the RepPANNeck module. - -Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial -Applications}. -It has the balance of feature fusion ability and hardware efficiency. -""" - - -from typing import Literal, cast +from typing import Any, Literal from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import RepDownBlock, RepUpBlock -from luxonis_train.utils.general import make_divisible +from luxonis_train.utils import make_divisible class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]): + in_channels: list[int] + def __init__( self, num_heads: Literal[2, 3, 4] = 3, @@ -23,23 +17,27 @@ def __init__( num_repeats: list[int] | None = None, depth_mul: float = 0.33, width_mul: float = 0.25, - **kwargs, + **kwargs: Any, ): - """Constructor for the RepPANNeck module. + """Implementation of the RepPANNeck module. + + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework + for Industrial Applications}. + It has the balance of feature fusion ability and hardware efficiency. @type num_heads: Literal[2,3,4] - @param num_heads: Number of output heads. Defaults to 3. ***Note: Should be same - also on head in most cases.*** + @param num_heads: Number of output heads. Defaults to 3. B{Note: Should be same + also on head in most cases.} @type channels_list: list[int] | None - @param channels_list: List of number of channels for each block. Defaults to - C{[256, 128, 128, 256, 256, 512]}. + @param channels_list: List of number of channels for each block. + Defaults to C{[256, 128, 128, 256, 256, 512]}. @type num_repeats: list[int] | None - @param num_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[12, - 12, 12, 12]}. + @param num_repeats: List of number of repeats of RepVGGBlock. + Defaults to C{[12, 12, 12, 12]}. @type depth_mul: float - @param depth_mul: Depth multiplier. Defaults to 0.33. + @param depth_mul: Depth multiplier. Defaults to C{0.33}. @type width_mul: float - @param width_mul: Width multiplier. Defaults to 0.25. + @param width_mul: Width multiplier. Defaults to C{0.25}. """ super().__init__(**kwargs) @@ -57,9 +55,9 @@ def __init__( self.up_blocks = nn.ModuleList() - in_channels = cast(list[int], self.in_channels)[-1] + in_channels = self.in_channels[-1] out_channels = channels_list[0] - in_channels_next = cast(list[int], self.in_channels)[-2] + in_channels_next = self.in_channels[-2] curr_num_repeats = num_repeats[0] up_out_channel_list = [in_channels] # used in DownBlocks @@ -78,7 +76,7 @@ def __init__( in_channels = out_channels out_channels = channels_list[i] - in_channels_next = cast(list[int], self.in_channels)[-1 - (i + 1)] + in_channels_next = self.in_channels[-1 - (i + 1)] curr_num_repeats = num_repeats[i] self.down_blocks = nn.ModuleList() @@ -110,17 +108,16 @@ def __init__( curr_num_repeats = num_repeats_down_blocks[i] def forward(self, inputs: list[Tensor]) -> list[Tensor]: - x0 = inputs[-1] - up_block_outs = [] - for i, up_block in enumerate(self.up_blocks): - conv_out, x0 = up_block(x0, inputs[-1 - (i + 1)]) + x = inputs[-1] + up_block_outs: list[Tensor] = [] + for up_block, input_ in zip(self.up_blocks, inputs[-2::-1], strict=False): + conv_out, x = up_block(x, input_) up_block_outs.append(conv_out) - up_block_outs.reverse() - outs = [x0] - for i, down_block in enumerate(self.down_blocks): - x0 = down_block(x0, up_block_outs[i]) - outs.append(x0) + outs = [x] + for down_block, up_out in zip(self.down_blocks, reversed(up_block_outs)): + x = down_block(x, up_out) + outs.append(x) return outs def _fit_to_num_heads( @@ -130,11 +127,11 @@ def _fit_to_num_heads( Also scales the numbers based on offset """ - if self.num_heads == 3: - ... - elif self.num_heads == 2: + if self.num_heads == 2: channels_list = [channels_list[0], channels_list[4], channels_list[5]] num_repeats = [num_repeats[0], num_repeats[3]] + elif self.num_heads == 3: + return channels_list, num_repeats elif self.num_heads == 4: channels_list = [ channels_list[0], @@ -158,6 +155,7 @@ def _fit_to_num_heads( else: raise ValueError( f"Specified number of heads ({self.num_heads}) not supported." + "The number of heads should be 2, 3 or 4." ) return channels_list, num_repeats diff --git a/luxonis_train/optimizers/__init__.py b/luxonis_train/optimizers/__init__.py new file mode 100644 index 00000000..acd73792 --- /dev/null +++ b/luxonis_train/optimizers/__init__.py @@ -0,0 +1 @@ +from .optimizers import * diff --git a/luxonis_train/utils/optimizers.py b/luxonis_train/optimizers/optimizers.py similarity index 92% rename from luxonis_train/utils/optimizers.py rename to luxonis_train/optimizers/optimizers.py index 7583cef9..c2a4bf12 100644 --- a/luxonis_train/utils/optimizers.py +++ b/luxonis_train/optimizers/optimizers.py @@ -1,4 +1,4 @@ -from torch import optim +import torch.optim as optim from luxonis_train.utils.registry import OPTIMIZERS diff --git a/luxonis_train/schedulers/__init__.py b/luxonis_train/schedulers/__init__.py new file mode 100644 index 00000000..99bcd9d9 --- /dev/null +++ b/luxonis_train/schedulers/__init__.py @@ -0,0 +1 @@ +from .schedulers import * diff --git a/luxonis_train/utils/schedulers.py b/luxonis_train/schedulers/schedulers.py similarity index 100% rename from luxonis_train/utils/schedulers.py rename to luxonis_train/schedulers/schedulers.py diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py index 609304c3..30654ffc 100644 --- a/luxonis_train/utils/__init__.py +++ b/luxonis_train/utils/__init__.py @@ -1,5 +1,53 @@ -from .assigners import * -from .config import * -from .loaders import * -from .optimizers import * -from .schedulers import * +from .boundingbox import ( + anchors_for_fpn_features, + anchors_from_dataset, + bbox2dist, + bbox_iou, + compute_iou_loss, + dist2bbox, + match_to_anchor, + non_max_suppression, + process_bbox_predictions, +) +from .config import Config +from .dataset_metadata import DatasetMetadata +from .exceptions import IncompatibleException +from .general import ( + get_with_default, + infer_upscale_factor, + make_divisible, + to_shape_packet, +) +from .graph import is_acyclic, traverse_graph +from .keypoints import get_sigmas, process_keypoints_predictions +from .tracker import LuxonisTrackerPL +from .types import AttachIndexType, Kwargs, Labels, Packet + +__all__ = [ + "Config", + "AttachIndexType", + "Kwargs", + "Labels", + "Packet", + "IncompatibleException", + "DatasetMetadata", + "make_divisible", + "infer_upscale_factor", + "to_shape_packet", + "get_with_default", + "LuxonisTrackerPL", + "registry", + "match_to_anchor", + "dist2bbox", + "bbox2dist", + "bbox_iou", + "non_max_suppression", + "anchors_from_dataset", + "anchors_for_fpn_features", + "process_bbox_predictions", + "compute_iou_loss", + "process_keypoints_predictions", + "get_sigmas", + "is_acyclic", + "traverse_graph", +] diff --git a/luxonis_train/utils/boxutils.py b/luxonis_train/utils/boundingbox.py similarity index 94% rename from luxonis_train/utils/boxutils.py rename to luxonis_train/utils/boundingbox.py index 3a206c75..2a3998ac 100644 --- a/luxonis_train/utils/boxutils.py +++ b/luxonis_train/utils/boundingbox.py @@ -1,12 +1,10 @@ -"""This module contains various utility functions for working with bounding boxes.""" - import math from typing import Literal, TypeAlias import torch +from luxonis_ml.data import LabelType from scipy.cluster.vq import kmeans from torch import Tensor -from torch.utils.data import DataLoader from torchvision.ops import ( batched_nms, box_convert, @@ -15,24 +13,11 @@ generalized_box_iou, ) -from luxonis_train.utils.types import LabelType +from luxonis_train.loaders import BaseLoaderTorch IoUType: TypeAlias = Literal["none", "giou", "diou", "ciou", "siou"] BBoxFormatType: TypeAlias = Literal["xyxy", "xywh", "cxcywh"] -__all__ = [ - "anchors_for_fpn_features", - "anchors_from_dataset", - "bbox2dist", - "bbox_iou", - "compute_iou_loss", - "dist2bbox", - "match_to_anchor", - "non_max_suppression", - "process_bbox_predictions", - "process_keypoints_predictions", -] - def match_to_anchor( targets: Tensor, @@ -409,7 +394,7 @@ def non_max_suppression( def anchors_from_dataset( - loader: DataLoader, + loader: BaseLoaderTorch, n_anchors: int = 9, n_generations: int = 1000, ratio_threshold: float = 4.0, @@ -432,18 +417,13 @@ def anchors_from_dataset( @return: Proposed anchors and the best possible recall. """ - widths = [] - inputs = None - for inp, labels in loader: + widths: list[Tensor] = [] + for _, labels in loader: for tensor, label_type in labels.values(): if label_type == LabelType.BOUNDINGBOX: curr_wh = tensor[:, 4:] widths.append(curr_wh) - inputs = inp - assert inputs is not None, "No inputs found in data loader" - _, _, h, w = inputs[ - loader.dataset.image_source # type: ignore - ].shape # assuming all images are same size + _, h, w = loader.input_shape img_size = torch.tensor([w, h]) wh = torch.vstack(widths) * img_size @@ -595,26 +575,6 @@ def anchors_for_fpn_features( ) -def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Tensor]: - """Extracts x, y and visibility from keypoints predictions. - - @type keypoints: Tensor - @param keypoints: Keypoints predictions. The last dimension must be divisible by 3 - and is expected to be in format [x1, y1, v1, x2, y2, v2, ...]. - - @rtype: tuple[Tensor, Tensor, Tensor] - @return: x, y and visibility tensors. - """ - x = keypoints[..., ::3] * 2.0 - 0.5 - y = keypoints[..., 1::3] * 2.0 - 0.5 - visibility = keypoints[..., 2::3] - return ( - x, - y, - visibility, - ) - - def process_bbox_predictions( bbox: Tensor, anchor: Tensor ) -> tuple[Tensor, Tensor, Tensor]: diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index 44c00637..f6cb5ac2 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -122,7 +122,7 @@ def check_predefined_model(self) -> Self: @model_validator(mode="after") def check_graph(self) -> Self: - from luxonis_train.utils.general import is_acyclic + from luxonis_train.utils import is_acyclic graph = {node.alias or node.name: node.inputs for node in self.nodes} if not is_acyclic(graph): @@ -147,7 +147,7 @@ def check_unique_names(self) -> Self: ("metrics", self.metrics), ("visualizers", self.visualizers), ]: - names = set() + names: set[str] = set() for obj in objects: obj: AttachedModuleConfig name = obj.alias or obj.name diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py new file mode 100644 index 00000000..6464b69a --- /dev/null +++ b/luxonis_train/utils/dataset_metadata.py @@ -0,0 +1,157 @@ +from luxonis_train.loaders import BaseLoaderTorch +from luxonis_train.utils import anchors_from_dataset + + +class DatasetMetadata: + """Metadata about the dataset.""" + + def __init__( + self, + *, + classes: dict[str, list[str]] | None = None, + n_keypoints: dict[str, int] | None = None, + loader: BaseLoaderTorch | None = None, + ): + """An object containing metadata about the dataset. Used to infer the number of + classes, number of keypoints, I{etc.} instead of passing them as arguments to + the model. + + @type classes: dict[str, list[str]] | None + @param classes: Dictionary mapping tasks to lists of class names. + @type n_keypoints: dict[str, int] | None + @param n_keypoints: Dictionary mapping tasks to the number of keypoints. + @type loader: DataLoader | None + @param loader: Dataset loader. + """ + self._classes = classes or {} + self._n_keypoints = n_keypoints or {} + self._loader = loader + + @property + def classes(self) -> dict[str, list[str]]: + """Dictionary mapping label types to lists of class names. + + @type: dict[str, list[str]] + @raises ValueError: If classes were not provided during initialization. + """ + if self._classes is None: + raise ValueError( + "Trying to access `classes`, byt they were not" + "provided during initialization." + ) + return self._classes + + def n_classes(self, task: str | None) -> int: + """Gets the number of classes for the specified task. + + @type task: str | None + @param task: Task to get the number of classes for. + @rtype: int + @return: Number of classes for the specified label type. + @raises ValueError: If the dataset loader was not provided during + initialization. + @raises ValueError: If the dataset contains different number of classes for + different label types. + """ + if task is not None: + if task not in self.classes: + raise ValueError(f"Task '{task}' is not present in the dataset.") + return len(self.classes[task]) + n_classes = len(list(self.classes.values())[0]) + for classes in self.classes.values(): + if len(classes) != n_classes: + raise ValueError( + "The dataset contains different number of classes for different tasks." + ) + return n_classes + + def n_keypoints(self, task: str | None) -> int: + """Gets the number of keypoints for the specified task. + + @type task: str | None + @param task: Task to get the number of keypoints for. + @rtype: int + @return: Number of keypoints for the specified label type. + @raises ValueError: If the dataset loader was not provided during initialization + or if the dataset does not contain the specified task. + """ + if task is not None: + if task not in self._n_keypoints: + raise ValueError(f"Task '{task}' is not present in the dataset.") + return self._n_keypoints[task] + if len(self._n_keypoints) > 1: + raise ValueError( + "The dataset specifies multiple keypoint tasks, " + "please specify the 'task' argument to get the number of keypoints." + ) + return next(iter(self._n_keypoints.values())) + + def class_names(self, task: str | None) -> list[str]: + """Gets the class names for the specified task. + + @type task: str | None + @param task: Task to get the class names for. + @rtype: list[str] + @return: List of class names for the specified label type. + @raises ValueError: If the dataset loader was not provided during + initialization. + @raises ValueError: If the dataset contains different class names for different + label types. + """ + if task is not None: + if task not in self.classes: + raise ValueError(f"Task type {task} is not present in the dataset.") + return self.classes[task] + class_names = list(self.classes.values())[0] + for classes in self.classes.values(): + if classes != class_names: + raise ValueError( + "The dataset contains different class names for different tasks." + ) + return class_names + + def autogenerate_anchors(self, num_heads: int) -> tuple[list[list[float]], float]: + """Automatically generates anchors for the provided dataset. + + @type num_heads: int + @param num_heads: Number of heads to generate anchors for. + @rtype: tuple[list[list[float]], float] + @return: List of anchors in [-1,6] format and recall of the anchors. + @raises ValueError: If the dataset loader was not provided during + initialization. + """ + if self.loader is None: + raise ValueError( + "Cannot generate anchors without a dataset loader. " + "Please provide a dataset loader to the constructor " + "or call `set_loader` method." + ) + + proposed_anchors, recall = anchors_from_dataset( + self.loader, n_anchors=num_heads * 3 + ) + return proposed_anchors.reshape(-1, 6).tolist(), recall + + def set_loader(self, loader: BaseLoaderTorch) -> None: + """Sets the dataset loader. + + @type loader: DataLoader + @param loader: Dataset loader. + """ + self.loader = loader + + @classmethod + def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": + """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. + + @type dataset: LuxonisDataset + @param dataset: Dataset to create the metadata from. + @rtype: DatasetMetadata + @return: Instance of L{DatasetMetadata} created from the provided dataset. + """ + classes = loader.get_classes() + n_keypoints = loader.get_n_keypoints() + + instance = cls(classes=classes, n_keypoints=n_keypoints) + instance.set_loader(loader) + return instance diff --git a/luxonis_train/utils/exceptions.py b/luxonis_train/utils/exceptions.py new file mode 100644 index 00000000..6621e4eb --- /dev/null +++ b/luxonis_train/utils/exceptions.py @@ -0,0 +1,9 @@ +class IncompatibleException(Exception): + """Raised when two parts of the model are incompatible with each other.""" + + @classmethod + def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str): + return cls( + f"{class_name} requires '{task}' label, but it was not found in " + f"the label dictionary. Available labels: {present_tasks}." + ) diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py index 5ae3b43f..b5899b3e 100644 --- a/luxonis_train/utils/general.py +++ b/luxonis_train/utils/general.py @@ -1,272 +1,129 @@ import logging import math -from copy import deepcopy -from typing import Generator, TypeVar +from typing import TypeVar -from pydantic import BaseModel from torch import Size, Tensor -from torch.utils.data import DataLoader -from luxonis_train.utils.boxutils import anchors_from_dataset -from luxonis_train.utils.loaders import BaseLoaderTorch from luxonis_train.utils.types import Packet +logger = logging.getLogger(__name__) -class DatasetMetadata: - """Metadata about the dataset.""" - def __init__( - self, - *, - classes: dict[str, list[str]] | None = None, - n_keypoints: dict[str, int] | None = None, - loader: DataLoader | None = None, - ): - """An object containing metadata about the dataset. Used to infer the number of - classes, number of keypoints, I{etc.} instead of passing them as arguments to - the model. - - @type classes: dict[str, list[str]] | None - @param classes: Dictionary mapping tasks to lists of class names. - @type n_keypoints: dict[str, int] | None - @param n_keypoints: Dictionary mapping tasks to the number of keypoints. - @type loader: DataLoader | None - @param loader: Dataset loader. - """ - self._classes = classes or {} - self._n_keypoints = n_keypoints or {} - self._loader = loader +def make_divisible(x: int | float, divisor: int) -> int: + """Upward revision the value x to make it evenly divisible by the divisor. - @property - def classes(self) -> dict[str, list[str]]: - """Dictionary mapping label types to lists of class names. + Equivalent to M{ceil(x / divisor) * divisor}. - @type: dict[str, list[str]] - @raises ValueError: If classes were not provided during initialization. - """ - if self._classes is None: - raise ValueError( - "Trying to access `classes`, byt they were not" - "provided during initialization." - ) - return self._classes + @type x: int | float + @param x: Value to be revised. + @type divisor: int + @param divisor: Divisor. + @rtype: int + @return: Revised value. + """ + return math.ceil(x / divisor) * divisor - def n_classes(self, task: str | None) -> int: - """Gets the number of classes for the specified task. - @type task: str | None - @param task: Task to get the number of classes for. - @rtype: int - @return: Number of classes for the specified label type. - @raises ValueError: If the dataset loader was not provided during - initialization. - @raises ValueError: If the dataset contains different number of classes for - different label types. - """ - if task is not None: - if task not in self.classes: - raise ValueError(f"Task '{task}' is not present in the dataset.") - return len(self.classes[task]) - n_classes = len(list(self.classes.values())[0]) - for classes in self.classes.values(): - if len(classes) != n_classes: - raise ValueError( - "The dataset contains different number of classes for different tasks." - ) - return n_classes +def infer_upscale_factor( + in_size: tuple[int, int] | int, orig_size: tuple[int, int] | int +) -> int: + """Infer the upscale factor from the input shape and the original shape. + + @type in_size: tuple[int, int] | int + @param in_size: Input shape as a tuple of (height, width) or just one of them. + @type orig_size: tuple[int, int] | int + @param orig_size: Original shape as a tuple of (height, width) or just one of them. + @rtype: int + @return: Upscale factor. + @raise ValueError: If the C{in_size} cannot be upscaled to the C{orig_size}. This + can happen if the upscale factors are not integers or are different. + """ - def n_keypoints(self, task: str | None) -> int: - if task is not None: - if task not in self._n_keypoints: - raise ValueError(f"Task '{task}' is not present in the dataset.") - return self._n_keypoints[task] - if len(self._n_keypoints) > 1: + def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float: + factor = math.log2(orig_size) - math.log2(in_size) + if abs(round(factor) - factor) < 1e-6: + return int(round(factor)) + return factor + + if isinstance(in_size, int): + in_size = (in_size, in_size) + if isinstance(orig_size, int): + orig_size = (orig_size, orig_size) + in_height, in_width = in_size + orig_height, orig_width = orig_size + + width_factor = _infer_upscale_factor(in_width, orig_width) + height_factor = _infer_upscale_factor(in_height, orig_height) + + match (width_factor, height_factor): + case (int(wf), int(hf)) if wf == hf: + return wf + case (int(wf), int(hf)): raise ValueError( - "The dataset specifies multiple keypoint tasks, " - "please specify the 'task' argument to get the number of keypoints." + f"Width and height upscale factors are different. " + f"Width: {wf}, height: {hf}." ) - return next(iter(self._n_keypoints.values())) - - def class_names(self, task: str | None) -> list[str]: - """Gets the class names for the specified task. - - @type task: str | None - @param task: Task to get the class names for. - @rtype: list[str] - @return: List of class names for the specified label type. - @raises ValueError: If the dataset loader was not provided during - initialization. - @raises ValueError: If the dataset contains different class names for different - label types. - """ - if task is not None: - if task not in self.classes: - raise ValueError(f"Task type {task} is not present in the dataset.") - return self.classes[task] - class_names = list(self.classes.values())[0] - for classes in self.classes.values(): - if classes != class_names: - raise ValueError( - "The dataset contains different class names for different tasks." - ) - return class_names - - def autogenerate_anchors(self, n_heads: int) -> tuple[list[list[float]], float]: - """Automatically generates anchors for the provided dataset. - - @type n_heads: int - @param n_heads: Number of heads to generate anchors for. - @rtype: tuple[list[list[float]], float] - @return: List of anchors in [-1,6] format and recall of the anchors. - @raises ValueError: If the dataset loader was not provided during - initialization. - """ - if self.loader is None: + case (int(wf), float(hf)): raise ValueError( - "Cannot generate anchors without a dataset loader. " - "Please provide a dataset loader to the constructor " - "or call `set_loader` method." + f"Width upscale factor is an integer, but height upscale factor is not. " + f"Width: {wf}, height: {hf}." ) - - proposed_anchors, recall = anchors_from_dataset( - self.loader, n_anchors=n_heads * 3 - ) - return proposed_anchors.reshape(-1, 6).tolist(), recall - - def set_loader(self, loader: DataLoader) -> None: - """Sets the dataset loader. - - @type loader: DataLoader - @param loader: Dataset loader. - """ - self.loader = loader - - @classmethod - def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": - """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. - - @type dataset: LuxonisDataset - @param dataset: Dataset to create the metadata from. - @rtype: DatasetMetadata - @return: Instance of L{DatasetMetadata} created from the provided dataset. - """ - classes = loader.get_classes() - n_keypoints = loader.get_n_keypoints() - - return cls(classes=classes, n_keypoints=n_keypoints) - - -def make_divisible(x: int | float, divisor: int) -> int: - """Upward revision the value x to make it evenly divisible by the divisor.""" - return math.ceil(x / divisor) * divisor - - -def infer_upscale_factor( - in_height: int, orig_height: int, strict: bool = True, warn: bool = True -) -> int: - """Infer the upscale factor from the input height and original height.""" - num_up = math.log2(orig_height) - math.log2(in_height) - if abs(round(num_up) - num_up) < 1e-6: - return int(round(num_up)) - elif not strict: - if warn: - logging.getLogger(__name__).warning( - f"Upscale factor is not an integer: {num_up}. " - "Output shape will not be the same as input shape." + case (float(wf), int(hf)): + raise ValueError( + f"Height upscale factor is an integer, but width upscale factor is not. " + f"Width: {wf}, height: {hf}." + ) + case (float(wf), float(hf)): + raise ValueError( + "Width and height upscale factors are not integers. " + f"Width: {wf}, height: {hf}." ) - return round(num_up) - else: - raise ValueError( - f"Upscale factor is not an integer: {num_up}. " - "Output shape will not be the same as input shape." - ) + case _: + raise ValueError("Unexpected error.") def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]: + """Converts a packet of tensors to a packet of shapes. Used for debugging purposes. + + @type packet: Packet[Tensor] + @param packet: Packet of tensors. + @rtype: Packet[Size] + @return: Packet of shapes. + """ shape_packet: Packet[Size] = {} for name, value in packet.items(): shape_packet[name] = [x.shape for x in value] return shape_packet -def is_acyclic(graph: dict[str, list[str]]) -> bool: - """Tests if graph is acyclic. - - @type graph: dict[str, list[str]] - @param graph: Graph in a format of a dictionary of predecessors. Keys are node - names, values are inputs to the node (list of node names). - @rtype: bool - @return: True if graph is acyclic, False otherwise. - """ - graph = graph.copy() - - def dfs(node: str, visited: set[str], recursion_stack: set[str]): - visited.add(node) - recursion_stack.add(node) - - for predecessor in graph.get(node, []): - if predecessor in recursion_stack: - return True - if predecessor not in visited: - if dfs(predecessor, visited, recursion_stack): - return True - - recursion_stack.remove(node) - return False - - visited: set[str] = set() - recursion_stack: set[str] = set() - - for node in graph.keys(): - if node not in visited: - if dfs(node, visited, recursion_stack): - return False - - return True - - -def validate_packet(data: Packet[Tensor], protocol: type[BaseModel]) -> Packet[Tensor]: - return protocol(**data).model_dump() - - T = TypeVar("T") -# TEST: -def traverse_graph( - graph: dict[str, list[str]], nodes: dict[str, T] -) -> Generator[tuple[str, T, list[str], list[str]], None, None]: - """Traverses the graph in topological order. - - @type graph: dict[str, list[str]] - @param graph: Graph in a format of a dictionary of predecessors. Keys are node - names, values are inputs to the node (list of node names). - @type nodes: dict[str, T] - @param nodes: Dictionary mapping node names to node objects. - @rtype: Generator[tuple[str, T, list[str], list[str]], None, None] - @return: Generator of tuples containing node name, node object, node dependencies - and unprocessed nodes. - @raises RuntimeError: If the graph is malformed. +def get_with_default( + value: T | None, action_name: str, caller_name: str | None = None, *, default: T +) -> T: + """Returns value if it is not None, otherwise returns the default valueand log an + info. + + @type value: T | None + @param value: Value to return. + @type action_name: str + @param action_name: Name of the action for which the default value is being used. + Used for logging. + @type caller_name: str | None + @param caller_name: Name of the caller function. Used for logging. + @type default: T + @param default: Default value to return if C{value} is C{None}. + @rtype: T + @return: C{value} if it is not C{None}, otherwise C{default}. """ - unprocessed_nodes = sorted( - set(nodes.keys()) - ) # sort the set to allow reproducibility - processed: set[str] = set() + if value is not None: + return value - graph = deepcopy(graph) - while unprocessed_nodes: - unprocessed_nodes_copy = unprocessed_nodes.copy() - for node_name in unprocessed_nodes_copy: - node_dependencies = graph[node_name] - if not node_dependencies or all( - dependency in processed for dependency in node_dependencies - ): - yield node_name, nodes[node_name], node_dependencies, unprocessed_nodes - processed.add(node_name) - unprocessed_nodes.remove(node_name) + msg = f"Default value of {value} is being used for {action_name}." - if unprocessed_nodes_copy == unprocessed_nodes: - raise RuntimeError( - "Malformed graph. " - "Please check that all nodes are connected in a directed acyclic graph." - ) + if caller_name: + msg = f"[{caller_name}] {msg}" + + logger.info(msg) + return default diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py new file mode 100644 index 00000000..849f440f --- /dev/null +++ b/luxonis_train/utils/graph.py @@ -0,0 +1,80 @@ +from copy import deepcopy +from typing import Generator, TypeVar + + +def is_acyclic(graph: dict[str, list[str]]) -> bool: + """Tests if graph is acyclic. + + @type graph: dict[str, list[str]] + @param graph: Graph in a format of a dictionary of predecessors. Keys are node + names, values are inputs to the node (list of node names). + @rtype: bool + @return: True if graph is acyclic, False otherwise. + """ + graph = graph.copy() + + def dfs(node: str, visited: set[str], recursion_stack: set[str]): + visited.add(node) + recursion_stack.add(node) + + for predecessor in graph.get(node, []): + if predecessor in recursion_stack: + return True + if predecessor not in visited: + if dfs(predecessor, visited, recursion_stack): + return True + + recursion_stack.remove(node) + return False + + visited: set[str] = set() + recursion_stack: set[str] = set() + + for node in graph.keys(): + if node not in visited: + if dfs(node, visited, recursion_stack): + return False + + return True + + +T = TypeVar("T") + + +def traverse_graph( + graph: dict[str, list[str]], nodes: dict[str, T] +) -> Generator[tuple[str, T, list[str], list[str]], None, None]: + """Traverses the graph in topological order. + + @type graph: dict[str, list[str]] + @param graph: Graph in a format of a dictionary of predecessors. Keys are node + names, values are inputs to the node (list of node names). + @type nodes: dict[str, T] + @param nodes: Dictionary mapping node names to node objects. + @rtype: Generator[tuple[str, T, list[str], list[str]], None, None] + @return: Generator of tuples containing node name, node object, node dependencies + and unprocessed nodes. + @raises RuntimeError: If the graph is malformed. + """ + unprocessed_nodes = sorted( + set(nodes.keys()) + ) # sort the set to allow reproducibility + processed: set[str] = set() + + graph = deepcopy(graph) + while unprocessed_nodes: + unprocessed_nodes_copy = unprocessed_nodes.copy() + for node_name in unprocessed_nodes_copy: + node_dependencies = graph[node_name] + if not node_dependencies or all( + dependency in processed for dependency in node_dependencies + ): + yield node_name, nodes[node_name], node_dependencies, unprocessed_nodes + processed.add(node_name) + unprocessed_nodes.remove(node_name) + + if unprocessed_nodes_copy == unprocessed_nodes: + raise RuntimeError( + "Malformed graph. " + "Please check that all nodes are connected in a directed acyclic graph." + ) diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py new file mode 100644 index 00000000..b3740e44 --- /dev/null +++ b/luxonis_train/utils/keypoints.py @@ -0,0 +1,81 @@ +import logging + +import torch +from torch import Tensor + +logger = logging.getLogger(__name__) + + +def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Tensor]: + """Extracts x, y and visibility from keypoints predictions. + + @type keypoints: Tensor + @param keypoints: Keypoints predictions. The last dimension must be divisible by 3 + and is expected to be in format [x1, y1, v1, x2, y2, v2, ...]. + + @rtype: tuple[Tensor, Tensor, Tensor] + @return: x, y and visibility tensors. + """ + x = keypoints[..., ::3] * 2.0 - 0.5 + y = keypoints[..., 1::3] * 2.0 - 0.5 + visibility = keypoints[..., 2::3] + return x, y, visibility + + +def get_sigmas( + sigmas: list[float] | None, n_keypoints: int, caller_name: str | None +) -> Tensor: + """Validate or create sigma values for each keypoint. + + @type sigmas: list[float] | None + @param sigmas: List of sigmas for each keypoint. If C{None}, then default sigmas are + used. + @type n_keypoints: int + @param n_keypoints: Number of keypoints. + @type caller_name: str | None + @param caller_name: Name of the caller function. Used for logging. + @rtype: Tensor + @return: Tensor of sigmas. + """ + if sigmas is not None: + if len(sigmas) == n_keypoints: + return torch.tensor(sigmas, dtype=torch.float32) + else: + error_msg = "The length of the sigmas list must be the same as the number of keypoints." + if caller_name: + error_msg = f"[{caller_name}] {error_msg}" + raise ValueError(error_msg) + else: + if n_keypoints == 17: + msg = "Default COCO sigmas are being used." + if caller_name: + msg = f"[{caller_name}] {msg}" + logger.warning(msg) + return torch.tensor( + [ + 0.026, + 0.025, + 0.025, + 0.035, + 0.035, + 0.079, + 0.079, + 0.072, + 0.072, + 0.062, + 0.062, + 0.107, + 0.107, + 0.087, + 0.087, + 0.089, + 0.089, + ], + dtype=torch.float32, + ) + else: + msg = "Default sigma of 0.04 is being used for each keypoint." + if caller_name: + msg = f"[{caller_name}] {msg}" + logger.info(msg) + return torch.tensor([0.04] * n_keypoints, dtype=torch.float32) diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index 2222ecbd..f2006378 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -1,46 +1,43 @@ """This module implements a metaclass for automatic registration of classes.""" +from typing import Any + import lightning.pytorch as pl -import torch from luxonis_ml.utils.registry import Registry +from torch.optim.lr_scheduler import _LRScheduler +from torch.optim.optimizer import Optimizer -import luxonis_train +import luxonis_train as lt CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks") """Registry for all callbacks.""" -LOADERS: Registry[type["luxonis_train.utils.loaders.BaseLoaderTorch"]] = Registry( - name="loaders" -) +LOADERS: Registry[type["lt.utils.loaders.BaseLoaderTorch"]] = Registry(name="loaders") """Registry for all loaders.""" -LOSSES: Registry[type["luxonis_train.attached_modules.BaseLoss"]] = Registry( +LOSSES: Registry[type["lt.attached_modules.BaseLoss[Any, Any]"]] = Registry( name="losses" ) """Registry for all losses.""" -METRICS: Registry[type["luxonis_train.attached_modules.BaseMetric"]] = Registry( +METRICS: Registry[type["lt.attached_modules.BaseMetric[Any, Any]"]] = Registry( name="metrics" ) """Registry for all metrics.""" -MODELS: Registry[type["luxonis_train.models.BasePredefinedModel"]] = Registry( - name="models" -) +MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry(name="models") """Registry for all models.""" -NODES: Registry[type["luxonis_train.nodes.BaseNode"]] = Registry(name="nodes") +NODES: Registry[type["lt.nodes.BaseNode[Any, Any]"]] = Registry(name="nodes") """Registry for all nodes.""" -OPTIMIZERS: Registry[type[torch.optim.Optimizer]] = Registry(name="optimizers") +OPTIMIZERS: Registry[type[Optimizer]] = Registry(name="optimizers") """Registry for all optimizers.""" -SCHEDULERS: Registry[type[torch.optim.lr_scheduler._LRScheduler]] = Registry( - name="schedulers" -) +SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers") """Registry for all schedulers.""" -VISUALIZERS: Registry[type["luxonis_train.visualizers.BaseVisualizer"]] = Registry( +VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer[Any, Any]"]] = Registry( "visualizers" ) """Registry for all visualizers.""" diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py index 4df76edd..1c4a42e7 100644 --- a/luxonis_train/utils/tracker.py +++ b/luxonis_train/utils/tracker.py @@ -1,3 +1,5 @@ +from typing import Any + from lightning.pytorch.loggers.logger import Logger from lightning.pytorch.utilities import rank_zero_only # type: ignore from luxonis_ml.tracker import LuxonisTracker @@ -6,7 +8,7 @@ class LuxonisTrackerPL(LuxonisTracker, Logger): """Implementation of LuxonisTracker that is compatible with PytorchLightning.""" - def __init__(self, *, _auto_finalize: bool = True, **kwargs): + def __init__(self, *, _auto_finalize: bool = True, **kwargs: Any): """ @type _auto_finalize: bool @param _auto_finalize: If True, the run will be finalized automatically when the training ends. @@ -21,7 +23,7 @@ def __init__(self, *, _auto_finalize: bool = True, **kwargs): self.finalize = self._finalize @rank_zero_only - def _finalize(self, status: str = "success") -> None: + def _finalize(self, status: str = "success") -> None: # pragma: no cover """Finalizes current run.""" if self.is_tensorboard: self.experiment["tensorboard"].flush() diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py index 84b8e019..158cf185 100644 --- a/luxonis_train/utils/types.py +++ b/luxonis_train/utils/types.py @@ -1,12 +1,14 @@ -from typing import Annotated, Any, Literal, TypeVar +from typing import Any, Literal, TypeVar from luxonis_ml.data import LabelType -from pydantic import BaseModel, Field, ValidationError from torch import Size, Tensor Kwargs = dict[str, Any] -OutputTypes = Literal["boundingbox", "class", "keypoints", "segmentation", "features"] +"""Kwargs is a dictionary containing keyword arguments.""" + Labels = dict[str, tuple[Tensor, LabelType]] +"""Labels is a dictionary containing a tuple of tensors and their corresponding label +type.""" AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int] """AttachIndexType is used to specify to which output of the prevoius node does the @@ -22,31 +24,3 @@ It is used to pass data between different nodes of the network graph. """ - - -class IncompatibleException(Exception): - """Raised when two parts of the model are incompatible with each other.""" - - @classmethod - def from_validation_error(cls, val_error: ValidationError, class_name: str): - return cls( - f"{class_name} received an input not conforming to the protocol. " - f"Validation error: {val_error.errors(include_input=False, include_url=False)}." - ) - - @classmethod - def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str): - return cls( - f"{class_name} requires '{task}' label, but it was not found in " - f"the label dictionary. Available labels: {present_tasks}." - ) - - -class BaseProtocol(BaseModel): - class Config: - arbitrary_types_allowed = True - extra = "forbid" - - -class FeaturesProtocol(BaseProtocol): - features: Annotated[list[Tensor], Field(min_length=1)] diff --git a/pyproject.toml b/pyproject.toml index 2093e25b..5ff79282 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,3 +54,32 @@ ignore_missing_imports = true [tool.pyright] typeCheckingMode = "basic" +reportMissingTypeStubs = "none" +reportPrivateImportUsage = "none" +reportPrivateUsage = "none" +reportIncompatibleVariableOverride = "none" +reportIncompatibleMethodOverride = "none" +reportUnnecessaryIsInstance = "none" + + +[tool.coverage.run] +omit = [ + "**/__main__.py", + "**/gpu_stats_monitor.py" +] + +[tool.coverage.report] +exclude_also = [ + "def __repr__", + "def __rich_repr__", + "def __str__", + "raise AssertionError", + "raise NotImplementedError", + "except ImportError", + "@abstractmethod", + "@overload", + "exit", + "cv2\\.imshow", + "cv2\\.waitKey", + "logger\\.", +] diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml index ae9f8069..de8a0c68 100644 --- a/tests/configs/parking_lot_config.yaml +++ b/tests/configs/parking_lot_config.yaml @@ -3,54 +3,26 @@ model: name: parking_lot_model nodes: - - name: ReXNetV1_lite - alias: rexnet-detection-backbone - - name: EfficientRep - alias: efficient-detection-backbone - params: - channels_list: [64, 128, 256, 512, 1024] - num_repeats: [1, 6, 12, 18, 6] - depth_mul: 0.33 - width_mul: 0.33 + alias: backbone - name: RepPANNeck - alias: efficient-detection-neck + alias: neck inputs: - - efficient-detection-backbone - params: - channels_list: [256, 128, 128, 256, 256, 512] - num_repeats: [12, 12, 12, 12] - depth_mul: 0.33 - width_mul: 0.33 - - - name: MicroNet - alias: color-segmentation-backbone - - - name: MobileOne - alias: brand-segmentation-backbone - - - name: MobileNetV2 - alias: vehicle-type-segmentation-backbone - - - name: ContextSpatial - alias: context-brand-segmentation-backbone + - backbone - name: EfficientBBoxHead alias: bbox-head inputs: - - efficient-detection-neck + - neck - name: ImplicitKeypointBBoxHead alias: car-detection-head inputs: - - rexnet-detection-backbone + - neck task: keypoints: car-keypoints boundingbox: car-boundingbox - params: - conf_thres: 0.25 - iou_thres: 0.45 - name: EfficientKeypointBBoxHead alias: motorbike-detection-head @@ -58,40 +30,31 @@ model: keypoints: motorbike-keypoints boundingbox: motorbike-boundingbox inputs: - - efficient-detection-neck - params: - conf_thres: 0.25 - iou_thres: 0.45 - - - name: BiSeNetHead - alias: context-brand-segmentation-head - task: brand_segmentation - inputs: - - context-brand-segmentation-backbone + - neck - name: SegmentationHead alias: color-segmentation-head - task: color_segmentation + task: color-segmentation inputs: - - color-segmentation-backbone + - neck - name: SegmentationHead alias: any-vehicle-segmentation-head - task: vehicle_segmentation + task: vehicle-segmentation inputs: - - vehicle-type-segmentation-backbone + - neck - name: BiSeNetHead alias: brand-segmentation-head - task: brand_segmentation + task: brand-segmentation inputs: - - brand-segmentation-backbone + - neck - name: BiSeNetHead alias: vehicle-type-segmentation-head - task: vehicle_type_segmentation + task: vehicle_type-segmentation inputs: - - vehicle-type-segmentation-backbone + - neck losses: - name: AdaptiveDetectionLoss @@ -100,12 +63,8 @@ model: attached_to: any-vehicle-segmentation-head - name: CrossEntropyLoss attached_to: vehicle-type-segmentation-head - - name: CrossEntropyLoss - attached_to: context-brand-segmentation-head - name: CrossEntropyLoss attached_to: color-segmentation-head - - name: SoftmaxFocalLoss - attached_to: brand-segmentation-head - name: ImplicitKeypointBBoxLoss attached_to: car-detection-head - name: EfficientKeypointBBoxLoss @@ -127,8 +86,6 @@ model: attached_to: vehicle-type-segmentation-head - name: Precision attached_to: brand-segmentation-head - - name: Recall - attached_to: context-brand-segmentation-head visualizers: - name: MultiVisualizer @@ -160,9 +117,6 @@ model: - name: SegmentationVisualizer alias: vehicle-segmentation-visualizer attached_to: any-vehicle-segmentation-head - - name: SegmentationVisualizer - alias: context-brand-segmentation-visualizer - attached_to: context-brand-segmentation-head - name: SegmentationVisualizer alias: brand-segmentation-visualizer attached_to: brand-segmentation-head @@ -214,6 +168,5 @@ trainer: callbacks: - name: ExportOnTrainEnd - - name: TestOnTrainEnd - name: ArchiveOnTrainEnd diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 9b24271b..b53abb16 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -4,6 +4,7 @@ import cv2 import gdown +import numpy as np import pytest import torchvision from luxonis_ml.data import LuxonisDataset @@ -17,10 +18,13 @@ environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml" -@pytest.fixture +@pytest.fixture(scope="session") def parking_lot_dataset() -> LuxonisDataset: url = "gs://luxonis-test-bucket/luxonis-ml-test-data/D1_ParkingSlotTest" - base_path = LuxonisFileSystem.download(url, WORK_DIR) + base_path = WORK_DIR / "D1_ParkingSlotTest" + if not base_path.exists(): + base_path = LuxonisFileSystem.download(url, WORK_DIR) + mask_brand_path = base_path / "mask_brand" mask_color_path = base_path / "mask_color" kpt_mask_path = base_path / "keypoints_mask_vehicle" @@ -28,7 +32,7 @@ def parking_lot_dataset() -> LuxonisDataset: def generator(): filenames: dict[int, Path] = {} for base_path in [kpt_mask_path, mask_brand_path, mask_color_path]: - for sequence_path in list(sorted(base_path.glob("sequence.*"))): + for sequence_path in base_path.glob("sequence.*"): frame_data = sequence_path / "step0.frame_data.json" with open(frame_data) as f: data = json.load(f)["captures"][0] @@ -122,11 +126,11 @@ def generator(): for inst in vehicle_type_segmentation["instances"] } if base_path == kpt_mask_path: - task = "vehicle_type_segmentation" + task = "vehicle_type-segmentation" elif base_path == mask_brand_path: - task = "brand_segmentation" + task = "brand-segmentation" else: - task = "color_segmentation" + task = "color-segmentation" for class_, mask_ in rgb_to_bool_masks( mask, classes, add_background_class=True ): @@ -145,21 +149,22 @@ def generator(): "annotation": { "type": "mask", "class": "vehicle", - "task": "vehicle_segmentation", + "task": "vehicle-segmentation", "mask": mask.astype(bool)[..., 0] | mask.astype(bool)[..., 1] | mask.astype(bool)[..., 2], }, } - dataset = LuxonisDataset("__D1ParkingSLot-test", delete_existing=True) + dataset = LuxonisDataset("_ParkingLot", delete_existing=True) dataset.add(generator()) + np.random.seed(42) dataset.make_splits() return dataset -@pytest.fixture(scope="session", autouse=True) -def create_coco_dataset(): +@pytest.fixture(scope="session") +def coco_dataset() -> LuxonisDataset: dataset_name = "coco_test" url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT" output_zip = WORK_DIR / "COCO_people_subset.zip" @@ -170,11 +175,11 @@ def create_coco_dataset(): parser = LuxonisParser( str(output_zip), dataset_name=dataset_name, delete_existing=True ) - parser.parse(random_split=True) + return parser.parse(random_split=True) -@pytest.fixture(scope="session", autouse=True) -def create_cifar10_dataset(): +@pytest.fixture(scope="session") +def cifar10_dataset() -> LuxonisDataset: dataset = LuxonisDataset("cifar10_test", delete_existing=True) output_folder = WORK_DIR / "cifar10" output_folder.mkdir(parents=True, exist_ok=True) @@ -210,3 +215,4 @@ def CIFAR10_subset_generator(): dataset.add(CIFAR10_subset_generator()) dataset.make_splits() + return dataset diff --git a/tests/integration/multi_input_modules.py b/tests/integration/multi_input_modules.py index dbc5a449..5cb32225 100644 --- a/tests/integration/multi_input_modules.py +++ b/tests/integration/multi_input_modules.py @@ -1,9 +1,10 @@ import torch +from luxonis_ml.data import LabelType from torch import Tensor, nn +from luxonis_train.loaders import BaseLoaderTorch from luxonis_train.nodes import BaseNode -from luxonis_train.utils.loaders import BaseLoaderTorch -from luxonis_train.utils.types import FeaturesProtocol, LabelType, Packet +from luxonis_train.utils import Packet class CustomMultiInputLoader(BaseLoaderTorch): @@ -61,28 +62,27 @@ def unwrap(self, inputs: list[dict[str, list[Tensor]]]): class FullBackbone(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 4 + ... class RGBDBackbone(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 3 + ... class PointcloudBackbone(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] + ... class FusionNeck(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 3 + ... class FusionNeck2(MultiInputTestBaseNode): - input_protocols = [FeaturesProtocol] * 3 + ... class CustomSegHead1(MultiInputTestBaseNode): tasks = {LabelType.SEGMENTATION: "segmentation"} - input_protocols = [FeaturesProtocol] def __init__(self, **kwargs): super().__init__(**kwargs) @@ -98,7 +98,6 @@ def forward(self, inputs: Tensor): class CustomSegHead2(MultiInputTestBaseNode): tasks = {LabelType.SEGMENTATION: "segmentation"} - input_protocols = [FeaturesProtocol] * 3 def __init__(self, **kwargs): super().__init__(**kwargs) diff --git a/tests/integration/overfit/conftest.py b/tests/integration/overfit/conftest.py new file mode 100644 index 00000000..08fc9eca --- /dev/null +++ b/tests/integration/overfit/conftest.py @@ -0,0 +1,43 @@ +import multiprocessing as mp +import os +from typing import Any + +import pytest + +LUXONIS_TRAIN_OVERFIT = os.getenv("LUXONIS_TRAIN_OVERFIT") or False + +if LUXONIS_TRAIN_OVERFIT: + EPOCHS = 200 +else: + EPOCHS = 1 + + +@pytest.fixture +def config() -> dict[str, Any]: + return { + "tracker": { + "save_directory": "tests/integration/save-directory", + }, + "loader": { + "train_view": "val", + "params": { + "dataset_name": "_ParkingLot", + }, + }, + "trainer": { + "batch_size": 4, + "epochs": EPOCHS, + "num_workers": mp.cpu_count(), + "validation_interval": EPOCHS, + "save_top_k": 0, + "preprocessing": { + "train_image_size": [256, 320], + "keep_aspect_ratio": False, + "normalize": {"active": True}, + }, + "callbacks": [ + {"name": "ExportOnTrainEnd"}, + {"name": "ArchiveOnTrainEnd"}, + ], + }, + } diff --git a/tests/integration/overfit/test_detection.py b/tests/integration/overfit/test_detection.py new file mode 100644 index 00000000..28250e0d --- /dev/null +++ b/tests/integration/overfit/test_detection.py @@ -0,0 +1,93 @@ +import os +from typing import Any + +import pytest +from luxonis_ml.data import LuxonisDataset + +from luxonis_train.core import LuxonisModel +from luxonis_train.nodes.backbones import __all__ as BACKBONES + +LUXONIS_TRAIN_OVERFIT = os.getenv("LUXONIS_TRAIN_OVERFIT") or False + + +def get_opts(backbone: str) -> dict[str, Any]: + return { + "model": { + "nodes": [ + { + "name": backbone, + }, + { + "name": "EfficientBBoxHead", + "inputs": [backbone], + }, + { + "name": "EfficientKeypointBBoxHead", + "task": { + "keypoints": "car-keypoints", + "boundingbox": "car-boundingbox", + }, + "inputs": [backbone], + }, + { + "name": "ImplicitKeypointBBoxHead", + "task": { + "keypoints": "car-keypoints", + "boundingbox": "car-boundingbox", + }, + "inputs": [backbone], + }, + ], + "losses": [ + { + "name": "AdaptiveDetectionLoss", + "attached_to": "EfficientBBoxHead", + }, + { + "name": "EfficientKeypointBBoxLoss", + "attached_to": "EfficientKeypointBBoxHead", + }, + { + "name": "ImplicitKeypointBBoxLoss", + "attached_to": "ImplicitKeypointBBoxHead", + }, + ], + "metrics": [ + { + "name": "MeanAveragePrecision", + "attached_to": "EfficientBBoxHead", + }, + { + "name": "MeanAveragePrecisionKeypoints", + "alias": "EfficientKeypointBBoxHead-MaP", + "attached_to": "EfficientKeypointBBoxHead", + }, + { + "name": "MeanAveragePrecisionKeypoints", + "alias": "ImplicitKeypointBBoxHead-MaP", + "attached_to": "ImplicitKeypointBBoxHead", + }, + ], + } + } + + +def train_and_test(config: dict[str, Any], opts: dict[str, Any]): + model = LuxonisModel(config, opts) + model.train() + results = model.test(view="val") + if LUXONIS_TRAIN_OVERFIT: + for name, value in results.items(): + if "/map_50" in name or "/kpt_map_medium" in name: + assert value > 0.8, f"{name} = {value} (expected > 0.8)" + + +@pytest.mark.parametrize("backbone", BACKBONES) +def test_backbones( + backbone: str, + config: dict[str, Any], + parking_lot_dataset: LuxonisDataset, +): + opts = get_opts(backbone) + opts["loader.params.dataset_name"] = parking_lot_dataset.identifier + train_and_test(config, opts) diff --git a/tests/integration/overfit/test_segmentation.py b/tests/integration/overfit/test_segmentation.py new file mode 100644 index 00000000..40013eeb --- /dev/null +++ b/tests/integration/overfit/test_segmentation.py @@ -0,0 +1,126 @@ +import os +from typing import Any + +import pytest +from luxonis_ml.data import LuxonisDataset + +from luxonis_train.core import LuxonisModel +from luxonis_train.nodes.backbones import __all__ as BACKBONES + +LUXONIS_TRAIN_OVERFIT = os.getenv("LUXONIS_TRAIN_OVERFIT") or False + + +def get_opts(backbone: str) -> dict[str, Any]: + opts = { + "model": { + "nodes": [ + { + "name": backbone, + }, + { + "name": "SegmentationHead", + "alias": "seg-color-segmentation", + "task": "color-segmentation", + "inputs": [backbone], + }, + { + "name": "BiSeNetHead", + "alias": "bi-color-segmentation", + "task": "color-segmentation", + "inputs": [backbone], + }, + { + "name": "SegmentationHead", + "alias": "seg-vehicle-segmentation", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + { + "name": "BiSeNetHead", + "alias": "bi-vehicle-segmentation", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + { + "name": "SegmentationHead", + "alias": "seg-vehicle-segmentation-2", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + { + "name": "SegmentationHead", + "alias": "seg-vehicle-segmentation-3", + "task": "vehicle-segmentation", + "inputs": [backbone], + }, + ], + "losses": [ + { + "name": "CrossEntropyLoss", + "attached_to": "seg-color-segmentation", + }, + { + "name": "CrossEntropyLoss", + "attached_to": "bi-color-segmentation", + }, + { + "name": "BCEWithLogitsLoss", + "attached_to": "seg-vehicle-segmentation", + }, + { + "name": "SigmoidFocalLoss", + "attached_to": "bi-vehicle-segmentation", + "params": {"alpha": 0.5, "gamma": 1.0}, + }, + { + "name": "SoftmaxFocalLoss", + "attached_to": "seg-vehicle-segmentation-2", + "params": {"alpha": 0.5, "gamma": 1.0}, + }, + { + "name": "SmoothBCEWithLogitsLoss", + "attached_to": "seg-vehicle-segmentation-3", + "params": {"label_smoothing": 0.1}, + }, + ], + "metrics": [], + } + } + aliases = [head["alias"] for head in opts["model"]["nodes"][1:]] + for alias in aliases: + opts["model"]["metrics"].extend( + [ + { + "name": "JaccardIndex", + "alias": f"JaccardIndex_{alias}", + "attached_to": alias, + }, + { + "name": "F1Score", + "alias": f"F1Score_{alias}", + "attached_to": alias, + }, + ] + ) + return opts + + +def train_and_test(config: dict[str, Any], opts: dict[str, Any]): + model = LuxonisModel(config, opts) + model.train() + results = model.test(view="val") + if LUXONIS_TRAIN_OVERFIT: + for name, value in results.items(): + if "metric" in name: + assert value > 0.8, f"{name} = {value} (expected > 0.8)" + + +@pytest.mark.parametrize("backbone", BACKBONES) +def test_backbones( + backbone: str, + config: dict[str, Any], + parking_lot_dataset: LuxonisDataset, +): + opts = get_opts(backbone) + opts["loader.params.dataset_name"] = parking_lot_dataset.identifier + train_and_test(config, opts) diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json index d9599642..5800dd14 100644 --- a/tests/integration/parking_lot.json +++ b/tests/integration/parking_lot.json @@ -36,7 +36,7 @@ ], "outputs": [ { - "name": "any-vehicle-segmentation-head/vehicle_segmentation/0", + "name": "any-vehicle-segmentation-head/vehicle-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -80,7 +80,7 @@ "layout": "NCHW" }, { - "name": "brand-segmentation-head/brand_segmentation/0", + "name": "brand-segmentation-head/brand-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -95,13 +95,13 @@ "dtype": "float32", "shape": [ 1, - 66240, + 5040, 24 ], "layout": "NCD" }, { - "name": "color-segmentation-head/color_segmentation/0", + "name": "color-segmentation-head/color-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -111,17 +111,6 @@ ], "layout": "NCHW" }, - { - "name": "context-brand-segmentation-head/brand_segmentation/0", - "dtype": "float32", - "shape": [ - 1, - 23, - 256, - 320 - ], - "layout": "NCHW" - }, { "name": "motorbike-detection-head/outputs/0", "dtype": "float32", @@ -156,7 +145,7 @@ "layout": "NCDE" }, { - "name": "vehicle-type-segmentation-head/vehicle_type_segmentation/0", + "name": "vehicle-type-segmentation-head/vehicle_type-segmentation/0", "dtype": "float32", "shape": [ 1, @@ -227,42 +216,6 @@ "motorbike-detection-head/outputs/2" ] }, - { - "parser": "SegmentationParser", - "metadata": { - "postprocessor_path": null, - "classes": [ - "background", - "chrysler", - "bmw", - "ducati", - "dodge", - "ferrari", - "infiniti", - "land-rover", - "roll-royce", - "saab", - "Kawasaki", - "moto", - "truimph", - "alfa-romeo", - "harley", - "honda", - "jeep", - "aprilia", - "piaggio", - "yamaha", - "buick", - "pontiac", - "isuzu" - ], - "n_classes": 23, - "is_softmax": false - }, - "outputs": [ - "context-brand-segmentation-head/brand_segmentation/0" - ] - }, { "parser": "SegmentationParser", "metadata": { @@ -277,7 +230,7 @@ "is_softmax": false }, "outputs": [ - "color-segmentation-head/color_segmentation/0" + "color-segmentation-head/color-segmentation/0" ] }, { @@ -291,7 +244,7 @@ "is_softmax": false }, "outputs": [ - "any-vehicle-segmentation-head/vehicle_segmentation/0" + "any-vehicle-segmentation-head/vehicle-segmentation/0" ] }, { @@ -300,34 +253,34 @@ "postprocessor_path": null, "classes": [ "background", + "alfa-romeo", "chrysler", "bmw", - "ducati", - "dodge", + "harley", "ferrari", + "honda", "infiniti", "land-rover", "roll-royce", - "saab", "Kawasaki", "moto", - "truimph", - "alfa-romeo", - "harley", - "honda", - "jeep", - "aprilia", "piaggio", + "ducati", + "isuzu", + "jeep", + "truimph", "yamaha", - "buick", + "dodge", + "saab", + "aprilia", "pontiac", - "isuzu" + "buick" ], "n_classes": 23, "is_softmax": false }, "outputs": [ - "brand-segmentation-head/brand_segmentation/0" + "brand-segmentation-head/brand-segmentation/0" ] }, { @@ -343,7 +296,7 @@ "is_softmax": false }, "outputs": [ - "vehicle-type-segmentation-head/vehicle_type_segmentation/0" + "vehicle-type-segmentation-head/vehicle_type-segmentation/0" ] } ] diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py index 5afa385b..8c02a1b5 100644 --- a/tests/integration/test_sanity.py +++ b/tests/integration/test_sanity.py @@ -11,13 +11,14 @@ from luxonis_train.core import LuxonisModel -TEST_OUTPUT = Path("tests/integration/_test-output") -INFER_PATH = Path("tests/integration/_infer_save_dir") +TEST_OUTPUT = Path("tests/integration/save-directory") +INFER_PATH = Path("tests/integration/infer-save-directory") ONNX_PATH = Path("tests/integration/_model.onnx") STUDY_PATH = Path("study_local.db") OPTS = { "trainer.epochs": 1, + # "trainer.seed": 42, "trainer.batch_size": 1, "trainer.validation_interval": 1, "trainer.callbacks": "[]", @@ -47,14 +48,20 @@ def clear_files(): "segmentation_model", "detection_model", "keypoint_bbox_model", - "resnet_model", - "coco_model", - "efficient_coco_model", ], ) -def test_simple_models(config_file: str): +def test_predefined_models( + config_file: str, + coco_dataset: LuxonisDataset, + cifar10_dataset: LuxonisDataset, +): config_file = f"configs/{config_file}.yaml" - model = LuxonisModel(config_file, opts=OPTS) + opts = deepcopy(OPTS) | { + "loader.params.dataset_name": cifar10_dataset.dataset_name + if config_file == "classification_model" + else coco_dataset.dataset_name, + } + model = LuxonisModel(config_file, opts=opts) model.train() model.test() model.export() @@ -73,7 +80,6 @@ def test_simple_models(config_file: str): .with_suffix(".onnx.tar.xz") .exists() ) - del model def test_multi_input(): @@ -89,7 +95,6 @@ def test_multi_input(): assert not INFER_PATH.exists() model.infer(view="val", save_dir=INFER_PATH) assert INFER_PATH.exists() - del model def test_custom_tasks(parking_lot_dataset: LuxonisDataset, subtests): @@ -119,13 +124,10 @@ def test_custom_tasks(parking_lot_dataset: LuxonisDataset, subtests): del generated_config["model"]["heads"][1]["metadata"]["anchors"] assert generated_config == correct_archive_config - del model - def test_parsing_loader(): model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml") model.train() - del model @pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows") @@ -133,4 +135,48 @@ def test_tuner(): model = LuxonisModel("configs/example_tuning.yaml", opts=OPTS) model.tune() assert STUDY_PATH.exists() - del model + + +def test_callbacks(coco_dataset: LuxonisDataset): + config_file = "tests/configs/parking_lot_config.yaml" + opts = deepcopy(OPTS) + del opts["trainer.callbacks"] + opts |= { + "trainer.use_rich_progress_bar": False, + "trainer.callbacks": [ + { + "name": "MetadataLogger", + "params": { + "hyperparams": ["trainer.epochs", "trainer.batch_size"], + }, + }, + {"name": "TestOnTrainEnd"}, + {"name": "UploadCheckpoint"}, + { + "name": "ExportOnTrainEnd", + }, + {"name": "ArchiveOnTrainEnd"}, + ], + } + opts["loader.params.dataset_name"] = coco_dataset.identifier + model = LuxonisModel(config_file, opts=opts) + model.train() + + +def test_freezing(coco_dataset: LuxonisDataset): + config_file = "configs/segmentation_model.yaml" + opts = deepcopy(OPTS) + opts |= { + "model.predefined_model.params": { + "head_params": { + "freezing": { + "active": True, + "unfreeze_after": 2, + }, + } + } + } + opts["trainer.epochs"] = 3 + opts["loader.params.dataset_name"] = coco_dataset.identifier + model = LuxonisModel(config_file, opts=opts) + model.train() diff --git a/tests/unittests/__init__.py b/tests/unittests/__init__.py index f9269fdf..e69de29b 100644 --- a/tests/unittests/__init__.py +++ b/tests/unittests/__init__.py @@ -1,2 +0,0 @@ -# import warnings -# warnings.filterwarnings("module", category=DeprecationWarning) diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py new file mode 100644 index 00000000..5d3529e0 --- /dev/null +++ b/tests/unittests/test_base_node.py @@ -0,0 +1,44 @@ +import pytest + +from luxonis_train.nodes import AttachIndexType, BaseNode + + +@pytest.mark.parametrize( + ("attach_index", "expected"), + [ + (-1, 5), + (0, 1), + ("all", [1, 2, 3, 4, 5]), + ((0, 2), [1, 2]), + ((0, 4, 2), [1, 3]), + ((-1, -3, -1), [5, 4]), + ((4, 2), [5, 4]), + ((-1, -3), [5, 4]), + ((-4, 4), [2, 3, 4]), + ((1, -1), [2, 3, 4]), + ], +) +def test_attach_index(attach_index: AttachIndexType, expected: list[int] | int): + lst = [1, 2, 3, 4, 5] + + class DummyBaseNode: + attach_index: AttachIndexType + + DummyBaseNode.attach_index = attach_index + + assert BaseNode.get_attached(DummyBaseNode, lst) == expected # type: ignore + + +def test_attach_index_error(): + lst = [1, 2, 3, 4, 5] + + class DummyBaseNode: + attach_index: AttachIndexType + + with pytest.raises(ValueError): + DummyBaseNode.attach_index = 10 + BaseNode.get_attached(DummyBaseNode, lst) # type: ignore + + with pytest.raises(ValueError): + DummyBaseNode.attach_index = "none" # type: ignore + BaseNode.get_attached(DummyBaseNode, lst) # type: ignore diff --git a/tests/unittests/test_core.py b/tests/unittests/test_core.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py b/tests/unittests/test_utils/test_assigners/test_atts_assigner.py index a3801ebb..f6af0c01 100644 --- a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py +++ b/tests/unittests/test_utils/test_assigners/test_atts_assigner.py @@ -1,6 +1,6 @@ import torch -from luxonis_train.utils.assigners.atts_assigner import ATSSAssigner +from luxonis_train.assigners import ATSSAssigner def test_init(): diff --git a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py index 8f291615..a06e58ec 100644 --- a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py +++ b/tests/unittests/test_utils/test_assigners/test_tal_assigner.py @@ -1,6 +1,6 @@ import torch -from luxonis_train.utils.assigners.tal_assigner import TaskAlignedAssigner +from luxonis_train.assigners import TaskAlignedAssigner def test_init(): diff --git a/tests/unittests/test_utils/test_assigners/test_utils.py b/tests/unittests/test_utils/test_assigners/test_utils.py index bf849e25..1b071327 100644 --- a/tests/unittests/test_utils/test_assigners/test_utils.py +++ b/tests/unittests/test_utils/test_assigners/test_utils.py @@ -1,10 +1,6 @@ import torch -from luxonis_train.utils.assigners.utils import ( - batch_iou, - candidates_in_gt, - fix_collisions, -) +from luxonis_train.assigners.utils import batch_iou, candidates_in_gt, fix_collisions def test_fix_collisions(): diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index 2cb3df24..0a53d1b1 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -1,6 +1,6 @@ import torch -from luxonis_train.utils.boxutils import ( +from luxonis_train.utils import ( anchors_for_fpn_features, bbox2dist, bbox_iou, diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_utils/test_loaders/test_base_loader.py index 0209c192..c07a5618 100644 --- a/tests/unittests/test_utils/test_loaders/test_base_loader.py +++ b/tests/unittests/test_utils/test_loaders/test_base_loader.py @@ -1,8 +1,8 @@ import pytest import torch +from luxonis_ml.data import LabelType -from luxonis_train.utils.loaders import collate_fn -from luxonis_train.utils.types import LabelType +from luxonis_train.loaders import collate_fn @pytest.mark.parametrize( From 0c17be82caf4b03d4da39c4dee4093ab150bbe65 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 6 Sep 2024 23:02:17 +0200 Subject: [PATCH 002/102] graph tests --- luxonis_train/utils/graph.py | 30 ++++++---- tests/unittests/test_utils/test_graph.py | 75 ++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 12 deletions(-) create mode 100644 tests/unittests/test_utils/test_graph.py diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py index 849f440f..41ca95d2 100644 --- a/luxonis_train/utils/graph.py +++ b/luxonis_train/utils/graph.py @@ -1,8 +1,10 @@ from copy import deepcopy -from typing import Generator, TypeVar +from typing import Iterator, TypeAlias, TypeVar +Graph: TypeAlias = dict[str, list[str]] -def is_acyclic(graph: dict[str, list[str]]) -> bool: + +def is_acyclic(graph: Graph) -> bool: """Tests if graph is acyclic. @type graph: dict[str, list[str]] @@ -42,8 +44,8 @@ def dfs(node: str, visited: set[str], recursion_stack: set[str]): def traverse_graph( - graph: dict[str, list[str]], nodes: dict[str, T] -) -> Generator[tuple[str, T, list[str], list[str]], None, None]: + graph: Graph, nodes: dict[str, T] +) -> Iterator[tuple[str, T, list[str], list[str]]]: """Traverses the graph in topological order. @type graph: dict[str, list[str]] @@ -51,14 +53,13 @@ def traverse_graph( names, values are inputs to the node (list of node names). @type nodes: dict[str, T] @param nodes: Dictionary mapping node names to node objects. - @rtype: Generator[tuple[str, T, list[str], list[str]], None, None] - @return: Generator of tuples containing node name, node object, node dependencies - and unprocessed nodes. + @rtype: Iterator[tuple[str, T, list[str], list[str]]] + @return: Iterator of tuples containing node name, node object, node dependencies and + unprocessed nodes. @raises RuntimeError: If the graph is malformed. """ - unprocessed_nodes = sorted( - set(nodes.keys()) - ) # sort the set to allow reproducibility + # sort the set to allow reproducibility + unprocessed_nodes = sorted(set(nodes.keys())) processed: set[str] = set() graph = deepcopy(graph) @@ -69,9 +70,14 @@ def traverse_graph( if not node_dependencies or all( dependency in processed for dependency in node_dependencies ): - yield node_name, nodes[node_name], node_dependencies, unprocessed_nodes - processed.add(node_name) unprocessed_nodes.remove(node_name) + yield ( + node_name, + nodes[node_name], + node_dependencies, + unprocessed_nodes.copy(), + ) + processed.add(node_name) if unprocessed_nodes_copy == unprocessed_nodes: raise RuntimeError( diff --git a/tests/unittests/test_utils/test_graph.py b/tests/unittests/test_utils/test_graph.py new file mode 100644 index 00000000..ae308d6a --- /dev/null +++ b/tests/unittests/test_utils/test_graph.py @@ -0,0 +1,75 @@ +import pytest + +from luxonis_train.utils.graph import Graph, is_acyclic, traverse_graph + + +@pytest.mark.parametrize( + ("graph", "acyclic"), + [ + ({}, True), + ({"a": []}, True), + ({"a": ["b"], "b": ["a"]}, False), + ({"a": ["b"], "b": []}, True), + ({"a": ["b"], "b": ["c"], "c": ["a"]}, False), + ({"a": ["b"], "b": ["c"], "c": []}, True), + ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, True), + ({"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]}, False), + ], +) +def test_acyclic(graph: Graph, acyclic: bool): + assert is_acyclic(graph) == acyclic + + +@pytest.mark.parametrize( + ("graph", "nodes", "expected"), + [ + ({}, {}, []), + ( + {"a": []}, + {"a": 1}, + [("a", 1, [], [])], + ), + ( + {"a": ["b"], "b": []}, + {"a": 1, "b": 2}, + [("b", 2, [], ["a"]), ("a", 1, ["b"], [])], + ), + ( + {"a": ["b"], "b": ["c"], "c": []}, + {"a": 1, "b": 2, "c": 3}, + [("c", 3, [], ["a", "b"]), ("b", 2, ["c"], ["a"]), ("a", 1, ["b"], [])], + ), + ( + {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, + {"a": 1, "b": 2, "c": 3, "d": 4}, + [ + ("d", 4, [], ["a", "b", "c"]), + ("b", 2, ["d"], ["a", "c"]), + ("c", 3, ["d"], ["a"]), + ("a", 1, ["b", "c"], []), + ], + ), + ], +) +def test_traverse( + graph: Graph, + nodes: dict[str, int], + expected: list[tuple[str, int, list[str], list[str]]], +): + result = list(traverse_graph(graph, nodes)) + assert result == expected + + +@pytest.mark.parametrize( + ("graph", "nodes"), + [ + ({"a": ["b"], "b": ["a"]}, {"a": 1, "b": 2}), + ( + {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": ["a"]}, + {"a": 1, "b": 2, "c": 3, "d": 4}, + ), + ], +) +def test_traverse_fail(graph: Graph, nodes: dict[str, int]): + with pytest.raises(RuntimeError): + list(traverse_graph(graph, nodes)) From f11bf62de6f38a7dc60917fe4d2f70ee45839c5b Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 6 Sep 2024 23:07:10 +0200 Subject: [PATCH 003/102] moved tests --- .../{test_utils => }/test_assigners/test_atts_assigner.py | 0 .../{test_utils => }/test_assigners/test_tal_assigner.py | 0 tests/unittests/{test_utils => }/test_assigners/test_utils.py | 0 tests/unittests/{test_utils => }/test_loaders/test_base_loader.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename tests/unittests/{test_utils => }/test_assigners/test_atts_assigner.py (100%) rename tests/unittests/{test_utils => }/test_assigners/test_tal_assigner.py (100%) rename tests/unittests/{test_utils => }/test_assigners/test_utils.py (100%) rename tests/unittests/{test_utils => }/test_loaders/test_base_loader.py (100%) diff --git a/tests/unittests/test_utils/test_assigners/test_atts_assigner.py b/tests/unittests/test_assigners/test_atts_assigner.py similarity index 100% rename from tests/unittests/test_utils/test_assigners/test_atts_assigner.py rename to tests/unittests/test_assigners/test_atts_assigner.py diff --git a/tests/unittests/test_utils/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py similarity index 100% rename from tests/unittests/test_utils/test_assigners/test_tal_assigner.py rename to tests/unittests/test_assigners/test_tal_assigner.py diff --git a/tests/unittests/test_utils/test_assigners/test_utils.py b/tests/unittests/test_assigners/test_utils.py similarity index 100% rename from tests/unittests/test_utils/test_assigners/test_utils.py rename to tests/unittests/test_assigners/test_utils.py diff --git a/tests/unittests/test_utils/test_loaders/test_base_loader.py b/tests/unittests/test_loaders/test_base_loader.py similarity index 100% rename from tests/unittests/test_utils/test_loaders/test_base_loader.py rename to tests/unittests/test_loaders/test_base_loader.py From f217191ff36b8d28e29420fdf4c5659dca6fbb3c Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 6 Sep 2024 23:07:20 +0200 Subject: [PATCH 004/102] fixed dataset in callback test --- tests/integration/test_sanity.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_sanity.py b/tests/integration/test_sanity.py index 8c02a1b5..317407cc 100644 --- a/tests/integration/test_sanity.py +++ b/tests/integration/test_sanity.py @@ -137,7 +137,7 @@ def test_tuner(): assert STUDY_PATH.exists() -def test_callbacks(coco_dataset: LuxonisDataset): +def test_callbacks(parking_lot_dataset: LuxonisDataset): config_file = "tests/configs/parking_lot_config.yaml" opts = deepcopy(OPTS) del opts["trainer.callbacks"] @@ -158,7 +158,7 @@ def test_callbacks(coco_dataset: LuxonisDataset): {"name": "ArchiveOnTrainEnd"}, ], } - opts["loader.params.dataset_name"] = coco_dataset.identifier + opts["loader.params.dataset_name"] = parking_lot_dataset.identifier model = LuxonisModel(config_file, opts=opts) model.train() From b04d919dab3bb2c1685b2755a61cb0e333d067bf Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 6 Sep 2024 23:08:37 +0200 Subject: [PATCH 005/102] added Graph type --- luxonis_train/models/luxonis_lightning.py | 3 ++- luxonis_train/utils/graph.py | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index c021e5b3..d3c7ac38 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -35,6 +35,7 @@ traverse_graph, ) from luxonis_train.utils.config import AttachedModuleConfig, Config +from luxonis_train.utils.graph import Graph from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry from .luxonis_output import LuxonisOutput @@ -127,7 +128,7 @@ def __init__( self.image_source = cfg.loader.image_source self.dataset_metadata = dataset_metadata or DatasetMetadata() self.frozen_nodes: list[tuple[nn.Module, int]] = [] - self.graph: dict[str, list[str]] = {} + self.graph: Graph = {} self.loader_input_shapes: dict[str, dict[str, Size]] = {} self.node_input_sources: dict[str, list[str]] = defaultdict(list) self.loss_weights: dict[str, float] = {} diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py index 41ca95d2..1f2f043a 100644 --- a/luxonis_train/utils/graph.py +++ b/luxonis_train/utils/graph.py @@ -2,6 +2,10 @@ from typing import Iterator, TypeAlias, TypeVar Graph: TypeAlias = dict[str, list[str]] +"""Graph in a format of a dictionary of predecessors. + +Keys are node names, values are inputs to the node (list of node names). +""" def is_acyclic(graph: Graph) -> bool: From 97ea600c94df094641a6908ebb4b11dcfcdfd1bb Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sat, 7 Sep 2024 03:44:56 +0200 Subject: [PATCH 006/102] increased coverage --- .../visualizers/multi_visualizer.py | 2 +- luxonis_train/core/core.py | 15 ++--- luxonis_train/core/utils/train_utils.py | 21 +++--- luxonis_train/utils/config.py | 12 ++++ luxonis_train/utils/general.py | 4 +- tests/integration/__init__.py | 0 tests/integration/conftest.py | 60 ++++++++++++++++- tests/integration/overfit/conftest.py | 43 ------------ .../{overfit => }/test_detection.py | 12 ++-- .../{overfit => }/test_segmentation.py | 11 ++-- .../{test_sanity.py => test_simple.py} | 65 ++++++++++--------- tests/unittests/test_assigners/__init__.py | 0 tests/unittests/test_loaders/__init__.py | 0 tests/unittests/test_utils/test_general.py | 44 +++++++++++++ 14 files changed, 179 insertions(+), 110 deletions(-) create mode 100644 tests/integration/__init__.py delete mode 100644 tests/integration/overfit/conftest.py rename tests/integration/{overfit => }/test_detection.py (93%) rename tests/integration/{overfit => }/test_segmentation.py (95%) rename tests/integration/{test_sanity.py => test_simple.py} (78%) create mode 100644 tests/unittests/test_assigners/__init__.py create mode 100644 tests/unittests/test_loaders/__init__.py create mode 100644 tests/unittests/test_utils/test_general.py diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py index 85dfec47..79c9ebdf 100644 --- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py @@ -49,5 +49,5 @@ def forward( label_canvas = label_viz prediction_canvas = prediction_viz case _: - raise NotImplementedError + raise NotImplementedError("Unexpected return type from visualizer.") return label_canvas, prediction_canvas diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 34dfca75..16e8013f 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -87,10 +87,8 @@ def __init__( # NOTE: overriding logger in pl so it uses our logger to log device info rank_zero_module.log = logger - deterministic = False if self.cfg.trainer.seed is not None: pl.seed_everything(self.cfg.trainer.seed, workers=True) - deterministic = True self.train_augmentations = Augmentations( image_size=self.cfg.trainer.preprocessing.train_image_size, @@ -113,9 +111,8 @@ def __init__( ) self.pl_trainer = create_trainer( - self.cfg, + self.cfg.trainer, logger=self.tracker, - deterministic=deterministic, callbacks=LuxonisRichProgressBar() if self.cfg.trainer.use_rich_progress_bar else LuxonisTQDMProgressBar(), @@ -465,16 +462,12 @@ def _objective(trial: optuna.trial.Trial) -> float: pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss") callbacks.append(pruner_callback) - deterministic = False - if self.cfg.trainer.seed: + + if self.cfg.trainer.seed is not None: pl.seed_everything(cfg.trainer.seed, workers=True) - deterministic = True pl_trainer = create_trainer( - cfg, - logger=child_tracker, - callbacks=callbacks, - deterministic=deterministic, + cfg.trainer, logger=child_tracker, callbacks=callbacks ) try: diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py index 1197000b..2e188fa6 100644 --- a/luxonis_train/core/utils/train_utils.py +++ b/luxonis_train/core/utils/train_utils.py @@ -2,10 +2,10 @@ import lightning.pytorch as pl -from luxonis_train.utils import Config +from luxonis_train.utils.config import TrainerConfig -def create_trainer(cfg: Config, **kwargs: Any) -> pl.Trainer: +def create_trainer(cfg: TrainerConfig, **kwargs: Any) -> pl.Trainer: """Creates Pytorch Lightning trainer. @type cfg: Config @@ -15,13 +15,14 @@ def create_trainer(cfg: Config, **kwargs: Any) -> pl.Trainer: @return: Pytorch Lightning trainer. """ return pl.Trainer( - accelerator=cfg.trainer.accelerator, - devices=cfg.trainer.devices, - strategy=cfg.trainer.strategy, - max_epochs=cfg.trainer.epochs, - accumulate_grad_batches=cfg.trainer.accumulate_grad_batches, - check_val_every_n_epoch=cfg.trainer.validation_interval, - num_sanity_val_steps=cfg.trainer.num_sanity_val_steps, - profiler=cfg.trainer.profiler, + accelerator=cfg.accelerator, + devices=cfg.devices, + strategy=cfg.strategy, + max_epochs=cfg.epochs, + accumulate_grad_batches=cfg.accumulate_grad_batches, + check_val_every_n_epoch=cfg.validation_interval, + num_sanity_val_steps=cfg.num_sanity_val_steps, + profiler=cfg.profiler, + deterministic=cfg.deterministic, **kwargs, ) diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index f6cb5ac2..3ec605bb 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -274,6 +274,7 @@ class TrainerConfig(BaseModelExtraForbid): verbose: bool = True seed: int | None = None + deterministic: bool | Literal["warn"] | None = None batch_size: PositiveInt = 32 accumulate_grad_batches: PositiveInt = 1 use_weighted_sampler: bool = False @@ -291,6 +292,17 @@ class TrainerConfig(BaseModelExtraForbid): optimizer: OptimizerConfig = OptimizerConfig() scheduler: SchedulerConfig = SchedulerConfig() + @model_validator(mode="after") + def validate_deterministic(self) -> Self: + if self.seed is not None and self.deterministic is None: + logger.warning( + "Setting `trainer.deterministic` to True because `trainer.seed` is set." + "This can cause certain layers to fail. " + "In such cases, set `trainer.deterministic` to `'warn'`." + ) + self.deterministic = True + return self + @model_validator(mode="after") def check_num_workes_platform(self) -> Self: if ( diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py index b5899b3e..238e6080 100644 --- a/luxonis_train/utils/general.py +++ b/luxonis_train/utils/general.py @@ -78,8 +78,8 @@ def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float: "Width and height upscale factors are not integers. " f"Width: {wf}, height: {hf}." ) - case _: - raise ValueError("Unexpected error.") + + raise NotImplementedError(f"Unexpected case: {width_factor}, {height_factor}") def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]: diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index b53abb16..2e7338f2 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,6 +1,10 @@ import json +import multiprocessing as mp +import os +import shutil from collections import defaultdict from pathlib import Path +from typing import Any import cv2 import gdown @@ -13,9 +17,25 @@ from luxonis_ml.utils import LuxonisFileSystem, environ WORK_DIR = Path("tests", "data") -WORK_DIR.mkdir(parents=True, exist_ok=True) -environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml" + +@pytest.fixture(scope="session") +def test_output_dir() -> Path: + return Path("tests/integration/save-directory") + + +@pytest.fixture(scope="session", autouse=True) +def setup(test_output_dir: Path): + WORK_DIR.mkdir(parents=True, exist_ok=True) + shutil.rmtree(WORK_DIR / "luxonisml", ignore_errors=True) + shutil.rmtree(test_output_dir, ignore_errors=True) + environ.LUXONISML_BASE_PATH = WORK_DIR / "luxonisml" + test_output_dir.mkdir(exist_ok=True) + + +@pytest.fixture +def train_overfit() -> bool: + return bool(os.getenv("LUXONIS_TRAIN_OVERFIT")) @pytest.fixture(scope="session") @@ -216,3 +236,39 @@ def CIFAR10_subset_generator(): dataset.add(CIFAR10_subset_generator()) dataset.make_splits() return dataset + + +@pytest.fixture +def config(train_overfit: bool) -> dict[str, Any]: + if train_overfit: + epochs = 200 + else: + epochs = 1 + + return { + "tracker": { + "save_directory": "tests/integration/save-directory", + }, + "loader": { + "train_view": "val", + "params": { + "dataset_name": "_ParkingLot", + }, + }, + "trainer": { + "batch_size": 4, + "epochs": epochs, + "num_workers": mp.cpu_count(), + "validation_interval": epochs, + "save_top_k": 0, + "preprocessing": { + "train_image_size": [256, 320], + "keep_aspect_ratio": False, + "normalize": {"active": True}, + }, + "callbacks": [ + {"name": "ExportOnTrainEnd"}, + {"name": "ArchiveOnTrainEnd"}, + ], + }, + } diff --git a/tests/integration/overfit/conftest.py b/tests/integration/overfit/conftest.py deleted file mode 100644 index 08fc9eca..00000000 --- a/tests/integration/overfit/conftest.py +++ /dev/null @@ -1,43 +0,0 @@ -import multiprocessing as mp -import os -from typing import Any - -import pytest - -LUXONIS_TRAIN_OVERFIT = os.getenv("LUXONIS_TRAIN_OVERFIT") or False - -if LUXONIS_TRAIN_OVERFIT: - EPOCHS = 200 -else: - EPOCHS = 1 - - -@pytest.fixture -def config() -> dict[str, Any]: - return { - "tracker": { - "save_directory": "tests/integration/save-directory", - }, - "loader": { - "train_view": "val", - "params": { - "dataset_name": "_ParkingLot", - }, - }, - "trainer": { - "batch_size": 4, - "epochs": EPOCHS, - "num_workers": mp.cpu_count(), - "validation_interval": EPOCHS, - "save_top_k": 0, - "preprocessing": { - "train_image_size": [256, 320], - "keep_aspect_ratio": False, - "normalize": {"active": True}, - }, - "callbacks": [ - {"name": "ExportOnTrainEnd"}, - {"name": "ArchiveOnTrainEnd"}, - ], - }, - } diff --git a/tests/integration/overfit/test_detection.py b/tests/integration/test_detection.py similarity index 93% rename from tests/integration/overfit/test_detection.py rename to tests/integration/test_detection.py index 28250e0d..fb184b6f 100644 --- a/tests/integration/overfit/test_detection.py +++ b/tests/integration/test_detection.py @@ -1,4 +1,3 @@ -import os from typing import Any import pytest @@ -7,8 +6,6 @@ from luxonis_train.core import LuxonisModel from luxonis_train.nodes.backbones import __all__ as BACKBONES -LUXONIS_TRAIN_OVERFIT = os.getenv("LUXONIS_TRAIN_OVERFIT") or False - def get_opts(backbone: str) -> dict[str, Any]: return { @@ -46,6 +43,7 @@ def get_opts(backbone: str) -> dict[str, Any]: { "name": "EfficientKeypointBBoxLoss", "attached_to": "EfficientKeypointBBoxHead", + "params": {"area_factor": 0.5}, }, { "name": "ImplicitKeypointBBoxLoss", @@ -72,11 +70,15 @@ def get_opts(backbone: str) -> dict[str, Any]: } -def train_and_test(config: dict[str, Any], opts: dict[str, Any]): +def train_and_test( + config: dict[str, Any], + opts: dict[str, Any], + train_overfit: bool = False, +): model = LuxonisModel(config, opts) model.train() results = model.test(view="val") - if LUXONIS_TRAIN_OVERFIT: + if train_overfit: for name, value in results.items(): if "/map_50" in name or "/kpt_map_medium" in name: assert value > 0.8, f"{name} = {value} (expected > 0.8)" diff --git a/tests/integration/overfit/test_segmentation.py b/tests/integration/test_segmentation.py similarity index 95% rename from tests/integration/overfit/test_segmentation.py rename to tests/integration/test_segmentation.py index 40013eeb..86e952e0 100644 --- a/tests/integration/overfit/test_segmentation.py +++ b/tests/integration/test_segmentation.py @@ -1,4 +1,3 @@ -import os from typing import Any import pytest @@ -7,8 +6,6 @@ from luxonis_train.core import LuxonisModel from luxonis_train.nodes.backbones import __all__ as BACKBONES -LUXONIS_TRAIN_OVERFIT = os.getenv("LUXONIS_TRAIN_OVERFIT") or False - def get_opts(backbone: str) -> dict[str, Any]: opts = { @@ -105,11 +102,15 @@ def get_opts(backbone: str) -> dict[str, Any]: return opts -def train_and_test(config: dict[str, Any], opts: dict[str, Any]): +def train_and_test( + config: dict[str, Any], + opts: dict[str, Any], + train_overfit: bool = False, +): model = LuxonisModel(config, opts) model.train() results = model.test(view="val") - if LUXONIS_TRAIN_OVERFIT: + if train_overfit: for name, value in results.items(): if "metric" in name: assert value > 0.8, f"{name} = {value} (expected > 0.8)" diff --git a/tests/integration/test_sanity.py b/tests/integration/test_simple.py similarity index 78% rename from tests/integration/test_sanity.py rename to tests/integration/test_simple.py index 317407cc..59b224f7 100644 --- a/tests/integration/test_sanity.py +++ b/tests/integration/test_simple.py @@ -4,37 +4,35 @@ import tarfile from copy import deepcopy from pathlib import Path +from typing import Any import pytest from luxonis_ml.data import LuxonisDataset -from multi_input_modules import * from luxonis_train.core import LuxonisModel -TEST_OUTPUT = Path("tests/integration/save-directory") +from .multi_input_modules import * + INFER_PATH = Path("tests/integration/infer-save-directory") ONNX_PATH = Path("tests/integration/_model.onnx") STUDY_PATH = Path("study_local.db") -OPTS = { - "trainer.epochs": 1, - # "trainer.seed": 42, - "trainer.batch_size": 1, - "trainer.validation_interval": 1, - "trainer.callbacks": "[]", - "tracker.save_directory": str(TEST_OUTPUT), - "tuner.n_trials": 4, -} - -@pytest.fixture(scope="session", autouse=True) -def manage_out_dir(): - shutil.rmtree(TEST_OUTPUT, ignore_errors=True) - TEST_OUTPUT.mkdir(exist_ok=True) +@pytest.fixture +def opts(test_output_dir: Path) -> dict[str, Any]: + return { + "trainer.epochs": 1, + "trainer.batch_size": 1, + "trainer.validation_interval": 1, + "trainer.callbacks": "[]", + "tracker.save_directory": str(test_output_dir), + "tuner.n_trials": 4, + } @pytest.fixture(scope="function", autouse=True) def clear_files(): + # todo yield STUDY_PATH.unlink(missing_ok=True) ONNX_PATH.unlink(missing_ok=True) @@ -51,17 +49,18 @@ def clear_files(): ], ) def test_predefined_models( + opts: dict[str, Any], config_file: str, coco_dataset: LuxonisDataset, cifar10_dataset: LuxonisDataset, ): config_file = f"configs/{config_file}.yaml" - opts = deepcopy(OPTS) | { + opts |= { "loader.params.dataset_name": cifar10_dataset.dataset_name if config_file == "classification_model" else coco_dataset.dataset_name, } - model = LuxonisModel(config_file, opts=opts) + model = LuxonisModel(config_file, opts) model.train() model.test() model.export() @@ -82,9 +81,9 @@ def test_predefined_models( ) -def test_multi_input(): +def test_multi_input(opts: dict[str, Any]): config_file = "configs/example_multi_input.yaml" - model = LuxonisModel(config_file, opts=OPTS) + model = LuxonisModel(config_file, opts) model.train() model.test(view="val") @@ -97,14 +96,16 @@ def test_multi_input(): assert INFER_PATH.exists() -def test_custom_tasks(parking_lot_dataset: LuxonisDataset, subtests): +def test_custom_tasks( + opts: dict[str, Any], parking_lot_dataset: LuxonisDataset, subtests +): config_file = "tests/configs/parking_lot_config.yaml" - opts = deepcopy(OPTS) | { + opts |= { "loader.params.dataset_name": parking_lot_dataset.dataset_name, "trainer.batch_size": 2, } del opts["trainer.callbacks"] - model = LuxonisModel(config_file, opts=opts) + model = LuxonisModel(config_file, opts) model.train() archive_path = Path( model.run_save_dir, "archive", model.cfg.model.name @@ -131,18 +132,20 @@ def test_parsing_loader(): @pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows") -def test_tuner(): - model = LuxonisModel("configs/example_tuning.yaml", opts=OPTS) +def test_tuner(opts: dict[str, Any]): + model = LuxonisModel("configs/example_tuning.yaml", opts) model.tune() assert STUDY_PATH.exists() -def test_callbacks(parking_lot_dataset: LuxonisDataset): +def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset): config_file = "tests/configs/parking_lot_config.yaml" - opts = deepcopy(OPTS) + opts = deepcopy(opts) del opts["trainer.callbacks"] opts |= { "trainer.use_rich_progress_bar": False, + "trainer.seed": 42, + "trainer.deterministic": "warn", "trainer.callbacks": [ { "name": "MetadataLogger", @@ -159,13 +162,13 @@ def test_callbacks(parking_lot_dataset: LuxonisDataset): ], } opts["loader.params.dataset_name"] = parking_lot_dataset.identifier - model = LuxonisModel(config_file, opts=opts) + model = LuxonisModel(config_file, opts) model.train() -def test_freezing(coco_dataset: LuxonisDataset): +def test_freezing(opts: dict[str, Any], coco_dataset: LuxonisDataset): config_file = "configs/segmentation_model.yaml" - opts = deepcopy(OPTS) + opts = deepcopy(opts) opts |= { "model.predefined_model.params": { "head_params": { @@ -178,5 +181,5 @@ def test_freezing(coco_dataset: LuxonisDataset): } opts["trainer.epochs"] = 3 opts["loader.params.dataset_name"] = coco_dataset.identifier - model = LuxonisModel(config_file, opts=opts) + model = LuxonisModel(config_file, opts) model.train() diff --git a/tests/unittests/test_assigners/__init__.py b/tests/unittests/test_assigners/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_loaders/__init__.py b/tests/unittests/test_loaders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_utils/test_general.py b/tests/unittests/test_utils/test_general.py new file mode 100644 index 00000000..7f13f796 --- /dev/null +++ b/tests/unittests/test_utils/test_general.py @@ -0,0 +1,44 @@ +import pytest + +from luxonis_train.utils.general import infer_upscale_factor + + +@pytest.mark.parametrize( + ("in_size", "orig_size", "expected"), + [ + ((1, 1), (1, 1), 0), + ((1, 1), (2, 2), 1), + ((2, 2), (1, 1), -1), + ((2, 2), (4, 4), 1), + ((4, 4), (2, 2), -1), + ((4, 4), (8, 8), 1), + ((8, 8), (4, 4), -1), + ((2, 2), (16, 16), 3), + ((16, 16), (4, 4), -2), + (4, 8, 1), + ], +) +def test_infer_upscale_factor( + in_size: tuple[int, int] | int, + orig_size: tuple[int, int] | int, + expected: int, +): + assert infer_upscale_factor(in_size, orig_size) == expected + + +@pytest.mark.parametrize( + ("in_size", "orig_size"), + [ + ((1, 1), (2, 1)), + ((1, 1), (1, 2)), + ((2, 3), (16, 16)), + ((3, 2), (16, 16)), + ((3, 3), (16, 16)), + ], +) +def test_infer_upscale_factor_fail( + in_size: tuple[int, int] | int, + orig_size: tuple[int, int] | int, +): + with pytest.raises(ValueError): + infer_upscale_factor(in_size, orig_size) From 23238eda2a34fb8249be9dc4005e3754b5a2c815 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 02:26:50 +0200 Subject: [PATCH 007/102] 95 coverage --- .../attached_modules/base_attached_module.py | 68 ++++---- .../losses/implicit_keypoint_bbox_loss.py | 1 + .../callbacks/archive_on_train_end.py | 4 +- .../callbacks/export_on_train_end.py | 2 +- .../callbacks/luxonis_progress_bar.py | 2 +- luxonis_train/callbacks/metadata_logger.py | 20 ++- luxonis_train/callbacks/needs_checkpoint.py | 3 +- luxonis_train/core/utils/export_utils.py | 4 +- luxonis_train/nodes/base_node.py | 6 +- luxonis_train/utils/boundingbox.py | 21 ++- luxonis_train/utils/dataset_metadata.py | 66 +++----- luxonis_train/utils/keypoints.py | 6 +- tests/integration/test_simple.py | 27 +++- tests/unittests/test_base_attached_module.py | 152 ++++++++++++++++++ .../test_callbacks/test_needs_checkpoint.py | 6 + .../test_loaders/test_base_loader.py | 74 +++++---- tests/unittests/test_utils/test_boxutils.py | 48 ++++-- .../test_utils/test_dataset_metadata.py | 53 ++++++ tests/unittests/test_utils/test_keypoints.py | 21 +++ 19 files changed, 432 insertions(+), 152 deletions(-) create mode 100644 tests/unittests/test_base_attached_module.py create mode 100644 tests/unittests/test_callbacks/test_needs_checkpoint.py create mode 100644 tests/unittests/test_utils/test_dataset_metadata.py create mode 100644 tests/unittests/test_utils/test_keypoints.py diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index b5c6747d..ea964ff0 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -59,25 +59,33 @@ def __init__(self, *, node: BaseNode[Any, Any] | None = None): self._node = node self._epoch = 0 - self._required_labels: tuple[LabelType, ...] | None = None - if self._node and self.supported_labels and self.node.tasks: + self.required_labels: list[LabelType] = [] + if self._node and self.supported_labels: + module_supported = [ + label.value + if isinstance(label, LabelType) + else f"({' + '.join(label)})" + for label in self.supported_labels + ] + module_supported = f"[{', '.join(module_supported)}]" + if not self.node.tasks: + raise IncompatibleException( + f"Module '{self.name}' requires one of the following " + f"labels or combinations of labels: {module_supported}, " + f"but is connected to node '{self.node.name}' which does not specify any tasks." + ) node_tasks = set(self.node.tasks) for required_labels in self.supported_labels: if isinstance(required_labels, LabelType): - required_labels = (required_labels,) + required_labels = [required_labels] + else: + required_labels = list(required_labels) if set(required_labels) <= node_tasks: - self._required_labels = required_labels + self.required_labels = required_labels break else: - module_supported = [ - label.value - if isinstance(label, LabelType) - else f"({' + '.join(label)})" - for label in self.supported_labels - ] - module_supported = f"[{', '.join(module_supported)}]" node_supported = [task.value for task in self.node.tasks] - raise ValueError( + raise IncompatibleException( f"Module '{self.name}' requires one of the following labels or combinations of labels: {module_supported}, " f"but is connected to node '{self.node.name}' which does not support any of them. " f"{self.node.name} supports {node_supported}." @@ -101,16 +109,10 @@ def node(self) -> BaseNode[Any, Any]: ) return self._node - @property - def required_labels(self) -> tuple[LabelType, ...]: - if self._required_labels is None: - raise ValueError(f"{self.name} does not require any labels.") - return self._required_labels - @property def node_tasks(self) -> dict[LabelType, str]: if self.node._tasks is None: - raise ValueError("Node must have the `tasks` attribute specified.") + raise RuntimeError("Node must have the `tasks` attribute specified.") return self.node._tasks def get_label(self, labels: Labels, label_type: LabelType | None = None) -> Tensor: @@ -157,17 +159,9 @@ def _get_label( ) return labels[task_name] - if len(self.required_labels) > 1: - raise NotImplementedError( - f"{self.name} requires multiple labels. You must provide the " - "`label_type` argument to extract the desired label." - ) - for label, label_type in labels.values(): - if label_type == self.required_labels[0]: - return label, label_type - - raise IncompatibleException.from_missing_task( - self.required_labels[0].value, list(labels.keys()), self.name + raise ValueError( + f"{self.name} requires multiple labels. You must provide the " + "`label_type` argument to extract the desired label." ) def get_input_tensors( @@ -203,18 +197,20 @@ def get_input_tensors( if task_type is not None: if isinstance(task_type, LabelType): if task_type not in self.node_tasks: - raise ValueError( + raise IncompatibleException( f"Task {task_type.value} is not supported by the node " f"{self.node.name}." ) return inputs[self.node_tasks[task_type]] else: if task_type not in inputs: - raise ValueError(f"Task {task_type} is not present in the inputs.") + raise IncompatibleException( + f"Task {task_type} is not present in the inputs." + ) return inputs[task_type] if len(self.required_labels) > 1: - raise NotImplementedError( + raise ValueError( f"{self.name} requires multiple labels, " "you must provide the `task_type` argument to extract the desired input." ) @@ -244,18 +240,18 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: @raises IncompatibleException: If the inputs are not compatible with the module. """ if self.node._tasks is None: - raise ValueError( + raise RuntimeError( f"{self.node.name} must have the `tasks` attribute specified " f"for {self.name} to make use of the default `prepare` method." ) if self.supported_labels is None: - raise ValueError( + raise RuntimeError( f"{self.name} must have the `supported_labels` attribute " "specified in order to use the default `prepare` method." ) if len(self.supported_labels) > 1: if len(self.node._tasks) > 1: - raise NotImplementedError( + raise RuntimeError( f"{self.name} supports more than one label type" f"and is connected to {self.node.name} node " "which is a multi-task node. The default `prepare` " diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py index fd73b36a..f12235c9 100644 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py @@ -32,6 +32,7 @@ logger = logging.getLogger(__name__) +# TODO: BROKEN! class ImplicitKeypointBBoxLoss(BaseLoss[list[Tensor], KeypointTargetType]): node: ImplicitKeypointBBoxHead supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)] diff --git a/luxonis_train/callbacks/archive_on_train_end.py b/luxonis_train/callbacks/archive_on_train_end.py index d9e7b298..30949e4e 100644 --- a/luxonis_train/callbacks/archive_on_train_end.py +++ b/luxonis_train/callbacks/archive_on_train_end.py @@ -26,12 +26,12 @@ def on_train_end( """ path = self.get_checkpoint(pl_module) - if path is None: + if path is None: # pragma: no cover logger.warning("Skipping model archiving.") return onnx_path = pl_module.core._exported_models.get("onnx") - if onnx_path is None: + if onnx_path is None: # pragma: no cover logger.error( "Model executable not found. " "Make sure to run exporter callback before archiver callback. " diff --git a/luxonis_train/callbacks/export_on_train_end.py b/luxonis_train/callbacks/export_on_train_end.py index 261c4ef6..e727e81f 100644 --- a/luxonis_train/callbacks/export_on_train_end.py +++ b/luxonis_train/callbacks/export_on_train_end.py @@ -25,7 +25,7 @@ def on_train_end( @param pl_module: Pytorch Lightning module. """ path = self.get_checkpoint(pl_module) - if path is None: + if path is None: # pragma: no cover logger.warning("Skipping model export.") return diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py index fc09b26a..93c6c0a6 100644 --- a/luxonis_train/callbacks/luxonis_progress_bar.py +++ b/luxonis_train/callbacks/luxonis_progress_bar.py @@ -106,7 +106,7 @@ def __init__(self): @property def console(self) -> Console: - if self._console is None: + if self._console is None: # pragma: no cover raise RuntimeError( "Console is not initialized for the `LuxonisRichProgressBar`. " "Consider setting `tracker.use_rich_progress_bar` to `False` in the configuration." diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py index f4fa851f..f04cb575 100644 --- a/luxonis_train/callbacks/metadata_logger.py +++ b/luxonis_train/callbacks/metadata_logger.py @@ -31,22 +31,30 @@ def on_fit_start( hparams = {key: cfg.get(key) for key in self.hyperparams} - # try to get luxonis-ml and luxonis-train git commit hashes (if installed as editable) luxonis_ml_hash = self._get_editable_package_git_hash("luxonis_ml") - if luxonis_ml_hash: + if luxonis_ml_hash: # pragma: no cover hparams["luxonis_ml"] = luxonis_ml_hash luxonis_train_hash = self._get_editable_package_git_hash("luxonis_train") - if luxonis_train_hash: + if luxonis_train_hash: # pragma: no cover hparams["luxonis_train"] = luxonis_train_hash pl_module.logger.log_hyperparams(hparams) - # also save metadata locally - with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w+") as f: + with open(osp.join(pl_module.save_dir, "metadata.yaml"), "w") as f: yaml.dump(hparams, f, default_flow_style=False) @staticmethod - def _get_editable_package_git_hash(package_name: str) -> str | None: + def _get_editable_package_git_hash( + package_name: str, + ) -> str | None: # pragma: no cover + """Get git hash of an editable package. + + @type package_name: str + @param package_name: Name of the package. + @rtype: str or None + @return: Git hash of the package or None if the package is not installed in + editable mode. + """ try: distribution = pkg_resources.get_distribution(package_name) package_location = osp.join(distribution.location, package_name) diff --git a/luxonis_train/callbacks/needs_checkpoint.py b/luxonis_train/callbacks/needs_checkpoint.py index 30355e82..ad6d80e4 100644 --- a/luxonis_train/callbacks/needs_checkpoint.py +++ b/luxonis_train/callbacks/needs_checkpoint.py @@ -40,7 +40,8 @@ def _get_checkpoint( ) return path - def _get_other_type(self, checkpoint_type: str) -> str: + @staticmethod + def _get_other_type(checkpoint_type: str) -> str: if checkpoint_type == "loss": return "metric" return "loss" diff --git a/luxonis_train/core/utils/export_utils.py b/luxonis_train/core/utils/export_utils.py index eb3e2775..b4863f1b 100644 --- a/luxonis_train/core/utils/export_utils.py +++ b/luxonis_train/core/utils/export_utils.py @@ -42,7 +42,7 @@ def try_onnx_simplify(onnx_path: str) -> None: model_onnx = onnx.load(onnx_path) onnx_model, check = onnxsim.simplify(model_onnx) if not check: - raise RuntimeError("ONNX simplify failed.") + raise RuntimeError("ONNX simplify failed.") # pragma: no cover onnx.save(onnx_model, onnx_path) logger.info(f"ONNX model saved to {onnx_path}") @@ -52,7 +52,7 @@ def try_onnx_simplify(onnx_path: str) -> None: "`onnxsim` not installed. Skipping ONNX model simplification. " "Ensure `onnxsim` is installed in your environment." ) - except RuntimeError: + except RuntimeError: # pragma: no cover logger.error( "Failed to simplify ONNX model. Proceeding without simplification." ) diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index bfb28701..2b7c5f81 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -273,7 +273,7 @@ def get_class_names(self, task: LabelType) -> list[str]: @rtype: list[str] @return: Class names for the task. """ - return self.dataset_metadata.class_names(self.get_task_name(task)) + return self.dataset_metadata.classes(self.get_task_name(task)) @property def n_keypoints(self) -> int: @@ -337,10 +337,10 @@ def class_names(self) -> list[str]: "the `BaseNode.dataset_metadata.class_names` method manually." ) elif len(self._tasks) == 1: - return self.dataset_metadata.class_names(self.task) + return self.dataset_metadata.classes(self.task) else: class_names = [ - self.dataset_metadata.class_names(self.get_task_name(task)) + self.dataset_metadata.classes(self.get_task_name(task)) for task in self._tasks ] if all(set(names) == set(class_names[0]) for names in class_names): diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py index 2a3998ac..6c5ee1d4 100644 --- a/luxonis_train/utils/boundingbox.py +++ b/luxonis_train/utils/boundingbox.py @@ -163,8 +163,21 @@ def bbox_iou( @param bbox2: Second set of bboxes [M, 4]. @type bbox_format: BBoxFormatType @param bbox_format: Input bbox format. Defaults to "xyxy". - @type iou_type: IoUType + @type iou_type: Literal["none", "giou", "diou", "ciou", "siou"] @param iou_type: IoU type. Defaults to "none". + Possible values are: + - "none": standard IoU + - "giou": Generalized IoU + - "diou": Distance IoU + - "ciou": Complete IoU. Introduced in U{ + Enhancing Geometric Factors in Model Learning and + Inference for Object Detection and Instance + Segmentation}. + Implementation adapted from torchvision C{complete_box_iou} + with improved stability. + - "siou": Soft IoU. Introduced in U{ + SIoU Loss: More Powerful Learning for Bounding Box + Regression}. @type element_wise: bool @param element_wise: If True returns element wise IoUs. Defaults to False. @rtype: Tensor @@ -182,9 +195,6 @@ def bbox_iou( elif iou_type == "diou": iou = distance_box_iou(bbox1, bbox2) elif iou_type == "ciou": - # CIoU from `Enhancing Geometric Factors in Model Learning and Inference for - # Object Detection and Instance Segmentation`, https://arxiv.org/pdf/2005.03572.pdf. - # Implementation adapted from torchvision complete_box_iou with added eps for stability eps = 1e-7 iou = bbox_iou(bbox1, bbox2, iou_type="none") @@ -203,9 +213,6 @@ def bbox_iou( iou = diou - alpha * v elif iou_type == "siou": - # SIoU from `SIoU Loss: More Powerful Learning for Bounding Box Regression`, - # https://arxiv.org/pdf/2205.12740.pdf - eps = 1e-7 bbox1_xywh = box_convert(bbox1, in_fmt="xyxy", out_fmt="xywh") w1, h1 = bbox1_xywh[:, 2], bbox1_xywh[:, 3] diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index 6464b69a..c9ec2e27 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -27,21 +27,7 @@ def __init__( self._n_keypoints = n_keypoints or {} self._loader = loader - @property - def classes(self) -> dict[str, list[str]]: - """Dictionary mapping label types to lists of class names. - - @type: dict[str, list[str]] - @raises ValueError: If classes were not provided during initialization. - """ - if self._classes is None: - raise ValueError( - "Trying to access `classes`, byt they were not" - "provided during initialization." - ) - return self._classes - - def n_classes(self, task: str | None) -> int: + def n_classes(self, task: str | None = None) -> int: """Gets the number of classes for the specified task. @type task: str | None @@ -54,18 +40,19 @@ def n_classes(self, task: str | None) -> int: different label types. """ if task is not None: - if task not in self.classes: + if task not in self._classes: raise ValueError(f"Task '{task}' is not present in the dataset.") - return len(self.classes[task]) - n_classes = len(list(self.classes.values())[0]) - for classes in self.classes.values(): + return len(self._classes[task]) + n_classes = len(list(self._classes.values())[0]) + for classes in self._classes.values(): if len(classes) != n_classes: raise ValueError( "The dataset contains different number of classes for different tasks." + "Please specify the 'task' argument to get the number of classes." ) return n_classes - def n_keypoints(self, task: str | None) -> int: + def n_keypoints(self, task: str | None = None) -> int: """Gets the number of keypoints for the specified task. @type task: str | None @@ -79,14 +66,16 @@ def n_keypoints(self, task: str | None) -> int: if task not in self._n_keypoints: raise ValueError(f"Task '{task}' is not present in the dataset.") return self._n_keypoints[task] - if len(self._n_keypoints) > 1: - raise ValueError( - "The dataset specifies multiple keypoint tasks, " - "please specify the 'task' argument to get the number of keypoints." - ) - return next(iter(self._n_keypoints.values())) + n_keypoints = next(iter(self._n_keypoints.values())) + for n in self._n_keypoints.values(): + if n != n_keypoints: + raise ValueError( + "The dataset contains different number of keypoints for different tasks." + "Please specify the 'task' argument to get the number of keypoints." + ) + return n_keypoints - def class_names(self, task: str | None) -> list[str]: + def classes(self, task: str | None = None) -> list[str]: """Gets the class names for the specified task. @type task: str | None @@ -99,11 +88,11 @@ def class_names(self, task: str | None) -> list[str]: label types. """ if task is not None: - if task not in self.classes: + if task not in self._classes: raise ValueError(f"Task type {task} is not present in the dataset.") - return self.classes[task] - class_names = list(self.classes.values())[0] - for classes in self.classes.values(): + return self._classes[task] + class_names = list(self._classes.values())[0] + for classes in self._classes.values(): if classes != class_names: raise ValueError( "The dataset contains different class names for different tasks." @@ -120,7 +109,7 @@ def autogenerate_anchors(self, num_heads: int) -> tuple[list[list[float]], float @raises ValueError: If the dataset loader was not provided during initialization. """ - if self.loader is None: + if self._loader is None: raise ValueError( "Cannot generate anchors without a dataset loader. " "Please provide a dataset loader to the constructor " @@ -128,18 +117,10 @@ def autogenerate_anchors(self, num_heads: int) -> tuple[list[list[float]], float ) proposed_anchors, recall = anchors_from_dataset( - self.loader, n_anchors=num_heads * 3 + self._loader, n_anchors=num_heads * 3 ) return proposed_anchors.reshape(-1, 6).tolist(), recall - def set_loader(self, loader: BaseLoaderTorch) -> None: - """Sets the dataset loader. - - @type loader: DataLoader - @param loader: Dataset loader. - """ - self.loader = loader - @classmethod def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": """Creates a L{DatasetMetadata} object from a L{LuxonisDataset}. @@ -152,6 +133,5 @@ def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": classes = loader.get_classes() n_keypoints = loader.get_n_keypoints() - instance = cls(classes=classes, n_keypoints=n_keypoints) - instance.set_loader(loader) + instance = cls(classes=classes, n_keypoints=n_keypoints, loader=loader) return instance diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py index b3740e44..613e3fb8 100644 --- a/luxonis_train/utils/keypoints.py +++ b/luxonis_train/utils/keypoints.py @@ -16,14 +16,14 @@ def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Te @rtype: tuple[Tensor, Tensor, Tensor] @return: x, y and visibility tensors. """ - x = keypoints[..., ::3] * 2.0 - 0.5 - y = keypoints[..., 1::3] * 2.0 - 0.5 + x = keypoints[..., ::3] + y = keypoints[..., 1::3] visibility = keypoints[..., 2::3] return x, y, visibility def get_sigmas( - sigmas: list[float] | None, n_keypoints: int, caller_name: str | None + sigmas: list[float] | None, n_keypoints: int, caller_name: str | None = None ) -> Tensor: """Validate or create sigma values for each keypoint. diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 59b224f7..0a537ab1 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -131,8 +131,23 @@ def test_parsing_loader(): model.train() -@pytest.mark.skipif(sys.platform == "win32", reason="Tuning not supported on Windows") -def test_tuner(opts: dict[str, Any]): +@pytest.mark.skipif( + sys.platform == "win32", + reason="Tuning not supported on Windows", +) +def test_tuner(opts: dict[str, Any], coco_dataset: LuxonisDataset): + opts["tuner.params"] = { + "trainer.optimizer.name_categorical": ["Adam", "SGD"], + "trainer.optimizer.params.lr_float": [0.0001, 0.001], + "trainer.batch_size_int": [4, 16, 4], + "trainer.preprocessing.augmentations_subset": [ + ["Defocus", "Sharpen", "Flip", "Normalize", "invalid"], + 2, + ], + "model.losses.0.weight_uniform": [0.1, 0.9], + "model.nodes.0.freezing.unfreeze_after_loguniform": [0.1, 0.9], + } + opts["loader.params.dataset_name"] = coco_dataset.identifier model = LuxonisModel("configs/example_tuning.yaml", opts) model.tune() assert STUDY_PATH.exists() @@ -158,8 +173,14 @@ def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset): { "name": "ExportOnTrainEnd", }, - {"name": "ArchiveOnTrainEnd"}, + { + "name": "ArchiveOnTrainEnd", + "params": {"preferred_checkpoint": "loss"}, + }, ], + "exporter.scale_values": [0.5, 0.5, 0.5], + "exporter.mean_values": [0.5, 0.5, 0.5], + "exporter.blobconverter.active": True, } opts["loader.params.dataset_name"] = parking_lot_dataset.identifier model = LuxonisModel(config_file, opts) diff --git a/tests/unittests/test_base_attached_module.py b/tests/unittests/test_base_attached_module.py new file mode 100644 index 00000000..96956d82 --- /dev/null +++ b/tests/unittests/test_base_attached_module.py @@ -0,0 +1,152 @@ +import pytest +from luxonis_ml.data import LabelType + +from luxonis_train import BaseLoss, BaseNode +from luxonis_train.utils.exceptions import IncompatibleException + + +class DummyBackbone(BaseNode): + def forward(self, _): + ... + + +class DummySegmentationHead(BaseNode): + tasks = [LabelType.SEGMENTATION] + + def forward(self, _): + ... + + +class DummyBBoxHead(BaseNode): + tasks = [LabelType.BOUNDINGBOX] + + def forward(self, _): + ... + + +class DummyDetectionHead(BaseNode): + tasks = [LabelType.BOUNDINGBOX, LabelType.KEYPOINTS] + + def forward(self, _): + ... + + +class DummyLoss(BaseLoss): + supported_labels = [ + LabelType.SEGMENTATION, + (LabelType.KEYPOINTS, LabelType.BOUNDINGBOX), + ] + + def forward(self, _): + ... + + +class NoLabelLoss(BaseLoss): + def forward(self, _): + ... + + +@pytest.fixture +def labels(): + return { + "segmentation": ("segmentation", LabelType.SEGMENTATION), + "keypoints": ("keypoints", LabelType.KEYPOINTS), + "boundingbox": ("boundingbox", LabelType.BOUNDINGBOX), + "classification": ("classification", LabelType.CLASSIFICATION), + } + + +@pytest.fixture +def inputs(): + return { + "features": ["features"], + "segmentation": ["segmentation"], + } + + +def test_valid_properties(): + head = DummySegmentationHead() + loss = DummyLoss(node=head) + no_labels_loss = NoLabelLoss(node=head) + assert loss.node == head + assert loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"} + assert loss.required_labels == [LabelType.SEGMENTATION] + assert no_labels_loss.node == head + assert no_labels_loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"} + assert no_labels_loss.required_labels == [] + + +def test_invalid_properties(): + backbone = DummyBackbone() + with pytest.raises(IncompatibleException): + DummyLoss(node=backbone) + with pytest.raises(IncompatibleException): + DummyLoss(node=DummyBBoxHead()) + with pytest.raises(RuntimeError): + _ = DummyLoss().node + with pytest.raises(RuntimeError): + _ = NoLabelLoss(node=backbone).node_tasks + + +def test_get_label(labels): + seg_head = DummySegmentationHead() + det_head = DummyDetectionHead() + seg_loss = DummyLoss(node=seg_head) + assert seg_loss.get_label(labels) == "segmentation" + assert seg_loss.get_label(labels, LabelType.SEGMENTATION) == "segmentation" + + del labels["segmentation"] + labels["segmentation-task"] = ("segmentation", LabelType.SEGMENTATION) + + with pytest.raises(IncompatibleException): + seg_loss.get_label(labels) + + det_loss = DummyLoss(node=det_head) + assert det_loss.get_label(labels, LabelType.KEYPOINTS) == "keypoints" + assert det_loss.get_label(labels, LabelType.BOUNDINGBOX) == "boundingbox" + + with pytest.raises(ValueError): + det_loss.get_label(labels) + + with pytest.raises(ValueError): + det_loss.get_label(labels, LabelType.SEGMENTATION) + + +def test_input_tensors(inputs): + seg_head = DummySegmentationHead() + seg_loss = DummyLoss(node=seg_head) + assert seg_loss.get_input_tensors(inputs) == ["segmentation"] + assert seg_loss.get_input_tensors(inputs, "segmentation") == ["segmentation"] + assert seg_loss.get_input_tensors(inputs, LabelType.SEGMENTATION) == [ + "segmentation" + ] + + with pytest.raises(IncompatibleException): + seg_loss.get_input_tensors(inputs, LabelType.KEYPOINTS) + with pytest.raises(IncompatibleException): + seg_loss.get_input_tensors(inputs, "keypoints") + + det_head = DummyDetectionHead() + det_loss = DummyLoss(node=det_head) + with pytest.raises(ValueError): + det_loss.get_input_tensors(inputs) + + +def test_prepare(inputs, labels): + backbone = DummyBackbone() + seg_head = DummySegmentationHead() + seg_loss = DummyLoss(node=seg_head) + det_head = DummyDetectionHead() + + assert seg_loss.prepare(inputs, labels) == ("segmentation", "segmentation") + inputs["segmentation"].append("segmentation2") + assert seg_loss.prepare(inputs, labels) == ("segmentation2", "segmentation") + + with pytest.raises(RuntimeError): + NoLabelLoss(node=backbone).prepare(inputs, labels) + + with pytest.raises(RuntimeError): + NoLabelLoss(node=seg_head).prepare(inputs, labels) + + with pytest.raises(RuntimeError): + DummyLoss(node=det_head).prepare(inputs, labels) diff --git a/tests/unittests/test_callbacks/test_needs_checkpoint.py b/tests/unittests/test_callbacks/test_needs_checkpoint.py new file mode 100644 index 00000000..bd296dea --- /dev/null +++ b/tests/unittests/test_callbacks/test_needs_checkpoint.py @@ -0,0 +1,6 @@ +from luxonis_train.callbacks.needs_checkpoint import NeedsCheckpoint + + +def test_other_type(): + assert NeedsCheckpoint._get_other_type("loss") == "metric" + assert NeedsCheckpoint._get_other_type("metric") == "loss" diff --git a/tests/unittests/test_loaders/test_base_loader.py b/tests/unittests/test_loaders/test_base_loader.py index c07a5618..abf8df72 100644 --- a/tests/unittests/test_loaders/test_base_loader.py +++ b/tests/unittests/test_loaders/test_base_loader.py @@ -1,6 +1,7 @@ import pytest import torch from luxonis_ml.data import LabelType +from torch import Size from luxonis_train.loaders import collate_fn @@ -8,31 +9,31 @@ @pytest.mark.parametrize( "input_names_and_shapes", [ - [("features", torch.Size([3, 224, 224]))], + [("features", Size([3, 224, 224]))], [ - ("features", torch.Size([3, 224, 224])), - ("segmentation", torch.Size([1, 224, 224])), + ("features", Size([3, 224, 224])), + ("segmentation", Size([1, 224, 224])), ], [ - ("features", torch.Size([3, 224, 224])), - ("segmentation", torch.Size([1, 224, 224])), - ("disparity", torch.Size([1, 224, 224])), + ("features", Size([3, 224, 224])), + ("segmentation", Size([1, 224, 224])), + ("disparity", Size([1, 224, 224])), ], [ - ("features", torch.Size([3, 224, 224])), - ("pointcloud", torch.Size([1000, 3])), + ("features", Size([3, 224, 224])), + ("pointcloud", Size([1000, 3])), ], [ - ("features", torch.Size([3, 224, 224])), - ("pointcloud", torch.Size([1000, 3])), - ("foobar", torch.Size([2, 3, 4, 5, 6])), + ("features", Size([3, 224, 224])), + ("pointcloud", Size([1000, 3])), + ("foobar", Size([2, 3, 4, 5, 6])), ], ], ) @pytest.mark.parametrize("batch_size", [1, 2]) -def test_collate_fn(input_names_and_shapes, batch_size): - # Mock batch data - +def test_collate_fn( + input_names_and_shapes: list[tuple[str, Size]], batch_size: int, subtests +): def build_batch_element(): inputs = {} for name, shape in input_names_and_shapes: @@ -42,28 +43,47 @@ def build_batch_element(): "classification": ( torch.randint(0, 2, (2,), dtype=torch.int64), LabelType.CLASSIFICATION, - ) + ), + "segmentation": ( + torch.randint(0, 2, (1, 224, 224), dtype=torch.int64), + LabelType.SEGMENTATION, + ), + "keypoints": ( + torch.rand(1, 52, dtype=torch.float32), + LabelType.KEYPOINTS, + ), + "boundingbox": ( + torch.rand(1, 5, dtype=torch.float32), + LabelType.BOUNDINGBOX, + ), } return inputs, labels batch = [build_batch_element() for _ in range(batch_size)] - # Call collate_fn inputs, annotations = collate_fn(batch) # type: ignore - # Check images tensor - assert inputs["features"].shape == (batch_size, 3, 224, 224) - assert inputs["features"].dtype == torch.float32 - - # Check annotations - assert "classification" in annotations - assert annotations["classification"][0].shape == (batch_size, 2) - assert annotations["classification"][0].dtype == torch.int64 + with subtests.test("inputs"): + assert inputs["features"].shape == (batch_size, 3, 224, 224) + assert inputs["features"].dtype == torch.float32 + with subtests.test("classification"): + assert "classification" in annotations + assert annotations["classification"][0].shape == (batch_size, 2) + assert annotations["classification"][0].dtype == torch.int64 -# TODO: test also segmentation, boundingbox and keypoint + with subtests.test("segmentation"): + assert "segmentation" in annotations + assert annotations["segmentation"][0].shape == (batch_size, 1, 224, 224) + assert annotations["segmentation"][0].dtype == torch.int64 + with subtests.test("keypoints"): + assert "keypoints" in annotations + assert annotations["keypoints"][0].shape == (batch_size, 53) + assert annotations["keypoints"][0].dtype == torch.float32 -if __name__ == "__main__": - pytest.main() + with subtests.test("boundingbox"): + assert "boundingbox" in annotations + assert annotations["boundingbox"][0].shape == (batch_size, 6) + assert annotations["boundingbox"][0].dtype == torch.float32 diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index 0a53d1b1..fb2c6f73 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -1,13 +1,14 @@ +import pytest import torch -from luxonis_train.utils import ( +from luxonis_train.utils.boundingbox import ( + IoUType, anchors_for_fpn_features, bbox2dist, bbox_iou, compute_iou_loss, dist2bbox, process_bbox_predictions, - process_keypoints_predictions, ) @@ -44,6 +45,8 @@ def test_dist2bbox(): bbox = dist2bbox(distance, anchor_points) assert bbox.shape == distance.shape + with pytest.raises(ValueError): + dist2bbox(distance, anchor_points, out_format="invalid") # type: ignore def test_bbox2dist(): @@ -56,15 +59,34 @@ def test_bbox2dist(): assert distance.shape == bbox.shape -def test_bbox_iou(): +@pytest.mark.parametrize("iou_type", ["none", "giou", "diou", "ciou", "siou"]) +def test_bbox_iou(iou_type: IoUType): for format in ["xyxy", "cxcywh", "xywh"]: bbox1 = generate_random_bboxes(5, 640, 640, format) - bbox2 = generate_random_bboxes(8, 640, 640, format) - - iou = bbox_iou(bbox1, bbox2) - - assert iou.shape == (5, 8) - assert iou.min() >= 0 and iou.max() <= 1 + if iou_type == "siou": + bbox2 = generate_random_bboxes(5, 640, 640, format) + else: + bbox2 = generate_random_bboxes(8, 640, 640, format) + + iou = bbox_iou( + bbox1, + bbox2, + bbox_format=format, # type: ignore + iou_type=iou_type, + ) + + assert iou.shape == (bbox1.shape[0], bbox2.shape[0]) + if iou_type == "none": + min = 0 + elif iou_type == "ciou": + min = -1.1 + else: + min = -1 + assert iou.min() >= min and iou.max() <= 1 + + if iou_type == "none": + with pytest.raises(ValueError): + bbox_iou(bbox1, bbox2, iou_type="invalid") # type: ignore def test_compute_iou_loss(): @@ -93,14 +115,6 @@ def test_process_bbox_predictions(): assert out_bbox_tail.shape == (10, 4) -def test_process_keypoints_predictions(): - keypoints = torch.rand(10, 15) # 5 keypoints * 3 (x, y, visibility) - - x, y, visibility = process_keypoints_predictions(keypoints) - - assert x.shape == y.shape == visibility.shape == (10, 5) - - def test_anchors_for_fpn_features(): features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)] strides = torch.tensor([8, 16]) diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py new file mode 100644 index 00000000..a6c9edda --- /dev/null +++ b/tests/unittests/test_utils/test_dataset_metadata.py @@ -0,0 +1,53 @@ +import pytest + +from luxonis_train.utils import DatasetMetadata + + +@pytest.fixture +def metadata(): + return DatasetMetadata( + classes={ + "color-segmentation": ["car", "person"], + "detection": ["car", "person"], + }, + n_keypoints={"color-segmentation": 0, "detection": 0}, + ) + + +def test_n_classes(metadata): + assert metadata.n_classes("color-segmentation") == 2 + assert metadata.n_classes("detection") == 2 + assert metadata.n_classes() == 2 + with pytest.raises(ValueError): + metadata.n_classes("segmentation") + metadata._classes["segmentation"] = ["car", "person", "tree"] + with pytest.raises(ValueError): + metadata.n_classes() + + +def test_n_keypoints(metadata): + assert metadata.n_keypoints("color-segmentation") == 0 + assert metadata.n_keypoints("detection") == 0 + assert metadata.n_keypoints() == 0 + with pytest.raises(ValueError): + metadata.n_keypoints("segmentation") + metadata._n_keypoints["segmentation"] = 1 + with pytest.raises(ValueError): + metadata.n_keypoints() + + +def test_class_names(metadata): + assert metadata.classes("color-segmentation") == ["car", "person"] + assert metadata.classes("detection") == ["car", "person"] + assert metadata.classes() == ["car", "person"] + with pytest.raises(ValueError): + metadata.classes("segmentation") + metadata._classes["segmentation"] = ["car", "person", "tree"] + with pytest.raises(ValueError): + metadata.classes() + + +def test_no_loader(): + metadata = DatasetMetadata() + with pytest.raises(ValueError): + metadata.autogenerate_anchors(3) diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py new file mode 100644 index 00000000..f14c4e37 --- /dev/null +++ b/tests/unittests/test_utils/test_keypoints.py @@ -0,0 +1,21 @@ +import pytest +import torch + +from luxonis_train.utils.keypoints import get_sigmas, process_keypoints_predictions + + +def test_get_sigmas(): + sigmas = [0.1, 0.2, 0.3] + pytest.approx(get_sigmas(sigmas, 3).tolist(), sigmas) + with pytest.raises(ValueError): + get_sigmas(sigmas, 2) + assert len(get_sigmas(None, 17)) == 17 + assert len(get_sigmas(None, 5)) == 5 + + +def test_process_keypoints_predictions(): + keypoints = torch.tensor([[0.1, 0.2, 1.0, 0.4, 0.5, 0.0]]) + x, y, visibility = process_keypoints_predictions(keypoints) + pytest.approx(x[0].tolist(), [0.1, 0.4]) + pytest.approx(y[0].tolist(), [0.2, 0.5]) + pytest.approx(visibility[0].tolist(), [1.0, 0.0]) From 1ace2eb31ef8b11e8d296b103d0878b06fee5d10 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 04:01:16 +0200 Subject: [PATCH 008/102] improved coverage --- luxonis_train/core/core.py | 18 ++-- luxonis_train/core/utils/archive_utils.py | 8 +- tests/__init__.py | 0 tests/configs/archive_config.yaml | 29 ++++++ tests/integration/conftest.py | 3 +- tests/integration/test_segmentation.py | 7 ++ tests/integration/test_simple.py | 35 ++++---- .../test_assigners/test_tal_assigner.py | 88 ++++++------------- tests/unittests/test_callbacks/__init__.py | 0 .../test_losses/test_bce_with_logits_loss.py | 5 -- 10 files changed, 94 insertions(+), 99 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/configs/archive_config.yaml create mode 100644 tests/unittests/test_callbacks/__init__.py diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 16e8013f..d0071c6a 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -78,6 +78,7 @@ def __init__( self.cfg.tracker.save_directory, self.tracker.run_name ) self.log_file = osp.join(self.run_save_dir, "luxonis_train.log") + self.error_message = None # NOTE: to add the file handler (we only get the save dir now, # but we want to use the logger before) @@ -90,6 +91,14 @@ def __init__( if self.cfg.trainer.seed is not None: pl.seed_everything(self.cfg.trainer.seed, workers=True) + self.pl_trainer = create_trainer( + self.cfg.trainer, + logger=self.tracker, + callbacks=LuxonisRichProgressBar() + if self.cfg.trainer.use_rich_progress_bar + else LuxonisTQDMProgressBar(), + ) + self.train_augmentations = Augmentations( image_size=self.cfg.trainer.preprocessing.train_image_size, augmentations=[ @@ -110,14 +119,6 @@ def __init__( only_normalize=True, ) - self.pl_trainer = create_trainer( - self.cfg.trainer, - logger=self.tracker, - callbacks=LuxonisRichProgressBar() - if self.cfg.trainer.use_rich_progress_bar - else LuxonisTQDMProgressBar(), - ) - self.loaders: dict[str, BaseLoaderTorch] = {} for view in ["train", "val", "test"]: loader_name = self.cfg.loader.name @@ -166,7 +167,6 @@ def __init__( ) for view in ["train", "val", "test"] } - self.error_message = None self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"]) diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index bf2432f5..a27c8f90 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -63,7 +63,7 @@ def _from_onnx_dtype(dtype: int) -> DataType: TensorProto.FLOAT: "float32", TensorProto.FLOAT16: "float16", } - if dtype not in dtype_map: + if dtype not in dtype_map: # pragma: no cover raise ValueError(f"Unsupported ONNX data type: `{dtype}`") return DataType(dtype_map[dtype]) @@ -72,7 +72,7 @@ def _from_onnx_dtype(dtype: int) -> DataType: def _load_onnx_model(onnx_path: Path) -> onnx.ModelProto: try: return onnx.load(str(onnx_path)) - except Exception as e: + except Exception as e: # pragma: no cover raise ValueError(f"Failed to load ONNX model: `{onnx_path}`") from e @@ -116,7 +116,7 @@ def _get_classes( node_task = "segmentation" case "ImplicitKeypointBBoxHead" | "EfficientKeypointBBoxHead": node_task = "keypoints" - case _: + case _: # pragma: no cover raise ValueError("Node does not map to a default task.") return classes.get(node_task, []) @@ -161,7 +161,7 @@ def _get_head_specific_parameters( parameters["conf_threshold"] = head_node.conf_thres parameters["max_det"] = head_node.max_det parameters["n_keypoints"] = head_node.n_keypoints - else: + else: # pragma: no cover raise ValueError("Unknown head name") return parameters diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml new file mode 100644 index 00000000..f7d8ae6f --- /dev/null +++ b/tests/configs/archive_config.yaml @@ -0,0 +1,29 @@ + +model: + name: archive_test + nodes: + - name: EfficientRep + + - name: EfficientBBoxHead + inputs: + - EfficientRep + + - name: EfficientKeypointBBoxHead + inputs: + - EfficientRep + + - name: ImplicitKeypointBBoxHead + inputs: + - EfficientRep + + - name: SegmentationHead + inputs: + - EfficientRep + + - name: BiSeNetHead + inputs: + - EfficientRep + + - name: ClassificationHead + inputs: + - EfficientRep diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 2e7338f2..e9242f4f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -241,7 +241,7 @@ def CIFAR10_subset_generator(): @pytest.fixture def config(train_overfit: bool) -> dict[str, Any]: if train_overfit: - epochs = 200 + epochs = 100 else: epochs = 1 @@ -268,7 +268,6 @@ def config(train_overfit: bool) -> dict[str, Any]: }, "callbacks": [ {"name": "ExportOnTrainEnd"}, - {"name": "ArchiveOnTrainEnd"}, ], }, } diff --git a/tests/integration/test_segmentation.py b/tests/integration/test_segmentation.py index 86e952e0..c24e6fb9 100644 --- a/tests/integration/test_segmentation.py +++ b/tests/integration/test_segmentation.py @@ -81,6 +81,7 @@ def get_opts(backbone: str) -> dict[str, Any]: }, ], "metrics": [], + "visualizers": [], } } aliases = [head["alias"] for head in opts["model"]["nodes"][1:]] @@ -99,6 +100,12 @@ def get_opts(backbone: str) -> dict[str, Any]: }, ] ) + opts["model"]["visualizers"].append( + { + "name": "SegmentationVisualizer", + "attached_to": alias, + } + ) return opts diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 0a537ab1..aeed3ad9 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -63,22 +63,6 @@ def test_predefined_models( model = LuxonisModel(config_file, opts) model.train() model.test() - model.export() - assert ( - Path(model.run_save_dir, "export", model.cfg.model.name) - .with_suffix(".onnx") - .exists() - ) - model.archive() - assert ( - Path( - model.run_save_dir, - "archive", - model.cfg.archiver.name or model.cfg.model.name, - ) - .with_suffix(".onnx.tar.xz") - .exists() - ) def test_multi_input(opts: dict[str, Any]): @@ -135,7 +119,7 @@ def test_parsing_loader(): sys.platform == "win32", reason="Tuning not supported on Windows", ) -def test_tuner(opts: dict[str, Any], coco_dataset: LuxonisDataset): +def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset): opts["tuner.params"] = { "trainer.optimizer.name_categorical": ["Adam", "SGD"], "trainer.optimizer.params.lr_float": [0.0001, 0.001], @@ -153,6 +137,23 @@ def test_tuner(opts: dict[str, Any], coco_dataset: LuxonisDataset): assert STUDY_PATH.exists() +def test_archive(coco_dataset: LuxonisDataset): + opts = { + "loader.params.dataset_name": coco_dataset.identifier, + } + model = LuxonisModel("tests/configs/archive_config.yaml", opts) + model.archive() + assert ( + Path( + model.run_save_dir, + "archive", + model.cfg.archiver.name or model.cfg.model.name, + ) + .with_suffix(".onnx.tar.xz") + .exists() + ) + + def test_callbacks(opts: dict[str, Any], parking_lot_dataset: LuxonisDataset): config_file = "tests/configs/parking_lot_config.yaml" opts = deepcopy(opts) diff --git a/tests/unittests/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py index a06e58ec..1f63b42b 100644 --- a/tests/unittests/test_assigners/test_tal_assigner.py +++ b/tests/unittests/test_assigners/test_tal_assigner.py @@ -13,13 +13,11 @@ def test_init(): def test_forward(): - # Constants for clarity batch_size = 10 num_anchors = 100 num_max_boxes = 5 num_classes = 80 - # Initialize the TaskAlignedAssigner assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13) # Create mock inputs @@ -30,83 +28,60 @@ def test_forward(): gt_bboxes = torch.zeros(batch_size, num_max_boxes, 4) # no gt bboxes mask_gt = torch.rand(batch_size, num_max_boxes, 1) - # Call the forward method labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward( pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt ) - # Assert the expected outcomes assert labels.shape == (batch_size, num_anchors) - assert labels.unique().tolist() == [ - num_classes - ] # All labels should be num_classes as there are no GT boxes assert bboxes.shape == (batch_size, num_anchors, 4) - assert torch.equal( - bboxes, torch.zeros_like(bboxes) - ) # All bboxes should be zero as there are no GT boxes - assert ( - scores.shape - == ( - batch_size, - num_anchors, - num_classes, - ) - ) # TODO: We have this in doc string: Returns: ... assigned scores of shape [bs, n_anchors, 1], - # it returns tensor of shape [bs, n_anchors, n_classes] instead - assert torch.equal( - scores, torch.zeros_like(scores) - ) # All scores should be zero as there are no GT boxes + assert scores.shape == ( + batch_size, + num_anchors, + num_classes, + ) assert mask.shape == (batch_size, num_anchors) - assert torch.equal( - mask, torch.zeros_like(mask) - ) # All mask values should be zero as there are no GT boxes assert assigned_gt_idx.shape == (batch_size, num_anchors) - assert torch.equal( - assigned_gt_idx, torch.zeros_like(assigned_gt_idx) - ) # All assigned_gt_idx values should be zero as there are no GT boxes + + # Labels should be `num_classes` as there are no GT boxes + assert labels.unique().tolist() == [num_classes] + + # All results should be zero as there are no GT boxes + assert torch.equal(bboxes, torch.zeros_like(bboxes)) + assert torch.equal(scores, torch.zeros_like(scores)) + assert torch.equal(mask, torch.zeros_like(mask)) + assert torch.equal(assigned_gt_idx, torch.zeros_like(assigned_gt_idx)) def test_get_alignment_metric(): - # Create mock inputs - bs = 2 # batch size + batch_size = 2 n_anchors = 5 n_max_boxes = 3 n_classes = 80 - pred_scores = torch.rand( - bs, n_anchors, n_classes - ) # TODO: Same issue: works with n_classes instead of 1, change it in the doc string in the method itself!!! - pred_bboxes = torch.rand(bs, n_anchors, 4) - gt_labels = torch.randint(0, n_classes, (bs, n_max_boxes, 1)) - gt_bboxes = torch.rand(bs, n_max_boxes, 4) + pred_scores = torch.rand(batch_size, n_anchors, n_classes) + pred_bboxes = torch.rand(batch_size, n_anchors, 4) + gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1)) + gt_bboxes = torch.rand(batch_size, n_max_boxes, 4) - # Initialize the TaskAlignedAssigner assigner = TaskAlignedAssigner( n_classes=n_classes, topk=13, alpha=1.0, beta=6.0, eps=1e-9 ) assigner.bs = pred_scores.size(0) assigner.n_max_boxes = gt_bboxes.size(1) - # Call the method align_metric, overlaps = assigner._get_alignment_metric( pred_scores, pred_bboxes, gt_labels, gt_bboxes ) - # Assert the expected outcomes - assert align_metric.shape == (bs, n_max_boxes, n_anchors) - assert overlaps.shape == (bs, n_max_boxes, n_anchors) + assert align_metric.shape == (batch_size, n_max_boxes, n_anchors) + assert overlaps.shape == (batch_size, n_max_boxes, n_anchors) assert align_metric.dtype == torch.float32 assert overlaps.dtype == torch.float32 - assert (align_metric >= 0).all() and ( - align_metric <= 1 - ).all() # Alignment metric should be in the range [0, 1] - assert (overlaps >= 0).all() and ( - overlaps <= 1 - ).all() # IoU should be in the range [0, 1] + assert align_metric.min() >= 0 and align_metric.max() <= 1 + assert overlaps.min() >= 0 and overlaps.max() <= 1 def test_select_topk_candidates(): - # Constants for the test batch_size = 2 num_max_boxes = 3 num_anchors = 5 @@ -115,51 +90,40 @@ def test_select_topk_candidates(): metrics = torch.rand(batch_size, num_max_boxes, num_anchors) mask_gt = torch.rand(batch_size, num_max_boxes, 1) - # Initialize the TaskAlignedAssigner assigner = TaskAlignedAssigner(n_classes=80, topk=topk) - # Call the method - is_in_topk = assigner._select_topk_candidates( - metrics, - ) + is_in_topk = assigner._select_topk_candidates(metrics) topk_mask = mask_gt.repeat([1, 1, topk]).bool() assert torch.equal( assigner._select_topk_candidates(metrics), assigner._select_topk_candidates(metrics, topk_mask=topk_mask), ) - # Assert the expected outcomes assert is_in_topk.shape == (batch_size, num_max_boxes, num_anchors) assert is_in_topk.dtype == torch.float32 - # Check that each ground truth has at most 'topk' anchors selected - assert (is_in_topk.sum(dim=-1) <= topk).all() + assert is_in_topk.sum(dim=-1).max() <= topk def test_get_final_assignments(): - # Constants for the test batch_size = 2 num_max_boxes = 3 num_anchors = 5 num_classes = 80 - # Mock inputs gt_labels = torch.randint(0, num_classes, (batch_size, num_max_boxes, 1)) gt_bboxes = torch.rand(batch_size, num_max_boxes, 4) assigned_gt_idx = torch.randint(0, num_max_boxes, (batch_size, num_anchors)) mask_pos_sum = torch.randint(0, 2, (batch_size, num_anchors)) - # Initialize the TaskAlignedAssigner assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13) assigner.bs = batch_size # Set batch size assigner.n_max_boxes = gt_bboxes.size(1) - # Call the method assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) - # Assert the expected outcomes assert assigned_labels.shape == (batch_size, num_anchors) assert assigned_bboxes.shape == (batch_size, num_anchors, 4) assert assigned_scores.shape == (batch_size, num_anchors, num_classes) - assert (assigned_labels >= 0).all() and (assigned_labels <= num_classes).all() + assert assigned_labels.min() >= 0 and assigned_labels.max() <= num_classes diff --git a/tests/unittests/test_callbacks/__init__.py b/tests/unittests/test_callbacks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unittests/test_losses/test_bce_with_logits_loss.py b/tests/unittests/test_losses/test_bce_with_logits_loss.py index 27871019..600cac59 100644 --- a/tests/unittests/test_losses/test_bce_with_logits_loss.py +++ b/tests/unittests/test_losses/test_bce_with_logits_loss.py @@ -1,4 +1,3 @@ -import pytest import torch from luxonis_train.attached_modules.losses import BCEWithLogitsLoss @@ -55,7 +54,3 @@ def test_weights(): loss_weight = loss_fn_weight.forward(predictions, targets) loss_no_weight = loss_fn_no_weight.forward(predictions, targets) assert loss_weight != loss_no_weight - - -if __name__ == "__main__": - pytest.main() From 94f2cf6b853940b2ed8e2ea56494585370ea9450 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 05:57:31 +0200 Subject: [PATCH 009/102] updated tests --- tests/integration/conftest.py | 1 + tests/integration/test_simple.py | 5 ++ tests/unittests/test_base_node.py | 63 +++++++++++++++++-- tests/unittests/test_blocks.py | 15 +++++ tests/unittests/test_core.py | 0 .../test_losses/test_bce_with_logits_loss.py | 7 +++ tests/unittests/test_utils/test_boxutils.py | 4 +- 7 files changed, 87 insertions(+), 8 deletions(-) create mode 100644 tests/unittests/test_blocks.py delete mode 100644 tests/unittests/test_core.py diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index e9242f4f..2374c3a9 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -269,5 +269,6 @@ def config(train_overfit: bool) -> dict[str, Any]: "callbacks": [ {"name": "ExportOnTrainEnd"}, ], + "matmul_precision": "medium", }, } diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index aeed3ad9..7d3587c4 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -8,6 +8,7 @@ import pytest from luxonis_ml.data import LuxonisDataset +from luxonis_ml.utils import environ from luxonis_train.core import LuxonisModel @@ -110,6 +111,10 @@ def test_custom_tasks( assert generated_config == correct_archive_config +@pytest.mark.skipif( + environ.GOOGLE_APPLICATION_CREDENTIALS is None, + reason="GCP credentials not set", +) def test_parsing_loader(): model = LuxonisModel("tests/configs/segmentation_parse_loader.yaml") model.train() diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py index 5d3529e0..8d581f25 100644 --- a/tests/unittests/test_base_node.py +++ b/tests/unittests/test_base_node.py @@ -1,6 +1,22 @@ import pytest +import torch +from torch import Size, Tensor from luxonis_train.nodes import AttachIndexType, BaseNode +from luxonis_train.utils import Packet +from luxonis_train.utils.exceptions import IncompatibleException + + +class DummyNode(BaseNode, register=False): + def forward(self, _): + ... + + +@pytest.fixture +def packet() -> Packet[Tensor]: + return { + "features": [torch.rand(3, 224, 224)], + } @pytest.mark.parametrize( @@ -32,13 +48,50 @@ class DummyBaseNode: def test_attach_index_error(): lst = [1, 2, 3, 4, 5] - class DummyBaseNode: + class DummyNode(BaseNode, register=False): attach_index: AttachIndexType with pytest.raises(ValueError): - DummyBaseNode.attach_index = 10 - BaseNode.get_attached(DummyBaseNode, lst) # type: ignore + DummyNode.attach_index = 10 + BaseNode.get_attached(DummyNode, lst) # type: ignore with pytest.raises(ValueError): - DummyBaseNode.attach_index = "none" # type: ignore - BaseNode.get_attached(DummyBaseNode, lst) # type: ignore + DummyNode.attach_index = "none" # type: ignore + BaseNode.get_attached(DummyNode, lst) # type: ignore + + +def test_invalid(packet: Packet[Tensor]): + node = DummyNode() + with pytest.raises(RuntimeError): + _ = node.input_shapes + with pytest.raises(RuntimeError): + _ = node.original_in_shape + with pytest.raises(RuntimeError): + _ = node.dataset_metadata + with pytest.raises(RuntimeError): + node.unwrap([packet, packet]) + with pytest.raises(RuntimeError): + node.wrap({"inp": torch.rand(3, 224, 224)}) + + +def tets_in_sizes(): + node = DummyNode( + input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}] + ) + assert node.in_sizes == [Size((3, 224, 224)) for _ in range(3)] + node = DummyNode(in_sizes=Size((3, 224, 224))) + assert node.in_sizes == Size((3, 224, 224)) + with pytest.raises(RuntimeError): + node = DummyNode(input_shapes=[{"feats": [Size((3, 224, 224))]}]) + _ = node.in_sizes + + +def test_check_type_override(): + class DummyNode(BaseNode, register=False): + in_channels: int + + def forward(self, _): + ... + + with pytest.raises(IncompatibleException): + DummyNode(input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}]) diff --git a/tests/unittests/test_blocks.py b/tests/unittests/test_blocks.py new file mode 100644 index 00000000..8b6110d4 --- /dev/null +++ b/tests/unittests/test_blocks.py @@ -0,0 +1,15 @@ +import torch + +from luxonis_train.nodes.blocks import SqueezeExciteBlock, autopad + + +def test_autopad(): + assert autopad(1, 2) == 2 + assert autopad(2) == 1 + assert autopad((2, 4)) == (1, 2) + + +def test_squeeze_excite_block(): + se_block = SqueezeExciteBlock(64, 32) + x = torch.rand(1, 64, 224, 224) + assert se_block(x).shape == (1, 64, 224, 224) diff --git a/tests/unittests/test_core.py b/tests/unittests/test_core.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unittests/test_losses/test_bce_with_logits_loss.py b/tests/unittests/test_losses/test_bce_with_logits_loss.py index 600cac59..778ce302 100644 --- a/tests/unittests/test_losses/test_bce_with_logits_loss.py +++ b/tests/unittests/test_losses/test_bce_with_logits_loss.py @@ -1,3 +1,4 @@ +import pytest import torch from luxonis_train.attached_modules.losses import BCEWithLogitsLoss @@ -54,3 +55,9 @@ def test_weights(): loss_weight = loss_fn_weight.forward(predictions, targets) loss_no_weight = loss_fn_no_weight.forward(predictions, targets) assert loss_weight != loss_no_weight + + +def test_invalid(): + loss_fn = BCEWithLogitsLoss() + with pytest.raises(RuntimeError): + loss_fn.forward(torch.rand(10, 10), torch.rand(15, 15)) diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index fb2c6f73..35f3dedc 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -78,10 +78,8 @@ def test_bbox_iou(iou_type: IoUType): assert iou.shape == (bbox1.shape[0], bbox2.shape[0]) if iou_type == "none": min = 0 - elif iou_type == "ciou": - min = -1.1 else: - min = -1 + min = -1.5 assert iou.min() >= min and iou.max() <= 1 if iou_type == "none": From 35ccacae7557162f25418affc9bea4b694f42cb1 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 05:59:34 +0200 Subject: [PATCH 010/102] updated docs --- .../attached_modules/base_attached_module.py | 20 ++++--- luxonis_train/core/core.py | 43 ++++++------- .../nodes/backbones/efficientrep/variants.py | 2 +- .../nodes/backbones/micronet/variants.py | 2 +- .../nodes/backbones/mobileone/variants.py | 2 +- .../nodes/backbones/repvgg/variants.py | 2 +- luxonis_train/nodes/base_node.py | 60 +++++++++++-------- luxonis_train/utils/dataset_metadata.py | 29 +++++---- 8 files changed, 87 insertions(+), 73 deletions(-) diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index ea964ff0..a53ce4c1 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -135,12 +135,12 @@ def get_label(self, labels: Labels, label_type: LabelType | None = None) -> Tens @param labels: Labels from the dataset. @type label_type: LabelType | None @param label_type: Type of the label to extract. - @raises IncompatibleException: If the label is not found in the labels dictionary. - @raises NotImplementedError: If the module requires multiple labels. For such cases, - the `prepare` method should be overridden. @rtype: Tensor @return: Extracted label + + @raises ValueError: If the module requires multiple labels and the C{label_type} is not provided. + @raises IncompatibleException: If the label is not found in the labels dictionary. """ return self._get_label(labels, label_type)[0] @@ -188,10 +188,10 @@ def get_input_tensors( @rtype: list[Tensor] @return: Extracted input tensors - @raises ValueError: If the task type is not supported by the node or if the task - is not present in the inputs. + @raises IncompatibleException: If the task type is not supported by the node. + @raises IncompatibleException: If the task is not present in the inputs. - @raises NotImplementedError: If the module requires multiple labels. + @raises ValueError: If the module requires multiple labels. For such cases, the `prepare` method should be overridden. """ if task_type is not None: @@ -236,8 +236,12 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: >>> loss.forward(*loss.prepare(outputs, labels)) - @raises NotImplementedError: If the module requires multiple labels. - @raises IncompatibleException: If the inputs are not compatible with the module. + @raises RuntimeError: If the module requires multiple labels and + is connected to a multi-task node. In this case, the default + implementation cannot be used and the C{prepare} method should be overridden. + + @raises RuntimeError: If the C{tasks} attribute is not set on the node. + @raises RuntimeError: If the C{supported_labels} attribute is not set on the module. """ if self.node._tasks is None: raise RuntimeError( diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index d0071c6a..02596e63 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -16,6 +16,7 @@ from luxonis_ml.nn_archive import ArchiveGenerator from luxonis_ml.nn_archive.config import CONFIG_VERSION from luxonis_ml.utils import LuxonisFileSystem, reset_logging, setup_logging +from typeguard import typechecked from luxonis_train.attached_modules.visualizers import get_unnormalized_images from luxonis_train.callbacks import LuxonisRichProgressBar, LuxonisTQDMProgressBar @@ -188,7 +189,7 @@ def _train(self, resume: str | None, *args, **kwargs): status = "success" try: self.pl_trainer.fit(*args, ckpt_path=resume, **kwargs) - except Exception as e: + except Exception as e: # pragma: no cover logger.exception("Encountered an exception during training.") status = "failed" raise e @@ -242,7 +243,7 @@ def graceful_exit(signum: int, _): # pragma: no cover logger.info("Training finished") logger.info(f"Checkpoints saved in: {self.run_save_dir}") - else: + else: # pragma: no cover # Every time exception happens in the Thread, this hook will activate def thread_exception_hook(args): self.error_message = str(args.exc_value) @@ -333,14 +334,14 @@ def export( for path in self._exported_models.values(): if self.cfg.exporter.upload_to_run: self.tracker.upload_artifact(path, typ="export") - if self.cfg.exporter.upload_url is not None: + if self.cfg.exporter.upload_url is not None: # pragma: no cover LuxonisFileSystem.upload(path, self.cfg.exporter.upload_url) with open(export_path.with_suffix(".yaml"), "w") as f: yaml.dump(modelconverter_config, f) if self.cfg.exporter.upload_to_run: self.tracker.upload_artifact(f.name, name=f.name, typ="export") - if self.cfg.exporter.upload_url is not None: + if self.cfg.exporter.upload_url is not None: # pragma: no cover LuxonisFileSystem.upload(f.name, self.cfg.exporter.upload_url) @overload @@ -359,8 +360,9 @@ def test( ) -> None: ... + @typechecked def test( - self, new_thread: bool = False, view: Literal["train", "test", "val"] = "val" + self, new_thread: bool = False, view: Literal["train", "val", "test"] = "val" ) -> Mapping[str, float] | None: """Runs testing. @@ -372,15 +374,11 @@ def test( @return: If new_thread is False, returns a dictionary test results. """ - if view not in self.pytorch_loaders: - raise ValueError( - f"View {view} is not valid. Valid views are: 'train', 'val', 'test'." - ) loader = self.pytorch_loaders[view] if not new_thread: return self.pl_trainer.test(self.lightning_module, loader)[0] - else: + else: # pragma: no cover self.thread = threading.Thread( target=self.pl_trainer.test, args=(self.lightning_module, loader), @@ -388,7 +386,12 @@ def test( ) self.thread.start() - def infer(self, view: str = "val", save_dir: str | Path | None = None) -> None: + @typechecked + def infer( + self, + view: Literal["train", "val", "test"] = "val", + save_dir: str | Path | None = None, + ) -> None: """Runs inference. @type view: str @@ -400,10 +403,6 @@ def infer(self, view: str = "val", save_dir: str | Path | None = None) -> None: """ self.lightning_module.eval() - if view not in self.pytorch_loaders: - raise ValueError( - f"View {view} is not valid. Valid views are: 'train', 'val', 'test'." - ) for inputs, labels in self.pytorch_loaders[view]: images = get_unnormalized_images(self.cfg, inputs) outputs = self.lightning_module.forward( @@ -436,11 +435,13 @@ def _objective(trial: optuna.trial.Trial) -> float: curr_params["model.predefined_model"] = None cfg_copy = self.cfg.model_copy(deep=True) + # manually remove Normalize so it doesn't + # get duplicated when creating new cfg instance cfg_copy.trainer.preprocessing.augmentations = [ a for a in cfg_copy.trainer.preprocessing.augmentations if a.name != "Normalize" - ] # manually remove Normalize so it doesn't duplicate it when creating new cfg instance + ] cfg = Config.get_config(cfg_copy.model_dump(), curr_params) child_tracker.log_hyperparams(curr_params) @@ -482,7 +483,7 @@ def _objective(trial: optuna.trial.Trial) -> float: except optuna.TrialPruned as e: logger.info(e) - if "val/loss" not in pl_trainer.callback_metrics: + if "val/loss" not in pl_trainer.callback_metrics: # pragma: no cover raise ValueError( "No validation loss found. " "This can happen if `TestOnTrainEnd` callback is used." @@ -506,7 +507,7 @@ def _objective(trial: optuna.trial.Trial) -> float: is_sweep=False, **tracker_params, ) - if self.parent_tracker.is_mlflow: + if self.parent_tracker.is_mlflow: # pragma: no cover # Experiment needs to be interacted with to create actual MLFlow run self.parent_tracker.experiment["mlflow"].active_run() @@ -522,7 +523,7 @@ def _objective(trial: optuna.trial.Trial) -> float: if cfg_tuner.storage.active: if cfg_tuner.storage.storage_type == "local": storage = "sqlite:///study_local.db" - else: + else: # pragma: no cover storage = "postgresql://{}:{}@{}:{}/{}".format( self.cfg.ENVIRON.POSTGRES_USER, self.cfg.ENVIRON.POSTGRES_PASSWORD, @@ -547,7 +548,7 @@ def _objective(trial: optuna.trial.Trial) -> float: self.parent_tracker.log_hyperparams(study.best_params) - if self.cfg.tracker.is_wandb: + if self.cfg.tracker.is_wandb: # pragma: no cover # If wandb used then init parent tracker separately at the end wandb_parent_tracker = LuxonisTrackerPL( rank=rank_zero_only.rank, @@ -649,7 +650,7 @@ def _mult(lst: list[float | int]) -> list[float]: logger.info(f"NN Archive saved to {archive_path}") - if self.cfg.archiver.upload_url is not None: + if self.cfg.archiver.upload_url is not None: # pragma: no cover LuxonisFileSystem.upload(archive_path, self.cfg.archiver.upload_url) if self.cfg.archiver.upload_to_run: diff --git a/luxonis_train/nodes/backbones/efficientrep/variants.py b/luxonis_train/nodes/backbones/efficientrep/variants.py index 2c4df9bc..7ced749e 100644 --- a/luxonis_train/nodes/backbones/efficientrep/variants.py +++ b/luxonis_train/nodes/backbones/efficientrep/variants.py @@ -36,7 +36,7 @@ def get_variant(variant: VariantLiteral) -> EfficientRepVariant: variants["medium"] = variants["m"] variants["large"] = variants["l"] - if variant not in variants: + if variant not in variants: # pragma: no cover raise ValueError( f"EfficientRep variant should be one of " f"{list(variants.keys())}, got '{variant}'." diff --git a/luxonis_train/nodes/backbones/micronet/variants.py b/luxonis_train/nodes/backbones/micronet/variants.py index 48ca6164..22a8d552 100644 --- a/luxonis_train/nodes/backbones/micronet/variants.py +++ b/luxonis_train/nodes/backbones/micronet/variants.py @@ -336,7 +336,7 @@ class MicroNetVariant(BaseModel): def get_variant(variant: Literal["M1", "M2", "M3"]) -> MicroNetVariant: variants = {"M1": M1, "M2": M2, "M3": M3} - if variant not in variants: + if variant not in variants: # pragma: no cover raise ValueError( "MicroNet model variant should be in " f"{list(variants.keys())}, got {variant}." diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py index a3f65c9b..64e5a2a6 100644 --- a/luxonis_train/nodes/backbones/mobileone/variants.py +++ b/luxonis_train/nodes/backbones/mobileone/variants.py @@ -29,7 +29,7 @@ def get_variant(variant: Literal["s0", "s1", "s2", "s3", "s4"]) -> MobileOneVari use_se=True, ), } - if variant not in variants: + if variant not in variants: # pragma: no cover raise ValueError( "MobileOne model variant should be in " f"{list(variants.keys())}, got {variant}." diff --git a/luxonis_train/nodes/backbones/repvgg/variants.py b/luxonis_train/nodes/backbones/repvgg/variants.py index de13a854..27fd7fb1 100644 --- a/luxonis_train/nodes/backbones/repvgg/variants.py +++ b/luxonis_train/nodes/backbones/repvgg/variants.py @@ -23,7 +23,7 @@ def get_variant(variant: Literal["A0", "A1", "A2"]) -> RepVGGVariant: width_multiplier=(1.5, 1.5, 1.5, 2.75), ), } - if variant not in variants: + if variant not in variants: # pragma: no cover raise ValueError( f"RepVGG variant should be one of " f"{list(variants.keys())}, got '{variant}'." diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 2b7c5f81..8dcecd07 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -353,7 +353,12 @@ def class_names(self) -> list[str]: @property def input_shapes(self) -> list[Packet[Size]]: - """Getter for the input shapes.""" + """Getter for the input shapes. + + @type: list[Packet[Size]] + @raises RuntimeError: If the C{input_shapes} were not set during initialization. + """ + if self._input_shapes is None: raise self._non_set_error("input_shapes") return self._input_shapes @@ -363,7 +368,8 @@ def original_in_shape(self) -> Size: """Getter for the original input shape as [N, H, W]. @type: Size - @raises ValueError: If the C{original_in_shape} is C{None}. + @raises RuntimeError: If the C{original_in_shape} were not set during + initialization. """ if self._original_in_shape is None: raise self._non_set_error("original_in_shape") @@ -374,10 +380,11 @@ def dataset_metadata(self) -> DatasetMetadata: """Getter for the dataset metadata. @type: L{DatasetMetadata} - @raises ValueError: If the C{dataset_metadata} is C{None}. + @raises RuntimeError: If the C{dataset_metadata} were not set during + initialization. """ if self._dataset_metadata is None: - raise ValueError( + raise RuntimeError( f"{self._non_set_error('dataset_metadata')}" "Either provide `dataset_metadata` or `n_classes`." ) @@ -404,7 +411,7 @@ def in_sizes(self) -> Size | list[Size]: >>> in_sizes = [Size(64, 128, 128), Size(3, 224, 224)] @type: Size | list[Size] - @raises IncompatibleException: If the C{input_shapes} are too complicated for + @raises RuntimeError: If the C{input_shapes} are too complicated for the default implementation. """ if self._in_sizes is not None: @@ -412,7 +419,7 @@ def in_sizes(self) -> Size | list[Size]: features = self.input_shapes[0].get("features") if features is None: - raise IncompatibleException( + raise RuntimeError( f"Feature field is missing in {self.name}. " "The default implementation of `in_sizes` cannot be used." ) @@ -428,8 +435,8 @@ def in_channels(self) -> int | list[int]: otherwise returns a single value. @type: int | list[int] - @raises IncompatibleException: If the C{input_shapes} are too complicated for - the default implementation. + @raises RuntimeError: If the C{input_shapes} are too complicated for the default + implementation of C{in_sizes}. """ return self._get_nth_size(-3) @@ -441,8 +448,8 @@ def in_height(self) -> int | list[int]: sufficiently simple. Otherwise the `input_shapes` should be used directly. @type: int | list[int] - @raises IncompatibleException: If the C{input_shapes} are too complicated for - the default implementation. + @raises RuntimeError: If the C{input_shapes} are too complicated for + the default implementation of C{in_sizes}. """ return self._get_nth_size(-2) @@ -454,8 +461,8 @@ def in_width(self) -> int | list[int]: sufficiently simple. Otherwise the `input_shapes` should be used directly. @type: int | list[int] - @raises IncompatibleException: If the C{input_shapes} are too complicated for - the default implementation. + @raises RuntimeError: If the C{input_shapes} are too complicated for + the default implementation of C{in_sizes}. """ return self._get_nth_size(-1) @@ -488,9 +495,11 @@ def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT: @param inputs: Inputs to the node. @rtype: ForwardInputT @return: Prepared inputs, ready to be passed to the L{forward} method. + @raises RuntimeError: If the number of inputs is not equal to 1. In such cases + the method has to be overridden. """ if len(inputs) > 1: - raise IncompatibleException( + raise RuntimeError( f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead." "If the node expects multiple inputs, the `unwrap` method should be overridden." ) @@ -534,6 +543,9 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: @rtype: L{Packet}[Tensor] @return: Wrapped output. + + @raises RuntimeError: If the output is not a tensor or a list of tensors. + In such cases the method has to be overridden. """ if isinstance(output, Tensor): @@ -543,7 +555,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: ): outputs = list(output) else: - raise IncompatibleException( + raise RuntimeError( "Default `wrap` expects a single tensor or a list of tensors." ) try: @@ -562,7 +574,7 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: @return: Outputs of the module as a dictionary of list of tensors: `{"features": [Tensor, ...], "segmentation": [Tensor]}` - @raises IncompatibleException: If the inputs are not compatible with the node. + @raises RuntimeError: If default L{wrap} or L{unwrap} methods are not sufficient. """ unwrapped = self.unwrap(inputs) outputs = self(unwrapped) @@ -579,18 +591,16 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: def get_attached(self, lst: list[T]) -> list[T] | T: """Gets the attached elements from a list. - This method is used to get the attached elements from a list based on - the `attach_index` attribute. + This method is used to get the attached elements from a list based on the + C{attach_index} attribute. @type lst: list[T] - @param lst: List to get the attached elements from. Can be either - a list of tensors or a list of sizes. - + @param lst: List to get the attached elements from. Can be either a list of + tensors or a list of sizes. @rtype: list[T] | T - @return: Attached elements. If `attach_index` is set to `"all"` or is a slice, + @return: Attached elements. If C{attach_index} is set to C{"all"} or is a slice, returns a list of attached elements. - - @raises ValueError: If the `attach_index` is invalid. + @raises ValueError: If the C{attach_index} is invalid. """ def _normalize_index(index: int) -> int: @@ -633,8 +643,8 @@ def _get_nth_size(self, idx: int) -> int | list[int]: case list(sizes): return [size[idx] for size in sizes] - def _non_set_error(self, name: str) -> ValueError: - return ValueError( + def _non_set_error(self, name: str) -> RuntimeError: + return RuntimeError( f"{self.name} is trying to access `{name}`, " "but it was not set during initialization. " ) diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index c9ec2e27..aa1fbfec 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -34,10 +34,9 @@ def n_classes(self, task: str | None = None) -> int: @param task: Task to get the number of classes for. @rtype: int @return: Number of classes for the specified label type. - @raises ValueError: If the dataset loader was not provided during - initialization. - @raises ValueError: If the dataset contains different number of classes for - different label types. + @raises ValueError: If the C{task} is not present in the dataset. + @raises RuntimeError: If the C{task} was not provided and the dataset contains + different number of classes for different label types. """ if task is not None: if task not in self._classes: @@ -46,7 +45,7 @@ def n_classes(self, task: str | None = None) -> int: n_classes = len(list(self._classes.values())[0]) for classes in self._classes.values(): if len(classes) != n_classes: - raise ValueError( + raise RuntimeError( "The dataset contains different number of classes for different tasks." "Please specify the 'task' argument to get the number of classes." ) @@ -59,8 +58,9 @@ def n_keypoints(self, task: str | None = None) -> int: @param task: Task to get the number of keypoints for. @rtype: int @return: Number of keypoints for the specified label type. - @raises ValueError: If the dataset loader was not provided during initialization - or if the dataset does not contain the specified task. + @raises ValueError: If the C{task} is not present in the dataset. + @raises RuntimeError: If the C{task} was not provided and the dataset contains + different number of keypoints for different label types. """ if task is not None: if task not in self._n_keypoints: @@ -69,7 +69,7 @@ def n_keypoints(self, task: str | None = None) -> int: n_keypoints = next(iter(self._n_keypoints.values())) for n in self._n_keypoints.values(): if n != n_keypoints: - raise ValueError( + raise RuntimeError( "The dataset contains different number of keypoints for different tasks." "Please specify the 'task' argument to get the number of keypoints." ) @@ -82,10 +82,9 @@ def classes(self, task: str | None = None) -> list[str]: @param task: Task to get the class names for. @rtype: list[str] @return: List of class names for the specified label type. - @raises ValueError: If the dataset loader was not provided during - initialization. - @raises ValueError: If the dataset contains different class names for different - label types. + @raises ValueError: If the C{task} is not present in the dataset. + @raises RuntimeError: If the C{task} was not provided and the dataset contains + different class names for different label types. """ if task is not None: if task not in self._classes: @@ -94,7 +93,7 @@ def classes(self, task: str | None = None) -> list[str]: class_names = list(self._classes.values())[0] for classes in self._classes.values(): if classes != class_names: - raise ValueError( + raise RuntimeError( "The dataset contains different class names for different tasks." ) return class_names @@ -106,11 +105,11 @@ def autogenerate_anchors(self, num_heads: int) -> tuple[list[list[float]], float @param num_heads: Number of heads to generate anchors for. @rtype: tuple[list[list[float]], float] @return: List of anchors in [-1,6] format and recall of the anchors. - @raises ValueError: If the dataset loader was not provided during + @raises RuntimeError: If the dataset loader was not provided during initialization. """ if self._loader is None: - raise ValueError( + raise RuntimeError( "Cannot generate anchors without a dataset loader. " "Please provide a dataset loader to the constructor " "or call `set_loader` method." From 164435d1106699661583bb26fbaf8635b2c7949e Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 06:34:14 +0200 Subject: [PATCH 011/102] renamed --- luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py index b74fb240..0b8baa2b 100644 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py @@ -176,10 +176,10 @@ def _build_predictions( self, feat: Tensor, anchor_grid: Tensor, grid: Tensor, stride: Tensor ) -> Tensor: batch_size = feat.shape[0] - x_bbox = feat[..., : self.box_offset + self.n_classes] - x_keypoints = feat[..., self.box_offset + self.n_classes :] + bbox = feat[..., : self.box_offset + self.n_classes] + keypoints = feat[..., self.box_offset + self.n_classes :] - box_cxcy, box_wh, box_tail = process_bbox_predictions(x_bbox, anchor_grid) + box_cxcy, box_wh, box_tail = process_bbox_predictions(bbox, anchor_grid) grid = grid.to(box_cxcy.device) stride = stride.to(box_cxcy.device) box_cxcy = (box_cxcy + grid) * stride @@ -187,7 +187,7 @@ def _build_predictions( grid_x = grid[..., 0:1] grid_y = grid[..., 1:2] - kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(x_keypoints) + kpt_x, kpt_y, kpt_vis = process_keypoints_predictions(keypoints) kpt_x = (kpt_x + grid_x) * stride kpt_y = (kpt_y + grid_y) * stride kpt_vis_sig = kpt_vis.sigmoid() From f4982dfa383e1704edce7ac109ab73c5889abf10 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 06:34:20 +0200 Subject: [PATCH 012/102] changed log message --- luxonis_train/utils/config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index 3ec605bb..9d5a903e 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -91,10 +91,11 @@ def check_main_metric(self) -> Self: name = metric.alias or metric.name logger.info(f"Setting '{name}' as main metric.") else: - logger.error( + logger.warning( + "[Ignore if using predefined model] " "No metrics specified. " "This is likely unintended unless " - "the configuration is not used for training." + "the configuration is not used for training. " ) return self From c5b9d1623b657dd06ec612bf13d786fa80da0ddb Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 06:43:06 +0200 Subject: [PATCH 013/102] changed default keypoint model head --- .../models/predefined_models/keypoint_detection_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py index 04c7a643..29a18c19 100644 --- a/luxonis_train/models/predefined_models/keypoint_detection_model.py +++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py @@ -21,7 +21,7 @@ class KeypointDetectionModel(BasePredefinedModel): loss_params: Kwargs = field(default_factory=dict) head_type: Literal[ "ImplicitKeypointBBoxHead", "EfficientKeypointBBoxHead" - ] = "ImplicitKeypointBBoxHead" + ] = "EfficientKeypointBBoxHead" kpt_visualizer_params: Kwargs = field(default_factory=dict) bbox_visualizer_params: Kwargs = field(default_factory=dict) bbox_task_name: str | None = None From e8f93314e4c78a992a1e03e9d77ba918f7f9efad Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 07:00:15 +0200 Subject: [PATCH 014/102] removed Any generic parameters from base classes --- .../attached_modules/base_attached_module.py | 6 +++--- luxonis_train/utils/registry.py | 15 ++++----------- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index a53ce4c1..b423fc0d 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -1,6 +1,6 @@ import logging from abc import ABC -from typing import Any, Generic +from typing import Generic from luxonis_ml.data import LabelType from luxonis_ml.utils.registry import AutoRegisterMeta @@ -54,7 +54,7 @@ class BaseAttachedModule( supported_labels: list[LabelType | tuple[LabelType, ...]] | None = None - def __init__(self, *, node: BaseNode[Any, Any] | None = None): + def __init__(self, *, node: BaseNode | None = None): super().__init__() self._node = node self._epoch = 0 @@ -96,7 +96,7 @@ def name(self) -> str: return self.__class__.__name__ @property - def node(self) -> BaseNode[Any, Any]: + def node(self) -> BaseNode: """Reference to the node that this module is attached to. @type: L{BaseNode} diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index f2006378..ff5ec843 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -1,6 +1,5 @@ """This module implements a metaclass for automatic registration of classes.""" -from typing import Any import lightning.pytorch as pl from luxonis_ml.utils.registry import Registry @@ -15,20 +14,16 @@ LOADERS: Registry[type["lt.utils.loaders.BaseLoaderTorch"]] = Registry(name="loaders") """Registry for all loaders.""" -LOSSES: Registry[type["lt.attached_modules.BaseLoss[Any, Any]"]] = Registry( - name="losses" -) +LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry(name="losses") """Registry for all losses.""" -METRICS: Registry[type["lt.attached_modules.BaseMetric[Any, Any]"]] = Registry( - name="metrics" -) +METRICS: Registry[type["lt.attached_modules.BaseMetric"]] = Registry(name="metrics") """Registry for all metrics.""" MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry(name="models") """Registry for all models.""" -NODES: Registry[type["lt.nodes.BaseNode[Any, Any]"]] = Registry(name="nodes") +NODES: Registry[type["lt.nodes.BaseNode"]] = Registry(name="nodes") """Registry for all nodes.""" OPTIMIZERS: Registry[type[Optimizer]] = Registry(name="optimizers") @@ -37,7 +32,5 @@ SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers") """Registry for all schedulers.""" -VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer[Any, Any]"]] = Registry( - "visualizers" -) +VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry("visualizers") """Registry for all visualizers.""" From a94c1d650190b913f1d714470ceb52b372437979 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 07:59:10 +0200 Subject: [PATCH 015/102] removed multiclass from focal loss until it is fixed --- .../losses/softmax_focal_loss.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py index f347421f..5caf5d69 100644 --- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py +++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py @@ -1,3 +1,4 @@ +import logging from typing import Any, Literal import torch @@ -8,14 +9,16 @@ from .cross_entropy import CrossEntropyLoss +logger = logging.getLogger(__name__) -# TODO: Make focal losses support multi-class tasks + +# TODO: Add support for multi-class tasks class SoftmaxFocalLoss(BaseLoss[Tensor, Tensor]): supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION] def __init__( self, - alpha: float | list[float] = 0.25, + alpha: float = 0.25, gamma: float = 2.0, reduction: Literal["none", "mean", "sum"] = "mean", **kwargs: Any, @@ -23,7 +26,7 @@ def __init__( """Focal loss implementation for binary classification and segmentation tasks using Softmax. - @type alpha: float | list[float] + @type alpha: float @param alpha: Weighting factor for the rare class. Defaults to C{0.25}. @type gamma: float @param gamma: Focusing parameter. Defaults to C{2.0}. @@ -40,13 +43,7 @@ def __init__( def forward(self, predictions: Tensor, target: Tensor) -> Tensor: ce_loss = self.ce_criterion.forward(predictions, target) pt = torch.exp(-ce_loss) - loss = ce_loss * ((1 - pt) ** self.gamma) - - if isinstance(self.alpha, float) and self.alpha >= 0: - loss = self.alpha * loss - elif isinstance(self.alpha, list): - alpha_t = torch.tensor(self.alpha)[target] - loss = alpha_t * loss + loss = ce_loss * ((1 - pt) ** self.gamma) * self.alpha if self.reduction == "mean": loss = loss.mean() From a4b49428294a842159b10a75a3590c496fd57c2d Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 07:59:28 +0200 Subject: [PATCH 016/102] cross entropy cleanup --- luxonis_train/attached_modules/losses/cross_entropy.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py index af545e9a..e1858686 100644 --- a/luxonis_train/attached_modules/losses/cross_entropy.py +++ b/luxonis_train/attached_modules/losses/cross_entropy.py @@ -9,7 +9,6 @@ from .base_loss import BaseLoss logger = getLogger(__name__) -was_logged = False class CrossEntropyLoss(BaseLoss[Tensor, Tensor]): @@ -34,19 +33,19 @@ def __init__( reduction=reduction, label_smoothing=label_smoothing, ) + self._was_logged = False def forward(self, preds: Tensor, target: Tensor) -> Tensor: - global was_logged if preds.ndim == target.ndim: ch_dim = 1 if preds.ndim > 1 else 0 if preds.shape[ch_dim] == 1: - if not was_logged: + if not self._was_logged: logger.warning( "`CrossEntropyLoss` expects at least 2 classes. " "Attempting to fix by adding a dummy channel. " "If you want to be sure, use `BCEWithLogitsLoss` instead." ) - was_logged = True + self._was_logged = True preds = torch.cat([torch.zeros_like(preds), preds], dim=ch_dim) if target.shape[ch_dim] == 1: target = torch.cat([1 - target, target], dim=ch_dim) From c963dd1f580ad1781341a25f1528d160f0e5634a Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:06:55 +0200 Subject: [PATCH 017/102] updated gpu monitor config --- luxonis_train/callbacks/gpu_stats_monitor.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py index 32983bde..244e8a6d 100644 --- a/luxonis_train/callbacks/gpu_stats_monitor.py +++ b/luxonis_train/callbacks/gpu_stats_monitor.py @@ -55,18 +55,18 @@ def __init__( GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows: - - **fan.speed** – The fan speed value is the percent of maximum speed that the device's fan is currently + - C{fan.speed} – The fan speed value is the percent of maximum speed that the device's fan is currently intended to run at. It ranges from 0 to 100 %. Note: The reported speed is the intended fan speed. If the fan is physically blocked and unable to spin, this output will not match the actual fan speed. Many parts do not report fan speeds because they rely on cooling via fans in the surrounding enclosure. - - **memory.used** – Total memory allocated by active contexts. - - **memory.free** – Total free memory. - - **utilization.gpu** – Percent of time over the past sample period during which one or more kernels was + - C{memory.used} – Total memory allocated by active contexts. + - C{memory.free} – Total free memory. + - C{utilization.gpu} – Percent of time over the past sample period during which one or more kernels was executing on the GPU. The sample period may be between 1 second and 1/6 second depending on the product. - - **utilization.memory** – Percent of time over the past sample period during which global (device) memory was + - C{utilization.memory} – Percent of time over the past sample period during which global (device) memory was being read or written. The sample period may be between 1 second and 1/6 second depending on the product. - - **temperature.gpu** – Core GPU temperature, in degrees C. - - **temperature.memory** – HBM memory temperature, in degrees C. + - C{temperature.gpu} – Core GPU temperature, in degrees C. + - C{temperature.memory} – HBM memory temperature, in degrees C. @type memory_utilization: bool @param memory_utilization: Set to C{True} to monitor used, free and percentage of memory utilization at the start and end of each step. Defaults to C{True}. From e61692d6f0a897aa95d2fe21042e1a46c048f494 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:16:12 +0200 Subject: [PATCH 018/102] fixed docs requirements --- .github/workflows/ci.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 67328122..09d74854 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -54,6 +54,8 @@ jobs: - name: Install dependencies run: | + sudo apt update + sudo apt install -y pandoc pip install pydoctor curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py" @@ -81,10 +83,7 @@ jobs: cache: pip - name: Install dependencies - run: | - sudo apt update - sudo apt install -y pandoc - pip install -e .[dev] + run: pip install -e .[dev] - name: Authenticate to Google Cloud id: google-auth From d60f4a2b112bdcc861589b31e74ff91e360e5fec Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:21:28 +0200 Subject: [PATCH 019/102] updated ci --- .github/workflows/ci.yaml | 41 +++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 09d74854..feee8945 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,8 +4,8 @@ on: pull_request: branches: [ dev, main ] paths: - - 'luxonis_train/**/**.py' - - 'tests/**/**.py' + - 'luxonis_train/**' + - 'tests/**' - .github/workflows/ci.yaml permissions: @@ -63,6 +63,9 @@ jobs: run: python gen-docs.py luxonis_ml tests: + needs: + - pre-commit + - docs strategy: fail-fast: false matrix: @@ -101,26 +104,40 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --cov luxonis_train --cov-report json --junit-xml pytest.xml + custom-arguments: --cov luxonis_train --cov-report xml --junit-xml pytest.xml - - name: Generate coverage report + - name: Create Test Report + uses: EnricoMi/publish-unit-test-result-action@v2 + if: matrix.os == 'ubuntu-latest' + with: + files: pytest.xml + + - name: Generate coverage badge + uses: tj-actions/coverage-badge-py@v2 if: matrix.os == 'ubuntu-latest' + with: + output: media/coverage_badge.svg + + - name: Generate coverage report uses: orgoro/coverage@v3.1 + if: matrix.os == 'ubuntu-latest' with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - - name: Update Coverage Badge - uses: we-cli/coverage-badge-action@main + - name: Commit coverage badge + if: matrix.os == 'ubuntu-latest' + run: | + git config --global user.name 'GitHub Actions' + git config --global user.email 'actions@github.com' + git diff --quiet media/coverage_badge.svg || { + git add media/coverage_badge.svg + git commit -m "[Automated] Updated coverage badge" + } - name: Push changes - if: matrix.os == 'ubuntu-latest' uses: ad-m/github-push-action@master + if: matrix.os == 'ubuntu-latest' with: branch: ${{ github.head_ref }} - - name: Create Test Report - uses: EnricoMi/publish-unit-test-result-action@v2 - if: matrix.os == 'ubuntu-latest' - with: - files: pytest.xml From 67fef3b4a3231b43be866a6e2d88a7c0658502a6 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:23:05 +0200 Subject: [PATCH 020/102] fixed docs generation --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index feee8945..9184e0db 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -60,7 +60,7 @@ jobs: curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py" - name: Build docs - run: python gen-docs.py luxonis_ml + run: python gen-docs.py luxonis_train tests: needs: From a66bf2299a49cc5a563459cf8a886b872ac4e2be Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:24:22 +0200 Subject: [PATCH 021/102] removed old workflows --- .github/workflows/docs.yaml | 26 -------------------------- .github/workflows/pre-commit.yaml | 13 ------------- 2 files changed, 39 deletions(-) delete mode 100644 .github/workflows/docs.yaml delete mode 100644 .github/workflows/pre-commit.yaml diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml deleted file mode 100644 index f3c69761..00000000 --- a/.github/workflows/docs.yaml +++ /dev/null @@ -1,26 +0,0 @@ -name: Docs - -on: - pull_request: - branches: [ dev, main ] - paths: - - 'luxonis_train/**' - - .github/workflows/docs.yaml - -jobs: - docs: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - ref: ${{ github.head_ref }} - - - name: Install dependencies - run: | - pip install pydoctor - curl -L "https://raw.githubusercontent.com/luxonis/python-api-analyzer-to-json/main/gen-docs.py" -o "gen-docs.py" - - - name: Build docs - run: | - python gen-docs.py luxonis_train diff --git a/.github/workflows/pre-commit.yaml b/.github/workflows/pre-commit.yaml deleted file mode 100644 index ce6b816b..00000000 --- a/.github/workflows/pre-commit.yaml +++ /dev/null @@ -1,13 +0,0 @@ -name: pre-commit - -on: - pull_request: - branches: [dev, main] - -jobs: - pre-commit: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v3 - - uses: pre-commit/action@v3.0.0 From ba6f3a9607d1b74cdf86bb674e6b48f6dac7d213 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:28:11 +0200 Subject: [PATCH 022/102] added pytest-md to requirements-dev.txt --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 7f915575..0b939aa3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,3 +4,4 @@ pre-commit>=3.2.1 opencv-stubs>=0.0.8 pytest-cov>=4.1.0 pytest-subtests>=0.12.1 +pytest-md>=0.2.0 From 3ab973ffe537329f6a99c23c593e3807772aaf37 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 08:45:59 +0200 Subject: [PATCH 023/102] fixed dataset metadata tests --- tests/unittests/test_utils/test_dataset_metadata.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unittests/test_utils/test_dataset_metadata.py b/tests/unittests/test_utils/test_dataset_metadata.py index a6c9edda..8dba11a8 100644 --- a/tests/unittests/test_utils/test_dataset_metadata.py +++ b/tests/unittests/test_utils/test_dataset_metadata.py @@ -21,7 +21,7 @@ def test_n_classes(metadata): with pytest.raises(ValueError): metadata.n_classes("segmentation") metadata._classes["segmentation"] = ["car", "person", "tree"] - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): metadata.n_classes() @@ -32,7 +32,7 @@ def test_n_keypoints(metadata): with pytest.raises(ValueError): metadata.n_keypoints("segmentation") metadata._n_keypoints["segmentation"] = 1 - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): metadata.n_keypoints() @@ -43,11 +43,11 @@ def test_class_names(metadata): with pytest.raises(ValueError): metadata.classes("segmentation") metadata._classes["segmentation"] = ["car", "person", "tree"] - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): metadata.classes() def test_no_loader(): metadata = DatasetMetadata() - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): metadata.autogenerate_anchors(3) From f11f728bd92c35ed408cafc70a5df43a7c128470 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 09:24:29 +0200 Subject: [PATCH 024/102] preserving class order --- tests/integration/conftest.py | 2 +- tests/integration/parking_lot.json | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 2374c3a9..dc26b9c4 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -52,7 +52,7 @@ def parking_lot_dataset() -> LuxonisDataset: def generator(): filenames: dict[int, Path] = {} for base_path in [kpt_mask_path, mask_brand_path, mask_color_path]: - for sequence_path in base_path.glob("sequence.*"): + for sequence_path in sorted(list(base_path.glob("sequence.*"))): frame_data = sequence_path / "step0.frame_data.json" with open(frame_data) as f: data = json.load(f)["captures"][0] diff --git a/tests/integration/parking_lot.json b/tests/integration/parking_lot.json index 5800dd14..0059241e 100644 --- a/tests/integration/parking_lot.json +++ b/tests/integration/parking_lot.json @@ -253,28 +253,28 @@ "postprocessor_path": null, "classes": [ "background", - "alfa-romeo", "chrysler", "bmw", - "harley", + "ducati", + "dodge", "ferrari", - "honda", "infiniti", "land-rover", "roll-royce", + "saab", "Kawasaki", "moto", - "piaggio", - "ducati", - "isuzu", - "jeep", "truimph", - "yamaha", - "dodge", - "saab", + "alfa-romeo", + "harley", + "honda", + "jeep", "aprilia", + "piaggio", + "yamaha", + "buick", "pontiac", - "buick" + "isuzu" ], "n_classes": 23, "is_softmax": false From 1fba74aecc3cdeac4807ebe3412e706c3b23ac75 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 09:26:16 +0200 Subject: [PATCH 025/102] ci pytest hotfix --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9184e0db..93092712 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -104,7 +104,7 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --cov luxonis_train --cov-report xml --junit-xml pytest.xml + custom-arguments: --cov luxonis_train --cov-report xml --junit-xml pytest.xml -k test_custom_tasks - name: Create Test Report uses: EnricoMi/publish-unit-test-result-action@v2 From 263da2cb573a5eb0b6746f5b2d40cd726f97a7cb Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Mon, 9 Sep 2024 09:37:00 +0200 Subject: [PATCH 026/102] removed macos --- .github/workflows/ci.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 93092712..86a25702 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -69,7 +69,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, windows-latest, macOS-latest] + os: [ubuntu-latest, windows-latest] runs-on: ${{ matrix.os }} @@ -104,7 +104,7 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --cov luxonis_train --cov-report xml --junit-xml pytest.xml -k test_custom_tasks + custom-arguments: --cov luxonis_train --cov-report xml --junit-xml pytest.xml - name: Create Test Report uses: EnricoMi/publish-unit-test-result-action@v2 From 25862015d558e11cd3321c16566dc151b2e393d7 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 9 Sep 2024 07:40:47 +0000 Subject: [PATCH 027/102] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 8e21255a..12876e69 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 84% - 84% + 78% + 78% From ac398e0a806bd2528c073197fb6308eadcc382d1 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Mon, 9 Sep 2024 08:08:51 +0000 Subject: [PATCH 028/102] [Automated] Updated coverage badge --- media/coverage_badge.svg | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 12876e69..ee07d4c2 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -9,13 +9,13 @@ - + coverage coverage - 78% - 78% + 96% + 96% From 1902d33666855b29b2eb90ca4f478a2d01936cbd Mon Sep 17 00:00:00 2001 From: Nikita Date: Mon, 9 Sep 2024 17:52:33 +0000 Subject: [PATCH 029/102] feature: add DDRNet segmentation model --- configs/ddrnet_segmentation_model.yaml | 52 ++ .../models/predefined_models/__init__.py | 2 + .../ddrnet_segmentation_model.py | 82 +++ luxonis_train/nodes/backbones/__init__.py | 2 + luxonis_train/nodes/backbones/ddrnet.py | 529 ++++++++++++++++++ luxonis_train/nodes/heads/__init__.py | 2 + .../nodes/heads/ddrnet_segmentation_head.py | 60 ++ 7 files changed, 729 insertions(+) create mode 100644 configs/ddrnet_segmentation_model.yaml create mode 100644 luxonis_train/models/predefined_models/ddrnet_segmentation_model.py create mode 100644 luxonis_train/nodes/backbones/ddrnet.py create mode 100644 luxonis_train/nodes/heads/ddrnet_segmentation_head.py diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml new file mode 100644 index 00000000..5586c665 --- /dev/null +++ b/configs/ddrnet_segmentation_model.yaml @@ -0,0 +1,52 @@ +# Example configuration for training a predefined segmentation model + +model: + name: ddrnet_segmentation + predefined_model: + name: DDRNetSegmentationModel + params: + backbone: DDRNet + task: multiclass + backbone_params: + use_aux_heads: True # set to False to disable auxiliary heads (for export) + + head_params: + in_planes: 128 + num_classes: 80 # number of classes + attach_index: 0 + aux_head_params: # ignored if use_aux_heads is False + in_planes: 64 + num_classes: 80 # number of classes + attach_index: 1 + + +loader: + params: + dataset_name: coco_test + +trainer: + preprocessing: + train_image_size: [&height 256, &width 320] # [512, 512] + keep_aspect_ratio: False + normalize: + active: True + + batch_size: 4 # 32 + epochs: &epochs 1 # 500 + num_workers: 4 + validation_interval: 10 + num_log_images: 8 + + callbacks: + - name: ExportOnTrainEnd + - name: TestOnTrainEnd + + optimizer: + name: SGD + params: + lr: 0.01 + momentum: 0.9 + weight_decay: 0.0005 + + scheduler: + name: PolynomialLR diff --git a/luxonis_train/models/predefined_models/__init__.py b/luxonis_train/models/predefined_models/__init__.py index 0e8fe8c0..76df2a4c 100644 --- a/luxonis_train/models/predefined_models/__init__.py +++ b/luxonis_train/models/predefined_models/__init__.py @@ -3,6 +3,7 @@ from .detection_model import DetectionModel from .keypoint_detection_model import KeypointDetectionModel from .segmentation_model import SegmentationModel +from .ddrnet_segmentation_model import DDRNetSegmentationModel __all__ = [ "BasePredefinedModel", @@ -10,4 +11,5 @@ "DetectionModel", "KeypointDetectionModel", "ClassificationModel", + "DDRNetSegmentationModel", ] diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py new file mode 100644 index 00000000..3548e616 --- /dev/null +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -0,0 +1,82 @@ +from dataclasses import dataclass, field +from typing import Literal + +from luxonis_train.utils.config import ( + AttachedModuleConfig, + LossModuleConfig, + MetricModuleConfig, + ModelNodeConfig, +) +from luxonis_train.utils.types import Kwargs + +from .segmentation_model import SegmentationModel + + +@dataclass +class DDRNetSegmentationModel(SegmentationModel): + backbone: str = "DDRNet" + task: Literal["binary", "multiclass"] = "binary" + backbone_params: Kwargs = field(default_factory=dict) + head_params: Kwargs = field(default_factory=dict) + aux_head_params: Kwargs = field(default_factory=dict) + loss_params: Kwargs = field(default_factory=dict) + visualizer_params: Kwargs = field(default_factory=dict) + task_name: str | None = None + + @property + def nodes(self) -> list[ModelNodeConfig]: + node_list = [ + ModelNodeConfig( + name=self.backbone, + alias="ddrnet_backbone", + freezing=self.backbone_params.pop("freezing", {}), + params=self.backbone_params, + ), + ModelNodeConfig( + name="DDRNetSegmentationHead", + alias="segmentation_head", + inputs=["ddrnet_backbone"], + freezing=self.head_params.pop("freezing", {}), + params=self.head_params, + task=self.task_name, + ), + ] + if self.backbone_params.get("use_aux_heads", False): + node_list.append( + ModelNodeConfig( + name="DDRNetSegmentationHead", + alias="aux_segmentation_head", + inputs=["ddrnet_backbone"], + freezing=self.aux_head_params.pop("freezing", {}), + params=self.aux_head_params, + task=self.task_name, + ) + ) + return node_list + + @property + def losses(self) -> list[LossModuleConfig]: + loss_list = [ + LossModuleConfig( + name="BCEWithLogitsLoss" + if self.task == "binary" + else "CrossEntropyLoss", + alias="segmentation_loss", + attached_to="segmentation_head", + params=self.loss_params, + weight=1.0, + ), + ] + if self.backbone_params.get("use_aux_heads", False): + loss_list.append( + LossModuleConfig( + name="BCEWithLogitsLoss" + if self.task == "binary" + else "CrossEntropyLoss", + alias="aux_segmentation_loss", + attached_to="aux_segmentation_head", + params=self.loss_params, + weight=0.4, + ) + ) + return loss_list diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py index 9463124b..e9e11ca4 100644 --- a/luxonis_train/nodes/backbones/__init__.py +++ b/luxonis_train/nodes/backbones/__init__.py @@ -7,6 +7,7 @@ from .repvgg import RepVGG from .resnet import ResNet from .rexnetv1 import ReXNetV1_lite +from .ddrnet import DDRNet __all__ = [ "ContextSpatial", @@ -18,4 +19,5 @@ "ReXNetV1_lite", "RepVGG", "ResNet", + "DDRNet", ] diff --git a/luxonis_train/nodes/backbones/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet.py new file mode 100644 index 00000000..c2bd3011 --- /dev/null +++ b/luxonis_train/nodes/backbones/ddrnet.py @@ -0,0 +1,529 @@ +"""DDRNet backbone. + +Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py} +Original source: U{https://github.com/ydhongHIT/DDRNet} +Paper: U{https://arxiv.org/pdf/2101.06085.pdf} +@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} +""" +from typing import Literal +from abc import ABC +from typing import Optional, Callable, Union, List, Tuple, Dict + + +import torchvision +import torch +from torch import Tensor, nn +from torch.nn import functional as F + +from ..base_node import BaseNode + +def ConvBN(in_channels: int, out_channels: int, kernel_size: int, bias=True, stride=1, padding=0, add_relu=False): + seq = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, stride=stride, padding=padding), nn.BatchNorm2d(out_channels)] + if add_relu: + seq.append(nn.ReLU(inplace=True)) + return nn.Sequential(*seq) + + +def _make_layer(block, in_planes, planes, num_blocks, stride=1, expansion=1): + layers = [] + layers.append(block(in_planes, planes, stride, final_relu=num_blocks > 1, expansion=expansion)) + in_planes = planes * expansion + if num_blocks > 1: + for i in range(1, num_blocks): + if i == (num_blocks - 1): + layers.append(block(in_planes, planes, stride=1, final_relu=False, expansion=expansion)) + else: + layers.append(block(in_planes, planes, stride=1, final_relu=True, expansion=expansion)) + + return nn.Sequential(*layers) + +def drop_path(x, drop_prob: float = 0.0, scale_by_keep: bool = True): + """ + Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """ + + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + +class DropPath(nn.Module): + """ + Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + + Intended usage of this block is the following: + + >>> class ResNetBlock(nn.Module): + >>> def __init__(self, ..., drop_path_rate:float): + >>> self.drop_path = DropPath(drop_path_rate) + >>> + >>> def forward(self, x): + >>> return x + self.drop_path(self.conv_bn_act(x)) + + Code taken from TIMM (https://github.com/rwightman/pytorch-image-models) + Apache License 2.0 + """ + + def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): + """ + + :param drop_prob: Probability of zeroing out individual vector (channel dimension) of each feature map + :param scale_by_keep: Whether to scale the output by the keep probability. Enable by default and helps to + keep output mean & std in the same range as w/o drop path. + """ + super(DropPath, self).__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def forward(self, x): + if self.drop_prob == 0.0 or not self.training: + return x + + return drop_path(x, self.drop_prob, self.scale_by_keep) + + def extra_repr(self): + return f"drop_prob={round(self.drop_prob,3):0.3f}" + + +class BasicResNetBlock(nn.Module): + def __init__(self, in_planes, planes, stride=1, expansion=1, final_relu=True, droppath_prob=0.0): + super(BasicResNetBlock, self).__init__() + self.expansion = expansion + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.final_relu = final_relu + + self.drop_path = DropPath(drop_prob=droppath_prob) + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out = self.drop_path(out) + out += self.shortcut(x) + if self.final_relu: + out = F.relu(out) + return out + +class Bottleneck(nn.Module): + def __init__(self, in_planes, planes, stride=1, expansion=4, final_relu=True, droppath_prob=0.0): + super(Bottleneck, self).__init__() + self.expansion = expansion + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(self.expansion * planes) + self.final_relu = final_relu + + self.drop_path = DropPath(drop_prob=droppath_prob) + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) + ) + + def forward(self, x): + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + + out = self.drop_path(out) + + out += self.shortcut(x) + + if self.final_relu: + out = F.relu(out) + + return out + + +class DAPPMBranch(nn.Module): + def __init__(self, kernel_size: int, stride: int, in_planes: int, branch_planes: int, inter_mode: str = "bilinear"): + """ + A DAPPM branch + :param kernel_size: the kernel size for the average pooling + when stride=0 this parameter is omitted and AdaptiveAvgPool2d over all the input is performed + :param stride: stride for the average pooling + when stride=0: an AdaptiveAvgPool2d over all the input is performed (output is 1x1) + when stride=1: no average pooling is performed + when stride>1: average polling is performed (scaling the input down and up again) + :param in_planes: + :param branch_planes: width after the the first convolution + :param inter_mode: interpolation mode for upscaling + """ + + super().__init__() + down_list = [] + if stride == 0: + # when stride is 0 average pool all the input to 1x1 + down_list.append(nn.AdaptiveAvgPool2d((1, 1))) + elif stride == 1: + # when stride id 1 no average pooling is used + pass + else: + down_list.append(nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=stride)) + + down_list.append(nn.BatchNorm2d(in_planes)) + down_list.append(nn.ReLU(inplace=True)) + down_list.append(nn.Conv2d(in_planes, branch_planes, kernel_size=1, bias=False)) + + self.down_scale = nn.Sequential(*down_list) + self.up_scale = UpscaleOnline(inter_mode) + + if stride != 1: + self.process = nn.Sequential( + nn.BatchNorm2d(branch_planes), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False), + ) + + def forward(self, x): + """ + All branches of the DAPPM but the first one receive the output of the previous branch as a second input + :param x: in branch 0 - the original input of the DAPPM. in other branches - a list containing the original + input and the output of the previous branch. + """ + + if isinstance(x, list): + output_of_prev_branch = x[1] + x = x[0] + else: + output_of_prev_branch = None + + in_width = x.shape[-1] + in_height = x.shape[-2] + out = self.down_scale(x) + out = self.up_scale(out, output_height=in_height, output_width=in_width) + + if output_of_prev_branch is not None: + out = self.process(out + output_of_prev_branch) + + return out + + +class DAPPM(nn.Module): + def __init__(self, in_planes: int, branch_planes: int, out_planes: int, kernel_sizes: list, strides: list, inter_mode: str = "bilinear"): + super().__init__() + + assert len(kernel_sizes) == len(strides), "len of kernel_sizes and strides must be the same" + self.branches = nn.ModuleList() + for kernel_size, stride in zip(kernel_sizes, strides): + self.branches.append(DAPPMBranch(kernel_size=kernel_size, stride=stride, in_planes=in_planes, branch_planes=branch_planes, inter_mode=inter_mode)) + + self.compression = nn.Sequential( + nn.BatchNorm2d(branch_planes * len(self.branches)), + nn.ReLU(inplace=True), + nn.Conv2d(branch_planes * len(self.branches), out_planes, kernel_size=1, bias=False), + ) + self.shortcut = nn.Sequential( + nn.BatchNorm2d(in_planes), + nn.ReLU(inplace=True), + nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False), + ) + + def forward(self, x): + x_list = [] + for i, branch in enumerate(self.branches): + if i == 0: + x_list.append(branch(x)) + else: + x_list.append(branch([x, x_list[i - 1]])) + + out = self.compression(torch.cat(x_list, 1)) + self.shortcut(x) + return out + + +class UpscaleOnline(nn.Module): + """ + In some cases the required scale/size for the scaling is known only when the input is received. + This class support such cases. only the interpolation mode is set in advance. + """ + + def __init__(self, mode="bilinear"): + super().__init__() + self.mode = mode + + def forward(self, x, output_height: int, output_width: int): + return F.interpolate(x, size=[output_height, output_width], mode=self.mode) + + +class DDRBackBoneBase(nn.Module, ABC): + """A base class defining functions that must be supported by DDRBackBones""" + + def validate_backbone_attributes(self): + expected_attributes = ["stem", "layer1", "layer2", "layer3", "layer4", "input_channels"] + for attribute in expected_attributes: + assert hasattr(self, attribute), f"Invalid backbone - attribute '{attribute}' is missing" + + def get_backbone_output_number_of_channels(self): + """Return a dictionary of the shapes of each output of the backbone to determine the in_channels of the + skip and compress layers""" + output_shapes = {} + x = torch.randn(1, self.input_channels, 320, 320) + x = self.stem(x) + x = self.layer1(x) + x = self.layer2(x) + output_shapes["layer2"] = x.shape[1] + for layer in self.layer3: + x = layer(x) + output_shapes["layer3"] = x.shape[1] + x = self.layer4(x) + output_shapes["layer4"] = x.shape[1] + return output_shapes + + +class BasicDDRBackBone(DDRBackBoneBase): + def __init__(self, block: nn.Module.__class__, width: int, layers: list, input_channels: int, layer3_repeats: int = 1): + super().__init__() + self.input_channels = input_channels + self.stem = nn.Sequential( + ConvBN(in_channels=input_channels, out_channels=width, kernel_size=3, stride=2, padding=1, add_relu=True), + ConvBN(in_channels=width, out_channels=width, kernel_size=3, stride=2, padding=1, add_relu=True), + ) + self.layer1 = _make_layer(block=block, in_planes=width, planes=width, num_blocks=layers[0]) + self.layer2 = _make_layer(block=block, in_planes=width, planes=width * 2, num_blocks=layers[1], stride=2) + self.layer3 = nn.ModuleList( + [_make_layer(block=block, in_planes=width * 2, planes=width * 4, num_blocks=layers[2], stride=2)] + + [_make_layer(block=block, in_planes=width * 4, planes=width * 4, num_blocks=layers[2], stride=1) for _ in range(layer3_repeats - 1)] + ) + self.layer4 = _make_layer(block=block, in_planes=width * 4, planes=width * 8, num_blocks=layers[3], stride=2) + + def replace_input_channels(self, in_channels: int, compute_new_weights_fn: Optional[Callable[[nn.Module, int], nn.Module]] = None): + from super_gradients.modules.weight_replacement_utils import replace_conv2d_input_channels + + self.stem[0][0] = replace_conv2d_input_channels(conv=self.stem[0][0], in_channels=in_channels, fn=compute_new_weights_fn) + self.input_channels = self.get_input_channels() + + def get_input_channels(self) -> int: + return self.stem[0][0].in_channels + + +class DDRNet(BaseNode[Tensor, list[Tensor]]): + def __init__( + self, + #backbone: DDRBackBoneBase.__class__, + use_aux_heads: bool = True, + upscale_module: nn.Module = UpscaleOnline(), + highres_planes: int = 64, + spp_width: int = 128, + #head_width: int, + ssp_inter_mode: str = "bilinear", + segmentation_inter_mode: str = "bilinear", + block: nn.Module.__class__ = BasicResNetBlock, + skip_block: nn.Module.__class__ = BasicResNetBlock, + layer5_block: nn.Module.__class__ = Bottleneck, + layer5_bottleneck_expansion: int = 2, + #classification_mode=False, + spp_kernel_sizes: list = [1, 5, 9, 17, 0], + spp_strides: list = [1, 2, 4, 8, 0], + layer3_repeats: int = 1, + planes: int = 32, + layers: list = [2, 2, 2, 2, 1, 2, 2, 1], + input_channels: int = 3, + **kwargs, + ): + """ + + :param upscale_module: upscale to use in the backbone (DAPPM and Segmentation head are using bilinear interpolation) + :param highres_planes: number of channels in the high resolution net + :param ssp_inter_mode: the interpolation used in the SPP block + :param segmentation_inter_mode: the interpolation used in the segmentation head + :param skip_block: allows specifying a different block (from 'block') for the skip layer + :param layer5_block: type of block to use in layer5 and layer5_skip + :param layer5_bottleneck_expansion: determines the expansion rate for Bottleneck block + :param spp_kernel_sizes: list of kernel sizes for the spp module pooling + :param spp_strides: list of strides for the spp module pooling + :param layer3_repeats: number of times to repeat the 3rd stage of ddr model, including the paths interchange + modules. + """ + + super().__init__(**kwargs) + #self.use_aux_heads = use_aux_heads + self._use_aux_heads = use_aux_heads + self.upscale = upscale_module + self.ssp_inter_mode = ssp_inter_mode + self.segmentation_inter_mode = segmentation_inter_mode + self.block = block + self.skip_block = skip_block + self.relu = nn.ReLU(inplace=False) + #self.classification_mode = classification_mode + self.layer3_repeats = layer3_repeats + self.planes = planes + self.layers = layers + self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] + self.input_channels = input_channels + + self._backbone: DDRBackBoneBase = BasicDDRBackBone( + block=self.block, + width=self.planes, + layers=self.backbone_layers, + input_channels=self.input_channels, + layer3_repeats=self.layer3_repeats, + ) + self._backbone.validate_backbone_attributes() + out_chan_backbone = self._backbone.get_backbone_output_number_of_channels() + + # Repeat r-times layer4 + self.compression3, self.down3, self.layer3_skip = nn.ModuleList(), nn.ModuleList(), nn.ModuleList() + for i in range(layer3_repeats): + self.compression3.append(ConvBN(in_channels=out_chan_backbone["layer3"], out_channels=highres_planes, kernel_size=1, bias=False)) + self.down3.append(ConvBN(in_channels=highres_planes, out_channels=out_chan_backbone["layer3"], kernel_size=3, stride=2, padding=1, bias=False)) + self.layer3_skip.append( + _make_layer( + in_planes=out_chan_backbone["layer2"] if i == 0 else highres_planes, + planes=highres_planes, + block=skip_block, + num_blocks=self.additional_layers[1], + ) + ) + + self.compression4 = ConvBN(in_channels=out_chan_backbone["layer4"], out_channels=highres_planes, kernel_size=1, bias=False) + + self.down4 = nn.Sequential( + ConvBN(in_channels=highres_planes, out_channels=highres_planes * 2, kernel_size=3, stride=2, padding=1, bias=False, add_relu=True), + ConvBN(in_channels=highres_planes * 2, out_channels=out_chan_backbone["layer4"], kernel_size=3, stride=2, padding=1, bias=False), + ) + self.layer4_skip = _make_layer(block=skip_block, in_planes=highres_planes, planes=highres_planes, num_blocks=self.additional_layers[2]) + self.layer5_skip = _make_layer( + block=layer5_block, in_planes=highres_planes, planes=highres_planes, num_blocks=self.additional_layers[3], expansion=layer5_bottleneck_expansion + ) + + + self.layer5 = _make_layer( + block=layer5_block, + in_planes=out_chan_backbone["layer4"], + planes=out_chan_backbone["layer4"], + num_blocks=self.additional_layers[0], + stride=2, + expansion=layer5_bottleneck_expansion, + ) + + self.spp = DAPPM( + in_planes=out_chan_backbone["layer4"] * layer5_bottleneck_expansion, + branch_planes=spp_width, + out_planes=highres_planes * layer5_bottleneck_expansion, + inter_mode=self.ssp_inter_mode, + kernel_sizes=spp_kernel_sizes, + strides=spp_strides, + ) + + self.highres_planes = highres_planes + self.layer5_bottleneck_expansion = layer5_bottleneck_expansion + #self.head_width = head_width + self.init_params() + + @property + def backbone(self): + """ + Create a fake backbone module to load backbone pre-trained weights. + """ + return nn.Sequential( + Dict( + [ + ("_backbone", self._backbone), + ("compression3", self.compression3), + ("compression4", self.compression4), + ("down3", self.down3), + ("down4", self.down4), + ("layer3_skip", self.layer3_skip), + ("layer4_skip", self.layer4_skip), + ("layer4_skip", self.layer4_skip), + ("layer5_skip", self.layer5_skip), + ] + ) + ) + + def forward(self, x: Tensor) -> List[Tensor]: + width_output = x.shape[-1] // 8 + height_output = x.shape[-2] // 8 + + x = self._backbone.stem(x) + x = self._backbone.layer1(x) + x = self._backbone.layer2(self.relu(x)) + + # Repeat layer 3 + x_skip = x + for i in range(self.layer3_repeats): + out_layer3 = self._backbone.layer3[i](self.relu(x)) + out_layer3_skip = self.layer3_skip[i](self.relu(x_skip)) + + x = out_layer3 + self.down3[i](self.relu(out_layer3_skip)) + x_skip = out_layer3_skip + self.upscale(self.compression3[i](self.relu(out_layer3)), height_output, width_output) + + # save for auxiliary head + if self._use_aux_heads: + x_extra = x_skip + + out_layer4 = self._backbone.layer4(self.relu(x)) + out_layer4_skip = self.layer4_skip(self.relu(x_skip)) + + x = out_layer4 + self.down4(self.relu(out_layer4_skip)) + x_skip = out_layer4_skip + self.upscale(self.compression4(self.relu(out_layer4)), height_output, width_output) + + out_layer5_skip = self.layer5_skip(self.relu(x_skip)) + + # if self.classification_mode: + # x_skip = self.high_to_low_fusion(self.relu(out_layer5_skip)) + # x = self.layer5(self.relu(x)) + # x = self.average_pool(x + x_skip) + # x = self.fc(x.squeeze()) + # return x + # else: + x = self.upscale(self.spp(self.layer5(self.relu(x))), height_output, width_output) + + x = x + out_layer5_skip + + if self._use_aux_heads: + return [x, x_extra] + else: + return [x] + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) + + @property + def use_aux_heads(self): + return self._use_aux_heads + + @use_aux_heads.setter + def use_aux_heads(self, use_aux: bool): + """ + public setter for self._use_aux_heads, called every time an assignment to self.use_aux_heads is applied. + if use_aux is False, `_remove_auxiliary_heads` is called to delete auxiliary and detail heads. + if use_aux is True, and self._use_aux_heads was already set to False a ValueError is raised, recreating + aux and detail heads outside init method is not allowed, and the module should be recreated. + """ + if use_aux is True and self._use_aux_heads is False: + raise ValueError( + "Cant turn use_aux_heads from False to True. Try initiating the module again with" + " `use_aux_heads=True` or initiating the auxiliary heads modules manually." + ) + if not use_aux: + self._remove_auxiliary_heads() + self._use_aux_heads = use_aux + + def prep_model_for_conversion(self, input_size: Union[tuple, list] = None, **kwargs): + # set to false and delete auxiliary and detail heads modules. + self.use_aux_heads = False + + def _remove_auxiliary_heads(self): + if hasattr(self, "seghead_extra"): + del self.seghead_extra diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py index 28b5e8ca..3c1f2103 100644 --- a/luxonis_train/nodes/heads/__init__.py +++ b/luxonis_train/nodes/heads/__init__.py @@ -4,6 +4,7 @@ from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead from .segmentation_head import SegmentationHead +from .ddrnet_segmentation_head import DDRNetSegmentationHead __all__ = [ "BiSeNetHead", @@ -12,4 +13,5 @@ "EfficientKeypointBBoxHead", "ImplicitKeypointBBoxHead", "SegmentationHead", + "DDRNetSegmentationHead", ] diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py new file mode 100644 index 00000000..69de631a --- /dev/null +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -0,0 +1,60 @@ +"""DDRNet segmentation head. + +Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py} +Original source: U{https://github.com/ydhongHIT/DDRNet} +Paper: U{https://arxiv.org/pdf/2101.06085.pdf} +@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} +""" + +import torch.nn as nn +from torch import Tensor + +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import UpBlock +from luxonis_train.utils.general import infer_upscale_factor +from luxonis_train.utils.types import LabelType + + +class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): + in_height: int + in_channels: int + tasks: list[LabelType] = [LabelType.SEGMENTATION] + + def __init__(self, num_classes: int, in_planes: int = 128, inter_planes: int = 64, scale_factor: int = 8, inter_mode: str = "bilinear", attach_index=0, **kwargs): + """ + Last stage of the segmentation network. + Reduces the number of output planes (usually to num_classes) while increasing the size by scale_factor + :param in_planes: width of input + :param inter_planes: width of internal conv. must be a multiple of scale_factor^2 when inter_mode=pixel_shuffle + :param num_classes: output width + :param scale_factor: scaling factor + :param inter_mode: one of nearest, linear, bilinear, bicubic, trilinear, area or pixel_shuffle. + when set to pixel_shuffle, an nn.PixelShuffle will be used for scaling + """ + self.attach_index = attach_index + + super().__init__(**kwargs) + + if inter_mode == "pixel_shuffle": + assert inter_planes % (scale_factor ^ 2) == 0, "when using pixel_shuffle, inter_planes must be a multiple of scale_factor^2" + + self.bn1 = nn.BatchNorm2d(in_planes) + self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=3, padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(inter_planes) + self.relu = nn.ReLU(inplace=True) + + if inter_mode == "pixel_shuffle": + self.conv2 = nn.Conv2d(inter_planes, inter_planes, kernel_size=1, padding=0, bias=True) + self.upscale = nn.PixelShuffle(scale_factor) + else: + self.conv2 = nn.Conv2d(inter_planes, num_classes, kernel_size=1, padding=0, bias=True) + self.upscale = nn.Upsample(scale_factor=scale_factor, mode=inter_mode) + + self.scale_factor = scale_factor + + def forward(self, x): + x = self.conv1(self.relu(self.bn1(x))) + out = self.conv2(self.relu(self.bn2(x))) + out = self.upscale(out) + + return out From 5be53abdcab8292798befa7de8b724e66f3c220c Mon Sep 17 00:00:00 2001 From: Nikita Date: Mon, 9 Sep 2024 23:33:20 +0000 Subject: [PATCH 030/102] refactor: refactoring and improving docstrings --- configs/ddrnet_segmentation_model.yaml | 4 +- .../models/predefined_models/__init__.py | 2 +- .../ddrnet_segmentation_model.py | 2 - luxonis_train/nodes/backbones/__init__.py | 2 +- luxonis_train/nodes/backbones/ddrnet.py | 755 +++++++++++++----- luxonis_train/nodes/heads/__init__.py | 2 +- .../nodes/heads/ddrnet_segmentation_head.py | 58 +- 7 files changed, 606 insertions(+), 219 deletions(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 5586c665..60c7e696 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -1,11 +1,11 @@ -# Example configuration for training a predefined segmentation model +# DDRNet-23-slim model for segmentation +# Refer to here for optimal hyperparameters for this model: https://github.com/Deci-AI/super-gradients/blob/4797c974c7c445d12e2575c468848d9c3e04becd/src/super_gradients/recipes/cityscapes_ddrnet.yaml#L4 model: name: ddrnet_segmentation predefined_model: name: DDRNetSegmentationModel params: - backbone: DDRNet task: multiclass backbone_params: use_aux_heads: True # set to False to disable auxiliary heads (for export) diff --git a/luxonis_train/models/predefined_models/__init__.py b/luxonis_train/models/predefined_models/__init__.py index 76df2a4c..c52e359d 100644 --- a/luxonis_train/models/predefined_models/__init__.py +++ b/luxonis_train/models/predefined_models/__init__.py @@ -1,9 +1,9 @@ from .base_predefined_model import BasePredefinedModel from .classification_model import ClassificationModel +from .ddrnet_segmentation_model import DDRNetSegmentationModel from .detection_model import DetectionModel from .keypoint_detection_model import KeypointDetectionModel from .segmentation_model import SegmentationModel -from .ddrnet_segmentation_model import DDRNetSegmentationModel __all__ = [ "BasePredefinedModel", diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index 3548e616..b8f73d61 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -2,9 +2,7 @@ from typing import Literal from luxonis_train.utils.config import ( - AttachedModuleConfig, LossModuleConfig, - MetricModuleConfig, ModelNodeConfig, ) from luxonis_train.utils.types import Kwargs diff --git a/luxonis_train/nodes/backbones/__init__.py b/luxonis_train/nodes/backbones/__init__.py index e9e11ca4..aad94198 100644 --- a/luxonis_train/nodes/backbones/__init__.py +++ b/luxonis_train/nodes/backbones/__init__.py @@ -1,4 +1,5 @@ from .contextspatial import ContextSpatial +from .ddrnet import DDRNet from .efficientnet import EfficientNet from .efficientrep import EfficientRep from .micronet import MicroNet @@ -7,7 +8,6 @@ from .repvgg import RepVGG from .resnet import ResNet from .rexnetv1 import ReXNetV1_lite -from .ddrnet import DDRNet __all__ = [ "ContextSpatial", diff --git a/luxonis_train/nodes/backbones/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet.py index c2bd3011..f9d80f53 100644 --- a/luxonis_train/nodes/backbones/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet.py @@ -5,45 +5,126 @@ Paper: U{https://arxiv.org/pdf/2101.06085.pdf} @license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} """ -from typing import Literal from abc import ABC -from typing import Optional, Callable, Union, List, Tuple, Dict +from typing import Dict, Type - -import torchvision import torch from torch import Tensor, nn from torch.nn import functional as F from ..base_node import BaseNode -def ConvBN(in_channels: int, out_channels: int, kernel_size: int, bias=True, stride=1, padding=0, add_relu=False): - seq = [nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias, stride=stride, padding=padding), nn.BatchNorm2d(out_channels)] + +def ConvBN( + in_channels: int, + out_channels: int, + kernel_size: int, + bias: bool = True, + stride: int = 1, + padding: int = 0, + add_relu: bool = False, +) -> nn.Sequential: + """A convolutional layer followed by batch normalization. + + @type in_channels: int + @param in_channels: Number of input channels. + @type out_channels: int + @param out_channels: Number of output channels. + @type kernel_size: int + @param kernel_size: Size of the convolutional kernel. + @type bias: bool + @param bias: Whether to include a bias term. Defaults to True. + @type stride: int + @param stride: Stride for the convolution. Defaults to 1. + @type padding: int + @param padding: Padding for the convolution. Defaults to 0. + @type add_relu: bool + @param add_relu: Whether to add a ReLU activation. Defaults to False. + @return: A sequential layer with Conv2D, BatchNorm, and optional ReLU. + """ + seq: list[nn.Module] = [ + nn.Conv2d( + in_channels, + out_channels, + kernel_size=kernel_size, + bias=bias, + stride=stride, + padding=padding, + ), + nn.BatchNorm2d(out_channels), + ] + if add_relu: seq.append(nn.ReLU(inplace=True)) + return nn.Sequential(*seq) -def _make_layer(block, in_planes, planes, num_blocks, stride=1, expansion=1): - layers = [] - layers.append(block(in_planes, planes, stride, final_relu=num_blocks > 1, expansion=expansion)) +def _make_layer( + block: Type[nn.Module], + in_planes: int, + planes: int, + num_blocks: int, + stride: int = 1, + expansion: int = 1, +) -> nn.Sequential: + """Creates a sequential layer consisting of a series of blocks. + + @type block: Type[nn.Module] + @param block: The block class to be used. + @type in_planes: int + @param in_planes: Number of input channels. + @type planes: int + @param planes: Number of output channels. + @type num_blocks: int + @param num_blocks: Number of blocks in the layer. + @type stride: int + @param stride: Stride for the first block. Defaults to 1. + @type expansion: int + @param expansion: Expansion factor for the block. Defaults to 1. + @return: A sequential container of the blocks. + """ + layers: list[nn.Module] = [] + + layers.append( + block(in_planes, planes, stride, final_relu=num_blocks > 1, expansion=expansion) + ) + in_planes = planes * expansion + if num_blocks > 1: for i in range(1, num_blocks): - if i == (num_blocks - 1): - layers.append(block(in_planes, planes, stride=1, final_relu=False, expansion=expansion)) - else: - layers.append(block(in_planes, planes, stride=1, final_relu=True, expansion=expansion)) + final_relu = i != (num_blocks - 1) + layers.append( + block( + in_planes, + planes, + stride=1, + final_relu=final_relu, + expansion=expansion, + ) + ) return nn.Sequential(*layers) -def drop_path(x, drop_prob: float = 0.0, scale_by_keep: bool = True): - """ - Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). - """ +def drop_path(x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True) -> Tensor: + """Drop paths (Stochastic Depth) per sample when applied in the main path of + residual blocks. + + @type x: Tensor + @param x: Input tensor. + @type drop_prob: float + @param drop_prob: Probability of dropping a path. Defaults to 0.0. + @type scale_by_keep: bool + @param scale_by_keep: Whether to scale the output by the keep probability. Defaults + to True. + @return: Tensor with dropped paths based on the provided drop probability. + """ keep_prob = 1 - drop_prob - shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) # Supports tensors of different dimensions random_tensor = x.new_empty(shape).bernoulli_(keep_prob) if keep_prob > 0.0 and scale_by_keep: random_tensor.div_(keep_prob) @@ -51,50 +132,81 @@ def drop_path(x, drop_prob: float = 0.0, scale_by_keep: bool = True): class DropPath(nn.Module): - """ - Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks). + """Drop paths (Stochastic Depth) per sample, when applied in the main path of + residual blocks. - Intended usage of this block is the following: + Intended usage of this block is as follows: >>> class ResNetBlock(nn.Module): - >>> def __init__(self, ..., drop_path_rate:float): + >>> def __init__(self, ..., drop_path_rate: float): >>> self.drop_path = DropPath(drop_path_rate) >>> >>> def forward(self, x): >>> return x + self.drop_path(self.conv_bn_act(x)) - Code taken from TIMM (https://github.com/rwightman/pytorch-image-models) - Apache License 2.0 + Code taken from TIMM (https://github.com/rwightman/pytorch-image-models), Apache License 2.0. """ def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): + """Initializes the DropPath module. + + @type drop_prob: float + @param drop_prob: Probability of zeroing out individual vectors (channel + dimension) of each feature map. Defaults to 0.0. + @type scale_by_keep: bool + @param scale_by_keep: Whether to scale the output by the keep probability. + Enabled by default to maintain output mean & std in the same range as + without DropPath. Defaults to True. """ - - :param drop_prob: Probability of zeroing out individual vector (channel dimension) of each feature map - :param scale_by_keep: Whether to scale the output by the keep probability. Enable by default and helps to - keep output mean & std in the same range as w/o drop path. - """ - super(DropPath, self).__init__() + super().__init__() self.drop_prob = drop_prob self.scale_by_keep = scale_by_keep - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: if self.drop_prob == 0.0 or not self.training: return x - return drop_path(x, self.drop_prob, self.scale_by_keep) - def extra_repr(self): - return f"drop_prob={round(self.drop_prob,3):0.3f}" + def extra_repr(self) -> str: + return f"drop_prob={round(self.drop_prob, 3):0.3f}" class BasicResNetBlock(nn.Module): - def __init__(self, in_planes, planes, stride=1, expansion=1, final_relu=True, droppath_prob=0.0): - super(BasicResNetBlock, self).__init__() + def __init__( + self, + in_planes: int, + planes: int, + stride: int = 1, + expansion: int = 1, + final_relu: bool = True, + droppath_prob: float = 0.0, + ): + """A basic residual block for ResNet. + + @type in_planes: int + @param in_planes: Number of input channels. + @type planes: int + @param planes: Number of output channels. + @type stride: int + @param stride: Stride for the convolutional layers. Defaults to 1. + @type expansion: int + @param expansion: Expansion factor for the output channels. Defaults to 1. + @type final_relu: bool + @param final_relu: Whether to apply a ReLU activation after the residual + addition. Defaults to True. + @type droppath_prob: float + @param droppath_prob: Drop path probability for stochastic depth. Defaults to + 0.0. + """ + super().__init__() self.expansion = expansion - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=1, padding=1, bias=False + ) self.bn2 = nn.BatchNorm2d(planes) self.final_relu = final_relu @@ -102,10 +214,17 @@ def __init__(self, in_planes, planes, stride=1, expansion=1, final_relu=True, dr self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(self.expansion * planes), ) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: out = F.relu(self.bn1(self.conv1(x))) out = self.bn2(self.conv2(out)) out = self.drop_path(out) @@ -114,15 +233,45 @@ def forward(self, x): out = F.relu(out) return out + class Bottleneck(nn.Module): - def __init__(self, in_planes, planes, stride=1, expansion=4, final_relu=True, droppath_prob=0.0): - super(Bottleneck, self).__init__() + def __init__( + self, + in_planes: int, + planes: int, + stride: int = 1, + expansion: int = 4, + final_relu: bool = True, + droppath_prob: float = 0.0, + ): + """A bottleneck block for ResNet. + + @type in_planes: int + @param in_planes: Number of input channels. + @type planes: int + @param planes: Number of intermediate channels. + @type stride: int + @param stride: Stride for the second convolutional layer. Defaults to 1. + @type expansion: int + @param expansion: Expansion factor for the output channels. Defaults to 4. + @type final_relu: bool + @param final_relu: Whether to apply a ReLU activation after the residual + addition. Defaults to True. + @type droppath_prob: float + @param droppath_prob: Drop path probability for stochastic depth. Defaults to + 0.0. + """ + super().__init__() self.expansion = expansion self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, self.expansion * planes, kernel_size=1, bias=False) + self.conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) self.bn3 = nn.BatchNorm2d(self.expansion * planes) self.final_relu = final_relu @@ -130,16 +279,22 @@ def __init__(self, in_planes, planes, stride=1, expansion=4, final_relu=True, dr self.shortcut = nn.Sequential() if stride != 1 or in_planes != self.expansion * planes: self.shortcut = nn.Sequential( - nn.Conv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(self.expansion * planes) + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(self.expansion * planes), ) - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: out = F.relu(self.bn1(self.conv1(x))) out = F.relu(self.bn2(self.conv2(out))) out = self.bn3(self.conv3(out)) out = self.drop_path(out) - out += self.shortcut(x) if self.final_relu: @@ -149,30 +304,40 @@ def forward(self, x): class DAPPMBranch(nn.Module): - def __init__(self, kernel_size: int, stride: int, in_planes: int, branch_planes: int, inter_mode: str = "bilinear"): - """ - A DAPPM branch - :param kernel_size: the kernel size for the average pooling - when stride=0 this parameter is omitted and AdaptiveAvgPool2d over all the input is performed - :param stride: stride for the average pooling - when stride=0: an AdaptiveAvgPool2d over all the input is performed (output is 1x1) - when stride=1: no average pooling is performed - when stride>1: average polling is performed (scaling the input down and up again) - :param in_planes: - :param branch_planes: width after the the first convolution - :param inter_mode: interpolation mode for upscaling + def __init__( + self, + kernel_size: int, + stride: int, + in_planes: int, + branch_planes: int, + inter_mode: str = "bilinear", + ): + """A DAPPM branch. + + @type kernel_size: int + @param kernel_size: The kernel size for the average pooling. When stride=0, this + parameter is omitted, and AdaptiveAvgPool2d over all the input is performed. + @type stride: int + @param stride: Stride for the average pooling. When stride=0, an + AdaptiveAvgPool2d over all the input is performed (output is 1x1). When + stride=1, no average pooling is performed. When stride>1, average pooling is + performed (scaling the input down and up again). + @type in_planes: int + @param in_planes: Number of input channels. + @type branch_planes: int + @param branch_planes: Width after the first convolution. + @type inter_mode: str + @param inter_mode: Interpolation mode for upscaling. Defaults to "bilinear". """ - super().__init__() + down_list = [] if stride == 0: - # when stride is 0 average pool all the input to 1x1 down_list.append(nn.AdaptiveAvgPool2d((1, 1))) - elif stride == 1: - # when stride id 1 no average pooling is used - pass - else: - down_list.append(nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=stride)) + elif stride > 1: + down_list.append( + nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=stride) + ) down_list.append(nn.BatchNorm2d(in_planes)) down_list.append(nn.ReLU(inplace=True)) @@ -185,16 +350,20 @@ def __init__(self, kernel_size: int, stride: int, in_planes: int, branch_planes: self.process = nn.Sequential( nn.BatchNorm2d(branch_planes), nn.ReLU(inplace=True), - nn.Conv2d(branch_planes, branch_planes, kernel_size=3, padding=1, bias=False), + nn.Conv2d( + branch_planes, branch_planes, kernel_size=3, padding=1, bias=False + ), ) - def forward(self, x): - """ - All branches of the DAPPM but the first one receive the output of the previous branch as a second input - :param x: in branch 0 - the original input of the DAPPM. in other branches - a list containing the original - input and the output of the previous branch. - """ + def forward(self, x: Tensor) -> Tensor: + """Process input through the DAPPM branch. + @type x: Tensor or list[Tensor] + @param x: In branch 0 - the original input of the DAPPM. In other branches - a list containing the original + input and the output of the previous branch. + + @return: Processed output tensor. + """ if isinstance(x, list): output_of_prev_branch = x[1] x = x[0] @@ -213,18 +382,58 @@ def forward(self, x): class DAPPM(nn.Module): - def __init__(self, in_planes: int, branch_planes: int, out_planes: int, kernel_sizes: list, strides: list, inter_mode: str = "bilinear"): + def __init__( + self, + in_planes: int, + branch_planes: int, + out_planes: int, + kernel_sizes: list[int], + strides: list[int], + inter_mode: str = "bilinear", + ): + """DAPPM (Dynamic Attention Pyramid Pooling Module). + + @type in_planes: int + @param in_planes: Number of input channels. + @type branch_planes: int + @param branch_planes: Width after the first convolution in each branch. + @type out_planes: int + @param out_planes: Number of output channels. + @type kernel_sizes: list[int] + @param kernel_sizes: List of kernel sizes for each branch. + @type strides: list[int] + @param strides: List of strides for each branch. + @type inter_mode: str + @param inter_mode: Interpolation mode for upscaling. Defaults to "bilinear". + """ super().__init__() - assert len(kernel_sizes) == len(strides), "len of kernel_sizes and strides must be the same" - self.branches = nn.ModuleList() - for kernel_size, stride in zip(kernel_sizes, strides): - self.branches.append(DAPPMBranch(kernel_size=kernel_size, stride=stride, in_planes=in_planes, branch_planes=branch_planes, inter_mode=inter_mode)) + assert len(kernel_sizes) == len( + strides + ), "len of kernel_sizes and strides must be the same" + + self.branches = nn.ModuleList( + [ + DAPPMBranch( + kernel_size=kernel_size, + stride=stride, + in_planes=in_planes, + branch_planes=branch_planes, + inter_mode=inter_mode, + ) + for kernel_size, stride in zip(kernel_sizes, strides) + ] + ) self.compression = nn.Sequential( nn.BatchNorm2d(branch_planes * len(self.branches)), nn.ReLU(inplace=True), - nn.Conv2d(branch_planes * len(self.branches), out_planes, kernel_size=1, bias=False), + nn.Conv2d( + branch_planes * len(self.branches), + out_planes, + kernel_size=1, + bias=False, + ), ) self.shortcut = nn.Sequential( nn.BatchNorm2d(in_planes), @@ -232,124 +441,270 @@ def __init__(self, in_planes: int, branch_planes: int, out_planes: int, kernel_s nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False), ) - def forward(self, x): - x_list = [] - for i, branch in enumerate(self.branches): - if i == 0: - x_list.append(branch(x)) - else: - x_list.append(branch([x, x_list[i - 1]])) + def forward(self, x: Tensor) -> Tensor: + """Forward pass through the DAPPM module. - out = self.compression(torch.cat(x_list, 1)) + self.shortcut(x) + @type x: Tensor + @param x: Input tensor. + @return: Output tensor after processing through all branches and compression. + """ + x_list = [self.branches[0](x)] + + for i in range(1, len(self.branches)): + x_list.append(self.branches[i]([x, x_list[i - 1]])) + + out = self.compression(torch.cat(x_list, dim=1)) + self.shortcut(x) return out class UpscaleOnline(nn.Module): + """Upscale tensor to a specified size during the forward pass. + + This class supports cases where the required scale/size is only known when the input + is received. Only the interpolation mode is set in advance. """ - In some cases the required scale/size for the scaling is known only when the input is received. - This class support such cases. only the interpolation mode is set in advance. - """ - def __init__(self, mode="bilinear"): + def __init__(self, mode: str = "bilinear"): + """Initialize UpscaleOnline with the interpolation mode. + + @type mode: str + @param mode: Interpolation mode for resizing. Defaults to "bilinear". + """ super().__init__() self.mode = mode - def forward(self, x, output_height: int, output_width: int): + def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: + """Upscale the input tensor to the specified height and width. + + @type x: Tensor + @param x: Input tensor to be upscaled. + @type output_height: int + @param output_height: Desired height of the output tensor. + @type output_width: int + @param output_width: Desired width of the output tensor. + @return: Upscaled tensor. + """ return F.interpolate(x, size=[output_height, output_width], mode=self.mode) class DDRBackBoneBase(nn.Module, ABC): - """A base class defining functions that must be supported by DDRBackBones""" + """Base class defining functions that must be supported by DDRBackBones.""" + + def validate_backbone_attributes(self) -> None: + """Validate the existence of required backbone attributes. - def validate_backbone_attributes(self): - expected_attributes = ["stem", "layer1", "layer2", "layer3", "layer4", "input_channels"] + Ensures that the following attributes are present: "stem", "layer1", "layer2", + "layer3", "layer4", "input_channels". + """ + expected_attributes = [ + "stem", + "layer1", + "layer2", + "layer3", + "layer4", + "input_channels", + ] for attribute in expected_attributes: - assert hasattr(self, attribute), f"Invalid backbone - attribute '{attribute}' is missing" + assert hasattr( + self, attribute + ), f"Invalid backbone - attribute '{attribute}' is missing" + + def get_backbone_output_number_of_channels(self) -> dict[str, int]: + """Determine the number of output channels for each layer of the backbone. - def get_backbone_output_number_of_channels(self): - """Return a dictionary of the shapes of each output of the backbone to determine the in_channels of the - skip and compress layers""" + Returns a dictionary with keys "layer2", "layer3", "layer4" and their respective + number of output channels. + + @return: Dictionary of output channel counts for each layer. + """ output_shapes = {} x = torch.randn(1, self.input_channels, 320, 320) x = self.stem(x) x = self.layer1(x) x = self.layer2(x) output_shapes["layer2"] = x.shape[1] + for layer in self.layer3: x = layer(x) output_shapes["layer3"] = x.shape[1] + x = self.layer4(x) output_shapes["layer4"] = x.shape[1] + return output_shapes class BasicDDRBackBone(DDRBackBoneBase): - def __init__(self, block: nn.Module.__class__, width: int, layers: list, input_channels: int, layer3_repeats: int = 1): + def __init__( + self, + block: Type[nn.Module], + width: int, + layers: list[int], + input_channels: int, + layer3_repeats: int = 1, + ): + """Initialize the BasicDDRBackBone with specified parameters. + + @type block: Type[nn.Module] + @param block: The block class to use for layers. + @type width: int + @param width: Width of the feature maps. + @type layers: list[int] + @param layers: Number of blocks in each layer. + @type input_channels: int + @param input_channels: Number of input channels. + @type layer3_repeats: int + @param layer3_repeats: Number of repeats for layer3. Defaults to 1. + """ super().__init__() self.input_channels = input_channels + self.stem = nn.Sequential( - ConvBN(in_channels=input_channels, out_channels=width, kernel_size=3, stride=2, padding=1, add_relu=True), - ConvBN(in_channels=width, out_channels=width, kernel_size=3, stride=2, padding=1, add_relu=True), + ConvBN( + in_channels=input_channels, + out_channels=width, + kernel_size=3, + stride=2, + padding=1, + add_relu=True, + ), + ConvBN( + in_channels=width, + out_channels=width, + kernel_size=3, + stride=2, + padding=1, + add_relu=True, + ), ) - self.layer1 = _make_layer(block=block, in_planes=width, planes=width, num_blocks=layers[0]) - self.layer2 = _make_layer(block=block, in_planes=width, planes=width * 2, num_blocks=layers[1], stride=2) - self.layer3 = nn.ModuleList( - [_make_layer(block=block, in_planes=width * 2, planes=width * 4, num_blocks=layers[2], stride=2)] - + [_make_layer(block=block, in_planes=width * 4, planes=width * 4, num_blocks=layers[2], stride=1) for _ in range(layer3_repeats - 1)] + + self.layer1 = _make_layer( + block=block, + in_planes=width, + planes=width, + num_blocks=layers[0], ) - self.layer4 = _make_layer(block=block, in_planes=width * 4, planes=width * 8, num_blocks=layers[3], stride=2) - def replace_input_channels(self, in_channels: int, compute_new_weights_fn: Optional[Callable[[nn.Module, int], nn.Module]] = None): - from super_gradients.modules.weight_replacement_utils import replace_conv2d_input_channels + self.layer2 = _make_layer( + block=block, + in_planes=width, + planes=width * 2, + num_blocks=layers[1], + stride=2, + ) - self.stem[0][0] = replace_conv2d_input_channels(conv=self.stem[0][0], in_channels=in_channels, fn=compute_new_weights_fn) - self.input_channels = self.get_input_channels() + self.layer3 = nn.ModuleList( + [ + _make_layer( + block=block, + in_planes=width * 2, + planes=width * 4, + num_blocks=layers[2], + stride=2, + ) + ] + + [ + _make_layer( + block=block, + in_planes=width * 4, + planes=width * 4, + num_blocks=layers[2], + stride=1, + ) + for _ in range(layer3_repeats - 1) + ] + ) - def get_input_channels(self) -> int: - return self.stem[0][0].in_channels + self.layer4 = _make_layer( + block=block, + in_planes=width * 4, + planes=width * 8, + num_blocks=layers[3], + stride=2, + ) class DDRNet(BaseNode[Tensor, list[Tensor]]): def __init__( self, - #backbone: DDRBackBoneBase.__class__, use_aux_heads: bool = True, - upscale_module: nn.Module = UpscaleOnline(), + upscale_module: nn.Module = None, highres_planes: int = 64, spp_width: int = 128, - #head_width: int, ssp_inter_mode: str = "bilinear", segmentation_inter_mode: str = "bilinear", - block: nn.Module.__class__ = BasicResNetBlock, - skip_block: nn.Module.__class__ = BasicResNetBlock, - layer5_block: nn.Module.__class__ = Bottleneck, + block: Type[nn.Module] = BasicResNetBlock, + skip_block: Type[nn.Module] = BasicResNetBlock, + layer5_block: Type[nn.Module] = Bottleneck, layer5_bottleneck_expansion: int = 2, - #classification_mode=False, - spp_kernel_sizes: list = [1, 5, 9, 17, 0], - spp_strides: list = [1, 2, 4, 8, 0], + spp_kernel_sizes: list[int] = None, + spp_strides: list[int] = None, layer3_repeats: int = 1, planes: int = 32, - layers: list = [2, 2, 2, 2, 1, 2, 2, 1], + layers: list[int] = None, input_channels: int = 3, **kwargs, ): + """Initialize the DDRNet with specified parameters. + + @type use_aux_heads: bool + @param use_aux_heads: Whether to use auxiliary heads. Defaults to True. + @type upscale_module: nn.Module + @param upscale_module: Module for upscaling (e.g., bilinear interpolation). + Defaults to UpscaleOnline(). + @type highres_planes: int + @param highres_planes: Number of channels in the high resolution net. Defaults + to 64. + @type spp_width: int + @param spp_width: Width of the branches in the SPP block. Defaults to 128. + @type ssp_inter_mode: str + @param ssp_inter_mode: Interpolation mode for the SPP block. Defaults to + "bilinear". + @type segmentation_inter_mode: str + @param segmentation_inter_mode: Interpolation mode for the segmentation head. + Defaults to "bilinear". + @type block: Type[nn.Module] + @param block: Type of block to use in the backbone. Defaults to + BasicResNetBlock. + @type skip_block: Type[nn.Module] + @param skip_block: Type of block for skip connections. Defaults to + BasicResNetBlock. + @type layer5_block: Type[nn.Module] + @param layer5_block: Type of block for layer5 and layer5_skip. Defaults to + Bottleneck. + @type layer5_bottleneck_expansion: int + @param layer5_bottleneck_expansion: Expansion factor for Bottleneck block in + layer5. Defaults to 2. + @type spp_kernel_sizes: list[int] + @param spp_kernel_sizes: Kernel sizes for the SPP module pooling. Defaults to + [1, 5, 9, 17, 0]. + @type spp_strides: list[int] + @param spp_strides: Strides for the SPP module pooling. Defaults to [1, 2, 4, 8, + 0]. + @type layer3_repeats: int + @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1. + @type planes: int + @param planes: Base number of channels. Defaults to 32. + @type layers: list[int] + @param layers: Number of blocks in each layer of the backbone. Defaults to [2, + 2, 2, 2, 1, 2, 2, 1]. + @type input_channels: int + @param input_channels: Number of input channels. Defaults to 3. + @type kwargs: Any + @param kwargs: Additional arguments to pass to L{BaseNode}. """ - :param upscale_module: upscale to use in the backbone (DAPPM and Segmentation head are using bilinear interpolation) - :param highres_planes: number of channels in the high resolution net - :param ssp_inter_mode: the interpolation used in the SPP block - :param segmentation_inter_mode: the interpolation used in the segmentation head - :param skip_block: allows specifying a different block (from 'block') for the skip layer - :param layer5_block: type of block to use in layer5 and layer5_skip - :param layer5_bottleneck_expansion: determines the expansion rate for Bottleneck block - :param spp_kernel_sizes: list of kernel sizes for the spp module pooling - :param spp_strides: list of strides for the spp module pooling - :param layer3_repeats: number of times to repeat the 3rd stage of ddr model, including the paths interchange - modules. - """ + if upscale_module is None: + upscale_module = UpscaleOnline() + if spp_kernel_sizes is None: + spp_kernel_sizes = [1, 5, 9, 17, 0] + if spp_strides is None: + spp_strides = [1, 2, 4, 8, 0] + if layers is None: + layers = [2, 2, 2, 2, 1, 2, 2, 1] super().__init__(**kwargs) - #self.use_aux_heads = use_aux_heads + self._use_aux_heads = use_aux_heads self.upscale = upscale_module self.ssp_inter_mode = ssp_inter_mode @@ -357,7 +712,6 @@ def __init__( self.block = block self.skip_block = skip_block self.relu = nn.ReLU(inplace=False) - #self.classification_mode = classification_mode self.layer3_repeats = layer3_repeats self.planes = planes self.layers = layers @@ -374,11 +728,29 @@ def __init__( self._backbone.validate_backbone_attributes() out_chan_backbone = self._backbone.get_backbone_output_number_of_channels() - # Repeat r-times layer4 - self.compression3, self.down3, self.layer3_skip = nn.ModuleList(), nn.ModuleList(), nn.ModuleList() + # Define layers for layer 3 + self.compression3 = nn.ModuleList() + self.down3 = nn.ModuleList() + self.layer3_skip = nn.ModuleList() for i in range(layer3_repeats): - self.compression3.append(ConvBN(in_channels=out_chan_backbone["layer3"], out_channels=highres_planes, kernel_size=1, bias=False)) - self.down3.append(ConvBN(in_channels=highres_planes, out_channels=out_chan_backbone["layer3"], kernel_size=3, stride=2, padding=1, bias=False)) + self.compression3.append( + ConvBN( + in_channels=out_chan_backbone["layer3"], + out_channels=highres_planes, + kernel_size=1, + bias=False, + ) + ) + self.down3.append( + ConvBN( + in_channels=highres_planes, + out_channels=out_chan_backbone["layer3"], + kernel_size=3, + stride=2, + padding=1, + bias=False, + ) + ) self.layer3_skip.append( _make_layer( in_planes=out_chan_backbone["layer2"] if i == 0 else highres_planes, @@ -388,18 +760,47 @@ def __init__( ) ) - self.compression4 = ConvBN(in_channels=out_chan_backbone["layer4"], out_channels=highres_planes, kernel_size=1, bias=False) + self.compression4 = ConvBN( + in_channels=out_chan_backbone["layer4"], + out_channels=highres_planes, + kernel_size=1, + bias=False, + ) self.down4 = nn.Sequential( - ConvBN(in_channels=highres_planes, out_channels=highres_planes * 2, kernel_size=3, stride=2, padding=1, bias=False, add_relu=True), - ConvBN(in_channels=highres_planes * 2, out_channels=out_chan_backbone["layer4"], kernel_size=3, stride=2, padding=1, bias=False), + ConvBN( + in_channels=highres_planes, + out_channels=highres_planes * 2, + kernel_size=3, + stride=2, + padding=1, + bias=False, + add_relu=True, + ), + ConvBN( + in_channels=highres_planes * 2, + out_channels=out_chan_backbone["layer4"], + kernel_size=3, + stride=2, + padding=1, + bias=False, + ), + ) + + self.layer4_skip = _make_layer( + block=skip_block, + in_planes=highres_planes, + planes=highres_planes, + num_blocks=self.additional_layers[2], ) - self.layer4_skip = _make_layer(block=skip_block, in_planes=highres_planes, planes=highres_planes, num_blocks=self.additional_layers[2]) self.layer5_skip = _make_layer( - block=layer5_block, in_planes=highres_planes, planes=highres_planes, num_blocks=self.additional_layers[3], expansion=layer5_bottleneck_expansion + block=layer5_block, + in_planes=highres_planes, + planes=highres_planes, + num_blocks=self.additional_layers[3], + expansion=layer5_bottleneck_expansion, ) - self.layer5 = _make_layer( block=layer5_block, in_planes=out_chan_backbone["layer4"], @@ -420,14 +821,11 @@ def __init__( self.highres_planes = highres_planes self.layer5_bottleneck_expansion = layer5_bottleneck_expansion - #self.head_width = head_width self.init_params() @property def backbone(self): - """ - Create a fake backbone module to load backbone pre-trained weights. - """ + """Create a fake backbone module to load backbone pre-trained weights.""" return nn.Sequential( Dict( [ @@ -438,13 +836,12 @@ def backbone(self): ("down4", self.down4), ("layer3_skip", self.layer3_skip), ("layer4_skip", self.layer4_skip), - ("layer4_skip", self.layer4_skip), ("layer5_skip", self.layer5_skip), ] ) ) - def forward(self, x: Tensor) -> List[Tensor]: + def forward(self, x: Tensor) -> list[Tensor]: width_output = x.shape[-1] // 8 height_output = x.shape[-2] // 8 @@ -459,9 +856,11 @@ def forward(self, x: Tensor) -> List[Tensor]: out_layer3_skip = self.layer3_skip[i](self.relu(x_skip)) x = out_layer3 + self.down3[i](self.relu(out_layer3_skip)) - x_skip = out_layer3_skip + self.upscale(self.compression3[i](self.relu(out_layer3)), height_output, width_output) + x_skip = out_layer3_skip + self.upscale( + self.compression3[i](self.relu(out_layer3)), height_output, width_output + ) - # save for auxiliary head + # Save for auxiliary head if self._use_aux_heads: x_extra = x_skip @@ -469,18 +868,15 @@ def forward(self, x: Tensor) -> List[Tensor]: out_layer4_skip = self.layer4_skip(self.relu(x_skip)) x = out_layer4 + self.down4(self.relu(out_layer4_skip)) - x_skip = out_layer4_skip + self.upscale(self.compression4(self.relu(out_layer4)), height_output, width_output) + x_skip = out_layer4_skip + self.upscale( + self.compression4(self.relu(out_layer4)), height_output, width_output + ) out_layer5_skip = self.layer5_skip(self.relu(x_skip)) - # if self.classification_mode: - # x_skip = self.high_to_low_fusion(self.relu(out_layer5_skip)) - # x = self.layer5(self.relu(x)) - # x = self.average_pool(x + x_skip) - # x = self.fc(x.squeeze()) - # return x - # else: - x = self.upscale(self.spp(self.layer5(self.relu(x))), height_output, width_output) + x = self.upscale( + self.spp(self.layer5(self.relu(x))), height_output, width_output + ) x = x + out_layer5_skip @@ -498,32 +894,3 @@ def init_params(self): elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) - - @property - def use_aux_heads(self): - return self._use_aux_heads - - @use_aux_heads.setter - def use_aux_heads(self, use_aux: bool): - """ - public setter for self._use_aux_heads, called every time an assignment to self.use_aux_heads is applied. - if use_aux is False, `_remove_auxiliary_heads` is called to delete auxiliary and detail heads. - if use_aux is True, and self._use_aux_heads was already set to False a ValueError is raised, recreating - aux and detail heads outside init method is not allowed, and the module should be recreated. - """ - if use_aux is True and self._use_aux_heads is False: - raise ValueError( - "Cant turn use_aux_heads from False to True. Try initiating the module again with" - " `use_aux_heads=True` or initiating the auxiliary heads modules manually." - ) - if not use_aux: - self._remove_auxiliary_heads() - self._use_aux_heads = use_aux - - def prep_model_for_conversion(self, input_size: Union[tuple, list] = None, **kwargs): - # set to false and delete auxiliary and detail heads modules. - self.use_aux_heads = False - - def _remove_auxiliary_heads(self): - if hasattr(self, "seghead_extra"): - del self.seghead_extra diff --git a/luxonis_train/nodes/heads/__init__.py b/luxonis_train/nodes/heads/__init__.py index 3c1f2103..e188f188 100644 --- a/luxonis_train/nodes/heads/__init__.py +++ b/luxonis_train/nodes/heads/__init__.py @@ -1,10 +1,10 @@ from .bisenet_head import BiSeNetHead from .classification_head import ClassificationHead +from .ddrnet_segmentation_head import DDRNetSegmentationHead from .efficient_bbox_head import EfficientBBoxHead from .efficient_keypoint_bbox_head import EfficientKeypointBBoxHead from .implicit_keypoint_bbox_head import ImplicitKeypointBBoxHead from .segmentation_head import SegmentationHead -from .ddrnet_segmentation_head import DDRNetSegmentationHead __all__ = [ "BiSeNetHead", diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 69de631a..71afab88 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -10,8 +10,6 @@ from torch import Tensor from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import UpBlock -from luxonis_train.utils.general import infer_upscale_factor from luxonis_train.utils.types import LabelType @@ -20,39 +18,63 @@ class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): in_channels: int tasks: list[LabelType] = [LabelType.SEGMENTATION] - def __init__(self, num_classes: int, in_planes: int = 128, inter_planes: int = 64, scale_factor: int = 8, inter_mode: str = "bilinear", attach_index=0, **kwargs): - """ - Last stage of the segmentation network. - Reduces the number of output planes (usually to num_classes) while increasing the size by scale_factor - :param in_planes: width of input - :param inter_planes: width of internal conv. must be a multiple of scale_factor^2 when inter_mode=pixel_shuffle - :param num_classes: output width - :param scale_factor: scaling factor - :param inter_mode: one of nearest, linear, bilinear, bicubic, trilinear, area or pixel_shuffle. - when set to pixel_shuffle, an nn.PixelShuffle will be used for scaling + def __init__( + self, + num_classes: int, + in_planes: int = 128, + inter_planes: int = 64, + scale_factor: int = 8, + inter_mode: str = "bilinear", + attach_index: int = 0, + **kwargs, + ): + """Last stage of the segmentation network. + + @type num_classes: int + @param num_classes: Output width. + @type in_planes: int + @param in_planes: Width of input. Defaults to 128. + @type inter_planes: int + @param inter_planes: Width of internal conv. Must be a multiple of + scale_factor^2 when inter_mode is pixel_shuffle. Defaults to 64. + @type scale_factor: int + @param scale_factor: Scaling factor. Defaults to 8. + @type inter_mode: str + @param inter_mode: Upsampling method. One of nearest, linear, bilinear, bicubic, + trilinear, area or pixel_shuffle. If pixel_shuffle is set, nn.PixelShuffle + is used for scaling. Defaults to "bilinear". + @type attach_index: int + @param attach_index: Index at which to attach. Defaults to 0. """ self.attach_index = attach_index - super().__init__(**kwargs) if inter_mode == "pixel_shuffle": - assert inter_planes % (scale_factor ^ 2) == 0, "when using pixel_shuffle, inter_planes must be a multiple of scale_factor^2" + assert ( + inter_planes % (scale_factor**2) == 0 + ), "When using pixel_shuffle, inter_planes must be a multiple of scale_factor^2." self.bn1 = nn.BatchNorm2d(in_planes) - self.conv1 = nn.Conv2d(in_planes, inter_planes, kernel_size=3, padding=1, bias=False) + self.conv1 = nn.Conv2d( + in_planes, inter_planes, kernel_size=3, padding=1, bias=False + ) self.bn2 = nn.BatchNorm2d(inter_planes) self.relu = nn.ReLU(inplace=True) if inter_mode == "pixel_shuffle": - self.conv2 = nn.Conv2d(inter_planes, inter_planes, kernel_size=1, padding=0, bias=True) + self.conv2 = nn.Conv2d( + inter_planes, inter_planes, kernel_size=1, padding=0, bias=True + ) self.upscale = nn.PixelShuffle(scale_factor) else: - self.conv2 = nn.Conv2d(inter_planes, num_classes, kernel_size=1, padding=0, bias=True) + self.conv2 = nn.Conv2d( + inter_planes, num_classes, kernel_size=1, padding=0, bias=True + ) self.upscale = nn.Upsample(scale_factor=scale_factor, mode=inter_mode) self.scale_factor = scale_factor - def forward(self, x): + def forward(self, x: Tensor) -> Tensor: x = self.conv1(self.relu(self.bn1(x))) out = self.conv2(self.relu(self.bn2(x))) out = self.upscale(out) From 8f35ca1bddc4233ef1decdec4e673f871ad46bed Mon Sep 17 00:00:00 2001 From: Nikita Date: Mon, 9 Sep 2024 23:56:15 +0000 Subject: [PATCH 031/102] feature: improve default arguments handling --- configs/ddrnet_segmentation_model.yaml | 11 +---------- .../ddrnet_segmentation_model.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 60c7e696..63196673 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -6,19 +6,10 @@ model: predefined_model: name: DDRNetSegmentationModel params: + num_classes: 80 task: multiclass backbone_params: use_aux_heads: True # set to False to disable auxiliary heads (for export) - - head_params: - in_planes: 128 - num_classes: 80 # number of classes - attach_index: 0 - aux_head_params: # ignored if use_aux_heads is False - in_planes: 64 - num_classes: 80 # number of classes - attach_index: 1 - loader: params: diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index b8f73d61..5bfbe1a0 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -13,6 +13,9 @@ @dataclass class DDRNetSegmentationModel(SegmentationModel): backbone: str = "DDRNet" + num_classes: int = 1 + highres_planes: int = 64 + layer5_bottleneck_expansion: int = 2 task: Literal["binary", "multiclass"] = "binary" backbone_params: Kwargs = field(default_factory=dict) head_params: Kwargs = field(default_factory=dict) @@ -23,6 +26,21 @@ class DDRNetSegmentationModel(SegmentationModel): @property def nodes(self) -> list[ModelNodeConfig]: + self.backbone_params.update({"highres_planes": self.highres_planes}) + self.backbone_params.update( + {"layer5_bottleneck_expansion": self.layer5_bottleneck_expansion} + ) + + self.head_params.update( + {"in_planes": self.highres_planes * self.layer5_bottleneck_expansion} + ) + self.head_params.update({"num_classes": self.num_classes}) + self.head_params.update({"attach_index": 0}) + + self.aux_head_params.update({"in_planes": self.highres_planes}) + self.aux_head_params.update({"num_classes": self.num_classes}) + self.aux_head_params.update({"attach_index": 1}) + node_list = [ ModelNodeConfig( name=self.backbone, From f4cef9f8c27c7371b85415413680d2f5b239f46a Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Tue, 10 Sep 2024 00:34:50 +0000 Subject: [PATCH 032/102] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 8e21255a..bab3ea39 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 84% - 84% + 81% + 81% From 5753851e7808d275cd5d331623dceab94c3ecbd2 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 10 Sep 2024 14:44:37 +0000 Subject: [PATCH 033/102] fix: use ConvModule --- luxonis_train/nodes/backbones/ddrnet.py | 74 ++++++------------------- 1 file changed, 18 insertions(+), 56 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet.py index f9d80f53..a9b00ab1 100644 --- a/luxonis_train/nodes/backbones/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet.py @@ -12,52 +12,8 @@ from torch import Tensor, nn from torch.nn import functional as F -from ..base_node import BaseNode - - -def ConvBN( - in_channels: int, - out_channels: int, - kernel_size: int, - bias: bool = True, - stride: int = 1, - padding: int = 0, - add_relu: bool = False, -) -> nn.Sequential: - """A convolutional layer followed by batch normalization. - - @type in_channels: int - @param in_channels: Number of input channels. - @type out_channels: int - @param out_channels: Number of output channels. - @type kernel_size: int - @param kernel_size: Size of the convolutional kernel. - @type bias: bool - @param bias: Whether to include a bias term. Defaults to True. - @type stride: int - @param stride: Stride for the convolution. Defaults to 1. - @type padding: int - @param padding: Padding for the convolution. Defaults to 0. - @type add_relu: bool - @param add_relu: Whether to add a ReLU activation. Defaults to False. - @return: A sequential layer with Conv2D, BatchNorm, and optional ReLU. - """ - seq: list[nn.Module] = [ - nn.Conv2d( - in_channels, - out_channels, - kernel_size=kernel_size, - bias=bias, - stride=stride, - padding=padding, - ), - nn.BatchNorm2d(out_channels), - ] - - if add_relu: - seq.append(nn.ReLU(inplace=True)) - - return nn.Sequential(*seq) +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import ConvModule def _make_layer( @@ -560,21 +516,23 @@ def __init__( self.input_channels = input_channels self.stem = nn.Sequential( - ConvBN( + ConvModule( in_channels=input_channels, out_channels=width, kernel_size=3, stride=2, padding=1, - add_relu=True, + bias=True, + activation=nn.ReLU(inplace=True), ), - ConvBN( + ConvModule( in_channels=width, out_channels=width, kernel_size=3, stride=2, padding=1, - add_relu=True, + bias=True, + activation=nn.ReLU(inplace=True), ), ) @@ -734,21 +692,23 @@ def __init__( self.layer3_skip = nn.ModuleList() for i in range(layer3_repeats): self.compression3.append( - ConvBN( + ConvModule( in_channels=out_chan_backbone["layer3"], out_channels=highres_planes, kernel_size=1, bias=False, + activation=nn.Identity(), ) ) self.down3.append( - ConvBN( + ConvModule( in_channels=highres_planes, out_channels=out_chan_backbone["layer3"], kernel_size=3, stride=2, padding=1, bias=False, + activation=nn.Identity(), ) ) self.layer3_skip.append( @@ -760,30 +720,32 @@ def __init__( ) ) - self.compression4 = ConvBN( + self.compression4 = ConvModule( in_channels=out_chan_backbone["layer4"], out_channels=highres_planes, kernel_size=1, bias=False, + activation=nn.Identity(), ) self.down4 = nn.Sequential( - ConvBN( + ConvModule( in_channels=highres_planes, out_channels=highres_planes * 2, kernel_size=3, stride=2, padding=1, bias=False, - add_relu=True, + activation=nn.ReLU(inplace=True), ), - ConvBN( + ConvModule( in_channels=highres_planes * 2, out_channels=out_chan_backbone["layer4"], kernel_size=3, stride=2, padding=1, bias=False, + activation=nn.Identity(), ), ) From d730046d1abd1d84c2d2fcea568dac1ba14d1652 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Tue, 10 Sep 2024 19:50:25 +0200 Subject: [PATCH 034/102] added type-checking to the CI --- .github/workflows/ci.yaml | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 86a25702..efee6205 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -7,6 +7,8 @@ on: - 'luxonis_train/**' - 'tests/**' - .github/workflows/ci.yaml + - '!**/*.md' + - '!luxonis_train/__main__.py' permissions: pull-requests: write @@ -62,10 +64,36 @@ jobs: - name: Build docs run: python gen-docs.py luxonis_train - tests: + type-check: needs: - pre-commit - docs + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: pip + + - name: Install dependencies + run: pip install . + + - uses: jakebailey/pyright-action@v2 + with: + level: warning + working-directory: luxonis_train + warnings: true + python-version: '3.10' + + tests: + needs: + - type-check strategy: fail-fast: false matrix: @@ -104,7 +132,7 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --cov luxonis_train --cov-report xml --junit-xml pytest.xml + custom-arguments: --junit-xml pytest.xml - name: Create Test Report uses: EnricoMi/publish-unit-test-result-action@v2 @@ -119,11 +147,13 @@ jobs: output: media/coverage_badge.svg - name: Generate coverage report - uses: orgoro/coverage@v3.1 + uses: orgoro/coverage@v3.2 if: matrix.os == 'ubuntu-latest' with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} + thresholdAll: 90 + thresholdNew: 80 - name: Commit coverage badge if: matrix.os == 'ubuntu-latest' From 1ecac10b6f558ee2c22e85718bbd73151643967f Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Tue, 10 Sep 2024 19:50:43 +0200 Subject: [PATCH 035/102] added pytest configuration --- pyproject.toml | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5ff79282..7388702e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,10 +48,6 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"] [tool.docformatter] black = true -[tool.mypy] -python_version = "3.10" -ignore_missing_imports = true - [tool.pyright] typeCheckingMode = "basic" reportMissingTypeStubs = "none" @@ -61,6 +57,9 @@ reportIncompatibleVariableOverride = "none" reportIncompatibleMethodOverride = "none" reportUnnecessaryIsInstance = "none" +[tool.pytest.ini_options] +testpaths = ["tests"] +addopts = "--cov=luxonis_train --cov-report=term --cov-report=html --cov-report=xml --disable-warnings" [tool.coverage.run] omit = [ @@ -73,12 +72,12 @@ exclude_also = [ "def __repr__", "def __rich_repr__", "def __str__", - "raise AssertionError", + "assert", "raise NotImplementedError", "except ImportError", "@abstractmethod", "@overload", - "exit", + "exit\\(\\)", "cv2\\.imshow", "cv2\\.waitKey", "logger\\.", From 2094de8ae6bd79bbf977c03cacc87b0986d9c1a0 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Tue, 10 Sep 2024 19:51:00 +0200 Subject: [PATCH 036/102] fixed type issues --- luxonis_train/utils/__init__.py | 1 - luxonis_train/utils/registry.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/luxonis_train/utils/__init__.py b/luxonis_train/utils/__init__.py index 30654ffc..c47d3d33 100644 --- a/luxonis_train/utils/__init__.py +++ b/luxonis_train/utils/__init__.py @@ -36,7 +36,6 @@ "to_shape_packet", "get_with_default", "LuxonisTrackerPL", - "registry", "match_to_anchor", "dist2bbox", "bbox2dist", diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index ff5ec843..c41a26cd 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -11,7 +11,7 @@ CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks") """Registry for all callbacks.""" -LOADERS: Registry[type["lt.utils.loaders.BaseLoaderTorch"]] = Registry(name="loaders") +LOADERS: Registry[type["lt.loaders.BaseLoaderTorch"]] = Registry(name="loaders") """Registry for all loaders.""" LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry(name="losses") From 6e6f397d3abfa8e94316520fd1cfa46aef6c17f5 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Tue, 10 Sep 2024 19:51:20 +0200 Subject: [PATCH 037/102] updated CONTRIBUTING.md --- CONTRIBUTING.md | 24 ++++++++++++++---------- luxonis_train/core/core.py | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d113518b..14923406 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -43,16 +43,24 @@ To verify that your documentation is formatted correctly, follow these steps: ## Tests We use [pytest](https://docs.pytest.org/en/stable/) for testing. -The tests are located in the `tests` directory. You can run the tests locally with: +The tests are located in the `tests` directory. You can run the tests locally by running: ```bash -pytest tests --cov=luxonis_train +pytest ``` -This command will run all tests and print a coverage report. The coverage report -is only informational for now, but we may enforce a minimum coverage in the future. +in the root directory. -**If a new feature is added, a new test should be added to cover it.** +This command will run all tests and print a coverage report. + +> \[!TIP\] +> It will also generate an HTML coverage report in the `htmlcov` directory +> if you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser. + +> \[!IMPORTANT\] +> If a new feature is added, a new test should be added to cover it. +> The minimum overall test coverage for a PR to be merged is 90%. +> The minimum coverage for new files is 80%. ## GitHub Actions @@ -72,9 +80,5 @@ Successful tests are required for merging a PR. 1. Make changes in a new branch. 1. Test your changes locally. 1. Commit (pre-commit hook will run). -1. Push to your branch and create a pull request. Always request a review from: - - [Martin Kozlovský](https://github.com/kozlov721) - - [Matija Teršek](https://github.com/tersekmatija) - - [Conor Simmons](https://github.com/conorsim) -1. Any other relevant team members can be added as reviewers as well. +1. Push to your branch and create a pull request. 1. The team will review and merge your PR. diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 02596e63..4f1762a8 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -227,7 +227,7 @@ def graceful_exit(signum: int, _): # pragma: no cover ckpt_path, typ="checkpoints", name="resume.ckpt" ) self.tracker._finalize(status="failed") - exit(0) + exit() signal.signal(signal.SIGTERM, graceful_exit) From 77c07a44d0fccef1b9d6c27159c8503b82c450e9 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Tue, 10 Sep 2024 20:20:17 +0200 Subject: [PATCH 038/102] renamed module to torchmetrics --- luxonis_train/attached_modules/metrics/__init__.py | 2 +- .../attached_modules/metrics/{common.py => torchmetrics.py} | 0 luxonis_train/models/luxonis_lightning.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename luxonis_train/attached_modules/metrics/{common.py => torchmetrics.py} (100%) diff --git a/luxonis_train/attached_modules/metrics/__init__.py b/luxonis_train/attached_modules/metrics/__init__.py index 9e73e4ac..b1dc40ea 100644 --- a/luxonis_train/attached_modules/metrics/__init__.py +++ b/luxonis_train/attached_modules/metrics/__init__.py @@ -1,8 +1,8 @@ from .base_metric import BaseMetric -from .common import Accuracy, F1Score, JaccardIndex, Precision, Recall from .mean_average_precision import MeanAveragePrecision from .mean_average_precision_keypoints import MeanAveragePrecisionKeypoints from .object_keypoint_similarity import ObjectKeypointSimilarity +from .torchmetrics import Accuracy, F1Score, JaccardIndex, Precision, Recall __all__ = [ "Accuracy", diff --git a/luxonis_train/attached_modules/metrics/common.py b/luxonis_train/attached_modules/metrics/torchmetrics.py similarity index 100% rename from luxonis_train/attached_modules/metrics/common.py rename to luxonis_train/attached_modules/metrics/torchmetrics.py diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index d3c7ac38..3c88b357 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -18,7 +18,7 @@ BaseMetric, BaseVisualizer, ) -from luxonis_train.attached_modules.metrics.common import TorchMetricWrapper +from luxonis_train.attached_modules.metrics.torchmetrics import TorchMetricWrapper from luxonis_train.attached_modules.visualizers import ( combine_visualizations, get_unnormalized_images, From da8430e7702dcd1ee4286fd9a5855448c35ba0da Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Tue, 10 Sep 2024 21:45:46 +0200 Subject: [PATCH 039/102] added more base_node tests --- luxonis_train/nodes/base_node.py | 12 +++++-- tests/unittests/test_base_node.py | 59 ++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 4 deletions(-) diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 8dcecd07..13e3ca4f 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -244,12 +244,18 @@ def name(self) -> str: @property def task(self) -> str: - """Getter for the task.""" + """Getter for the task. + + @type: str + @raises RuntimeError: If the node doesn't define any tasks. + @raises RuntimeError: If the node defines more than one task. In that case, use + the L{get_task_name} method. + """ if not self._tasks: - raise ValueError(f"{self.name} does not have any tasks defined.") + raise RuntimeError(f"{self.name} does not have any tasks defined.") if len(self._tasks) > 1: - raise ValueError( + raise RuntimeError( f"Node {self.name} has multiple tasks defined. " "Use `get_task_name` method instead." ) diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py index 8d581f25..47955699 100644 --- a/tests/unittests/test_base_node.py +++ b/tests/unittests/test_base_node.py @@ -1,9 +1,10 @@ import pytest import torch +from luxonis_ml.data import LabelType from torch import Size, Tensor from luxonis_train.nodes import AttachIndexType, BaseNode -from luxonis_train.utils import Packet +from luxonis_train.utils import DatasetMetadata, Packet from luxonis_train.utils.exceptions import IncompatibleException @@ -95,3 +96,59 @@ def forward(self, _): with pytest.raises(IncompatibleException): DummyNode(input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}]) + + +def test_tasks(): + class DummyHead(DummyNode): + tasks = [LabelType.CLASSIFICATION] + + class DummyMultiHead(DummyNode): + tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + + dummy_head = DummyHead() + dummy_node = DummyNode() + dummy_multi_head = DummyMultiHead(n_keypoints=4) + assert dummy_head.get_task_name(LabelType.CLASSIFICATION) == "classification" + assert dummy_head.task == "classification" + with pytest.raises(ValueError): + dummy_head.get_task_name(LabelType.SEGMENTATION) + + with pytest.raises(ValueError): + dummy_node.get_task_name(LabelType.SEGMENTATION) + + with pytest.raises(RuntimeError): + _ = dummy_node.task + + with pytest.raises(RuntimeError): + _ = dummy_multi_head.task + + metadata = DatasetMetadata( + classes={ + "segmentation": ["car", "person", "dog"], + "classification": ["car-class", "person-class"], + }, + n_keypoints={"color-segmentation": 0, "detection": 0}, + ) + + dummy_multi_head._dataset_metadata = metadata + assert dummy_multi_head.get_class_names(LabelType.SEGMENTATION) == [ + "car", + "person", + "dog", + ] + assert dummy_multi_head.get_class_names(LabelType.CLASSIFICATION) == [ + "car-class", + "person-class", + ] + assert dummy_multi_head.get_n_classes(LabelType.SEGMENTATION) == 3 + assert dummy_multi_head.get_n_classes(LabelType.CLASSIFICATION) == 2 + assert dummy_multi_head.n_keypoints == 4 + with pytest.raises(ValueError): + _ = dummy_head.n_keypoints + with pytest.raises(ValueError): + _ = dummy_node.n_keypoints + + dummy_head = DummyHead(n_classes=5) + assert dummy_head.n_classes == 5 + with pytest.raises(ValueError): + _ = dummy_multi_head.n_classes From a8a5cb2fe486d1cbe9129ced99728ffcc5467cd0 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 10 Sep 2024 20:56:30 +0000 Subject: [PATCH 040/102] refactor: new node structure --- .../ddrnet_segmentation_model.py | 6 - .../nodes/backbones/ddrnet/__init__.py | 3 + .../backbones/{ddrnet.py => ddrnet/blocks.py} | 474 ++++-------------- .../nodes/backbones/ddrnet/ddrnet.py | 295 +++++++++++ .../nodes/heads/ddrnet_segmentation_head.py | 38 +- 5 files changed, 419 insertions(+), 397 deletions(-) create mode 100644 luxonis_train/nodes/backbones/ddrnet/__init__.py rename luxonis_train/nodes/backbones/{ddrnet.py => ddrnet/blocks.py} (64%) create mode 100644 luxonis_train/nodes/backbones/ddrnet/ddrnet.py diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index 5bfbe1a0..03daea1e 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -16,13 +16,7 @@ class DDRNetSegmentationModel(SegmentationModel): num_classes: int = 1 highres_planes: int = 64 layer5_bottleneck_expansion: int = 2 - task: Literal["binary", "multiclass"] = "binary" - backbone_params: Kwargs = field(default_factory=dict) - head_params: Kwargs = field(default_factory=dict) aux_head_params: Kwargs = field(default_factory=dict) - loss_params: Kwargs = field(default_factory=dict) - visualizer_params: Kwargs = field(default_factory=dict) - task_name: str | None = None @property def nodes(self) -> list[ModelNodeConfig]: diff --git a/luxonis_train/nodes/backbones/ddrnet/__init__.py b/luxonis_train/nodes/backbones/ddrnet/__init__.py new file mode 100644 index 00000000..ef2f869e --- /dev/null +++ b/luxonis_train/nodes/backbones/ddrnet/__init__.py @@ -0,0 +1,3 @@ +from .ddrnet import DDRNet + +__all__ = ["DDRNet"] \ No newline at end of file diff --git a/luxonis_train/nodes/backbones/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py similarity index 64% rename from luxonis_train/nodes/backbones/ddrnet.py rename to luxonis_train/nodes/backbones/ddrnet/blocks.py index a9b00ab1..07eca586 100644 --- a/luxonis_train/nodes/backbones/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -1,4 +1,4 @@ -"""DDRNet backbone. +"""DDRNet blocks. Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py} Original source: U{https://github.com/ydhongHIT/DDRNet} @@ -16,77 +16,6 @@ from luxonis_train.nodes.blocks import ConvModule -def _make_layer( - block: Type[nn.Module], - in_planes: int, - planes: int, - num_blocks: int, - stride: int = 1, - expansion: int = 1, -) -> nn.Sequential: - """Creates a sequential layer consisting of a series of blocks. - - @type block: Type[nn.Module] - @param block: The block class to be used. - @type in_planes: int - @param in_planes: Number of input channels. - @type planes: int - @param planes: Number of output channels. - @type num_blocks: int - @param num_blocks: Number of blocks in the layer. - @type stride: int - @param stride: Stride for the first block. Defaults to 1. - @type expansion: int - @param expansion: Expansion factor for the block. Defaults to 1. - @return: A sequential container of the blocks. - """ - layers: list[nn.Module] = [] - - layers.append( - block(in_planes, planes, stride, final_relu=num_blocks > 1, expansion=expansion) - ) - - in_planes = planes * expansion - - if num_blocks > 1: - for i in range(1, num_blocks): - final_relu = i != (num_blocks - 1) - layers.append( - block( - in_planes, - planes, - stride=1, - final_relu=final_relu, - expansion=expansion, - ) - ) - - return nn.Sequential(*layers) - - -def drop_path(x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True) -> Tensor: - """Drop paths (Stochastic Depth) per sample when applied in the main path of - residual blocks. - - @type x: Tensor - @param x: Input tensor. - @type drop_prob: float - @param drop_prob: Probability of dropping a path. Defaults to 0.0. - @type scale_by_keep: bool - @param scale_by_keep: Whether to scale the output by the keep probability. Defaults - to True. - @return: Tensor with dropped paths based on the provided drop probability. - """ - keep_prob = 1 - drop_prob - shape = (x.shape[0],) + (1,) * ( - x.ndim - 1 - ) # Supports tensors of different dimensions - random_tensor = x.new_empty(shape).bernoulli_(keep_prob) - if keep_prob > 0.0 and scale_by_keep: - random_tensor.div_(keep_prob) - return x * random_tensor - - class DropPath(nn.Module): """Drop paths (Stochastic Depth) per sample, when applied in the main path of residual blocks. @@ -442,55 +371,7 @@ def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: """ return F.interpolate(x, size=[output_height, output_width], mode=self.mode) - -class DDRBackBoneBase(nn.Module, ABC): - """Base class defining functions that must be supported by DDRBackBones.""" - - def validate_backbone_attributes(self) -> None: - """Validate the existence of required backbone attributes. - - Ensures that the following attributes are present: "stem", "layer1", "layer2", - "layer3", "layer4", "input_channels". - """ - expected_attributes = [ - "stem", - "layer1", - "layer2", - "layer3", - "layer4", - "input_channels", - ] - for attribute in expected_attributes: - assert hasattr( - self, attribute - ), f"Invalid backbone - attribute '{attribute}' is missing" - - def get_backbone_output_number_of_channels(self) -> dict[str, int]: - """Determine the number of output channels for each layer of the backbone. - - Returns a dictionary with keys "layer2", "layer3", "layer4" and their respective - number of output channels. - - @return: Dictionary of output channel counts for each layer. - """ - output_shapes = {} - x = torch.randn(1, self.input_channels, 320, 320) - x = self.stem(x) - x = self.layer1(x) - x = self.layer2(x) - output_shapes["layer2"] = x.shape[1] - - for layer in self.layer3: - x = layer(x) - output_shapes["layer3"] = x.shape[1] - - x = self.layer4(x) - output_shapes["layer4"] = x.shape[1] - - return output_shapes - - -class BasicDDRBackBone(DDRBackBoneBase): +class BasicDDRBackBone(nn.Module): def __init__( self, block: Type[nn.Module], @@ -581,278 +462,115 @@ def __init__( stride=2, ) + def validate_backbone_attributes(self) -> None: + """Validate the existence of required backbone attributes. -class DDRNet(BaseNode[Tensor, list[Tensor]]): - def __init__( - self, - use_aux_heads: bool = True, - upscale_module: nn.Module = None, - highres_planes: int = 64, - spp_width: int = 128, - ssp_inter_mode: str = "bilinear", - segmentation_inter_mode: str = "bilinear", - block: Type[nn.Module] = BasicResNetBlock, - skip_block: Type[nn.Module] = BasicResNetBlock, - layer5_block: Type[nn.Module] = Bottleneck, - layer5_bottleneck_expansion: int = 2, - spp_kernel_sizes: list[int] = None, - spp_strides: list[int] = None, - layer3_repeats: int = 1, - planes: int = 32, - layers: list[int] = None, - input_channels: int = 3, - **kwargs, - ): - """Initialize the DDRNet with specified parameters. - - @type use_aux_heads: bool - @param use_aux_heads: Whether to use auxiliary heads. Defaults to True. - @type upscale_module: nn.Module - @param upscale_module: Module for upscaling (e.g., bilinear interpolation). - Defaults to UpscaleOnline(). - @type highres_planes: int - @param highres_planes: Number of channels in the high resolution net. Defaults - to 64. - @type spp_width: int - @param spp_width: Width of the branches in the SPP block. Defaults to 128. - @type ssp_inter_mode: str - @param ssp_inter_mode: Interpolation mode for the SPP block. Defaults to - "bilinear". - @type segmentation_inter_mode: str - @param segmentation_inter_mode: Interpolation mode for the segmentation head. - Defaults to "bilinear". - @type block: Type[nn.Module] - @param block: Type of block to use in the backbone. Defaults to - BasicResNetBlock. - @type skip_block: Type[nn.Module] - @param skip_block: Type of block for skip connections. Defaults to - BasicResNetBlock. - @type layer5_block: Type[nn.Module] - @param layer5_block: Type of block for layer5 and layer5_skip. Defaults to - Bottleneck. - @type layer5_bottleneck_expansion: int - @param layer5_bottleneck_expansion: Expansion factor for Bottleneck block in - layer5. Defaults to 2. - @type spp_kernel_sizes: list[int] - @param spp_kernel_sizes: Kernel sizes for the SPP module pooling. Defaults to - [1, 5, 9, 17, 0]. - @type spp_strides: list[int] - @param spp_strides: Strides for the SPP module pooling. Defaults to [1, 2, 4, 8, - 0]. - @type layer3_repeats: int - @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1. - @type planes: int - @param planes: Base number of channels. Defaults to 32. - @type layers: list[int] - @param layers: Number of blocks in each layer of the backbone. Defaults to [2, - 2, 2, 2, 1, 2, 2, 1]. - @type input_channels: int - @param input_channels: Number of input channels. Defaults to 3. - @type kwargs: Any - @param kwargs: Additional arguments to pass to L{BaseNode}. + Ensures that the following attributes are present: "stem", "layer1", "layer2", + "layer3", "layer4", "input_channels". """ + expected_attributes = [ + "stem", + "layer1", + "layer2", + "layer3", + "layer4", + "input_channels", + ] + for attribute in expected_attributes: + assert hasattr( + self, attribute + ), f"Invalid backbone - attribute '{attribute}' is missing" - if upscale_module is None: - upscale_module = UpscaleOnline() - if spp_kernel_sizes is None: - spp_kernel_sizes = [1, 5, 9, 17, 0] - if spp_strides is None: - spp_strides = [1, 2, 4, 8, 0] - if layers is None: - layers = [2, 2, 2, 2, 1, 2, 2, 1] - - super().__init__(**kwargs) - - self._use_aux_heads = use_aux_heads - self.upscale = upscale_module - self.ssp_inter_mode = ssp_inter_mode - self.segmentation_inter_mode = segmentation_inter_mode - self.block = block - self.skip_block = skip_block - self.relu = nn.ReLU(inplace=False) - self.layer3_repeats = layer3_repeats - self.planes = planes - self.layers = layers - self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] - self.input_channels = input_channels - - self._backbone: DDRBackBoneBase = BasicDDRBackBone( - block=self.block, - width=self.planes, - layers=self.backbone_layers, - input_channels=self.input_channels, - layer3_repeats=self.layer3_repeats, - ) - self._backbone.validate_backbone_attributes() - out_chan_backbone = self._backbone.get_backbone_output_number_of_channels() - - # Define layers for layer 3 - self.compression3 = nn.ModuleList() - self.down3 = nn.ModuleList() - self.layer3_skip = nn.ModuleList() - for i in range(layer3_repeats): - self.compression3.append( - ConvModule( - in_channels=out_chan_backbone["layer3"], - out_channels=highres_planes, - kernel_size=1, - bias=False, - activation=nn.Identity(), - ) - ) - self.down3.append( - ConvModule( - in_channels=highres_planes, - out_channels=out_chan_backbone["layer3"], - kernel_size=3, - stride=2, - padding=1, - bias=False, - activation=nn.Identity(), - ) - ) - self.layer3_skip.append( - _make_layer( - in_planes=out_chan_backbone["layer2"] if i == 0 else highres_planes, - planes=highres_planes, - block=skip_block, - num_blocks=self.additional_layers[1], - ) - ) + def get_backbone_output_number_of_channels(self) -> dict[str, int]: + """Determine the number of output channels for each layer of the backbone. - self.compression4 = ConvModule( - in_channels=out_chan_backbone["layer4"], - out_channels=highres_planes, - kernel_size=1, - bias=False, - activation=nn.Identity(), - ) + Returns a dictionary with keys "layer2", "layer3", "layer4" and their respective + number of output channels. - self.down4 = nn.Sequential( - ConvModule( - in_channels=highres_planes, - out_channels=highres_planes * 2, - kernel_size=3, - stride=2, - padding=1, - bias=False, - activation=nn.ReLU(inplace=True), - ), - ConvModule( - in_channels=highres_planes * 2, - out_channels=out_chan_backbone["layer4"], - kernel_size=3, - stride=2, - padding=1, - bias=False, - activation=nn.Identity(), - ), - ) + @return: Dictionary of output channel counts for each layer. + """ + output_shapes = {} + x = torch.randn(1, self.input_channels, 320, 320) + x = self.stem(x) + x = self.layer1(x) + x = self.layer2(x) + output_shapes["layer2"] = x.shape[1] - self.layer4_skip = _make_layer( - block=skip_block, - in_planes=highres_planes, - planes=highres_planes, - num_blocks=self.additional_layers[2], - ) - self.layer5_skip = _make_layer( - block=layer5_block, - in_planes=highres_planes, - planes=highres_planes, - num_blocks=self.additional_layers[3], - expansion=layer5_bottleneck_expansion, - ) + for layer in self.layer3: + x = layer(x) + output_shapes["layer3"] = x.shape[1] - self.layer5 = _make_layer( - block=layer5_block, - in_planes=out_chan_backbone["layer4"], - planes=out_chan_backbone["layer4"], - num_blocks=self.additional_layers[0], - stride=2, - expansion=layer5_bottleneck_expansion, - ) + x = self.layer4(x) + output_shapes["layer4"] = x.shape[1] - self.spp = DAPPM( - in_planes=out_chan_backbone["layer4"] * layer5_bottleneck_expansion, - branch_planes=spp_width, - out_planes=highres_planes * layer5_bottleneck_expansion, - inter_mode=self.ssp_inter_mode, - kernel_sizes=spp_kernel_sizes, - strides=spp_strides, - ) + return output_shapes - self.highres_planes = highres_planes - self.layer5_bottleneck_expansion = layer5_bottleneck_expansion - self.init_params() - - @property - def backbone(self): - """Create a fake backbone module to load backbone pre-trained weights.""" - return nn.Sequential( - Dict( - [ - ("_backbone", self._backbone), - ("compression3", self.compression3), - ("compression4", self.compression4), - ("down3", self.down3), - ("down4", self.down4), - ("layer3_skip", self.layer3_skip), - ("layer4_skip", self.layer4_skip), - ("layer5_skip", self.layer5_skip), - ] - ) - ) +def _make_layer( + block: Type[nn.Module], + in_planes: int, + planes: int, + num_blocks: int, + stride: int = 1, + expansion: int = 1, +) -> nn.Sequential: + """Creates a sequential layer consisting of a series of blocks. - def forward(self, x: Tensor) -> list[Tensor]: - width_output = x.shape[-1] // 8 - height_output = x.shape[-2] // 8 + @type block: Type[nn.Module] + @param block: The block class to be used. + @type in_planes: int + @param in_planes: Number of input channels. + @type planes: int + @param planes: Number of output channels. + @type num_blocks: int + @param num_blocks: Number of blocks in the layer. + @type stride: int + @param stride: Stride for the first block. Defaults to 1. + @type expansion: int + @param expansion: Expansion factor for the block. Defaults to 1. + @return: A sequential container of the blocks. + """ + layers: list[nn.Module] = [] - x = self._backbone.stem(x) - x = self._backbone.layer1(x) - x = self._backbone.layer2(self.relu(x)) + layers.append( + block(in_planes, planes, stride, final_relu=num_blocks > 1, expansion=expansion) + ) - # Repeat layer 3 - x_skip = x - for i in range(self.layer3_repeats): - out_layer3 = self._backbone.layer3[i](self.relu(x)) - out_layer3_skip = self.layer3_skip[i](self.relu(x_skip)) + in_planes = planes * expansion - x = out_layer3 + self.down3[i](self.relu(out_layer3_skip)) - x_skip = out_layer3_skip + self.upscale( - self.compression3[i](self.relu(out_layer3)), height_output, width_output + if num_blocks > 1: + for i in range(1, num_blocks): + final_relu = i != (num_blocks - 1) + layers.append( + block( + in_planes, + planes, + stride=1, + final_relu=final_relu, + expansion=expansion, + ) ) - # Save for auxiliary head - if self._use_aux_heads: - x_extra = x_skip - - out_layer4 = self._backbone.layer4(self.relu(x)) - out_layer4_skip = self.layer4_skip(self.relu(x_skip)) - - x = out_layer4 + self.down4(self.relu(out_layer4_skip)) - x_skip = out_layer4_skip + self.upscale( - self.compression4(self.relu(out_layer4)), height_output, width_output - ) - - out_layer5_skip = self.layer5_skip(self.relu(x_skip)) + return nn.Sequential(*layers) - x = self.upscale( - self.spp(self.layer5(self.relu(x))), height_output, width_output - ) - x = x + out_layer5_skip +def drop_path(x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True) -> Tensor: + """Drop paths (Stochastic Depth) per sample when applied in the main path of + residual blocks. - if self._use_aux_heads: - return [x, x_extra] - else: - return [x] - - def init_params(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") - if m.bias is not None: - nn.init.constant_(m.bias, 0) - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) + @type x: Tensor + @param x: Input tensor. + @type drop_prob: float + @param drop_prob: Probability of dropping a path. Defaults to 0.0. + @type scale_by_keep: bool + @param scale_by_keep: Whether to scale the output by the keep probability. Defaults + to True. + @return: Tensor with dropped paths based on the provided drop probability. + """ + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * ( + x.ndim - 1 + ) + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor \ No newline at end of file diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py new file mode 100644 index 00000000..1d8ddbf3 --- /dev/null +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -0,0 +1,295 @@ +"""DDRNet backbone. + +Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py} +Original source: U{https://github.com/ydhongHIT/DDRNet} +Paper: U{https://arxiv.org/pdf/2101.06085.pdf} +@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} +""" +from abc import ABC +from typing import Dict, Type + +import torch +from torch import Tensor, nn +from torch.nn import functional as F + +from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import ConvModule +from luxonis_train.nodes.heads import DDRNetSegmentationHead + +from .blocks import BasicResNetBlock, Bottleneck, UpscaleOnline, BasicDDRBackBone, DAPPM, _make_layer + + +class DDRNet(BaseNode[Tensor, list[Tensor]]): + def __init__( + self, + use_aux_heads: bool = True, + upscale_module: nn.Module = None, + highres_planes: int = 64, + spp_width: int = 128, + ssp_inter_mode: str = "bilinear", + segmentation_inter_mode: str = "bilinear", + block: Type[nn.Module] = BasicResNetBlock, + skip_block: Type[nn.Module] = BasicResNetBlock, + layer5_block: Type[nn.Module] = Bottleneck, + layer5_bottleneck_expansion: int = 2, + spp_kernel_sizes: list[int] = None, + spp_strides: list[int] = None, + layer3_repeats: int = 1, + planes: int = 32, + layers: list[int] = None, + input_channels: int = 3, + **kwargs, + ): + """Initialize the DDRNet with specified parameters. + + @type use_aux_heads: bool + @param use_aux_heads: Whether to use auxiliary heads. Defaults to True. + @type upscale_module: nn.Module + @param upscale_module: Module for upscaling (e.g., bilinear interpolation). + Defaults to UpscaleOnline(). + @type highres_planes: int + @param highres_planes: Number of channels in the high resolution net. Defaults + to 64. + @type spp_width: int + @param spp_width: Width of the branches in the SPP block. Defaults to 128. + @type ssp_inter_mode: str + @param ssp_inter_mode: Interpolation mode for the SPP block. Defaults to + "bilinear". + @type segmentation_inter_mode: str + @param segmentation_inter_mode: Interpolation mode for the segmentation head. + Defaults to "bilinear". + @type block: Type[nn.Module] + @param block: Type of block to use in the backbone. Defaults to + BasicResNetBlock. + @type skip_block: Type[nn.Module] + @param skip_block: Type of block for skip connections. Defaults to + BasicResNetBlock. + @type layer5_block: Type[nn.Module] + @param layer5_block: Type of block for layer5 and layer5_skip. Defaults to + Bottleneck. + @type layer5_bottleneck_expansion: int + @param layer5_bottleneck_expansion: Expansion factor for Bottleneck block in + layer5. Defaults to 2. + @type spp_kernel_sizes: list[int] + @param spp_kernel_sizes: Kernel sizes for the SPP module pooling. Defaults to + [1, 5, 9, 17, 0]. + @type spp_strides: list[int] + @param spp_strides: Strides for the SPP module pooling. Defaults to [1, 2, 4, 8, + 0]. + @type layer3_repeats: int + @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1. + @type planes: int + @param planes: Base number of channels. Defaults to 32. + @type layers: list[int] + @param layers: Number of blocks in each layer of the backbone. Defaults to [2, + 2, 2, 2, 1, 2, 2, 1]. + @type input_channels: int + @param input_channels: Number of input channels. Defaults to 3. + @type kwargs: Any + @param kwargs: Additional arguments to pass to L{BaseNode}. + """ + + if upscale_module is None: + upscale_module = UpscaleOnline() + if spp_kernel_sizes is None: + spp_kernel_sizes = [1, 5, 9, 17, 0] + if spp_strides is None: + spp_strides = [1, 2, 4, 8, 0] + if layers is None: + layers = [2, 2, 2, 2, 1, 2, 2, 1] + + super().__init__(**kwargs) + + self._use_aux_heads = use_aux_heads + self.upscale = upscale_module + self.ssp_inter_mode = ssp_inter_mode + self.segmentation_inter_mode = segmentation_inter_mode + self.block = block + self.skip_block = skip_block + self.relu = nn.ReLU(inplace=False) + self.layer3_repeats = layer3_repeats + self.planes = planes + self.layers = layers + self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] + self.input_channels = input_channels + + self._backbone: DDRBackBoneBase = BasicDDRBackBone( + block=self.block, + width=self.planes, + layers=self.backbone_layers, + input_channels=self.input_channels, + layer3_repeats=self.layer3_repeats, + ) + self._backbone.validate_backbone_attributes() + out_chan_backbone = self._backbone.get_backbone_output_number_of_channels() + + # Define layers for layer 3 + self.compression3 = nn.ModuleList() + self.down3 = nn.ModuleList() + self.layer3_skip = nn.ModuleList() + for i in range(layer3_repeats): + self.compression3.append( + ConvModule( + in_channels=out_chan_backbone["layer3"], + out_channels=highres_planes, + kernel_size=1, + bias=False, + activation=nn.Identity(), + ) + ) + self.down3.append( + ConvModule( + in_channels=highres_planes, + out_channels=out_chan_backbone["layer3"], + kernel_size=3, + stride=2, + padding=1, + bias=False, + activation=nn.Identity(), + ) + ) + self.layer3_skip.append( + _make_layer( + in_planes=out_chan_backbone["layer2"] if i == 0 else highres_planes, + planes=highres_planes, + block=skip_block, + num_blocks=self.additional_layers[1], + ) + ) + + self.compression4 = ConvModule( + in_channels=out_chan_backbone["layer4"], + out_channels=highres_planes, + kernel_size=1, + bias=False, + activation=nn.Identity(), + ) + + self.down4 = nn.Sequential( + ConvModule( + in_channels=highres_planes, + out_channels=highres_planes * 2, + kernel_size=3, + stride=2, + padding=1, + bias=False, + activation=nn.ReLU(inplace=True), + ), + ConvModule( + in_channels=highres_planes * 2, + out_channels=out_chan_backbone["layer4"], + kernel_size=3, + stride=2, + padding=1, + bias=False, + activation=nn.Identity(), + ), + ) + + self.layer4_skip = _make_layer( + block=skip_block, + in_planes=highres_planes, + planes=highres_planes, + num_blocks=self.additional_layers[2], + ) + self.layer5_skip = _make_layer( + block=layer5_block, + in_planes=highres_planes, + planes=highres_planes, + num_blocks=self.additional_layers[3], + expansion=layer5_bottleneck_expansion, + ) + + self.layer5 = _make_layer( + block=layer5_block, + in_planes=out_chan_backbone["layer4"], + planes=out_chan_backbone["layer4"], + num_blocks=self.additional_layers[0], + stride=2, + expansion=layer5_bottleneck_expansion, + ) + + self.spp = DAPPM( + in_planes=out_chan_backbone["layer4"] * layer5_bottleneck_expansion, + branch_planes=spp_width, + out_planes=highres_planes * layer5_bottleneck_expansion, + inter_mode=self.ssp_inter_mode, + kernel_sizes=spp_kernel_sizes, + strides=spp_strides, + ) + + self.highres_planes = highres_planes + self.layer5_bottleneck_expansion = layer5_bottleneck_expansion + self.init_params() + + @property + def backbone(self): + """Create a fake backbone module to load backbone pre-trained weights.""" + return nn.Sequential( + Dict( + [ + ("_backbone", self._backbone), + ("compression3", self.compression3), + ("compression4", self.compression4), + ("down3", self.down3), + ("down4", self.down4), + ("layer3_skip", self.layer3_skip), + ("layer4_skip", self.layer4_skip), + ("layer5_skip", self.layer5_skip), + ] + ) + ) + + def forward(self, x: Tensor) -> list[Tensor]: + width_output = x.shape[-1] // 8 + height_output = x.shape[-2] // 8 + + x = self._backbone.stem(x) + x = self._backbone.layer1(x) + x = self._backbone.layer2(self.relu(x)) + + # Repeat layer 3 + x_skip = x + for i in range(self.layer3_repeats): + out_layer3 = self._backbone.layer3[i](self.relu(x)) + out_layer3_skip = self.layer3_skip[i](self.relu(x_skip)) + + x = out_layer3 + self.down3[i](self.relu(out_layer3_skip)) + x_skip = out_layer3_skip + self.upscale( + self.compression3[i](self.relu(out_layer3)), height_output, width_output + ) + + # Save for auxiliary head + if self._use_aux_heads: + x_extra = x_skip + + out_layer4 = self._backbone.layer4(self.relu(x)) + out_layer4_skip = self.layer4_skip(self.relu(x_skip)) + + x = out_layer4 + self.down4(self.relu(out_layer4_skip)) + x_skip = out_layer4_skip + self.upscale( + self.compression4(self.relu(out_layer4)), height_output, width_output + ) + + out_layer5_skip = self.layer5_skip(self.relu(x_skip)) + + x = self.upscale( + self.spp(self.layer5(self.relu(x))), height_output, width_output + ) + + x = x + out_layer5_skip + + if self._use_aux_heads: + return [x, x_extra] + else: + return [x] + + def init_params(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + if m.bias is not None: + nn.init.constant_(m.bias, 0) + elif isinstance(m, nn.BatchNorm2d): + nn.init.constant_(m.weight, 1) + nn.init.constant_(m.bias, 0) diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 71afab88..10a0c51a 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -10,6 +10,7 @@ from torch import Tensor from luxonis_train.nodes.base_node import BaseNode +from luxonis_train.nodes.blocks import ConvModule from luxonis_train.utils.types import LabelType @@ -48,35 +49,46 @@ def __init__( """ self.attach_index = attach_index super().__init__(**kwargs) + self.scale_factor = scale_factor if inter_mode == "pixel_shuffle": assert ( inter_planes % (scale_factor**2) == 0 ), "When using pixel_shuffle, inter_planes must be a multiple of scale_factor^2." - self.bn1 = nn.BatchNorm2d(in_planes) - self.conv1 = nn.Conv2d( - in_planes, inter_planes, kernel_size=3, padding=1, bias=False + self.conv1 = ConvModule( + in_planes, + inter_planes, + kernel_size=3, + padding=1, + bias=False, + activation=nn.ReLU(inplace=True), ) - self.bn2 = nn.BatchNorm2d(inter_planes) - self.relu = nn.ReLU(inplace=True) if inter_mode == "pixel_shuffle": - self.conv2 = nn.Conv2d( - inter_planes, inter_planes, kernel_size=1, padding=0, bias=True + self.conv2 = ConvModule( + inter_planes, + inter_planes, + kernel_size=1, + padding=0, + bias=True, + activation=nn.Identity(), ) self.upscale = nn.PixelShuffle(scale_factor) else: - self.conv2 = nn.Conv2d( - inter_planes, num_classes, kernel_size=1, padding=0, bias=True + self.conv2 = ConvModule( + inter_planes, + num_classes, + kernel_size=1, + padding=0, + bias=True, + activation=nn.Identity(), ) self.upscale = nn.Upsample(scale_factor=scale_factor, mode=inter_mode) - self.scale_factor = scale_factor - def forward(self, x: Tensor) -> Tensor: - x = self.conv1(self.relu(self.bn1(x))) - out = self.conv2(self.relu(self.bn2(x))) + x = self.conv1(x) + out = self.conv2(x) out = self.upscale(out) return out From fb8734a3ef3c7174dc2ea10006759087a25f33e8 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 10 Sep 2024 21:12:03 +0000 Subject: [PATCH 041/102] style: remove redundant comments --- configs/ddrnet_segmentation_model.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 63196673..0fa36ecc 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -17,13 +17,13 @@ loader: trainer: preprocessing: - train_image_size: [&height 256, &width 320] # [512, 512] + train_image_size: [&height 256, &width 320] keep_aspect_ratio: False normalize: active: True - batch_size: 4 # 32 - epochs: &epochs 1 # 500 + batch_size: 4 + epochs: &epochs 500 num_workers: 4 validation_interval: 10 num_log_images: 8 From fbd6f373886732f08522fb33b3dbaf6a93941987 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 10 Sep 2024 21:21:38 +0000 Subject: [PATCH 042/102] style: formatting --- .../ddrnet_segmentation_model.py | 1 - luxonis_train/nodes/backbones/ddrnet/__init__.py | 2 +- luxonis_train/nodes/backbones/ddrnet/blocks.py | 12 +++++------- luxonis_train/nodes/backbones/ddrnet/ddrnet.py | 15 +++++++++------ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index 03daea1e..a3fb60da 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -1,5 +1,4 @@ from dataclasses import dataclass, field -from typing import Literal from luxonis_train.utils.config import ( LossModuleConfig, diff --git a/luxonis_train/nodes/backbones/ddrnet/__init__.py b/luxonis_train/nodes/backbones/ddrnet/__init__.py index ef2f869e..8ecc5814 100644 --- a/luxonis_train/nodes/backbones/ddrnet/__init__.py +++ b/luxonis_train/nodes/backbones/ddrnet/__init__.py @@ -1,3 +1,3 @@ from .ddrnet import DDRNet -__all__ = ["DDRNet"] \ No newline at end of file +__all__ = ["DDRNet"] diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index 07eca586..87f54118 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -5,14 +5,12 @@ Paper: U{https://arxiv.org/pdf/2101.06085.pdf} @license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} """ -from abc import ABC -from typing import Dict, Type +from typing import Type import torch from torch import Tensor, nn from torch.nn import functional as F -from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule @@ -371,6 +369,7 @@ def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: """ return F.interpolate(x, size=[output_height, output_width], mode=self.mode) + class BasicDDRBackBone(nn.Module): def __init__( self, @@ -505,6 +504,7 @@ def get_backbone_output_number_of_channels(self) -> dict[str, int]: return output_shapes + def _make_layer( block: Type[nn.Module], in_planes: int, @@ -567,10 +567,8 @@ def drop_path(x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True) -> @return: Tensor with dropped paths based on the provided drop probability. """ keep_prob = 1 - drop_prob - shape = (x.shape[0],) + (1,) * ( - x.ndim - 1 - ) + shape = (x.shape[0],) + (1,) * (x.ndim - 1) random_tensor = x.new_empty(shape).bernoulli_(keep_prob) if keep_prob > 0.0 and scale_by_keep: random_tensor.div_(keep_prob) - return x * random_tensor \ No newline at end of file + return x * random_tensor diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 1d8ddbf3..00e7cae9 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -5,18 +5,21 @@ Paper: U{https://arxiv.org/pdf/2101.06085.pdf} @license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} """ -from abc import ABC from typing import Dict, Type -import torch from torch import Tensor, nn -from torch.nn import functional as F from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule -from luxonis_train.nodes.heads import DDRNetSegmentationHead -from .blocks import BasicResNetBlock, Bottleneck, UpscaleOnline, BasicDDRBackBone, DAPPM, _make_layer +from .blocks import ( + DAPPM, + BasicDDRBackBone, + BasicResNetBlock, + Bottleneck, + UpscaleOnline, + _make_layer, +) class DDRNet(BaseNode[Tensor, list[Tensor]]): @@ -113,7 +116,7 @@ def __init__( self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] self.input_channels = input_channels - self._backbone: DDRBackBoneBase = BasicDDRBackBone( + self._backbone = BasicDDRBackBone( block=self.block, width=self.planes, layers=self.backbone_layers, From f962512e4a1ea34d131f1523a76771971c446817 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 10 Sep 2024 21:24:45 +0000 Subject: [PATCH 043/102] test: add ddrnet_segmentation_model test --- tests/integration/test_simple.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 7d3587c4..15c94319 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -47,6 +47,7 @@ def clear_files(): "segmentation_model", "detection_model", "keypoint_bbox_model", + "ddrnet_segmentation_model" ], ) def test_predefined_models( From aa1852f8f17c80a8e5e4e213dc143ed334986ec4 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 10 Sep 2024 21:25:07 +0000 Subject: [PATCH 044/102] style: formatting --- tests/integration/test_simple.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 15c94319..efc8a02e 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -47,7 +47,7 @@ def clear_files(): "segmentation_model", "detection_model", "keypoint_bbox_model", - "ddrnet_segmentation_model" + "ddrnet_segmentation_model", ], ) def test_predefined_models( From 6ead2715a5ff9d7feb22e8d04d54b55db35417d4 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 01:11:12 +0200 Subject: [PATCH 045/102] fixed type issues --- .github/workflows/ci.yaml | 2 +- luxonis_train/__main__.py | 2 +- luxonis_train/callbacks/metadata_logger.py | 2 ++ luxonis_train/nodes/backbones/efficientnet.py | 4 ++-- 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index efee6205..299dd65e 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -82,7 +82,7 @@ jobs: cache: pip - name: Install dependencies - run: pip install . + run: pip install .[dev] - uses: jakebailey/pyright-action@v2 with: diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 3351f067..7740f1d5 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -180,7 +180,7 @@ def archive( def version_callback(value: bool): if value: - typer.echo(f"LuxonisTrain Version: {version(__package__)}") + typer.echo(f"LuxonisTrain Version: {version('luxonis_train')}") raise typer.Exit() diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py index f04cb575..3186a666 100644 --- a/luxonis_train/callbacks/metadata_logger.py +++ b/luxonis_train/callbacks/metadata_logger.py @@ -57,6 +57,8 @@ def _get_editable_package_git_hash( """ try: distribution = pkg_resources.get_distribution(package_name) + if distribution.location is None: + return None package_location = osp.join(distribution.location, package_name) # remove any additional folders in path (e.g. "/src") diff --git a/luxonis_train/nodes/backbones/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py index 1c7120eb..f18d883f 100644 --- a/luxonis_train/nodes/backbones/efficientnet.py +++ b/luxonis_train/nodes/backbones/efficientnet.py @@ -1,7 +1,7 @@ from typing import Any import torch -from torch import Tensor +from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode @@ -36,7 +36,7 @@ def __init__( """ super().__init__(**kwargs) - self.backbone = torch.hub.load( # type: ignore + self.backbone: nn.Module = torch.hub.load( # type: ignore "rwightman/gen-efficientnet-pytorch", "efficientnet_lite0", pretrained=download_weights, From 43cd6232f5d5b7764ce0ce1a8ded8e44be49a304 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 01:22:40 +0200 Subject: [PATCH 046/102] changed pyright settings --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 299dd65e..ec04c572 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -87,9 +87,9 @@ jobs: - uses: jakebailey/pyright-action@v2 with: level: warning - working-directory: luxonis_train warnings: true python-version: '3.10' + project: pyproject.toml tests: needs: From b12f1bdc52369b55fdea8cbfd8556643bf65be66 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 01:27:31 +0200 Subject: [PATCH 047/102] editable install --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ec04c572..590583f0 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -82,7 +82,7 @@ jobs: cache: pip - name: Install dependencies - run: pip install .[dev] + run: pip install -e .[dev] - uses: jakebailey/pyright-action@v2 with: From 7bdb2582c63692a43f0c11f98969d32c50996bdf Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 01:39:21 +0200 Subject: [PATCH 048/102] added pyright version --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 590583f0..3176f245 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -86,6 +86,7 @@ jobs: - uses: jakebailey/pyright-action@v2 with: + version: '1.1.380' level: warning warnings: true python-version: '3.10' From 74dc6d0e65d1d227e7054d8d0c0d066d27c41e5e Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 01:39:26 +0200 Subject: [PATCH 049/102] fixed task error --- luxonis_train/nodes/base_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 13e3ca4f..a1101479 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -566,7 +566,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: ) try: task = self.task - except ValueError: + except RuntimeError: task = "features" return {task: outputs} From 242726eaa3286782999f079bf901ab32112c3a1d Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 03:24:56 +0200 Subject: [PATCH 050/102] removed toc plugin --- .github/workflows/ci.yaml | 7 ++++--- .pre-commit-config.yaml | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3176f245..8b3b389d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -84,7 +84,8 @@ jobs: - name: Install dependencies run: pip install -e .[dev] - - uses: jakebailey/pyright-action@v2 + - name: Type check + uses: jakebailey/pyright-action@v2 with: version: '1.1.380' level: warning @@ -153,8 +154,8 @@ jobs: with: coverageFile: coverage.xml token: ${{ secrets.GITHUB_TOKEN }} - thresholdAll: 90 - thresholdNew: 80 + thresholdAll: 0.9 + thresholdNew: 0.8 - name: Commit coverage badge if: matrix.os == 'ubuntu-latest' diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3f95fc26..6226370c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,4 +28,3 @@ repos: - id: mdformat additional_dependencies: - mdformat-gfm - - mdformat-toc From 811c020c9471da19c44f03655f271273d2f202af Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 03:25:12 +0200 Subject: [PATCH 051/102] small docs fixes --- luxonis_train/attached_modules/base_attached_module.py | 2 +- .../metrics/mean_average_precision_keypoints.py | 2 +- luxonis_train/nodes/backbones/efficientnet.py | 2 +- luxonis_train/nodes/base_node.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index b423fc0d..413de0ae 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -232,7 +232,7 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: @rtype: tuple[Unpack[Ts]] @return: Prepared inputs. Should allow the following usage with the - L{forward} method: + L{forward} method:: >>> loss.forward(*loss.prepare(outputs, labels)) diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py index e424d2dd..3ccaea5d 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py @@ -52,7 +52,7 @@ def __init__( Adapted from: U{https://github.com/Lightning-AI/torchmetrics/blob/v1.0.1/src/ torchmetrics/detection/mean_ap.py}. - @license: Apache-2.0 License + @license: Apache License, Version 2.0 @type num_keypoints: int @param num_keypoints: Number of keypoints. diff --git a/luxonis_train/nodes/backbones/efficientnet.py b/luxonis_train/nodes/backbones/efficientnet.py index f18d883f..7744236a 100644 --- a/luxonis_train/nodes/backbones/efficientnet.py +++ b/luxonis_train/nodes/backbones/efficientnet.py @@ -21,7 +21,7 @@ def __init__( Source: U{https://github.com/rwightman/gen-efficientnet-pytorch} - @license: U{Apache-2.0 + @license: U{Apache License, Version 2.0 } @see: U{https://paperswithcode.com/method/efficientnet} diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index a1101479..bd2aa6db 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -406,7 +406,7 @@ def in_sizes(self) -> Size | list[Size]: In case `in_sizes` were provided during initialization, they are returned directly. - Example: + Example:: >>> input_shapes = [{"features": [Size(64, 128, 128), Size(3, 224, 224)]}] >>> attach_index = -1 From bcb749732468a75243ba01d76d1d5a92e4b8796e Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Wed, 11 Sep 2024 03:25:41 +0200 Subject: [PATCH 052/102] updated CONTRIBUTING.md --- CONTRIBUTING.md | 58 +++++++++++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 14923406..d3636e13 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,17 +1,30 @@ -# Contributing to LuxonisTrain +# Contributing to LuxonisTrain **This guide is intended for our internal development team.** It outlines our workflow and standards for contributing to this project. -## Table of Contents +## Table Of Contents +- [Requirements](#requirements) - [Pre-commit Hooks](#pre-commit-hooks) - [Documentation](#documentation) +- [Type Checking](#type-checking) - [Editor Support](#editor-support) - [Tests](#tests) - [GitHub Actions](#github-actions) - [Making and Reviewing Changes](#making-and-reviewing-changes) -- [Notes](#notes) + +## Requirements + +Install the development dependencies by running `pip install -r requirements-dev.txt` or installing the package with the `dev` extra: + +```bash +pip install -e .[dev] +``` + +> \[!NOTE\] +> This will install the package in editable mode (`-e`), +> so you can make changes to the code and run them immediately. ## Pre-commit Hooks @@ -34,6 +47,14 @@ To verify that your documentation is formatted correctly, follow these steps: - **NOTE:** If the script fails, it might not give the specific error message. In that case, you can run the script for each file individually until you find the one that is causing the error. +## Type Checking + +The codebase is type-checked using [pyright](https://github.com/microsoft/pyright) `v1.1.380`. To run type checking, use the following command in the root project directory: + +```bash +pyright --warnings --level warning --pythonversion 3.10 luxonis_train +``` + ### Editor Support - **PyCharm** - built in support for generating `epytext` docstrings @@ -43,19 +64,13 @@ To verify that your documentation is formatted correctly, follow these steps: ## Tests We use [pytest](https://docs.pytest.org/en/stable/) for testing. -The tests are located in the `tests` directory. You can run the tests locally by running: - -```bash -pytest -``` - -in the root directory. +The tests are located in the `tests` directory. You can run the tests locally by running `pytest` in the root directory. This command will run all tests and print a coverage report. > \[!TIP\] -> It will also generate an HTML coverage report in the `htmlcov` directory -> if you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser. +> This will also generate an HTML coverage report in the `htmlcov` directory. +> If you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser. > \[!IMPORTANT\] > If a new feature is added, a new test should be added to cover it. @@ -65,17 +80,18 @@ This command will run all tests and print a coverage report. ## GitHub Actions Our GitHub Actions workflow is run when a new PR is opened. -It first checks that the pre-commit hook passes and that the documentation builds successfully. -The tests are run only if the pre-commit hook and documentation build pass. -Successful tests are required for merging a PR. -1. Checks and tests are run automatically when you open a pull request. -1. For the tests to run, the [pre-commit](#pre-commit-hooks) hook must pass and - the [documentation](#documentation) must be built successfully. -1. Review the GitHub Actions output if your PR fails. -1. Fix any issues to ensure that all checks and tests pass. +1. First, the [pre-commit](#pre-commit-hooks) hooks must pass and the [documentation](#documentation) must be built successfully. +1. Next, the [type checking](#type-checking) is run. +1. If all previous checks pass, the [tests](#tests) are run. + +> \[!TIP\] +> Review the GitHub Actions output if your PR fails. + +> \[!IMPORTANT\] +> Successfull completion of all the workflow checks is required for merging a PR. -## Making and Reviewing Changes +## Making and Submitting Changes 1. Make changes in a new branch. 1. Test your changes locally. From 282212cc54f9d302803b6d93e9d8d1eb9de68a9e Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 12 Sep 2024 18:59:21 +0200 Subject: [PATCH 053/102] fixed version attribute --- luxonis_train/__init__.py | 3 +++ pyproject.toml | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/luxonis_train/__init__.py b/luxonis_train/__init__.py index 52f18281..ebc4a719 100644 --- a/luxonis_train/__init__.py +++ b/luxonis_train/__init__.py @@ -1,3 +1,6 @@ +__version__ = "0.0.1" + + from .attached_modules import * from .core import * from .loaders import * diff --git a/pyproject.toml b/pyproject.toml index 7388702e..97635f36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,5 @@ [project] name = "luxonis-train" -version = "0.0.1" description = "Luxonis training framework for seamless training of various neural networks." readme = "README.md" requires-python = ">=3.10" @@ -8,7 +7,7 @@ license = { file = "LICENSE" } authors = [{ name = "Luxonis", email = "support@luxonis.com" }] maintainers = [{ name = "Luxonis", email = "support@luxonis.com" }] keywords = ["ml", "training", "luxonis", "oak"] -dynamic = ["dependencies", "optional-dependencies"] +dynamic = ["dependencies", "optional-dependencies", "version"] classifiers = [ "License :: OSI Approved :: Apache Software License", "Development Status :: 3 - Alpha", @@ -35,6 +34,7 @@ where = ["."] [tool.setuptools.dynamic] dependencies = { file = ["requirements.txt"] } optional-dependencies = { dev = { file = ["requirements-dev.txt"] } } +version = {attr = "luxonis_train.__version__"} [tool.ruff] target-version = "py310" From 7bfa0bc4f8ac9cdc73d4269405e3446e72f0f526 Mon Sep 17 00:00:00 2001 From: Nikita Date: Thu, 12 Sep 2024 21:23:00 +0000 Subject: [PATCH 054/102] refactor: restructure functions and edit args --- configs/ddrnet_segmentation_model.yaml | 2 +- .../ddrnet_segmentation_model.py | 6 +- .../nodes/backbones/ddrnet/blocks.py | 226 +----------------- .../nodes/backbones/ddrnet/ddrnet.py | 39 ++- luxonis_train/nodes/blocks/__init__.py | 8 + luxonis_train/nodes/blocks/blocks.py | 226 ++++++++++++++++++ .../nodes/heads/ddrnet_segmentation_head.py | 39 ++- 7 files changed, 273 insertions(+), 273 deletions(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 0fa36ecc..a8239dbf 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -6,7 +6,7 @@ model: predefined_model: name: DDRNetSegmentationModel params: - num_classes: 80 + n_classes: 80 task: multiclass backbone_params: use_aux_heads: True # set to False to disable auxiliary heads (for export) diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index a3fb60da..9082a541 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -12,7 +12,7 @@ @dataclass class DDRNetSegmentationModel(SegmentationModel): backbone: str = "DDRNet" - num_classes: int = 1 + n_classes: int = 1 highres_planes: int = 64 layer5_bottleneck_expansion: int = 2 aux_head_params: Kwargs = field(default_factory=dict) @@ -27,11 +27,11 @@ def nodes(self) -> list[ModelNodeConfig]: self.head_params.update( {"in_planes": self.highres_planes * self.layer5_bottleneck_expansion} ) - self.head_params.update({"num_classes": self.num_classes}) + self.head_params.update({"n_classes": self.n_classes}) self.head_params.update({"attach_index": 0}) self.aux_head_params.update({"in_planes": self.highres_planes}) - self.aux_head_params.update({"num_classes": self.num_classes}) + self.aux_head_params.update({"n_classes": self.n_classes}) self.aux_head_params.update({"attach_index": 1}) node_list = [ diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index 87f54118..5a01c61a 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -9,181 +9,8 @@ import torch from torch import Tensor, nn -from torch.nn import functional as F -from luxonis_train.nodes.blocks import ConvModule - - -class DropPath(nn.Module): - """Drop paths (Stochastic Depth) per sample, when applied in the main path of - residual blocks. - - Intended usage of this block is as follows: - - >>> class ResNetBlock(nn.Module): - >>> def __init__(self, ..., drop_path_rate: float): - >>> self.drop_path = DropPath(drop_path_rate) - >>> - >>> def forward(self, x): - >>> return x + self.drop_path(self.conv_bn_act(x)) - - Code taken from TIMM (https://github.com/rwightman/pytorch-image-models), Apache License 2.0. - """ - - def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): - """Initializes the DropPath module. - - @type drop_prob: float - @param drop_prob: Probability of zeroing out individual vectors (channel - dimension) of each feature map. Defaults to 0.0. - @type scale_by_keep: bool - @param scale_by_keep: Whether to scale the output by the keep probability. - Enabled by default to maintain output mean & std in the same range as - without DropPath. Defaults to True. - """ - super().__init__() - self.drop_prob = drop_prob - self.scale_by_keep = scale_by_keep - - def forward(self, x: Tensor) -> Tensor: - if self.drop_prob == 0.0 or not self.training: - return x - return drop_path(x, self.drop_prob, self.scale_by_keep) - - def extra_repr(self) -> str: - return f"drop_prob={round(self.drop_prob, 3):0.3f}" - - -class BasicResNetBlock(nn.Module): - def __init__( - self, - in_planes: int, - planes: int, - stride: int = 1, - expansion: int = 1, - final_relu: bool = True, - droppath_prob: float = 0.0, - ): - """A basic residual block for ResNet. - - @type in_planes: int - @param in_planes: Number of input channels. - @type planes: int - @param planes: Number of output channels. - @type stride: int - @param stride: Stride for the convolutional layers. Defaults to 1. - @type expansion: int - @param expansion: Expansion factor for the output channels. Defaults to 1. - @type final_relu: bool - @param final_relu: Whether to apply a ReLU activation after the residual - addition. Defaults to True. - @type droppath_prob: float - @param droppath_prob: Drop path probability for stochastic depth. Defaults to - 0.0. - """ - super().__init__() - self.expansion = expansion - self.conv1 = nn.Conv2d( - in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False - ) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d( - planes, planes, kernel_size=3, stride=1, padding=1, bias=False - ) - self.bn2 = nn.BatchNorm2d(planes) - self.final_relu = final_relu - - self.drop_path = DropPath(drop_prob=droppath_prob) - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion * planes: - self.shortcut = nn.Sequential( - nn.Conv2d( - in_planes, - self.expansion * planes, - kernel_size=1, - stride=stride, - bias=False, - ), - nn.BatchNorm2d(self.expansion * planes), - ) - - def forward(self, x: Tensor) -> Tensor: - out = F.relu(self.bn1(self.conv1(x))) - out = self.bn2(self.conv2(out)) - out = self.drop_path(out) - out += self.shortcut(x) - if self.final_relu: - out = F.relu(out) - return out - - -class Bottleneck(nn.Module): - def __init__( - self, - in_planes: int, - planes: int, - stride: int = 1, - expansion: int = 4, - final_relu: bool = True, - droppath_prob: float = 0.0, - ): - """A bottleneck block for ResNet. - - @type in_planes: int - @param in_planes: Number of input channels. - @type planes: int - @param planes: Number of intermediate channels. - @type stride: int - @param stride: Stride for the second convolutional layer. Defaults to 1. - @type expansion: int - @param expansion: Expansion factor for the output channels. Defaults to 4. - @type final_relu: bool - @param final_relu: Whether to apply a ReLU activation after the residual - addition. Defaults to True. - @type droppath_prob: float - @param droppath_prob: Drop path probability for stochastic depth. Defaults to - 0.0. - """ - super().__init__() - self.expansion = expansion - self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d( - planes, planes, kernel_size=3, stride=stride, padding=1, bias=False - ) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d( - planes, self.expansion * planes, kernel_size=1, bias=False - ) - self.bn3 = nn.BatchNorm2d(self.expansion * planes) - self.final_relu = final_relu - - self.drop_path = DropPath(drop_prob=droppath_prob) - self.shortcut = nn.Sequential() - if stride != 1 or in_planes != self.expansion * planes: - self.shortcut = nn.Sequential( - nn.Conv2d( - in_planes, - self.expansion * planes, - kernel_size=1, - stride=stride, - bias=False, - ), - nn.BatchNorm2d(self.expansion * planes), - ) - - def forward(self, x: Tensor) -> Tensor: - out = F.relu(self.bn1(self.conv1(x))) - out = F.relu(self.bn2(self.conv2(out))) - out = self.bn3(self.conv3(out)) - - out = self.drop_path(out) - out += self.shortcut(x) - - if self.final_relu: - out = F.relu(out) - - return out +from luxonis_train.nodes.blocks import ConvModule, UpscaleOnline class DAPPMBranch(nn.Module): @@ -340,36 +167,6 @@ def forward(self, x: Tensor) -> Tensor: return out -class UpscaleOnline(nn.Module): - """Upscale tensor to a specified size during the forward pass. - - This class supports cases where the required scale/size is only known when the input - is received. Only the interpolation mode is set in advance. - """ - - def __init__(self, mode: str = "bilinear"): - """Initialize UpscaleOnline with the interpolation mode. - - @type mode: str - @param mode: Interpolation mode for resizing. Defaults to "bilinear". - """ - super().__init__() - self.mode = mode - - def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: - """Upscale the input tensor to the specified height and width. - - @type x: Tensor - @param x: Input tensor to be upscaled. - @type output_height: int - @param output_height: Desired height of the output tensor. - @type output_width: int - @param output_width: Desired width of the output tensor. - @return: Upscaled tensor. - """ - return F.interpolate(x, size=[output_height, output_width], mode=self.mode) - - class BasicDDRBackBone(nn.Module): def __init__( self, @@ -551,24 +348,3 @@ def _make_layer( ) return nn.Sequential(*layers) - - -def drop_path(x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True) -> Tensor: - """Drop paths (Stochastic Depth) per sample when applied in the main path of - residual blocks. - - @type x: Tensor - @param x: Input tensor. - @type drop_prob: float - @param drop_prob: Probability of dropping a path. Defaults to 0.0. - @type scale_by_keep: bool - @param scale_by_keep: Whether to scale the output by the keep probability. Defaults - to True. - @return: Tensor with dropped paths based on the provided drop probability. - """ - keep_prob = 1 - drop_prob - shape = (x.shape[0],) + (1,) * (x.ndim - 1) - random_tensor = x.new_empty(shape).bernoulli_(keep_prob) - if keep_prob > 0.0 and scale_by_keep: - random_tensor.div_(keep_prob) - return x * random_tensor diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 00e7cae9..4ff1f72c 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -1,23 +1,18 @@ -"""DDRNet backbone. - -Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py} -Original source: U{https://github.com/ydhongHIT/DDRNet} -Paper: U{https://arxiv.org/pdf/2101.06085.pdf} -@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} -""" from typing import Dict, Type from torch import Tensor, nn from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import ConvModule +from luxonis_train.nodes.blocks import ( + BasicResNetBlock, + Bottleneck, + ConvModule, + UpscaleOnline, +) from .blocks import ( DAPPM, BasicDDRBackBone, - BasicResNetBlock, - Bottleneck, - UpscaleOnline, _make_layer, ) @@ -40,11 +35,16 @@ def __init__( layer3_repeats: int = 1, planes: int = 32, layers: list[int] = None, - input_channels: int = 3, **kwargs, ): - """Initialize the DDRNet with specified parameters. + """DDRNet backbone. + @see: U{Adapted from } + @see: U{Original code } + @see: U{Paper } + @license: U{Apache License, Version 2.0 } @type use_aux_heads: bool @param use_aux_heads: Whether to use auxiliary heads. Defaults to True. @type upscale_module: nn.Module @@ -86,8 +86,6 @@ def __init__( @type layers: list[int] @param layers: Number of blocks in each layer of the backbone. Defaults to [2, 2, 2, 2, 1, 2, 2, 1]. - @type input_channels: int - @param input_channels: Number of input channels. Defaults to 3. @type kwargs: Any @param kwargs: Additional arguments to pass to L{BaseNode}. """ @@ -114,13 +112,12 @@ def __init__( self.planes = planes self.layers = layers self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] - self.input_channels = input_channels self._backbone = BasicDDRBackBone( block=self.block, width=self.planes, layers=self.backbone_layers, - input_channels=self.input_channels, + input_channels=self.in_channels, layer3_repeats=self.layer3_repeats, ) self._backbone.validate_backbone_attributes() @@ -243,11 +240,11 @@ def backbone(self): ) ) - def forward(self, x: Tensor) -> list[Tensor]: - width_output = x.shape[-1] // 8 - height_output = x.shape[-2] // 8 + def forward(self, inputs: Tensor) -> list[Tensor]: + width_output = inputs.shape[-1] // 8 + height_output = inputs.shape[-2] // 8 - x = self._backbone.stem(x) + x = self._backbone.stem(inputs) x = self._backbone.layer1(x) x = self._backbone.layer2(self.relu(x)) diff --git a/luxonis_train/nodes/blocks/__init__.py b/luxonis_train/nodes/blocks/__init__.py index a87c336e..52c3408e 100644 --- a/luxonis_train/nodes/blocks/__init__.py +++ b/luxonis_train/nodes/blocks/__init__.py @@ -1,7 +1,10 @@ from .blocks import ( AttentionRefinmentBlock, + BasicResNetBlock, BlockRepeater, + Bottleneck, ConvModule, + DropPath, EfficientDecoupledBlock, FeatureFusionBlock, KeypointBlock, @@ -14,6 +17,7 @@ SpatialPyramidPoolingBlock, SqueezeExciteBlock, UpBlock, + UpscaleOnline, autopad, ) @@ -34,4 +38,8 @@ "LearnableMulAddConv", "KeypointBlock", "RepUpBlock", + "BasicResNetBlock", + "Bottleneck", + "UpscaleOnline", + "DropPath", ] diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index ea7c8290..1aebfbae 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -3,6 +3,7 @@ import numpy as np import torch +import torch.nn.functional as F from torch import Tensor, nn from luxonis_train.nodes.activations import HSigmoid @@ -708,3 +709,228 @@ def autopad(kernel_size: T, padding: T | None = None) -> T: if isinstance(kernel_size, int): return kernel_size // 2 return tuple(x // 2 for x in kernel_size) + + +class BasicResNetBlock(nn.Module): + def __init__( + self, + in_planes: int, + planes: int, + stride: int = 1, + expansion: int = 1, + final_relu: bool = True, + droppath_prob: float = 0.0, + ): + """A basic residual block for ResNet. + + @type in_planes: int + @param in_planes: Number of input channels. + @type planes: int + @param planes: Number of output channels. + @type stride: int + @param stride: Stride for the convolutional layers. Defaults to 1. + @type expansion: int + @param expansion: Expansion factor for the output channels. Defaults to 1. + @type final_relu: bool + @param final_relu: Whether to apply a ReLU activation after the residual + addition. Defaults to True. + @type droppath_prob: float + @param droppath_prob: Drop path probability for stochastic depth. Defaults to + 0.0. + """ + super().__init__() + self.expansion = expansion + self.conv1 = nn.Conv2d( + in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=1, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.final_relu = final_relu + + self.drop_path = DropPath(drop_prob=droppath_prob) + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(self.expansion * planes), + ) + + def forward(self, x: Tensor) -> Tensor: + out = F.relu(self.bn1(self.conv1(x))) + out = self.bn2(self.conv2(out)) + out = self.drop_path(out) + out += self.shortcut(x) + if self.final_relu: + out = F.relu(out) + return out + + +class Bottleneck(nn.Module): + def __init__( + self, + in_planes: int, + planes: int, + stride: int = 1, + expansion: int = 4, + final_relu: bool = True, + droppath_prob: float = 0.0, + ): + """A bottleneck block for ResNet. + + @type in_planes: int + @param in_planes: Number of input channels. + @type planes: int + @param planes: Number of intermediate channels. + @type stride: int + @param stride: Stride for the second convolutional layer. Defaults to 1. + @type expansion: int + @param expansion: Expansion factor for the output channels. Defaults to 4. + @type final_relu: bool + @param final_relu: Whether to apply a ReLU activation after the residual + addition. Defaults to True. + @type droppath_prob: float + @param droppath_prob: Drop path probability for stochastic depth. Defaults to + 0.0. + """ + super().__init__() + self.expansion = expansion + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d( + planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + ) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d( + planes, self.expansion * planes, kernel_size=1, bias=False + ) + self.bn3 = nn.BatchNorm2d(self.expansion * planes) + self.final_relu = final_relu + + self.drop_path = DropPath(drop_prob=droppath_prob) + self.shortcut = nn.Sequential() + if stride != 1 or in_planes != self.expansion * planes: + self.shortcut = nn.Sequential( + nn.Conv2d( + in_planes, + self.expansion * planes, + kernel_size=1, + stride=stride, + bias=False, + ), + nn.BatchNorm2d(self.expansion * planes), + ) + + def forward(self, x: Tensor) -> Tensor: + out = F.relu(self.bn1(self.conv1(x))) + out = F.relu(self.bn2(self.conv2(out))) + out = self.bn3(self.conv3(out)) + + out = self.drop_path(out) + out += self.shortcut(x) + + if self.final_relu: + out = F.relu(out) + + return out + + +class UpscaleOnline(nn.Module): + """Upscale tensor to a specified size during the forward pass. + + This class supports cases where the required scale/size is only known when the input + is received. Only the interpolation mode is set in advance. + """ + + def __init__(self, mode: str = "bilinear"): + """Initialize UpscaleOnline with the interpolation mode. + + @type mode: str + @param mode: Interpolation mode for resizing. Defaults to "bilinear". + """ + super().__init__() + self.mode = mode + + def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: + """Upscale the input tensor to the specified height and width. + + @type x: Tensor + @param x: Input tensor to be upscaled. + @type output_height: int + @param output_height: Desired height of the output tensor. + @type output_width: int + @param output_width: Desired width of the output tensor. + @return: Upscaled tensor. + """ + return F.interpolate(x, size=[output_height, output_width], mode=self.mode) + + +class DropPath(nn.Module): + """Drop paths (Stochastic Depth) per sample, when applied in the main path of + residual blocks. + + Intended usage of this block is as follows: + + >>> class ResNetBlock(nn.Module): + >>> def __init__(self, ..., drop_path_rate: float): + >>> self.drop_path = DropPath(drop_path_rate) + >>> + >>> def forward(self, x): + >>> return x + self.drop_path(self.conv_bn_act(x)) + + @see U{Original code (TIMM) } + @license: U{Apache License 2.0 } + """ + + def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): + """Initializes the DropPath module. + + @type drop_prob: float + @param drop_prob: Probability of zeroing out individual vectors (channel + dimension) of each feature map. Defaults to 0.0. + @type scale_by_keep: bool + @param scale_by_keep: Whether to scale the output by the keep probability. + Enabled by default to maintain output mean & std in the same range as + without DropPath. Defaults to True. + """ + super().__init__() + self.drop_prob = drop_prob + self.scale_by_keep = scale_by_keep + + def drop_path( + self, x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True + ) -> Tensor: + """Drop paths (Stochastic Depth) per sample when applied in the main path of + residual blocks. + + @type x: Tensor + @param x: Input tensor. + @type drop_prob: float + @param drop_prob: Probability of dropping a path. Defaults to 0.0. + @type scale_by_keep: bool + @param scale_by_keep: Whether to scale the output by the keep probability. + Defaults to True. + @return: Tensor with dropped paths based on the provided drop probability. + """ + keep_prob = 1 - drop_prob + shape = (x.shape[0],) + (1,) * (x.ndim - 1) + random_tensor = x.new_empty(shape).bernoulli_(keep_prob) + if keep_prob > 0.0 and scale_by_keep: + random_tensor.div_(keep_prob) + return x * random_tensor + + def forward(self, x: Tensor) -> Tensor: + if self.drop_prob == 0.0 or not self.training: + return x + return self.drop_path(x, self.drop_prob, self.scale_by_keep) + + def extra_repr(self) -> str: + return f"drop_prob={round(self.drop_prob, 3):0.3f}" diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 10a0c51a..9eca0e29 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -1,11 +1,3 @@ -"""DDRNet segmentation head. - -Adapted from: U{https://github.com/Deci-AI/super-gradients/blob/master/src/super_gradients/training/models/segmentation_models/ddrnet.py} -Original source: U{https://github.com/ydhongHIT/DDRNet} -Paper: U{https://arxiv.org/pdf/2101.06085.pdf} -@license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} -""" - import torch.nn as nn from torch import Tensor @@ -16,23 +8,26 @@ class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): in_height: int - in_channels: int + n_classes: int tasks: list[LabelType] = [LabelType.SEGMENTATION] + attach_index: int = 0 def __init__( self, - num_classes: int, in_planes: int = 128, inter_planes: int = 64, scale_factor: int = 8, inter_mode: str = "bilinear", - attach_index: int = 0, **kwargs, ): - """Last stage of the segmentation network. + """DDRNet segmentation head. - @type num_classes: int - @param num_classes: Output width. + @see: U{Adapted from } + @see: U{Original code } + @see: U{Paper } + @license: U{Apache License, Version 2.0 } @type in_planes: int @param in_planes: Width of input. Defaults to 128. @type inter_planes: int @@ -44,17 +39,15 @@ def __init__( @param inter_mode: Upsampling method. One of nearest, linear, bilinear, bicubic, trilinear, area or pixel_shuffle. If pixel_shuffle is set, nn.PixelShuffle is used for scaling. Defaults to "bilinear". - @type attach_index: int - @param attach_index: Index at which to attach. Defaults to 0. """ - self.attach_index = attach_index super().__init__(**kwargs) self.scale_factor = scale_factor if inter_mode == "pixel_shuffle": - assert ( - inter_planes % (scale_factor**2) == 0 - ), "When using pixel_shuffle, inter_planes must be a multiple of scale_factor^2." + if inter_planes % (scale_factor**2) != 0: + raise ValueError( + "When using pixel_shuffle, inter_planes must be a multiple of scale_factor^2." + ) self.conv1 = ConvModule( in_planes, @@ -78,7 +71,7 @@ def __init__( else: self.conv2 = ConvModule( inter_planes, - num_classes, + self.n_classes, kernel_size=1, padding=0, bias=True, @@ -86,8 +79,8 @@ def __init__( ) self.upscale = nn.Upsample(scale_factor=scale_factor, mode=inter_mode) - def forward(self, x: Tensor) -> Tensor: - x = self.conv1(x) + def forward(self, inputs: Tensor) -> Tensor: + x = self.conv1(inputs) out = self.conv2(x) out = self.upscale(out) From d30440890cffc5ecdd8b71783377180d902eac6b Mon Sep 17 00:00:00 2001 From: Nikita Date: Thu, 12 Sep 2024 23:25:10 +0000 Subject: [PATCH 055/102] feature: disable aux head during export --- .../nodes/heads/ddrnet_segmentation_head.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 9eca0e29..d907fa79 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -1,3 +1,5 @@ +import logging + import torch.nn as nn from torch import Tensor @@ -5,6 +7,8 @@ from luxonis_train.nodes.blocks import ConvModule from luxonis_train.utils.types import LabelType +logger = logging.getLogger(__name__) + class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): in_height: int @@ -85,3 +89,18 @@ def forward(self, inputs: Tensor) -> Tensor: out = self.upscale(out) return out + + def set_export_mode(self, mode: bool = True) -> None: + """Sets the module to export mode. + + Replaces the forward method with an identity function when in export mode. + + @warning: The replacement is destructive and cannot be undone. + @type mode: bool + @param mode: Whether to set the export mode to True or False. Defaults to True. + """ + super().set_export_mode(mode) + if self.export and self.attach_index != 0: + logger.info("Removing the auxiliary head.") + + self.forward = lambda x: x From eda3ead01d8208972b37342371736b1cf9412ada Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 04:38:04 +0200 Subject: [PATCH 056/102] renamed workflow --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 8b3b389d..033ef804 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,4 +1,4 @@ -name: Tests +name: CI on: pull_request: From 2b48cbc7e1645d4eb11e8d384f24a3decd2c0854 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:21:47 +0200 Subject: [PATCH 057/102] simplified getting node properties from attached module --- .../attached_modules/base_attached_module.py | 62 ++++++++++++++++++- .../losses/implicit_keypoint_bbox_loss.py | 8 --- .../metrics/mean_average_precision.py | 2 +- .../mean_average_precision_keypoints.py | 6 +- .../metrics/object_keypoint_similarity.py | 11 +--- .../attached_modules/metrics/torchmetrics.py | 6 +- .../visualizers/bbox_visualizer.py | 2 +- .../visualizers/classification_visualizer.py | 8 +-- 8 files changed, 70 insertions(+), 35 deletions(-) diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index 413de0ae..b0cd952d 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -1,10 +1,11 @@ import logging from abc import ABC +from contextlib import suppress from typing import Generic from luxonis_ml.data import LabelType from luxonis_ml.utils.registry import AutoRegisterMeta -from torch import Tensor, nn +from torch import Size, Tensor, nn from typing_extensions import TypeVarTuple, Unpack from luxonis_train.nodes import BaseNode @@ -90,6 +91,7 @@ def __init__(self, *, node: BaseNode | None = None): f"but is connected to node '{self.node.name}' which does not support any of them. " f"{self.node.name} supports {node_supported}." ) + self._check_node_type_override() @property def name(self) -> str: @@ -109,8 +111,50 @@ def node(self) -> BaseNode: ) return self._node + @property + def n_keypoints(self) -> int: + """Getter for the number of keypoints. + + @type: int + @raises ValueError: If the number of keypoints cannot be determined. + """ + return self.node.n_keypoints + + @property + def n_classes(self) -> int: + """Getter for the number of classes. + + @type: int + @raises ValueError: If the number of classes cannot be determined. + @raises ValueError: If the number of classes is different for different tasks. + In that case, use the C{node.get_n_classes} method. + """ + return self.node.n_classes + + @property + def original_in_shape(self) -> Size: + """Getter for the original input shape as [N, H, W]. + + @type: Size + """ + return self.node.original_in_shape + + @property + def class_names(self) -> list[str]: + """Getter for the class names. + + @type: list[str] + @raises ValueError: If the class names cannot be determined. + """ + return self.node.class_names + @property def node_tasks(self) -> dict[LabelType, str]: + """Getter for the tasks of the attached node. + + @type: dict[LabelType, str] + @raises RuntimeError: If the node does not have the `tasks` attribute set. + """ if self.node._tasks is None: raise RuntimeError("Node must have the `tasks` attribute specified.") return self.node._tasks @@ -254,7 +298,7 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: "specified in order to use the default `prepare` method." ) if len(self.supported_labels) > 1: - if len(self.node._tasks) > 1: + if len(self.node_tasks) > 1: raise RuntimeError( f"{self.name} supports more than one label type" f"and is connected to {self.node.name} node " @@ -262,7 +306,7 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: "implementation cannot be used in this case." ) self.supported_labels = list( - set(self.supported_labels) & set(self.node._tasks) + set(self.supported_labels) & set(self.node_tasks) ) x = self.get_input_tensors(inputs) label, label_type = self._get_label(labels) @@ -280,3 +324,15 @@ def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: x = x[-1] return x, label # type: ignore + + def _check_node_type_override(self) -> None: + if "node" not in self.__annotations__: + return + + node_type = self.__annotations__["node"] + with suppress(RuntimeError): + if not isinstance(self.node, node_type): + raise IncompatibleException( + f"Module '{self.name}' is attached to the '{self.node.name}' node, " + f"but '{self.name}' is only compatible with nodes of type '{node_type.__name__}'." + ) diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py index f12235c9..1c6156d1 100644 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py @@ -9,7 +9,6 @@ from luxonis_train.attached_modules.losses.keypoint_loss import KeypointLoss from luxonis_train.nodes import ImplicitKeypointBBoxHead from luxonis_train.utils import ( - IncompatibleException, Labels, Packet, compute_iou_loss, @@ -96,13 +95,6 @@ def __init__( super().__init__(**kwargs) - if not isinstance(self.node, ImplicitKeypointBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `ImplicitKeypointBBoxHead`." - ) - self.n_classes = self.node.n_classes - self.n_keypoints = self.node.n_keypoints self.n_anchors = self.node.n_anchors self.num_heads = self.node.num_heads self.box_offset = self.node.box_offset diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py index ea64afd0..cc479d76 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py @@ -39,7 +39,7 @@ def prepare( box_label = self.get_label(labels) output_nms = self.get_input_tensors(inputs) - image_size = self.node.original_in_shape[1:] + image_size = self.original_in_shape[1:] output_list: list[dict[str, Tensor]] = [] label_list: list[dict[str, Tensor]] = [] diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py index 3ccaea5d..5006a49d 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py @@ -54,8 +54,6 @@ def __init__( @license: Apache License, Version 2.0 - @type num_keypoints: int - @param num_keypoints: Number of keypoints. @type sigmas: list[float] | None @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use COCO if possible otherwise defaults. Defaults to C{None}. @@ -68,8 +66,6 @@ def __init__( """ super().__init__(**kwargs) - self.n_keypoints = self.node.n_keypoints - self.sigmas = get_sigmas(sigmas, self.n_keypoints, caller_name=self.name) self.area_factor = get_with_default( area_factor, "bbox area scaling", self.name, default=0.53 @@ -111,7 +107,7 @@ def prepare( output_list_kpt_map: list[dict[str, Tensor]] = [] label_list_kpt_map: list[dict[str, Tensor]] = [] - image_size = self.node.original_in_shape[1:] + image_size = self.original_in_shape[1:] output_kpts = self.get_input_tensors(inputs, LabelType.KEYPOINTS) output_bboxes = self.get_input_tensors(inputs, LabelType.BOUNDINGBOX) diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py index 77c05ea4..e0b1d475 100644 --- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py +++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py @@ -39,8 +39,6 @@ def __init__( ) -> None: """Object Keypoint Similarity metric for evaluating keypoint predictions. - @type n_keypoints: int - @param n_keypoints: Number of keypoints. @type sigmas: list[float] | None @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use COCO if possible otherwise defaults. Defaults to C{None}. @@ -53,12 +51,6 @@ def __init__( """ super().__init__(**kwargs) - if n_keypoints is None and self._node is None: - raise ValueError( - f"Either `n_keypoints` or `node` must be provided to {self.name}." - ) - self.n_keypoints = n_keypoints or self.node.n_keypoints - self.sigmas = get_sigmas(sigmas, self.n_keypoints, caller_name=self.name) self.area_factor = get_with_default( area_factor, "bbox area scaling", self.name, default=0.53 @@ -72,7 +64,6 @@ def __init__( def prepare( self, inputs: Packet[Tensor], labels: Labels ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: - assert self.node.tasks is not None kpts_labels = self.get_label(labels, LabelType.KEYPOINTS) bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX) num_keypoints = (kpts_labels.shape[1] - 2) // 3 @@ -85,7 +76,7 @@ def prepare( output_list_oks = [] label_list_oks = [] - image_size = self.node.original_in_shape[1:] + image_size = self.original_in_shape[1:] for i, pred_kpt in enumerate( self.get_input_tensors(inputs, LabelType.KEYPOINTS) diff --git a/luxonis_train/attached_modules/metrics/torchmetrics.py b/luxonis_train/attached_modules/metrics/torchmetrics.py index a678d54e..5bddc0b9 100644 --- a/luxonis_train/attached_modules/metrics/torchmetrics.py +++ b/luxonis_train/attached_modules/metrics/torchmetrics.py @@ -17,7 +17,7 @@ def __init__(self, **kwargs: Any): super().__init__(node=kwargs.pop("node", None)) task = kwargs.get("task") - if self.node.n_classes > 1: + if self.n_classes > 1: if task == "binary": raise ValueError( f"Task type set to '{task}', but the dataset has more than 1 class. " @@ -42,7 +42,7 @@ def __init__(self, **kwargs: Any): if self._task == "multiclass": if "num_classes" not in kwargs: try: - kwargs["num_classes"] = self.node.n_classes + kwargs["num_classes"] = self.n_classes except RuntimeError as e: raise ValueError( "Either `node` or `num_classes` must be provided to " @@ -51,7 +51,7 @@ def __init__(self, **kwargs: Any): else: if "num_labels" not in kwargs: try: - kwargs["num_labels"] = self.node.n_classes + kwargs["num_labels"] = self.n_classes except RuntimeError as e: raise ValueError( "Either `node` or `num_labels` must be provided to " diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py index 44595ea6..b2c8f411 100644 --- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py @@ -50,7 +50,7 @@ def __init__( labels = {i: label for i, label in enumerate(labels)} self.bbox_labels = labels or { - i: label for i, label in enumerate(self.node.class_names) + i: label for i, label in enumerate(self.class_names) } if colors is None: diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py index 343ab3cb..c048872d 100644 --- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py @@ -34,17 +34,17 @@ def __init__( def _get_class_name(self, pred: Tensor) -> str: idx = int((pred.argmax()).item()) - if self.node.class_names is None: + if self.class_names is None: return str(idx) - return self.node.class_names[idx] + return self.class_names[idx] def _generate_plot(self, prediction: Tensor, width: int, height: int) -> Tensor: pred = prediction.softmax(-1).detach().cpu().numpy() fig, ax = plt.subplots(figsize=(width / 100, height / 100)) ax.bar(np.arange(len(pred)), pred) ax.set_xticks(np.arange(len(pred))) - if self.node.class_names is not None: - ax.set_xticklabels(self.node.class_names, rotation=90) + if self.class_names is not None: + ax.set_xticklabels(self.class_names, rotation=90) else: ax.set_xticklabels(np.arange(1, len(pred) + 1)) ax.set_ylim(0, 1) From cab43739df977de251b5f16b4a17aa3fbd953ef3 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:22:11 +0200 Subject: [PATCH 058/102] increased stacklevel --- luxonis_train/utils/general.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py index 238e6080..29a59c4c 100644 --- a/luxonis_train/utils/general.py +++ b/luxonis_train/utils/general.py @@ -125,5 +125,5 @@ def get_with_default( if caller_name: msg = f"[{caller_name}] {msg}" - logger.info(msg) + logger.info(msg, stacklevel=2) return default From 40a683f685dec6fd5942b94480b7b081e0ae8524 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:22:23 +0200 Subject: [PATCH 059/102] added property docs --- luxonis_train/nodes/base_node.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index bd2aa6db..aba30049 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -283,7 +283,11 @@ def get_class_names(self, task: LabelType) -> list[str]: @property def n_keypoints(self) -> int: - """Getter for the number of keypoints.""" + """Getter for the number of keypoints. + + @type: int + @raises ValueError: If the number of keypoints cannot be determined. + """ if self._n_keypoints is not None: return self._n_keypoints @@ -304,7 +308,13 @@ def n_keypoints(self) -> int: @property def n_classes(self) -> int: - """Getter for the number of classes.""" + """Getter for the number of classes. + + @type: int + @raises ValueError: If the number of classes cannot be determined. + @raises ValueError: If the number of classes is different for different tasks. + In that case, use the L{get_n_classes} method. + """ if self._n_classes is not None: return self._n_classes @@ -333,7 +343,11 @@ def n_classes(self) -> int: @property def class_names(self) -> list[str]: - """Getter for the class names.""" + """Getter for the class names. + + @type: list[str] + @raises ValueError: If the class names cannot be determined. + """ if not self._tasks: raise ValueError( f"{self.name} does not have any tasks defined, " From 3376b3cb96dc331e9614d8d87a0c9bf2e1e78c9c Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:22:31 +0200 Subject: [PATCH 060/102] fixed incorrect parameter name --- .../models/predefined_models/keypoint_detection_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/models/predefined_models/keypoint_detection_model.py b/luxonis_train/models/predefined_models/keypoint_detection_model.py index 29a18c19..670b00b1 100644 --- a/luxonis_train/models/predefined_models/keypoint_detection_model.py +++ b/luxonis_train/models/predefined_models/keypoint_detection_model.py @@ -50,7 +50,7 @@ def nodes(self) -> list[ModelNodeConfig]: task = {} if self.bbox_task_name is not None: - task["bbox"] = self.bbox_task_name + task["boundingbox"] = self.bbox_task_name if self.kpt_task_name is not None: task["keypoints"] = self.kpt_task_name From 9c8d692c1ed0765f84ba6cc4c5bd4387f60f50f9 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:22:53 +0200 Subject: [PATCH 061/102] simplified efficient bbox and keypoint losses --- .../losses/adaptive_detection_loss.py | 165 +++++++------ .../losses/efficient_keypoint_bbox_loss.py | 225 +++++------------- 2 files changed, 155 insertions(+), 235 deletions(-) diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py index a0c21eb2..e460eee4 100644 --- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py +++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py @@ -10,7 +10,6 @@ from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner from luxonis_train.nodes import EfficientBBoxHead from luxonis_train.utils import ( - IncompatibleException, Labels, Packet, anchors_for_fpn_features, @@ -28,6 +27,12 @@ class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Ten node: EfficientBBoxHead supported_labels = [LabelType.BOUNDINGBOX] + anchors: Tensor + anchor_points: Tensor + n_anchors_list: list[int] + stride_tensor: Tensor + gt_bboxes_scale: Tensor + def __init__( self, n_warmup_epochs: int = 4, @@ -55,18 +60,17 @@ def __init__( """ super().__init__(**kwargs) - if not isinstance(self.node, EfficientBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `EfficientBBoxHead`." - ) + # if not isinstance(self.node, EfficientBBoxHead): + # raise IncompatibleException( + # f"Loss `{self.name}` is only " + # "compatible with nodes of type `EfficientBBoxHead`." + # ) self.iou_type: IoUType = iou_type self.reduction = reduction - self.n_classes = self.node.n_classes self.stride = self.node.stride self.grid_cell_size = self.node.grid_cell_size self.grid_cell_offset = self.node.grid_cell_offset - self.original_img_size = self.node.original_in_shape[1:] + self.original_img_size = self.original_in_shape[1:] self.n_warmup_epochs = n_warmup_epochs self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) @@ -78,11 +82,7 @@ def __init__( self.class_loss_weight = class_loss_weight self.iou_loss_weight = iou_loss_weight - self.anchors = None - self.anchor_points = None - self.n_anchors_list = None - self.stride_tensor = None - self.gt_bboxes_scale = None + self._logged_assigner_change = False def prepare( self, inputs: Packet[Tensor], labels: Labels @@ -90,71 +90,33 @@ def prepare( feats = self.get_input_tensors(inputs, "features") pred_scores = self.get_input_tensors(inputs, "class_scores")[0] pred_distri = self.get_input_tensors(inputs, "distributions")[0] - batch_size = pred_scores.shape[0] - device = pred_scores.device target = self.get_label(labels) - if self.gt_bboxes_scale is None: - self.gt_bboxes_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - ( - self.anchors, - self.anchor_points, - self.n_anchors_list, - self.stride_tensor, - ) = anchors_for_fpn_features( - feats, - self.stride, - self.grid_cell_size, - self.grid_cell_offset, - multiply_with_stride=True, - ) - self.anchor_points_strided = self.anchor_points / self.stride_tensor - target = self._preprocess_target(target, batch_size) + batch_size = pred_scores.shape[0] + + self._init_parameters(feats) + + target = self._preprocess_bbox_target(target, batch_size) pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided) gt_labels = target[:, :, :1] gt_xyxy = target[:, :, 1:] mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float() - if self._epoch < self.n_warmup_epochs: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - _, - ) = self.atts_assigner( - self.anchors, - self.n_anchors_list, - gt_labels, - gt_xyxy, - mask_gt, - pred_bboxes.detach() * self.stride_tensor, - ) - else: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - _, - ) = self.tal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * self.stride_tensor, - self.anchor_points, - gt_labels, - gt_xyxy, - mask_gt, - ) + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + mask_positive, + _, + ) = self._run_assigner( + gt_labels, + gt_xyxy, + mask_gt, + pred_bboxes, + pred_scores, + ) return ( pred_bboxes, @@ -196,7 +158,60 @@ def forward( return loss, sub_losses - def _preprocess_target(self, target: Tensor, batch_size: int): + def _init_parameters(self, features: list[Tensor]): + if not hasattr(self, "gt_bboxes_scale"): + self.gt_bboxes_scale = torch.tensor( + [ + self.original_img_size[1], + self.original_img_size[0], + self.original_img_size[1], + self.original_img_size[0], + ], + device=features[0].device, + ) + ( + self.anchors, + self.anchor_points, + self.n_anchors_list, + self.stride_tensor, + ) = anchors_for_fpn_features( + features, + self.stride, + self.grid_cell_size, + self.grid_cell_offset, + multiply_with_stride=True, + ) + self.anchor_points_strided = self.anchor_points / self.stride_tensor + + def _run_assigner( + self, + gt_labels: Tensor, + gt_xyxy: Tensor, + mask_gt: Tensor, + pred_bboxes: Tensor, + pred_scores: Tensor, + ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + if self._epoch < self.n_warmup_epochs: + return self.atts_assigner( + self.anchors, + self.n_anchors_list, + gt_labels, + gt_xyxy, + mask_gt, + pred_bboxes.detach() * self.stride_tensor, + ) + else: + self._log_assigner_change() + return self.tal_assigner( + pred_scores.detach(), + pred_bboxes.detach() * self.stride_tensor, + self.anchor_points, + gt_labels, + gt_xyxy, + mask_gt, + ) + + def _preprocess_bbox_target(self, target: Tensor, batch_size: int) -> Tensor: """Preprocess target in shape [batch_size, N, 5] where N is maximum number of instances in one image.""" sample_ids, counts = cast( @@ -212,6 +227,16 @@ def _preprocess_target(self, target: Tensor, batch_size: int): out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy") return out_target + def _log_assigner_change(self): + if self._logged_assigner_change: + return + + logger.info( + f"Switching to Task Aligned Assigner after {self.n_warmup_epochs} warmup epochs.", + stacklevel=2, + ) + self._logged_assigner_change = True + class VarifocalLoss(nn.Module): def __init__(self, alpha: float = 0.75, gamma: float = 2.0): diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py index 04a9d641..e9fba8b6 100644 --- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py @@ -1,18 +1,15 @@ -from typing import Any, Literal, cast +from typing import Any, Literal import torch import torch.nn.functional as F from luxonis_ml.data import LabelType -from torch import Tensor, nn -from torchvision.ops import box_convert +from torch import Tensor -from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner +from luxonis_train.attached_modules.losses import AdaptiveDetectionLoss from luxonis_train.nodes import EfficientKeypointBBoxHead from luxonis_train.utils import ( - IncompatibleException, Labels, Packet, - anchors_for_fpn_features, compute_iou_loss, dist2bbox, get_sigmas, @@ -20,27 +17,21 @@ ) from luxonis_train.utils.boundingbox import IoUType -from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss -class EfficientKeypointBBoxLoss( - BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor] -): +class EfficientKeypointBBoxLoss(AdaptiveDetectionLoss): node: EfficientKeypointBBoxHead supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)] - class NodePacket(Packet[Tensor]): - features: list[Tensor] - class_scores: Tensor - distributions: Tensor + gt_kpts_scale: Tensor def __init__( self, n_warmup_epochs: int = 4, iou_type: IoUType = "giou", reduction: Literal["sum", "mean"] = "mean", - class_bbox_loss_weight: float = 1.0, + class_loss_weight: float = 1.0, iou_loss_weight: float = 2.5, viz_pw: float = 1.0, regr_kpts_loss_weight: float = 1.5, @@ -60,8 +51,8 @@ def __init__( @param iou_type: IoU type used for bbox regression loss. @type reduction: Literal["sum", "mean"] @param reduction: Reduction type for loss. - @type class_bbox_loss_weight: float - @param class_bbox_loss_weight: Weight of classification loss for bounding boxes. + @type class_loss_weight: float + @param class_loss_weight: Weight of classification loss for bounding boxes. @type regr_kpts_loss_weight: float @param regr_kpts_loss_weight: Weight of regression loss for keypoints. @type vis_kpts_loss_weight: float @@ -73,39 +64,24 @@ def __init__( @type area_factor: float | None @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}. """ - super().__init__(**kwargs) - - if not isinstance(self.node, EfficientKeypointBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `EfficientKeypointBBoxHead`." - ) - self.iou_type: IoUType = iou_type - self.reduction = reduction - self.n_classes = self.node.n_classes - self.stride = self.node.stride - self.grid_cell_size = self.node.grid_cell_size - self.grid_cell_offset = self.node.grid_cell_offset - self.original_img_size = self.node.original_in_shape[1:] - self.n_kps = self.node.n_keypoints + super().__init__( + n_warmup_epochs=n_warmup_epochs, + iou_type=iou_type, + reduction=reduction, + class_loss_weight=class_loss_weight, + iou_loss_weight=iou_loss_weight, + **kwargs, + ) self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw])) self.sigmas = get_sigmas( - sigmas=sigmas, n_keypoints=self.n_kps, caller_name=self.name + sigmas=sigmas, + n_keypoints=self.n_keypoints, + caller_name=self.name, ) self.area_factor = get_with_default( area_factor, "bbox area scaling", self.name, default=0.53 ) - - self.n_warmup_epochs = n_warmup_epochs - self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) - self.tal_assigner = TaskAlignedAssigner( - topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0 - ) - - self.varifocal_loss = VarifocalLoss() - self.class_bbox_loss_weight = class_bbox_loss_weight - self.iou_loss_weight = iou_loss_weight self.regr_kpts_loss_weight = regr_kpts_loss_weight self.vis_kpts_loss_weight = vis_kpts_loss_weight @@ -117,102 +93,59 @@ def prepare( pred_distri = self.get_input_tensors(inputs, "distributions")[0] pred_kpts = self.get_input_tensors(inputs, "keypoints_raw")[0] - batch_size = pred_scores.shape[0] - device = pred_scores.device - target_kpts = self.get_label(labels, LabelType.KEYPOINTS) target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX) + + batch_size = pred_scores.shape[0] n_kpts = (target_kpts.shape[1] - 2) // 3 - gt_bboxes_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - gt_kpts_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - ( - anchors, - anchor_points, - n_anchors_list, - stride_tensor, - ) = anchors_for_fpn_features( - feats, - self.stride, - self.grid_cell_size, - self.grid_cell_offset, - multiply_with_stride=True, - ) + self._init_parameters(feats) - anchor_points_strided = anchor_points / stride_tensor - pred_bboxes = dist2bbox(pred_distri, anchor_points_strided) + pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided) pred_kpts = self.dist2kpts_noscale( - anchor_points_strided, pred_kpts.view(batch_size, -1, n_kpts, 3) + self.anchor_points_strided, + pred_kpts.view( + batch_size, + -1, + n_kpts, + 3, + ), ) - target_bbox = self._preprocess_bbox_target( - target_bbox, batch_size, gt_bboxes_scale - ) + target_bbox = self._preprocess_bbox_target(target_bbox, batch_size) gt_bbox_labels = target_bbox[:, :, :1] gt_xyxy = target_bbox[:, :, 1:] mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float() - - if self._epoch < self.n_warmup_epochs: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - assigned_gt_idx, - ) = self.atts_assigner( - anchors, - n_anchors_list, - gt_bbox_labels, - gt_xyxy, - mask_gt, - pred_bboxes.detach() * stride_tensor, - ) - else: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - assigned_gt_idx, - ) = self.tal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * stride_tensor, - anchor_points, - gt_bbox_labels, - gt_xyxy, - mask_gt, - ) + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + mask_positive, + assigned_gt_idx, + ) = self._run_assigner( + gt_bbox_labels, + gt_xyxy, + mask_gt, + pred_bboxes, + pred_scores, + ) batched_kpts = self._preprocess_kpts_target( - target_kpts, batch_size, gt_kpts_scale + target_kpts, batch_size, self.gt_kpts_scale ) assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1) selected_keypoints = batched_kpts.gather( - 1, assigned_gt_idx_expanded.expand(-1, -1, self.n_kps, 3) + 1, assigned_gt_idx_expanded.expand(-1, -1, self.n_keypoints, 3) ) xy_components = selected_keypoints[:, :, :, :2] - normalized_xy = xy_components / stride_tensor.view(1, -1, 1, 1) + normalized_xy = xy_components / self.stride_tensor.view(1, -1, 1, 1) selected_keypoints = torch.cat( (normalized_xy, selected_keypoints[:, :, :, 2:]), dim=-1 ) gt_kpt = selected_keypoints[mask_positive] pred_kpts = pred_kpts[mask_positive] - assigned_bboxes = assigned_bboxes / stride_tensor + assigned_bboxes = assigned_bboxes / self.stride_tensor area = ( assigned_bboxes[mask_positive][:, 0] - assigned_bboxes[mask_positive][:, 2] @@ -273,7 +206,7 @@ def forward( )[0] loss = ( - self.class_bbox_loss_weight * loss_cls + self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + regression_loss * self.regr_kpts_loss_weight + visibility_loss * self.vis_kpts_loss_weight @@ -288,25 +221,6 @@ def forward( return loss, sub_losses - def _preprocess_bbox_target( - self, bbox_target: Tensor, batch_size: int, scale_tensor: Tensor - ) -> Tensor: - """Preprocess target bboxes in shape [batch_size, N, 5] where N is maximum - number of instances in one image.""" - sample_ids, counts = cast( - tuple[Tensor, Tensor], - torch.unique(bbox_target[:, 0].int(), return_counts=True), - ) - c_max = int(counts.max()) if counts.numel() > 0 else 0 - out_target = torch.zeros(batch_size, c_max, 5, device=bbox_target.device) - out_target[:, :, 0] = -1 - for id, count in zip(sample_ids, counts): - out_target[id, :count] = bbox_target[bbox_target[:, 0] == id][:, 1:] - - scaled_target = out_target[:, :, 1:5] * scale_tensor - out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy") - return out_target - def _preprocess_kpts_target( self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor ) -> Tensor: @@ -316,13 +230,13 @@ def _preprocess_kpts_target( _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True) max_kpts = int(counts.max()) if counts.numel() > 0 else 0 batched_keypoints = torch.zeros( - (batch_size, max_kpts, self.n_kps, 3), device=kpts_target.device + (batch_size, max_kpts, self.n_keypoints, 3), device=kpts_target.device ) for i in range(batch_size): keypoints_i = kpts_target[kpts_target[:, 0] == i] scaled_keypoints_i = keypoints_i[:, 2:].clone() batched_keypoints[i, : keypoints_i.shape[0]] = scaled_keypoints_i.view( - -1, self.n_kps, 3 + -1, self.n_keypoints, 3 ) batched_keypoints[i, :, :, :2] *= scale_tensor[:2] @@ -341,32 +255,13 @@ def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor: adj_kpts[..., 1] += y_adj return adj_kpts - -class VarifocalLoss(nn.Module): - def __init__(self, alpha: float = 0.75, gamma: float = 2.0): - """Varifocal Loss is a loss function for training a dense object detector to predict - the IoU-aware classification score, inspired by focal loss. - Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py} - - @type alpha: float - @param alpha: alpha parameter in focal loss, default is 0.75. - @type gamma: float - @param gamma: gamma parameter in focal loss, default is 2.0. - """ - - super().__init__() - - self.alpha = alpha - self.gamma = gamma - - def forward( - self, pred_score: Tensor, target_score: Tensor, label: Tensor - ) -> Tensor: - weight = ( - self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label - ) - ce_loss = F.binary_cross_entropy( - pred_score.float(), target_score.float(), reduction="none" + def _init_parameters(self, features: list[Tensor]): + device = features[0].device + super()._init_parameters(features) + self.gt_kpts_scale = torch.tensor( + [ + self.original_img_size[1], + self.original_img_size[0], + ], + device=device, ) - loss = (ce_loss * weight).sum() - return loss From 0455bf7d91a39a2a20d198b2436baf04bc8bf160 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:22:53 +0200 Subject: [PATCH 062/102] simplified efficient bbox and keypoint losses --- .../losses/adaptive_detection_loss.py | 160 +++++++------ .../losses/efficient_keypoint_bbox_loss.py | 225 +++++------------- 2 files changed, 150 insertions(+), 235 deletions(-) diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py index a0c21eb2..3cf6af28 100644 --- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py +++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py @@ -10,7 +10,6 @@ from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner from luxonis_train.nodes import EfficientBBoxHead from luxonis_train.utils import ( - IncompatibleException, Labels, Packet, anchors_for_fpn_features, @@ -28,6 +27,12 @@ class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Ten node: EfficientBBoxHead supported_labels = [LabelType.BOUNDINGBOX] + anchors: Tensor + anchor_points: Tensor + n_anchors_list: list[int] + stride_tensor: Tensor + gt_bboxes_scale: Tensor + def __init__( self, n_warmup_epochs: int = 4, @@ -55,18 +60,12 @@ def __init__( """ super().__init__(**kwargs) - if not isinstance(self.node, EfficientBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `EfficientBBoxHead`." - ) self.iou_type: IoUType = iou_type self.reduction = reduction - self.n_classes = self.node.n_classes self.stride = self.node.stride self.grid_cell_size = self.node.grid_cell_size self.grid_cell_offset = self.node.grid_cell_offset - self.original_img_size = self.node.original_in_shape[1:] + self.original_img_size = self.original_in_shape[1:] self.n_warmup_epochs = n_warmup_epochs self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) @@ -78,11 +77,7 @@ def __init__( self.class_loss_weight = class_loss_weight self.iou_loss_weight = iou_loss_weight - self.anchors = None - self.anchor_points = None - self.n_anchors_list = None - self.stride_tensor = None - self.gt_bboxes_scale = None + self._logged_assigner_change = False def prepare( self, inputs: Packet[Tensor], labels: Labels @@ -90,71 +85,33 @@ def prepare( feats = self.get_input_tensors(inputs, "features") pred_scores = self.get_input_tensors(inputs, "class_scores")[0] pred_distri = self.get_input_tensors(inputs, "distributions")[0] - batch_size = pred_scores.shape[0] - device = pred_scores.device target = self.get_label(labels) - if self.gt_bboxes_scale is None: - self.gt_bboxes_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - ( - self.anchors, - self.anchor_points, - self.n_anchors_list, - self.stride_tensor, - ) = anchors_for_fpn_features( - feats, - self.stride, - self.grid_cell_size, - self.grid_cell_offset, - multiply_with_stride=True, - ) - self.anchor_points_strided = self.anchor_points / self.stride_tensor - target = self._preprocess_target(target, batch_size) + batch_size = pred_scores.shape[0] + + self._init_parameters(feats) + + target = self._preprocess_bbox_target(target, batch_size) pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided) gt_labels = target[:, :, :1] gt_xyxy = target[:, :, 1:] mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float() - if self._epoch < self.n_warmup_epochs: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - _, - ) = self.atts_assigner( - self.anchors, - self.n_anchors_list, - gt_labels, - gt_xyxy, - mask_gt, - pred_bboxes.detach() * self.stride_tensor, - ) - else: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - _, - ) = self.tal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * self.stride_tensor, - self.anchor_points, - gt_labels, - gt_xyxy, - mask_gt, - ) + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + mask_positive, + _, + ) = self._run_assigner( + gt_labels, + gt_xyxy, + mask_gt, + pred_bboxes, + pred_scores, + ) return ( pred_bboxes, @@ -196,7 +153,60 @@ def forward( return loss, sub_losses - def _preprocess_target(self, target: Tensor, batch_size: int): + def _init_parameters(self, features: list[Tensor]): + if not hasattr(self, "gt_bboxes_scale"): + self.gt_bboxes_scale = torch.tensor( + [ + self.original_img_size[1], + self.original_img_size[0], + self.original_img_size[1], + self.original_img_size[0], + ], + device=features[0].device, + ) + ( + self.anchors, + self.anchor_points, + self.n_anchors_list, + self.stride_tensor, + ) = anchors_for_fpn_features( + features, + self.stride, + self.grid_cell_size, + self.grid_cell_offset, + multiply_with_stride=True, + ) + self.anchor_points_strided = self.anchor_points / self.stride_tensor + + def _run_assigner( + self, + gt_labels: Tensor, + gt_xyxy: Tensor, + mask_gt: Tensor, + pred_bboxes: Tensor, + pred_scores: Tensor, + ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: + if self._epoch < self.n_warmup_epochs: + return self.atts_assigner( + self.anchors, + self.n_anchors_list, + gt_labels, + gt_xyxy, + mask_gt, + pred_bboxes.detach() * self.stride_tensor, + ) + else: + self._log_assigner_change() + return self.tal_assigner( + pred_scores.detach(), + pred_bboxes.detach() * self.stride_tensor, + self.anchor_points, + gt_labels, + gt_xyxy, + mask_gt, + ) + + def _preprocess_bbox_target(self, target: Tensor, batch_size: int) -> Tensor: """Preprocess target in shape [batch_size, N, 5] where N is maximum number of instances in one image.""" sample_ids, counts = cast( @@ -212,6 +222,16 @@ def _preprocess_target(self, target: Tensor, batch_size: int): out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy") return out_target + def _log_assigner_change(self): + if self._logged_assigner_change: + return + + logger.info( + f"Switching to Task Aligned Assigner after {self.n_warmup_epochs} warmup epochs.", + stacklevel=2, + ) + self._logged_assigner_change = True + class VarifocalLoss(nn.Module): def __init__(self, alpha: float = 0.75, gamma: float = 2.0): diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py index 04a9d641..e9fba8b6 100644 --- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py @@ -1,18 +1,15 @@ -from typing import Any, Literal, cast +from typing import Any, Literal import torch import torch.nn.functional as F from luxonis_ml.data import LabelType -from torch import Tensor, nn -from torchvision.ops import box_convert +from torch import Tensor -from luxonis_train.assigners import ATSSAssigner, TaskAlignedAssigner +from luxonis_train.attached_modules.losses import AdaptiveDetectionLoss from luxonis_train.nodes import EfficientKeypointBBoxHead from luxonis_train.utils import ( - IncompatibleException, Labels, Packet, - anchors_for_fpn_features, compute_iou_loss, dist2bbox, get_sigmas, @@ -20,27 +17,21 @@ ) from luxonis_train.utils.boundingbox import IoUType -from .base_loss import BaseLoss from .bce_with_logits import BCEWithLogitsLoss -class EfficientKeypointBBoxLoss( - BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor] -): +class EfficientKeypointBBoxLoss(AdaptiveDetectionLoss): node: EfficientKeypointBBoxHead supported_labels = [(LabelType.BOUNDINGBOX, LabelType.KEYPOINTS)] - class NodePacket(Packet[Tensor]): - features: list[Tensor] - class_scores: Tensor - distributions: Tensor + gt_kpts_scale: Tensor def __init__( self, n_warmup_epochs: int = 4, iou_type: IoUType = "giou", reduction: Literal["sum", "mean"] = "mean", - class_bbox_loss_weight: float = 1.0, + class_loss_weight: float = 1.0, iou_loss_weight: float = 2.5, viz_pw: float = 1.0, regr_kpts_loss_weight: float = 1.5, @@ -60,8 +51,8 @@ def __init__( @param iou_type: IoU type used for bbox regression loss. @type reduction: Literal["sum", "mean"] @param reduction: Reduction type for loss. - @type class_bbox_loss_weight: float - @param class_bbox_loss_weight: Weight of classification loss for bounding boxes. + @type class_loss_weight: float + @param class_loss_weight: Weight of classification loss for bounding boxes. @type regr_kpts_loss_weight: float @param regr_kpts_loss_weight: Weight of regression loss for keypoints. @type vis_kpts_loss_weight: float @@ -73,39 +64,24 @@ def __init__( @type area_factor: float | None @param area_factor: Factor by which we multiply bbox area which is used in KeypointLoss. If None then use default one. Defaults to C{None}. """ - super().__init__(**kwargs) - - if not isinstance(self.node, EfficientKeypointBBoxHead): - raise IncompatibleException( - f"Loss `{self.name}` is only " - "compatible with nodes of type `EfficientKeypointBBoxHead`." - ) - self.iou_type: IoUType = iou_type - self.reduction = reduction - self.n_classes = self.node.n_classes - self.stride = self.node.stride - self.grid_cell_size = self.node.grid_cell_size - self.grid_cell_offset = self.node.grid_cell_offset - self.original_img_size = self.node.original_in_shape[1:] - self.n_kps = self.node.n_keypoints + super().__init__( + n_warmup_epochs=n_warmup_epochs, + iou_type=iou_type, + reduction=reduction, + class_loss_weight=class_loss_weight, + iou_loss_weight=iou_loss_weight, + **kwargs, + ) self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw])) self.sigmas = get_sigmas( - sigmas=sigmas, n_keypoints=self.n_kps, caller_name=self.name + sigmas=sigmas, + n_keypoints=self.n_keypoints, + caller_name=self.name, ) self.area_factor = get_with_default( area_factor, "bbox area scaling", self.name, default=0.53 ) - - self.n_warmup_epochs = n_warmup_epochs - self.atts_assigner = ATSSAssigner(topk=9, n_classes=self.n_classes) - self.tal_assigner = TaskAlignedAssigner( - topk=13, n_classes=self.n_classes, alpha=1.0, beta=6.0 - ) - - self.varifocal_loss = VarifocalLoss() - self.class_bbox_loss_weight = class_bbox_loss_weight - self.iou_loss_weight = iou_loss_weight self.regr_kpts_loss_weight = regr_kpts_loss_weight self.vis_kpts_loss_weight = vis_kpts_loss_weight @@ -117,102 +93,59 @@ def prepare( pred_distri = self.get_input_tensors(inputs, "distributions")[0] pred_kpts = self.get_input_tensors(inputs, "keypoints_raw")[0] - batch_size = pred_scores.shape[0] - device = pred_scores.device - target_kpts = self.get_label(labels, LabelType.KEYPOINTS) target_bbox = self.get_label(labels, LabelType.BOUNDINGBOX) + + batch_size = pred_scores.shape[0] n_kpts = (target_kpts.shape[1] - 2) // 3 - gt_bboxes_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - gt_kpts_scale = torch.tensor( - [ - self.original_img_size[1], - self.original_img_size[0], - ], - device=device, - ) - ( - anchors, - anchor_points, - n_anchors_list, - stride_tensor, - ) = anchors_for_fpn_features( - feats, - self.stride, - self.grid_cell_size, - self.grid_cell_offset, - multiply_with_stride=True, - ) + self._init_parameters(feats) - anchor_points_strided = anchor_points / stride_tensor - pred_bboxes = dist2bbox(pred_distri, anchor_points_strided) + pred_bboxes = dist2bbox(pred_distri, self.anchor_points_strided) pred_kpts = self.dist2kpts_noscale( - anchor_points_strided, pred_kpts.view(batch_size, -1, n_kpts, 3) + self.anchor_points_strided, + pred_kpts.view( + batch_size, + -1, + n_kpts, + 3, + ), ) - target_bbox = self._preprocess_bbox_target( - target_bbox, batch_size, gt_bboxes_scale - ) + target_bbox = self._preprocess_bbox_target(target_bbox, batch_size) gt_bbox_labels = target_bbox[:, :, :1] gt_xyxy = target_bbox[:, :, 1:] mask_gt = (gt_xyxy.sum(-1, keepdim=True) > 0).float() - - if self._epoch < self.n_warmup_epochs: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - assigned_gt_idx, - ) = self.atts_assigner( - anchors, - n_anchors_list, - gt_bbox_labels, - gt_xyxy, - mask_gt, - pred_bboxes.detach() * stride_tensor, - ) - else: - ( - assigned_labels, - assigned_bboxes, - assigned_scores, - mask_positive, - assigned_gt_idx, - ) = self.tal_assigner( - pred_scores.detach(), - pred_bboxes.detach() * stride_tensor, - anchor_points, - gt_bbox_labels, - gt_xyxy, - mask_gt, - ) + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + mask_positive, + assigned_gt_idx, + ) = self._run_assigner( + gt_bbox_labels, + gt_xyxy, + mask_gt, + pred_bboxes, + pred_scores, + ) batched_kpts = self._preprocess_kpts_target( - target_kpts, batch_size, gt_kpts_scale + target_kpts, batch_size, self.gt_kpts_scale ) assigned_gt_idx_expanded = assigned_gt_idx.unsqueeze(-1).unsqueeze(-1) selected_keypoints = batched_kpts.gather( - 1, assigned_gt_idx_expanded.expand(-1, -1, self.n_kps, 3) + 1, assigned_gt_idx_expanded.expand(-1, -1, self.n_keypoints, 3) ) xy_components = selected_keypoints[:, :, :, :2] - normalized_xy = xy_components / stride_tensor.view(1, -1, 1, 1) + normalized_xy = xy_components / self.stride_tensor.view(1, -1, 1, 1) selected_keypoints = torch.cat( (normalized_xy, selected_keypoints[:, :, :, 2:]), dim=-1 ) gt_kpt = selected_keypoints[mask_positive] pred_kpts = pred_kpts[mask_positive] - assigned_bboxes = assigned_bboxes / stride_tensor + assigned_bboxes = assigned_bboxes / self.stride_tensor area = ( assigned_bboxes[mask_positive][:, 0] - assigned_bboxes[mask_positive][:, 2] @@ -273,7 +206,7 @@ def forward( )[0] loss = ( - self.class_bbox_loss_weight * loss_cls + self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + regression_loss * self.regr_kpts_loss_weight + visibility_loss * self.vis_kpts_loss_weight @@ -288,25 +221,6 @@ def forward( return loss, sub_losses - def _preprocess_bbox_target( - self, bbox_target: Tensor, batch_size: int, scale_tensor: Tensor - ) -> Tensor: - """Preprocess target bboxes in shape [batch_size, N, 5] where N is maximum - number of instances in one image.""" - sample_ids, counts = cast( - tuple[Tensor, Tensor], - torch.unique(bbox_target[:, 0].int(), return_counts=True), - ) - c_max = int(counts.max()) if counts.numel() > 0 else 0 - out_target = torch.zeros(batch_size, c_max, 5, device=bbox_target.device) - out_target[:, :, 0] = -1 - for id, count in zip(sample_ids, counts): - out_target[id, :count] = bbox_target[bbox_target[:, 0] == id][:, 1:] - - scaled_target = out_target[:, :, 1:5] * scale_tensor - out_target[..., 1:] = box_convert(scaled_target, "xywh", "xyxy") - return out_target - def _preprocess_kpts_target( self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor ) -> Tensor: @@ -316,13 +230,13 @@ def _preprocess_kpts_target( _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True) max_kpts = int(counts.max()) if counts.numel() > 0 else 0 batched_keypoints = torch.zeros( - (batch_size, max_kpts, self.n_kps, 3), device=kpts_target.device + (batch_size, max_kpts, self.n_keypoints, 3), device=kpts_target.device ) for i in range(batch_size): keypoints_i = kpts_target[kpts_target[:, 0] == i] scaled_keypoints_i = keypoints_i[:, 2:].clone() batched_keypoints[i, : keypoints_i.shape[0]] = scaled_keypoints_i.view( - -1, self.n_kps, 3 + -1, self.n_keypoints, 3 ) batched_keypoints[i, :, :, :2] *= scale_tensor[:2] @@ -341,32 +255,13 @@ def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor: adj_kpts[..., 1] += y_adj return adj_kpts - -class VarifocalLoss(nn.Module): - def __init__(self, alpha: float = 0.75, gamma: float = 2.0): - """Varifocal Loss is a loss function for training a dense object detector to predict - the IoU-aware classification score, inspired by focal loss. - Code is adapted from: U{https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/models/losses.py} - - @type alpha: float - @param alpha: alpha parameter in focal loss, default is 0.75. - @type gamma: float - @param gamma: gamma parameter in focal loss, default is 2.0. - """ - - super().__init__() - - self.alpha = alpha - self.gamma = gamma - - def forward( - self, pred_score: Tensor, target_score: Tensor, label: Tensor - ) -> Tensor: - weight = ( - self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label - ) - ce_loss = F.binary_cross_entropy( - pred_score.float(), target_score.float(), reduction="none" + def _init_parameters(self, features: list[Tensor]): + device = features[0].device + super()._init_parameters(features) + self.gt_kpts_scale = torch.tensor( + [ + self.original_img_size[1], + self.original_img_size[0], + ], + device=device, ) - loss = (ce_loss * weight).sum() - return loss From 37008670fa1ad28a0f5dd8df5dd785b4e90368d7 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 07:32:58 +0200 Subject: [PATCH 063/102] fixed docs --- luxonis_train/attached_modules/losses/bce_with_logits.py | 4 ++-- .../attached_modules/losses/smooth_bce_with_logits.py | 4 ++-- luxonis_train/utils/general.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py index 86f45c3d..8fbff5d0 100644 --- a/luxonis_train/attached_modules/losses/bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/bce_with_logits.py @@ -56,9 +56,9 @@ def forward(self, predictions: Tensor, target: Tensor) -> Tensor: """Computes the BCE loss from logits. @type predictions: Tensor - @param predictions: Network predictions of shape (N, C, H, W) + @param predictions: Network predictions of shape (N, C, ...) @type target: Tensor - @param target: A tensor of shape (N, C, H, W). + @param target: A tensor of the same shape as predictions. @rtype: Tensor @return: A scalar tensor. """ diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py index 62a9335f..77071e06 100644 --- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py @@ -52,9 +52,9 @@ def forward(self, predictions: Tensor, target: Tensor) -> Tensor: """Computes the BCE loss with label smoothing. @type predictions: Tensor - @param predictions: Network predictions of shape (N, C, H, W) + @param predictions: Network predictions of shape (N, C, ...) @type target: Tensor - @param target: A tensor of shape (N, C, H, W). + @param target: A tensor of the same shape as predictions. @rtype: Tensor @return: A scalar tensor. """ diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py index 29a59c4c..a1a2cbce 100644 --- a/luxonis_train/utils/general.py +++ b/luxonis_train/utils/general.py @@ -102,8 +102,8 @@ def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]: def get_with_default( value: T | None, action_name: str, caller_name: str | None = None, *, default: T ) -> T: - """Returns value if it is not None, otherwise returns the default valueand log an - info. + """Returns value if it is not C{None}, otherwise returns the default value and log + an info. @type value: T | None @param value: Value to return. From e9b0c6cb3f4b79c975d7bca976d2aaaef67ab907 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 08:06:39 +0200 Subject: [PATCH 064/102] consistend variable naming --- configs/README.md | 6 +- configs/classification_model.yaml | 4 +- configs/coco_model.yaml | 10 +-- configs/detection_model.yaml | 4 +- configs/efficient_coco_model.yaml | 10 +-- configs/example_export.yaml | 4 +- configs/example_multi_input.yaml | 4 +- configs/example_tuning.yaml | 2 +- configs/keypoint_bbox_model.yaml | 4 +- configs/resnet_model.yaml | 4 +- configs/segmentation_model.yaml | 4 +- luxonis_train/assigners/tal_assigner.py | 4 +- .../losses/implicit_keypoint_bbox_loss.py | 10 +-- .../mean_average_precision_keypoints.py | 4 +- .../metrics/object_keypoint_similarity.py | 4 +- luxonis_train/core/core.py | 2 +- luxonis_train/core/utils/train_utils.py | 2 +- luxonis_train/models/luxonis_lightning.py | 4 +- luxonis_train/nodes/README.md | 12 +-- .../backbones/efficientrep/efficientrep.py | 14 ++-- .../nodes/backbones/mobileone/blocks.py | 14 ++-- .../nodes/backbones/mobileone/mobileone.py | 56 ++++++------- .../nodes/backbones/mobileone/variants.py | 4 +- .../nodes/backbones/repvgg/repvgg.py | 18 ++-- .../nodes/backbones/repvgg/variants.py | 8 +- luxonis_train/nodes/backbones/rexnetv1.py | 6 +- luxonis_train/nodes/blocks/blocks.py | 24 +++--- .../nodes/heads/efficient_bbox_head.py | 22 ++--- .../heads/efficient_keypoint_bbox_head.py | 10 +-- .../heads/implicit_keypoint_bbox_head.py | 30 +++---- .../nodes/heads/segmentation_head.py | 4 +- luxonis_train/nodes/necks/reppan_neck.py | 82 +++++++++---------- luxonis_train/utils/boundingbox.py | 4 +- luxonis_train/utils/config.py | 14 ++-- luxonis_train/utils/dataset_metadata.py | 8 +- tests/configs/parking_lot_config.yaml | 6 +- tests/configs/segmentation_parse_loader.yaml | 4 +- tests/integration/conftest.py | 2 +- .../test_assigners/test_tal_assigner.py | 70 ++++++++-------- tests/unittests/test_utils/test_boxutils.py | 14 ++-- 40 files changed, 252 insertions(+), 260 deletions(-) diff --git a/configs/README.md b/configs/README.md index a85d5221..96444f66 100644 --- a/configs/README.md +++ b/configs/README.md @@ -147,16 +147,16 @@ Here you can change everything related to actual training of the model. | accumulate_grad_batches | int | 1 | number of batches for gradient accumulation | | use_weighted_sampler | bool | False | bool if use WeightedRandomSampler for training, only works with classification tasks | | epochs | int | 100 | number of training epochs | -| num_workers | int | 2 | number of workers for data loading | +| n_workers | int | 2 | number of workers for data loading | | train_metrics_interval | int | -1 | frequency of computing metrics on train data, -1 if don't perform | | validation_interval | int | 1 | frequency of computing metrics on validation data | -| num_log_images | int | 4 | maximum number of images to visualize and log | +| n_log_images | int | 4 | maximum number of images to visualize and log | | skip_last_batch | bool | True | whether to skip last batch while training | | accelerator | Literal\["auto", "cpu", "gpu"\] | "auto" | What accelerator to use for training. | | devices | int \| list\[int\] \| str | "auto" | Either specify how many devices to use (int), list specific devices, or use "auto" for automatic configuration based on the selected accelerator | | matmul_precision | Literal\["medium", "high", "highest"\] \| None | None | Sets the internal precision of float32 matrix multiplications. | | strategy | Literal\["auto", "ddp"\] | "auto" | What strategy to use for training. | -| num_sanity_val_steps | int | 2 | Number of sanity validation steps performed before training. | +| n_sanity_val_steps | int | 2 | Number of sanity validation steps performed before training. | | profiler | Literal\["simple", "advanced"\] \| None | None | PL profiler for GPU/CPU/RAM utilization analysis | | verbose | bool | True | Print all intermediate results to console. | diff --git a/configs/classification_model.yaml b/configs/classification_model.yaml index be5a5006..4db7a9b1 100644 --- a/configs/classification_model.yaml +++ b/configs/classification_model.yaml @@ -25,9 +25,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/configs/coco_model.yaml b/configs/coco_model.yaml index 9af25feb..23516bea 100644 --- a/configs/coco_model.yaml +++ b/configs/coco_model.yaml @@ -7,7 +7,7 @@ model: - name: EfficientRep params: channels_list: [64, 128, 256, 512, 1024] - num_repeats: [1, 6, 12, 18, 6] + n_repeats: [1, 6, 12, 18, 6] depth_mul: 0.33 width_mul: 0.33 @@ -16,7 +16,7 @@ model: - EfficientRep params: channels_list: [256, 128, 128, 256, 256, 512] - num_repeats: [12, 12, 12, 12] + n_repeats: [12, 12, 12, 12] depth_mul: 0.33 width_mul: 0.33 @@ -108,16 +108,16 @@ trainer: devices: auto strategy: auto - num_sanity_val_steps: 1 + n_sanity_val_steps: 1 profiler: null verbose: True batch_size: 4 accumulate_grad_batches: 1 epochs: &epochs 200 - num_workers: 8 + n_workers: 8 train_metrics_interval: -1 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 skip_last_batch: True log_sub_losses: True save_top_k: 3 diff --git a/configs/detection_model.yaml b/configs/detection_model.yaml index 45c3431e..7bc87eef 100644 --- a/configs/detection_model.yaml +++ b/configs/detection_model.yaml @@ -20,9 +20,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/configs/efficient_coco_model.yaml b/configs/efficient_coco_model.yaml index 64aa48e0..f2c9db5d 100644 --- a/configs/efficient_coco_model.yaml +++ b/configs/efficient_coco_model.yaml @@ -5,7 +5,7 @@ model: - name: EfficientRep params: channels_list: [64, 128, 256, 512, 1024] - num_repeats: [1, 6, 12, 18, 6] + n_repeats: [1, 6, 12, 18, 6] depth_mul: 0.33 width_mul: 0.33 @@ -14,7 +14,7 @@ model: - EfficientRep params: channels_list: [256, 128, 128, 256, 256, 512] - num_repeats: [12, 12, 12, 12] + n_repeats: [12, 12, 12, 12] depth_mul: 0.33 width_mul: 0.33 @@ -91,14 +91,14 @@ loader: trainer: - num_sanity_val_steps: 1 + n_sanity_val_steps: 1 batch_size: 4 accumulate_grad_batches: 1 epochs: 200 - num_workers: 4 + n_workers: 4 train_metrics_interval: -1 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 save_top_k: 3 preprocessing: diff --git a/configs/example_export.yaml b/configs/example_export.yaml index f86f1dfa..51f768dc 100644 --- a/configs/example_export.yaml +++ b/configs/example_export.yaml @@ -22,9 +22,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 optimizer: name: SGD diff --git a/configs/example_multi_input.yaml b/configs/example_multi_input.yaml index d185f37e..9632ed43 100644 --- a/configs/example_multi_input.yaml +++ b/configs/example_multi_input.yaml @@ -97,9 +97,9 @@ tracker: trainer: batch_size: 1 epochs: 10 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 4 + n_log_images: 4 callbacks: - name: ExportOnTrainEnd diff --git a/configs/example_tuning.yaml b/configs/example_tuning.yaml index b350ea2f..d8c9027d 100644 --- a/configs/example_tuning.yaml +++ b/configs/example_tuning.yaml @@ -30,7 +30,7 @@ trainer: batch_size: 4 epochs: &epochs 100 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 scheduler: name: CosineAnnealingLR diff --git a/configs/keypoint_bbox_model.yaml b/configs/keypoint_bbox_model.yaml index 5b1ebb2d..51554f73 100644 --- a/configs/keypoint_bbox_model.yaml +++ b/configs/keypoint_bbox_model.yaml @@ -18,9 +18,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/configs/resnet_model.yaml b/configs/resnet_model.yaml index e8353870..bb9f8f62 100644 --- a/configs/resnet_model.yaml +++ b/configs/resnet_model.yaml @@ -36,9 +36,9 @@ loader: trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 preprocessing: train_image_size: [&height 224, &width 224] diff --git a/configs/segmentation_model.yaml b/configs/segmentation_model.yaml index a822d7c1..b403a75e 100644 --- a/configs/segmentation_model.yaml +++ b/configs/segmentation_model.yaml @@ -21,9 +21,9 @@ trainer: batch_size: 4 epochs: &epochs 200 - num_workers: 4 + n_workers: 4 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 callbacks: - name: ExportOnTrainEnd diff --git a/luxonis_train/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py index 08b5b461..8660525e 100644 --- a/luxonis_train/assigners/tal_assigner.py +++ b/luxonis_train/assigners/tal_assigner.py @@ -177,7 +177,7 @@ def _select_topk_candidates( @rtype: Tensor @return: Mask of selected anchors of shape [bs, n_max_boxes, n_anchors] """ - num_anchors = metrics.shape[-1] + n_anchors = metrics.shape[-1] topk_metrics, topk_idxs = torch.topk( metrics, self.topk, dim=-1, largest=largest ) @@ -186,7 +186,7 @@ def _select_topk_candidates( [1, 1, self.topk] ) topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs)) - is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(dim=-2) + is_in_topk = F.one_hot(topk_idxs, n_anchors).sum(dim=-2) is_in_topk = torch.where( is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk ) diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py index 1c6156d1..e59f0c52 100644 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py @@ -96,16 +96,16 @@ def __init__( super().__init__(**kwargs) self.n_anchors = self.node.n_anchors - self.num_heads = self.node.num_heads + self.n_heads = self.node.n_heads self.box_offset = self.node.box_offset self.anchors = self.node.anchors self.balance = balance or [4.0, 1.0, 0.4] - if len(self.balance) < self.num_heads: + if len(self.balance) < self.n_heads: logger.warning( - f"Balance list must have at least {self.num_heads} elements." + f"Balance list must have at least {self.n_heads} elements." "Filling the rest with 1.0." ) - self.balance += [1.0] * (self.num_heads - len(self.balance)) + self.balance += [1.0] * (self.n_heads - len(self.balance)) self.min_objectness_iou = min_objectness_iou self.bbox_weight = bbox_loss_weight @@ -192,7 +192,7 @@ def prepare( * self.bias ) - for i in range(self.num_heads): + for i in range(self.n_heads): anchor = self.anchors[i] feature_height, feature_width = predictions[i].shape[2:4] scaled_targets, xy_shifts = match_to_anchor( diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py index 5006a49d..26f95a51 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py @@ -294,7 +294,7 @@ def _get_coco_format( else: area_stat = image_box[2] * image_box[3] * self.area_factor - num_keypoints = len( + n_keypoints = len( [i for i in range(2, len(image_kpt), 3) if image_kpt[i] != 0] ) # number of annotated keypoints annotation = { @@ -307,7 +307,7 @@ def _get_coco_format( crowds[image_id][k].cpu().tolist() if crowds is not None else 0 ), "keypoints": image_kpt, - "num_keypoints": num_keypoints, + "num_keypoints": n_keypoints, } if scores is not None: diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py index e0b1d475..182cb269 100644 --- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py +++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py @@ -66,8 +66,8 @@ def prepare( ) -> tuple[list[dict[str, Tensor]], list[dict[str, Tensor]]]: kpts_labels = self.get_label(labels, LabelType.KEYPOINTS) bbox_labels = self.get_label(labels, LabelType.BOUNDINGBOX) - num_keypoints = (kpts_labels.shape[1] - 2) // 3 - label = torch.zeros((len(bbox_labels), num_keypoints * 3 + 6)) + n_keypoints = (kpts_labels.shape[1] - 2) // 3 + label = torch.zeros((len(bbox_labels), n_keypoints * 3 + 6)) label[:, :2] = bbox_labels[:, :2] label[:, 2:6] = box_convert(bbox_labels[:, 2:], "xywh", "xyxy") label[:, 6::3] = kpts_labels[:, 2::3] # insert kp x coordinates diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 83ec1c43..778d6cae 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -158,7 +158,7 @@ def __init__( view: torch_data.DataLoader( self.loaders[view], batch_size=self.cfg.trainer.batch_size, - num_workers=self.cfg.trainer.num_workers, + num_workers=self.cfg.trainer.n_workers, collate_fn=collate_fn, shuffle=view == "train", drop_last=( diff --git a/luxonis_train/core/utils/train_utils.py b/luxonis_train/core/utils/train_utils.py index 2e188fa6..73b615cb 100644 --- a/luxonis_train/core/utils/train_utils.py +++ b/luxonis_train/core/utils/train_utils.py @@ -21,7 +21,7 @@ def create_trainer(cfg: TrainerConfig, **kwargs: Any) -> pl.Trainer: max_epochs=cfg.epochs, accumulate_grad_batches=cfg.accumulate_grad_batches, check_val_every_n_epoch=cfg.validation_interval, - num_sanity_val_steps=cfg.num_sanity_val_steps, + num_sanity_val_steps=cfg.n_sanity_val_steps, profiler=cfg.profiler, deterministic=cfg.deterministic, **kwargs, diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 3c88b357..18136e40 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -625,7 +625,7 @@ def _evaluation_step( ) -> dict[str, Tensor]: inputs, labels = batch images = None - if self._logged_images < self.cfg.trainer.num_log_images: + if self._logged_images < self.cfg.trainer.n_log_images: images = get_unnormalized_images(self.cfg, inputs) outputs = self.forward( inputs, @@ -643,7 +643,7 @@ def _evaluation_step( for viz_name, viz_batch in visualizations.items(): logged_images = self._logged_images for viz in viz_batch: - if logged_images >= self.cfg.trainer.num_log_images: + if logged_images >= self.cfg.trainer.n_log_images: break self.logger.log_image( f"{mode}/visualizations/{node_name}/{viz_name}/{logged_images}", diff --git a/luxonis_train/nodes/README.md b/luxonis_train/nodes/README.md index ffd0217d..60e5971c 100644 --- a/luxonis_train/nodes/README.md +++ b/luxonis_train/nodes/README.md @@ -77,7 +77,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). | Key | Type | Default value | Description | | ------------- | ----------- | --------------------------- | --------------------------------------------------- | | channels_list | List\[int\] | \[64, 128, 256, 512, 1024\] | List of number of channels for each block | -| num_repeats | List\[int\] | \[1, 6, 12, 18, 6\] | List of number of repeats of RepVGGBlock | +| n_repeats | List\[int\] | \[1, 6, 12, 18, 6\] | List of number of repeats of RepVGGBlock | | in_channels | int | 3 | Number of input channels, should be 3 in most cases | | depth_mul | int | 0.33 | Depth multiplier | | width_mul | int | 0.25 | Width multiplier | @@ -145,9 +145,9 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). | Key | Type | Default value | Description | | ------------- | ---------------- | ------------------------------------------------------- | ----------------------------------------- | -| num_heads | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads | +| n_heads | Literal\[2,3,4\] | 3 ***Note:** Should be same also on head in most cases* | Number of output heads | | channels_list | List\[int\] | \[256, 128, 128, 256, 256, 512\] | List of number of channels for each block | -| num_repeats | List\[int\] | \[12, 12, 12, 12\] | List of number of repeats of RepVGGBlock | +| n_repeats | List\[int\] | \[12, 12, 12, 12\] | List of number of repeats of RepVGGBlock | | depth_mul | int | 0.33 | Depth multiplier | | width_mul | int | 0.25 | Width multiplier | @@ -182,7 +182,7 @@ Adapted from [here](https://arxiv.org/pdf/2209.02976.pdf). | Key | Type | Default value | Description | | ---------- | ----- | ------------- | -------------------------------------------------- | -| num_heads | bool | 3 | Number of output heads | +| n_heads | bool | 3 | Number of output heads | | conf_thres | float | 0.25 | confidence threshold for nms (used for evaluation) | | iou_thres | float | 0.45 | iou threshold for nms (used for evaluation) | @@ -195,7 +195,7 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf). | Key | Type | Default value | Description | | ---------------- | --------------------------- | ------------- | ---------------------------------------------------------------------------------------------------------- | | n_keypoints | int \| None | None | Number of keypoints. | -| num_heads | int | 3 | Number of output heads | +| n_heads | int | 3 | Number of output heads | | anchors | List\[List\[int\]\] \| None | None | Anchors used for object detection. If set to `None`, the anchors are computed at runtime from the dataset. | | init_coco_biases | bool | True | Whether to use COCO bias and weight initialization | | conf_thres | float | 0.25 | confidence threshold for nms (used for evaluation) | @@ -210,6 +210,6 @@ Adapted from [here](https://arxiv.org/pdf/2207.02696.pdf). | Key | Type | Default value | Description | | ----------- | ----------- | ------------- | -------------------------------------------------- | | n_keypoints | int \| None | None | Number of keypoints. | -| num_heads | int | 3 | Number of output heads | +| n_heads | int | 3 | Number of output heads | | conf_thres | float | 0.25 | confidence threshold for nms (used for evaluation) | | iou_thres | float | 0.45 | iou threshold for nms (used for evaluation) | diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py index 717b9bd5..3549bbff 100644 --- a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -23,7 +23,7 @@ def __init__( self, variant: VariantLiteral = "nano", channels_list: list[int] | None = None, - num_repeats: list[int] | None = None, + n_repeats: list[int] | None = None, depth_mul: float | None = None, width_mul: float | None = None, **kwargs: Any, @@ -46,8 +46,8 @@ def __init__( @type channels_list: list[int] | None @param channels_list: List of number of channels for each block. If unspecified, defaults to [64, 128, 256, 512, 1024]. - @type num_repeats: list[int] | None - @param num_repeats: List of number of repeats of RepVGGBlock. If unspecified, + @type n_repeats: list[int] | None + @param n_repeats: List of number of repeats of RepVGGBlock. If unspecified, defaults to [1, 6, 12, 18, 6]. @type depth_mul: float @param depth_mul: Depth multiplier. If provided, overrides the variant value. @@ -61,11 +61,9 @@ def __init__( width_mul = width_mul or var.width_multiplier channels_list = channels_list or [64, 128, 256, 512, 1024] - num_repeats = num_repeats or [1, 6, 12, 18, 6] + n_repeats = n_repeats or [1, 6, 12, 18, 6] channels_list = [make_divisible(i * width_mul, 8) for i in channels_list] - num_repeats = [ - (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats - ] + n_repeats = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats] self.repvgg_encoder = RepVGGBlock( in_channels=self.in_channels, @@ -87,7 +85,7 @@ def __init__( block=RepVGGBlock, in_channels=channels_list[i + 1], out_channels=channels_list[i + 1], - num_blocks=num_repeats[i + 1], + n_blocks=n_repeats[i + 1], ), ) self.blocks.append(curr_block) diff --git a/luxonis_train/nodes/backbones/mobileone/blocks.py b/luxonis_train/nodes/backbones/mobileone/blocks.py index 17b9d223..276f08c5 100644 --- a/luxonis_train/nodes/backbones/mobileone/blocks.py +++ b/luxonis_train/nodes/backbones/mobileone/blocks.py @@ -28,7 +28,7 @@ def __init__( padding: int = 0, groups: int = 1, use_se: bool = False, - num_conv_branches: int = 1, + n_conv_branches: int = 1, ): """Construct a MobileOneBlock module. @@ -48,8 +48,8 @@ def __init__( @param groups: Group number. Defaults to 1. @type use_se: bool @param use_se: Whether to use SE-ReLU activations. Defaults to False. - @type num_conv_branches: int - @param num_conv_branches: Number of linear conv branches. Defaults to 1. + @type n_conv_branches: int + @param n_conv_branches: Number of linear conv branches. Defaults to 1. """ super().__init__() @@ -58,7 +58,7 @@ def __init__( self.kernel_size = kernel_size self.in_channels = in_channels self.out_channels = out_channels - self.num_conv_branches = num_conv_branches + self.n_conv_branches = n_conv_branches self.inference_mode = False # Check if SE-ReLU is requested @@ -81,7 +81,7 @@ def __init__( # Re-parameterizable conv branches rbr_conv: list[nn.Module] = [] - for _ in range(self.num_conv_branches): + for _ in range(self.n_conv_branches): rbr_conv.append( ConvModule( in_channels=self.in_channels, @@ -127,7 +127,7 @@ def forward(self, inputs: Tensor): # Other branches out = scale_out + identity_out - for ix in range(self.num_conv_branches): + for ix in range(self.n_conv_branches): out += self.rbr_conv[ix](inputs) return self.activation(self.se(out)) @@ -190,7 +190,7 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]: # get weights and bias of conv branches kernel_conv = torch.zeros(()) bias_conv = torch.zeros(()) - for ix in range(self.num_conv_branches): + for ix in range(self.n_conv_branches): _kernel, _bias = self._fuse_bn_tensor(self.rbr_conv[ix]) kernel_conv = kernel_conv + _kernel bias_conv = bias_conv + _bias diff --git a/luxonis_train/nodes/backbones/mobileone/mobileone.py b/luxonis_train/nodes/backbones/mobileone/mobileone.py index 55104f92..8180f960 100644 --- a/luxonis_train/nodes/backbones/mobileone/mobileone.py +++ b/luxonis_train/nodes/backbones/mobileone/mobileone.py @@ -24,7 +24,7 @@ def __init__( self, variant: Literal["s0", "s1", "s2", "s3", "s4"] = "s0", width_multipliers: tuple[float, float, float, float] | None = None, - num_conv_branches: int | None = None, + n_conv_branches: int | None = None, use_se: bool | None = None, **kwargs: Any, ): @@ -56,16 +56,16 @@ def __init__( - use of SE blocks - A boolean specifying whether to use SE blocks in the network. The variants are as follows: - - s0 (default): width_multipliers=(0.75, 1.0, 1.0, 2.0), num_conv_branches=4, use_se=False - - s1: width_multipliers=(1.5, 1.5, 2.0, 2.5), num_conv_branches=1, use_se=False - - s2: width_multipliers=(1.5, 2.0, 2.5, 4.0), num_conv_branches=1, use_se=False - - s3: width_multipliers=(2.0, 2.5, 3.0, 4.0), num_conv_branches=1, use_se=False - - s4: width_multipliers=(3.0, 3.5, 3.5, 4.0), num_conv_branches=1, use_se=True + - s0 (default): width_multipliers=(0.75, 1.0, 1.0, 2.0), n_conv_branches=4, use_se=False + - s1: width_multipliers=(1.5, 1.5, 2.0, 2.5), n_conv_branches=1, use_se=False + - s2: width_multipliers=(1.5, 2.0, 2.5, 4.0), n_conv_branches=1, use_se=False + - s3: width_multipliers=(2.0, 2.5, 3.0, 4.0), n_conv_branches=1, use_se=False + - s4: width_multipliers=(3.0, 3.5, 3.5, 4.0), n_conv_branches=1, use_se=True @type width_multipliers: tuple[float, float, float, float] | None @param width_multipliers: Width multipliers for each stage. If provided, overrides the variant values. - @type num_conv_branches: int | None - @param num_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values. + @type n_conv_branches: int | None + @param n_conv_branches: Number of linear convolution branches in MobileOne block. If provided, overrides the variant values. @type use_se: bool | None @param use_se: Whether to use SE blocks in the network. If provided, overrides the variant value. """ @@ -75,8 +75,8 @@ def __init__( width_multipliers = width_multipliers or var.width_multipliers use_se = use_se or var.use_se - self.num_blocks_per_stage = [2, 8, 10, 1] - self.num_conv_branches = num_conv_branches or var.num_conv_branches + self.n_blocks_per_stage = [2, 8, 10, 1] + self.n_conv_branches = n_conv_branches or var.n_conv_branches self.in_planes = min(64, int(64 * width_multipliers[0])) @@ -90,23 +90,23 @@ def __init__( self.cur_layer_idx = 1 self.stage1 = self._make_stage( int(64 * width_multipliers[0]), - self.num_blocks_per_stage[0], - num_se_blocks=0, + self.n_blocks_per_stage[0], + n_se_blocks=0, ) self.stage2 = self._make_stage( int(128 * width_multipliers[1]), - self.num_blocks_per_stage[1], - num_se_blocks=0, + self.n_blocks_per_stage[1], + n_se_blocks=0, ) self.stage3 = self._make_stage( int(256 * width_multipliers[2]), - self.num_blocks_per_stage[2], - num_se_blocks=self.num_blocks_per_stage[2] // 2 if use_se else 0, + self.n_blocks_per_stage[2], + n_se_blocks=self.n_blocks_per_stage[2] // 2 if use_se else 0, ) self.stage4 = self._make_stage( int(512 * width_multipliers[3]), - self.num_blocks_per_stage[3], - num_se_blocks=self.num_blocks_per_stage[3] if use_se else 0, + self.n_blocks_per_stage[3], + n_se_blocks=self.n_blocks_per_stage[3] if use_se else 0, ) def forward(self, inputs: Tensor) -> list[Tensor]: @@ -142,28 +142,28 @@ def set_export_mode(self, mode: bool = True) -> None: if hasattr(module, "reparameterize"): module.reparameterize() - def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int): + def _make_stage(self, planes: int, n_blocks: int, n_se_blocks: int): """Build a stage of MobileOne model. @type planes: int @param planes: Number of output channels. - @type num_blocks: int - @param num_blocks: Number of blocks in this stage. - @type num_se_blocks: int - @param num_se_blocks: Number of SE blocks in this stage. + @type n_blocks: int + @param n_blocks: Number of blocks in this stage. + @type n_se_blocks: int + @param n_se_blocks: Number of SE blocks in this stage. @rtype: nn.Sequential @return: A stage of MobileOne model. """ # Get strides for all layers - strides = [2] + [1] * (num_blocks - 1) + strides = [2] + [1] * (n_blocks - 1) blocks: list[nn.Module] = [] for ix, stride in enumerate(strides): use_se = False - if num_se_blocks > num_blocks: + if n_se_blocks > n_blocks: raise ValueError( "Number of SE blocks cannot " "exceed number of layers." ) - if ix >= (num_blocks - num_se_blocks): + if ix >= (n_blocks - n_se_blocks): use_se = True # Depthwise conv @@ -176,7 +176,7 @@ def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int): padding=1, groups=self.in_planes, use_se=use_se, - num_conv_branches=self.num_conv_branches, + n_conv_branches=self.n_conv_branches, ) ) # Pointwise conv @@ -189,7 +189,7 @@ def _make_stage(self, planes: int, num_blocks: int, num_se_blocks: int): padding=0, groups=1, use_se=use_se, - num_conv_branches=self.num_conv_branches, + n_conv_branches=self.n_conv_branches, ) ) self.in_planes = planes diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py index 64e5a2a6..0eeaca93 100644 --- a/luxonis_train/nodes/backbones/mobileone/variants.py +++ b/luxonis_train/nodes/backbones/mobileone/variants.py @@ -5,7 +5,7 @@ class MobileOneVariant(BaseModel): width_multipliers: tuple[float, float, float, float] - num_conv_branches: int = 1 + n_conv_branches: int = 1 use_se: bool = False @@ -13,7 +13,7 @@ def get_variant(variant: Literal["s0", "s1", "s2", "s3", "s4"]) -> MobileOneVari variants = { "s0": MobileOneVariant( width_multipliers=(0.75, 1.0, 1.0, 2.0), - num_conv_branches=4, + n_conv_branches=4, ), "s1": MobileOneVariant( width_multipliers=(1.5, 1.5, 2.0, 2.5), diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py index 5fc0b4af..c0818341 100644 --- a/luxonis_train/nodes/backbones/repvgg/repvgg.py +++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py @@ -20,7 +20,7 @@ class RepVGG(BaseNode[Tensor, list[Tensor]]): def __init__( self, variant: Literal["A0", "A1", "A2"] = "A0", - num_blocks: tuple[int, int, int, int] | None = None, + n_blocks: tuple[int, int, int, int] | None = None, width_multiplier: tuple[float, float, float, float] | None = None, override_groups_map: dict[int, int] | None = None, use_se: bool = False, @@ -52,15 +52,15 @@ def __init__( @param use_se: Whether to use Squeeze-and-Excitation blocks. @type use_checkpoint: bool @param use_checkpoint: Whether to use checkpointing. - @type num_blocks: tuple[int, int, int, int] | None - @param num_blocks: Number of blocks in each stage. + @type n_blocks: tuple[int, int, int, int] | None + @param n_blocks: Number of blocks in each stage. @type width_multiplier: tuple[float, float, float, float] | None @param width_multiplier: Width multiplier for each stage. """ super().__init__(**kwargs) var = get_variant(variant) - num_blocks = num_blocks or var.num_blocks + n_blocks = n_blocks or var.n_blocks width_multiplier = width_multiplier or var.width_multiplier override_groups_map = defaultdict(lambda: 1, override_groups_map or {}) self.use_se = use_se @@ -81,7 +81,7 @@ def __init__( for i in range(4) for block in self._make_stage( int(2**i * 64 * width_multiplier[i]), - num_blocks[i], + n_blocks[i], stride=2, groups=override_groups_map[i], ) @@ -101,15 +101,15 @@ def forward(self, inputs: Tensor) -> list[Tensor]: return outputs def _make_stage( - self, planes: int, num_blocks: int, stride: int, groups: int + self, channels: int, n_blocks: int, stride: int, groups: int ) -> nn.ModuleList: - strides = [stride] + [1] * (num_blocks - 1) + strides = [stride] + [1] * (n_blocks - 1) blocks: list[nn.Module] = [] for stride in strides: blocks.append( RepVGGBlock( in_channels=self.in_planes, - out_channels=planes, + out_channels=channels, kernel_size=3, stride=stride, padding=1, @@ -117,7 +117,7 @@ def _make_stage( use_se=self.use_se, ) ) - self.in_planes = planes + self.in_planes = channels return nn.ModuleList(blocks) def set_export_mode(self, mode: bool = True) -> None: diff --git a/luxonis_train/nodes/backbones/repvgg/variants.py b/luxonis_train/nodes/backbones/repvgg/variants.py index 27fd7fb1..a5c734b5 100644 --- a/luxonis_train/nodes/backbones/repvgg/variants.py +++ b/luxonis_train/nodes/backbones/repvgg/variants.py @@ -4,22 +4,22 @@ class RepVGGVariant(BaseModel): - num_blocks: tuple[int, int, int, int] + n_blocks: tuple[int, int, int, int] width_multiplier: tuple[float, float, float, float] def get_variant(variant: Literal["A0", "A1", "A2"]) -> RepVGGVariant: variants = { "A0": RepVGGVariant( - num_blocks=(2, 4, 14, 1), + n_blocks=(2, 4, 14, 1), width_multiplier=(0.75, 0.75, 0.75, 2.5), ), "A1": RepVGGVariant( - num_blocks=(2, 4, 14, 1), + n_blocks=(2, 4, 14, 1), width_multiplier=(1, 1, 1, 2.5), ), "A2": RepVGGVariant( - num_blocks=(2, 4, 14, 1), + n_blocks=(2, 4, 14, 1), width_multiplier=(1.5, 1.5, 1.5, 2.75), ), } diff --git a/luxonis_train/nodes/backbones/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py index e03110b3..0f5abdf4 100644 --- a/luxonis_train/nodes/backbones/rexnetv1.py +++ b/luxonis_train/nodes/backbones/rexnetv1.py @@ -64,7 +64,7 @@ def __init__( layers = [1, 2, 2, 3, 3, 5] strides = [1, 2, 2, 2, 1, 2] - self.num_convblocks = sum(layers) + self.n_convblocks = sum(layers) self.out_indices = out_indices or [1, 4, 10, 17] kernel_sizes = ( @@ -102,7 +102,7 @@ def __init__( ) ) - for i in range(self.num_convblocks): + for i in range(self.n_convblocks): inplanes_divisible = make_divisible( int(round(inplanes * multiplier)), divisible_value ) @@ -111,7 +111,7 @@ def __init__( channels_group.append(inplanes_divisible) else: in_channels_group.append(inplanes_divisible) - inplanes += final_ch / (self.num_convblocks - 1 * 1.0) + inplanes += final_ch / (self.n_convblocks - 1 * 1.0) inplanes_divisible = make_divisible( int(round(inplanes * multiplier)), divisible_value ) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index ea7c8290..a32f6f87 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -378,7 +378,7 @@ def __init__( block: type[nn.Module], in_channels: int, out_channels: int, - num_blocks: int = 1, + n_blocks: int = 1, ): """Module which repeats the block n times. First block accepts in_channels and outputs out_channels while subsequent blocks accept out_channels and output @@ -390,14 +390,14 @@ def __init__( @param in_channels: Number of input channels. @type out_channels: int @param out_channels: Number of output channels. - @type num_blocks: int - @param num_blocks: Number of blocks to repeat. Defaults to C{1}. + @type n_blocks: int + @param n_blocks: Number of blocks to repeat. Defaults to C{1}. """ super().__init__() in_channels = in_channels self.blocks = nn.ModuleList() - for _ in range(num_blocks): + for _ in range(n_blocks): self.blocks.append( block(in_channels=in_channels, out_channels=out_channels) ) @@ -597,7 +597,7 @@ def __init__( in_channels: int, in_channels_next: int, out_channels: int, - num_repeats: int, + n_repeats: int, ): """UpBlock used in RepPAN neck. @@ -608,8 +608,8 @@ def __init__( concat. @type out_channels: int @param out_channels: Number of output channels. - @type num_repeats: int - @param num_repeats: Number of RepVGGBlock repeats. + @type n_repeats: int + @param n_repeats: Number of RepVGGBlock repeats. """ super().__init__() @@ -631,7 +631,7 @@ def __init__( block=RepVGGBlock, in_channels=in_channels_next + out_channels, out_channels=out_channels, - num_blocks=num_repeats, + n_blocks=n_repeats, ) def forward(self, x0: Tensor, x1: Tensor) -> tuple[Tensor, Tensor]: @@ -649,7 +649,7 @@ def __init__( downsample_out_channels: int, in_channels_next: int, out_channels: int, - num_repeats: int, + n_repeats: int, ): """DownBlock used in RepPAN neck. @@ -662,8 +662,8 @@ def __init__( concat. @type out_channels: int @param out_channels: Number of output channels. - @type num_repeats: int - @param num_repeats: Number of RepVGGBlock repeats. + @type n_repeats: int + @param n_repeats: Number of RepVGGBlock repeats. """ super().__init__() @@ -678,7 +678,7 @@ def __init__( block=RepVGGBlock, in_channels=downsample_out_channels + in_channels_next, out_channels=out_channels, - num_blocks=num_repeats, + n_blocks=n_repeats, ) def forward(self, x0: Tensor, x1: Tensor) -> Tensor: diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index 11be28cb..8bef5044 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -25,7 +25,7 @@ class EfficientBBoxHead( def __init__( self, - num_heads: Literal[2, 3, 4] = 3, + n_heads: Literal[2, 3, 4] = 3, conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, @@ -35,8 +35,8 @@ def __init__( Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications }. - @type num_heads: Literal[2,3,4] - @param num_heads: Number of output heads. Defaults to 3. B{Note:} Should be same + @type n_heads: Literal[2,3,4] + @param n_heads: Number of output heads. Defaults to 3. B{Note:} Should be same also on neck in most cases. @type conf_thres: float @param conf_thres: Threshold for confidence. Defaults to C{0.25}. @@ -48,25 +48,25 @@ def __init__( """ super().__init__(**kwargs) - self.num_heads = num_heads + self.n_heads = n_heads self.conf_thres = conf_thres self.iou_thres = iou_thres self.max_det = max_det - self.stride = self._fit_stride_to_num_heads() + self.stride = self._fit_stride_to_n_heads() self.grid_cell_offset = 0.5 self.grid_cell_size = 5.0 self.heads = nn.ModuleList() - if len(self.in_channels) < self.num_heads: + if len(self.in_channels) < self.n_heads: logger.warning( - f"Head '{self.name}' was set to use {self.num_heads} heads, " + f"Head '{self.name}' was set to use {self.n_heads} heads, " f"but received only {len(self.in_channels)} inputs. " f"Changing number of heads to {len(self.in_channels)}." ) - self.num_heads = len(self.in_channels) - for i in range(self.num_heads): + self.n_heads = len(self.in_channels) + for i in range(self.n_heads): curr_head = EfficientDecoupledBlock( n_classes=self.n_classes, in_channels=self.in_channels[i], @@ -125,12 +125,12 @@ def wrap( "distributions": [reg_tensor], } - def _fit_stride_to_num_heads(self): + def _fit_stride_to_n_heads(self): """Returns correct stride for number of heads and attach index.""" stride = torch.tensor( [ self.original_in_shape[1] / x[2] # type: ignore - for x in self.in_sizes[: self.num_heads] + for x in self.in_sizes[: self.n_heads] ], dtype=torch.int, ) diff --git a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py index fffd361c..5bb6bcc0 100644 --- a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py @@ -20,7 +20,7 @@ class EfficientKeypointBBoxHead(EfficientBBoxHead): def __init__( self, - num_heads: Literal[2, 3, 4] = 3, + n_heads: Literal[2, 3, 4] = 3, conf_thres: float = 0.25, iou_thres: float = 0.45, max_det: int = 300, @@ -31,9 +31,9 @@ def __init__( Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial Applications}. - @param num_heads: Number of output heads. Defaults to C{3}. + @param n_heads: Number of output heads. Defaults to C{3}. B{Note:} Should be same also on neck in most cases. - @type num_heads: int + @type n_heads: int @param conf_thres: Threshold for confidence. Defaults to C{0.25}. @type conf_thres: float @@ -45,7 +45,7 @@ def __init__( @type max_det: int """ super().__init__( - num_heads=num_heads, + n_heads=n_heads, conf_thres=conf_thres, iou_thres=iou_thres, max_det=max_det, @@ -78,7 +78,7 @@ def forward( ) kpt_list: list[Tensor] = [] - for i in range(self.num_heads): + for i in range(self.n_heads): kpt_pred = self.kpt_layers[i](inputs[i]) kpt_list.append(kpt_pred) diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py index 0b8baa2b..e8b4ad5b 100644 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py @@ -24,7 +24,7 @@ class ImplicitKeypointBBoxHead(BaseNode[list[Tensor], tuple[list[Tensor], Tensor def __init__( self, - num_heads: int = 3, + n_heads: int = 3, anchors: list[list[float]] | None = None, init_coco_biases: bool = True, conf_thres: float = 0.25, @@ -39,8 +39,8 @@ def __init__( TODO: more technical documentation - @type num_heads: int - @param num_heads: Number of output heads. Defaults to C{3}. + @type n_heads: int + @param n_heads: Number of output heads. Defaults to C{3}. B{Note:} Should be same also on neck in most cases. @type anchors: list[list[float]] | None @param anchors: Anchors used for object detection. @@ -59,18 +59,18 @@ def __init__( self.iou_thres = iou_thres self.max_det = max_det - self.num_heads = num_heads - if len(self.in_channels) < self.num_heads: + self.n_heads = n_heads + if len(self.in_channels) < self.n_heads: logger.warning( - f"Head '{self.name}' was set to use {self.num_heads} heads, " + f"Head '{self.name}' was set to use {self.n_heads} heads, " f"but received only {len(self.in_channels)} inputs. " f"Changing number of heads to {len(self.in_channels)}." ) - self.num_heads = len(self.in_channels) + self.n_heads = len(self.in_channels) if anchors is None: logger.info("No anchors provided, generating them automatically.") - anchors, recall = self.dataset_metadata.autogenerate_anchors(self.num_heads) + anchors, recall = self.dataset_metadata.autogenerate_anchors(self.n_heads) logger.info(f"Anchors generated. Best possible recall: {recall:.2f}") self.box_offset = 5 @@ -80,10 +80,10 @@ def __init__( self.n_anchors = len(anchors[0]) // 2 self.grid: list[Tensor] = [] - self.anchors = torch.tensor(anchors).float().view(self.num_heads, -1, 2) - self.anchor_grid = self.anchors.clone().view(self.num_heads, 1, -1, 1, 1, 2) + self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2) + self.anchor_grid = self.anchors.clone().view(self.n_heads, 1, -1, 1, 1, 2) - self.channel_list, self.stride = self._fit_to_num_heads(self.in_channels) + self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels) self.learnable_mul_add_conv = nn.ModuleList( LearnableMulAddConv( @@ -115,7 +115,7 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: self.anchor_grid = self.anchor_grid.to(inputs[0].device) - for i in range(self.num_heads): + for i in range(self.n_heads): feat = cast( Tensor, torch.cat( @@ -207,12 +207,12 @@ def _infer_bbox( ) return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1) - def _fit_to_num_heads(self, channel_list: list[int]) -> tuple[list[int], Tensor]: - out_channel_list = channel_list[: self.num_heads] + def _fit_to_n_heads(self, channel_list: list[int]) -> tuple[list[int], Tensor]: + out_channel_list = channel_list[: self.n_heads] stride = torch.tensor( [ self.original_in_shape[1] / h - for h in cast(list[int], self.in_height)[: self.num_heads] + for h in cast(list[int], self.in_height)[: self.n_heads] ], dtype=torch.int, ) diff --git a/luxonis_train/nodes/heads/segmentation_head.py b/luxonis_train/nodes/heads/segmentation_head.py index 19f87f3b..240b956c 100644 --- a/luxonis_train/nodes/heads/segmentation_head.py +++ b/luxonis_train/nodes/heads/segmentation_head.py @@ -23,11 +23,11 @@ def __init__(self, **kwargs: Any): """ super().__init__(**kwargs) h, w = self.original_in_shape[1:] - num_up = infer_upscale_factor((self.in_height, self.in_width), (h, w)) + n_up = infer_upscale_factor((self.in_height, self.in_width), (h, w)) modules: list[nn.Module] = [] in_channels = self.in_channels - for _ in range(int(num_up)): + for _ in range(int(n_up)): modules.append( UpBlock(in_channels=in_channels, out_channels=in_channels // 2) ) diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py index a2bf668e..7873efc0 100644 --- a/luxonis_train/nodes/necks/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck.py @@ -12,9 +12,9 @@ class RepPANNeck(BaseNode[list[Tensor], list[Tensor]]): def __init__( self, - num_heads: Literal[2, 3, 4] = 3, + n_heads: Literal[2, 3, 4] = 3, channels_list: list[int] | None = None, - num_repeats: list[int] | None = None, + n_repeats: list[int] | None = None, depth_mul: float = 0.33, width_mul: float = 0.25, **kwargs: Any, @@ -25,14 +25,14 @@ def __init__( for Industrial Applications}. It has the balance of feature fusion ability and hardware efficiency. - @type num_heads: Literal[2,3,4] - @param num_heads: Number of output heads. Defaults to 3. B{Note: Should be same + @type n_heads: Literal[2,3,4] + @param n_heads: Number of output heads. Defaults to 3. B{Note: Should be same also on head in most cases.} @type channels_list: list[int] | None @param channels_list: List of number of channels for each block. Defaults to C{[256, 128, 128, 256, 256, 512]}. - @type num_repeats: list[int] | None - @param num_repeats: List of number of repeats of RepVGGBlock. + @type n_repeats: list[int] | None + @param n_repeats: List of number of repeats of RepVGGBlock. Defaults to C{[12, 12, 12, 12]}. @type depth_mul: float @param depth_mul: Depth multiplier. Defaults to C{0.33}. @@ -42,70 +42,68 @@ def __init__( super().__init__(**kwargs) - num_repeats = num_repeats or [12, 12, 12, 12] - channels_list = channels_list or [256, 128, 128, 256, 256, 512] + self.n_heads = n_heads - self.num_heads = num_heads + n_repeats = n_repeats or [12, 12, 12, 12] + channels_list = channels_list or [256, 128, 128, 256, 256, 512] channels_list = [make_divisible(ch * width_mul, 8) for ch in channels_list] - num_repeats = [ - (max(round(i * depth_mul), 1) if i > 1 else i) for i in num_repeats - ] - channels_list, num_repeats = self._fit_to_num_heads(channels_list, num_repeats) + n_repeats = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats] + channels_list, n_repeats = self._fit_to_n_heads(channels_list, n_repeats) self.up_blocks = nn.ModuleList() in_channels = self.in_channels[-1] out_channels = channels_list[0] in_channels_next = self.in_channels[-2] - curr_num_repeats = num_repeats[0] + curr_n_repeats = n_repeats[0] up_out_channel_list = [in_channels] # used in DownBlocks - for i in range(1, num_heads): + for i in range(1, n_heads): curr_up_block = RepUpBlock( in_channels=in_channels, in_channels_next=in_channels_next, out_channels=out_channels, - num_repeats=curr_num_repeats, + n_repeats=curr_n_repeats, ) up_out_channel_list.append(out_channels) self.up_blocks.append(curr_up_block) - if len(self.up_blocks) == (num_heads - 1): + if len(self.up_blocks) == (n_heads - 1): up_out_channel_list.reverse() break in_channels = out_channels out_channels = channels_list[i] in_channels_next = self.in_channels[-1 - (i + 1)] - curr_num_repeats = num_repeats[i] + curr_n_repeats = n_repeats[i] self.down_blocks = nn.ModuleList() - channels_list_down_blocks = channels_list[(num_heads - 1) :] - num_repeats_down_blocks = num_repeats[(num_heads - 1) :] + channels_list_down_blocks = channels_list[(n_heads - 1) :] + n_repeats_down_blocks = n_repeats[(n_heads - 1) :] in_channels = out_channels downsample_out_channels = channels_list_down_blocks[0] in_channels_next = up_out_channel_list[0] out_channels = channels_list_down_blocks[1] - curr_num_repeats = num_repeats_down_blocks[0] + curr_n_repeats = n_repeats_down_blocks[0] - for i in range(1, num_heads): + for i in range(1, n_heads): curr_down_block = RepDownBlock( in_channels=in_channels, downsample_out_channels=downsample_out_channels, in_channels_next=in_channels_next, out_channels=out_channels, - num_repeats=curr_num_repeats, + n_repeats=curr_n_repeats, ) self.down_blocks.append(curr_down_block) - if len(self.down_blocks) == (num_heads - 1): + if len(self.down_blocks) == (n_heads - 1): break in_channels = out_channels downsample_out_channels = channels_list_down_blocks[2 * i] in_channels_next = up_out_channel_list[i] out_channels = channels_list_down_blocks[2 * i + 1] - curr_num_repeats = num_repeats_down_blocks[i] + curr_n_repeats = n_repeats_down_blocks[i] def forward(self, inputs: list[Tensor]) -> list[Tensor]: x = inputs[-1] @@ -120,19 +118,19 @@ def forward(self, inputs: list[Tensor]) -> list[Tensor]: outs.append(x) return outs - def _fit_to_num_heads( - self, channels_list: list[int], num_repeats: list[int] + def _fit_to_n_heads( + self, channels_list: list[int], n_repeats: list[int] ) -> tuple[list[int], list[int]]: - """Fits channels_list and num_repeats to num_heads by removing or adding items. + """Fits channels_list and n_repeats to n_heads by removing or adding items. Also scales the numbers based on offset """ - if self.num_heads == 2: + if self.n_heads == 2: channels_list = [channels_list[0], channels_list[4], channels_list[5]] - num_repeats = [num_repeats[0], num_repeats[3]] - elif self.num_heads == 3: - return channels_list, num_repeats - elif self.num_heads == 4: + n_repeats = [n_repeats[0], n_repeats[3]] + elif self.n_heads == 3: + return channels_list, n_repeats + elif self.n_heads == 4: channels_list = [ channels_list[0], channels_list[1], @@ -144,18 +142,18 @@ def _fit_to_num_heads( channels_list[4], channels_list[5], ] - num_repeats = [ - num_repeats[0], - num_repeats[1], - num_repeats[1], - num_repeats[2], - num_repeats[2], - num_repeats[3], + n_repeats = [ + n_repeats[0], + n_repeats[1], + n_repeats[1], + n_repeats[2], + n_repeats[2], + n_repeats[3], ] else: raise ValueError( - f"Specified number of heads ({self.num_heads}) not supported." + f"Specified number of heads ({self.n_heads}) not supported." "The number of heads should be 2, 3 or 4." ) - return channels_list, num_repeats + return channels_list, n_repeats diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py index 6c5ee1d4..fa630643 100644 --- a/luxonis_train/utils/boundingbox.py +++ b/luxonis_train/utils/boundingbox.py @@ -413,8 +413,8 @@ def anchors_from_dataset( @type loader: L{torch.utils.data.DataLoader} @param loader: Data loader. @type n_anchors: int - @param n_anchors: Number of anchors, this is normally num_heads * 3 which generates - 3 anchors per layer. Defaults to 9. + @param n_anchors: Number of anchors, this is normally n_heads * 3 which generates 3 + anchors per layer. Defaults to 9. @type n_generations: int @param n_generations: Number of iterations for anchor improvement with genetic algorithm. Defaults to 1000. diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index dc35bc43..670e1008 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -269,7 +269,7 @@ class TrainerConfig(BaseModelExtraForbid): accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto" devices: int | list[int] | str = "auto" strategy: Literal["auto", "ddp"] = "auto" - num_sanity_val_steps: int = 2 + n_sanity_val_steps: Annotated[int, Field(alias="num_sanity_val_steps")] = 2 profiler: Literal["simple", "advanced"] | None = None matmul_precision: Literal["medium", "high", "highest"] | None = None verbose: bool = True @@ -280,10 +280,10 @@ class TrainerConfig(BaseModelExtraForbid): accumulate_grad_batches: PositiveInt = 1 use_weighted_sampler: bool = False epochs: PositiveInt = 100 - num_workers: NonNegativeInt = 4 + n_workers: Annotated[NonNegativeInt, Field(alias="num_workers")] = 4 train_metrics_interval: Literal[-1] | PositiveInt = -1 validation_interval: Literal[-1] | PositiveInt = 1 - num_log_images: NonNegativeInt = 4 + n_log_images: Annotated[NonNegativeInt, Field(alias="num_log_images")] = 4 skip_last_batch: bool = True pin_memory: bool = True log_sub_losses: bool = True @@ -306,13 +306,13 @@ def validate_deterministic(self) -> Self: return self @model_validator(mode="after") - def check_num_workes_platform(self) -> Self: + def check_n_workes_platform(self) -> Self: if ( sys.platform == "win32" or sys.platform == "darwin" - ) and self.num_workers != 0: - self.num_workers = 0 + ) and self.n_workers != 0: + self.n_workers = 0 logger.warning( - "Setting `num_workers` to 0 because of platform compatibility." + "Setting `n_workers` to 0 because of platform compatibility." ) return self diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index aa1fbfec..84c5aa85 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -98,11 +98,11 @@ def classes(self, task: str | None = None) -> list[str]: ) return class_names - def autogenerate_anchors(self, num_heads: int) -> tuple[list[list[float]], float]: + def autogenerate_anchors(self, n_heads: int) -> tuple[list[list[float]], float]: """Automatically generates anchors for the provided dataset. - @type num_heads: int - @param num_heads: Number of heads to generate anchors for. + @type n_heads: int + @param n_heads: Number of heads to generate anchors for. @rtype: tuple[list[list[float]], float] @return: List of anchors in [-1,6] format and recall of the anchors. @raises RuntimeError: If the dataset loader was not provided during @@ -116,7 +116,7 @@ def autogenerate_anchors(self, num_heads: int) -> tuple[list[list[float]], float ) proposed_anchors, recall = anchors_from_dataset( - self._loader, n_anchors=num_heads * 3 + self._loader, n_anchors=n_heads * 3 ) return proposed_anchors.reshape(-1, 6).tolist(), recall diff --git a/tests/configs/parking_lot_config.yaml b/tests/configs/parking_lot_config.yaml index de8a0c68..bb15ac37 100644 --- a/tests/configs/parking_lot_config.yaml +++ b/tests/configs/parking_lot_config.yaml @@ -138,16 +138,16 @@ trainer: devices: auto strategy: auto - num_sanity_val_steps: 1 + n_sanity_val_steps: 1 profiler: null verbose: True batch_size: 2 accumulate_grad_batches: 1 epochs: 200 - num_workers: 8 + n_workers: 8 train_metrics_interval: -1 validation_interval: 10 - num_log_images: 8 + n_log_images: 8 skip_last_batch: True log_sub_losses: True save_top_k: 3 diff --git a/tests/configs/segmentation_parse_loader.yaml b/tests/configs/segmentation_parse_loader.yaml index 60f7a30d..14814571 100644 --- a/tests/configs/segmentation_parse_loader.yaml +++ b/tests/configs/segmentation_parse_loader.yaml @@ -22,6 +22,6 @@ trainer: batch_size: 4 epochs: &epochs 1 - num_workers: 4 + n_workers: 4 validation_interval: 1 - num_log_images: 8 + n_log_images: 8 diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index dc26b9c4..9aa7d4ab 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -258,7 +258,7 @@ def config(train_overfit: bool) -> dict[str, Any]: "trainer": { "batch_size": 4, "epochs": epochs, - "num_workers": mp.cpu_count(), + "n_workers": mp.cpu_count(), "validation_interval": epochs, "save_top_k": 0, "preprocessing": { diff --git a/tests/unittests/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py index 1f63b42b..f028b55a 100644 --- a/tests/unittests/test_assigners/test_tal_assigner.py +++ b/tests/unittests/test_assigners/test_tal_assigner.py @@ -14,36 +14,36 @@ def test_init(): def test_forward(): batch_size = 10 - num_anchors = 100 - num_max_boxes = 5 - num_classes = 80 + n_anchors = 100 + n_max_boxes = 5 + n_classes = 80 - assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13) + assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13) # Create mock inputs - pred_scores = torch.rand(batch_size, num_anchors, 1) - pred_bboxes = torch.rand(batch_size, num_anchors, 4) - anchor_points = torch.rand(num_anchors, 2) - gt_labels = torch.rand(batch_size, num_max_boxes, 1) - gt_bboxes = torch.zeros(batch_size, num_max_boxes, 4) # no gt bboxes - mask_gt = torch.rand(batch_size, num_max_boxes, 1) + pred_scores = torch.rand(batch_size, n_anchors, 1) + pred_bboxes = torch.rand(batch_size, n_anchors, 4) + anchor_points = torch.rand(n_anchors, 2) + gt_labels = torch.rand(batch_size, n_max_boxes, 1) + gt_bboxes = torch.zeros(batch_size, n_max_boxes, 4) # no gt bboxes + mask_gt = torch.rand(batch_size, n_max_boxes, 1) labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward( pred_scores, pred_bboxes, anchor_points, gt_labels, gt_bboxes, mask_gt ) - assert labels.shape == (batch_size, num_anchors) - assert bboxes.shape == (batch_size, num_anchors, 4) + assert labels.shape == (batch_size, n_anchors) + assert bboxes.shape == (batch_size, n_anchors, 4) assert scores.shape == ( batch_size, - num_anchors, - num_classes, + n_anchors, + n_classes, ) - assert mask.shape == (batch_size, num_anchors) - assert assigned_gt_idx.shape == (batch_size, num_anchors) + assert mask.shape == (batch_size, n_anchors) + assert assigned_gt_idx.shape == (batch_size, n_anchors) - # Labels should be `num_classes` as there are no GT boxes - assert labels.unique().tolist() == [num_classes] + # Labels should be `n_classes` as there are no GT boxes + assert labels.unique().tolist() == [n_classes] # All results should be zero as there are no GT boxes assert torch.equal(bboxes, torch.zeros_like(bboxes)) @@ -83,12 +83,12 @@ def test_get_alignment_metric(): def test_select_topk_candidates(): batch_size = 2 - num_max_boxes = 3 - num_anchors = 5 + n_max_boxes = 3 + n_anchors = 5 topk = 2 - metrics = torch.rand(batch_size, num_max_boxes, num_anchors) - mask_gt = torch.rand(batch_size, num_max_boxes, 1) + metrics = torch.rand(batch_size, n_max_boxes, n_anchors) + mask_gt = torch.rand(batch_size, n_max_boxes, 1) assigner = TaskAlignedAssigner(n_classes=80, topk=topk) @@ -98,7 +98,7 @@ def test_select_topk_candidates(): assigner._select_topk_candidates(metrics), assigner._select_topk_candidates(metrics, topk_mask=topk_mask), ) - assert is_in_topk.shape == (batch_size, num_max_boxes, num_anchors) + assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors) assert is_in_topk.dtype == torch.float32 assert is_in_topk.sum(dim=-1).max() <= topk @@ -106,16 +106,16 @@ def test_select_topk_candidates(): def test_get_final_assignments(): batch_size = 2 - num_max_boxes = 3 - num_anchors = 5 - num_classes = 80 + n_max_boxes = 3 + n_anchors = 5 + n_classes = 80 - gt_labels = torch.randint(0, num_classes, (batch_size, num_max_boxes, 1)) - gt_bboxes = torch.rand(batch_size, num_max_boxes, 4) - assigned_gt_idx = torch.randint(0, num_max_boxes, (batch_size, num_anchors)) - mask_pos_sum = torch.randint(0, 2, (batch_size, num_anchors)) + gt_labels = torch.randint(0, n_classes, (batch_size, n_max_boxes, 1)) + gt_bboxes = torch.rand(batch_size, n_max_boxes, 4) + assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors)) + mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors)) - assigner = TaskAlignedAssigner(n_classes=num_classes, topk=13) + assigner = TaskAlignedAssigner(n_classes=n_classes, topk=13) assigner.bs = batch_size # Set batch size assigner.n_max_boxes = gt_bboxes.size(1) @@ -123,7 +123,7 @@ def test_get_final_assignments(): gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) - assert assigned_labels.shape == (batch_size, num_anchors) - assert assigned_bboxes.shape == (batch_size, num_anchors, 4) - assert assigned_scores.shape == (batch_size, num_anchors, num_classes) - assert assigned_labels.min() >= 0 and assigned_labels.max() <= num_classes + assert assigned_labels.shape == (batch_size, n_anchors) + assert assigned_bboxes.shape == (batch_size, n_anchors, 4) + assert assigned_scores.shape == (batch_size, n_anchors, n_classes) + assert assigned_labels.min() >= 0 and assigned_labels.max() <= n_classes diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index 35f3dedc..a0d238bc 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -12,25 +12,21 @@ ) -def generate_random_bboxes(num_bboxes, max_width, max_height, format="xyxy"): - # Generate top-left corners (x1, y1) - x1y1 = torch.rand(num_bboxes, 2) * torch.tensor([max_width - 1, max_height - 1]) +def generate_random_bboxes( + n_bboxes: int, max_width: int, max_height: int, format: str = "xyxy" +): + x1y1 = torch.rand(n_bboxes, 2) * torch.tensor([max_width - 1, max_height - 1]) - # Generate widths and heights ensuring x2 > x1 and y2 > y1 wh = ( - torch.rand(num_bboxes, 2) * (torch.tensor([max_width, max_height]) - 1 - x1y1) - + 1 + torch.rand(n_bboxes, 2) * (torch.tensor([max_width, max_height]) - 1 - x1y1) + 1 ) if format == "xyxy": - # Calculate bottom-right corners (x2, y2) for xyxy format x2y2 = x1y1 + wh bboxes = torch.cat((x1y1, x2y2), dim=1) elif format == "xywh": - # Use x1y1 as top-left corner and wh as width and height for xywh format bboxes = torch.cat((x1y1, wh), dim=1) elif format == "cxcywh": - # Calculate center coordinates and use wh as width and height for cxcywh format cxcy = x1y1 + wh / 2 bboxes = torch.cat((cxcy, wh), dim=1) else: From 7b4438e215e1173262a74e29aa674c10cbbc3c84 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 08:10:41 +0200 Subject: [PATCH 065/102] list comprehension --- luxonis_train/nodes/necks/reppan_neck.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py index 7873efc0..9e64e97a 100644 --- a/luxonis_train/nodes/necks/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck.py @@ -126,7 +126,7 @@ def _fit_to_n_heads( Also scales the numbers based on offset """ if self.n_heads == 2: - channels_list = [channels_list[0], channels_list[4], channels_list[5]] + channels_list = [channels_list[i] for i in [0, 4, 5]] n_repeats = [n_repeats[0], n_repeats[3]] elif self.n_heads == 3: return channels_list, n_repeats @@ -142,14 +142,7 @@ def _fit_to_n_heads( channels_list[4], channels_list[5], ] - n_repeats = [ - n_repeats[0], - n_repeats[1], - n_repeats[1], - n_repeats[2], - n_repeats[2], - n_repeats[3], - ] + n_repeats = [n_repeats[i] for i in [0, 1, 1, 2, 2, 3]] else: raise ValueError( f"Specified number of heads ({self.n_heads}) not supported." From 1149dd568d1fc19fbd20c17cbb02daa0b282a617 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 08:35:54 +0200 Subject: [PATCH 066/102] changed formatter settings to conform to PEP8 --- .pre-commit-config.yaml | 5 +- luxonis_train/__main__.py | 15 +- luxonis_train/assigners/atts_assigner.py | 69 +++++-- luxonis_train/assigners/tal_assigner.py | 55 +++-- luxonis_train/assigners/utils.py | 21 +- .../attached_modules/base_attached_module.py | 32 ++- .../losses/adaptive_detection_loss.py | 34 +++- .../attached_modules/losses/base_loss.py | 22 +- .../losses/bce_with_logits.py | 50 ++--- .../attached_modules/losses/cross_entropy.py | 4 +- .../losses/efficient_keypoint_bbox_loss.py | 38 ++-- .../losses/implicit_keypoint_bbox_loss.py | 65 ++++-- .../attached_modules/losses/keypoint_loss.py | 36 ++-- .../losses/sigmoid_focal_loss.py | 6 +- .../losses/smooth_bce_with_logits.py | 23 ++- .../losses/softmax_focal_loss.py | 7 +- .../attached_modules/metrics/base_metric.py | 17 +- .../metrics/mean_average_precision.py | 11 +- .../mean_average_precision_keypoints.py | 55 +++-- .../metrics/object_keypoint_similarity.py | 59 ++++-- .../visualizers/base_visualizer.py | 16 +- .../visualizers/bbox_visualizer.py | 55 +++-- .../visualizers/classification_visualizer.py | 12 +- .../visualizers/keypoint_visualizer.py | 24 ++- .../visualizers/multi_visualizer.py | 15 +- .../visualizers/segmentation_visualizer.py | 14 +- .../attached_modules/visualizers/utils.py | 32 +-- luxonis_train/callbacks/gpu_stats_monitor.py | 30 ++- .../callbacks/luxonis_progress_bar.py | 27 ++- luxonis_train/callbacks/metadata_logger.py | 17 +- luxonis_train/callbacks/module_freezer.py | 3 +- luxonis_train/callbacks/needs_checkpoint.py | 4 +- luxonis_train/callbacks/test_on_train_end.py | 4 +- luxonis_train/callbacks/upload_checkpoint.py | 7 +- luxonis_train/core/core.py | 137 +++++++++---- luxonis_train/core/utils/archive_utils.py | 23 ++- luxonis_train/core/utils/tune_utils.py | 12 +- luxonis_train/loaders/base_loader.py | 25 ++- luxonis_train/loaders/luxonis_loader_torch.py | 4 +- luxonis_train/models/luxonis_lightning.py | 188 ++++++++++++------ .../base_predefined_model.py | 12 +- .../predefined_models/detection_model.py | 4 +- .../nodes/backbones/contextspatial.py | 8 +- .../backbones/efficientrep/efficientrep.py | 11 +- .../nodes/backbones/micronet/blocks.py | 62 ++++-- .../nodes/backbones/micronet/micronet.py | 9 +- .../nodes/backbones/mobileone/blocks.py | 31 ++- .../nodes/backbones/mobileone/variants.py | 4 +- .../nodes/backbones/repvgg/repvgg.py | 3 +- luxonis_train/nodes/backbones/resnet.py | 6 +- luxonis_train/nodes/backbones/rexnetv1.py | 19 +- luxonis_train/nodes/base_node.py | 81 ++++---- luxonis_train/nodes/blocks/blocks.py | 51 +++-- .../nodes/heads/classification_head.py | 8 +- .../nodes/heads/efficient_bbox_head.py | 39 ++-- .../heads/efficient_keypoint_bbox_head.py | 34 +++- .../heads/implicit_keypoint_bbox_head.py | 48 +++-- luxonis_train/nodes/necks/reppan_neck.py | 23 ++- luxonis_train/utils/boundingbox.py | 88 +++++--- luxonis_train/utils/config.py | 27 ++- luxonis_train/utils/dataset_metadata.py | 62 ++++-- luxonis_train/utils/exceptions.py | 7 +- luxonis_train/utils/general.py | 38 ++-- luxonis_train/utils/graph.py | 14 +- luxonis_train/utils/keypoints.py | 12 +- luxonis_train/utils/registry.py | 24 ++- luxonis_train/utils/tracker.py | 3 +- luxonis_train/utils/types.py | 12 +- pyproject.toml | 4 +- tests/integration/conftest.py | 14 +- tests/integration/multi_input_modules.py | 15 +- tests/integration/test_simple.py | 4 +- .../test_assigners/test_atts_assigner.py | 19 +- .../test_assigners/test_tal_assigner.py | 10 +- tests/unittests/test_assigners/test_utils.py | 6 +- tests/unittests/test_base_attached_module.py | 31 +-- tests/unittests/test_base_node.py | 20 +- .../test_loaders/test_base_loader.py | 7 +- .../test_losses/test_bce_with_logits_loss.py | 4 +- tests/unittests/test_utils/test_boxutils.py | 25 ++- tests/unittests/test_utils/test_graph.py | 6 +- tests/unittests/test_utils/test_keypoints.py | 5 +- 82 files changed, 1463 insertions(+), 720 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6226370c..3d68c872 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,11 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.6.4 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] types_or: [python, pyi, jupyter] - id: ruff-format - args: [--line-length, '88'] types_or: [python, pyi, jupyter] - repo: https://github.com/PyCQA/docformatter @@ -14,7 +13,7 @@ repos: hooks: - id: docformatter additional_dependencies: [tomli] - args: [--in-place, --black, --style=epytext] + args: [--in-place, --style=epytext] - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 diff --git a/luxonis_train/__main__.py b/luxonis_train/__main__.py index 7740f1d5..c3164227 100644 --- a/luxonis_train/__main__.py +++ b/luxonis_train/__main__.py @@ -41,7 +41,9 @@ class _ViewType(str, Enum): ), ] -ViewType = Annotated[_ViewType, typer.Option(help="Which dataset view to use.")] +ViewType = Annotated[ + _ViewType, typer.Option(help="Which dataset view to use.") +] SaveDirType = Annotated[ Optional[Path], @@ -53,7 +55,8 @@ class _ViewType(str, Enum): def train( config: ConfigType = None, resume: Annotated[ - Optional[str], typer.Option(help="Resume training from this checkpoint.") + Optional[str], + typer.Option(help="Resume training from this checkpoint."), ] = None, opts: OptsType = None, ): @@ -65,7 +68,9 @@ def train( @app.command() def test( - config: ConfigType = None, view: ViewType = _ViewType.VAL, opts: OptsType = None + config: ConfigType = None, + view: ViewType = _ViewType.VAL, + opts: OptsType = None, ): """Evaluate model.""" from luxonis_train.core import LuxonisModel @@ -189,7 +194,9 @@ def common( _: Annotated[ bool, typer.Option( - "--version", callback=version_callback, help="Show version and exit." + "--version", + callback=version_callback, + help="Show version and exit.", ), ] = False, source: Annotated[ diff --git a/luxonis_train/assigners/atts_assigner.py b/luxonis_train/assigners/atts_assigner.py index 4fea425c..269496fa 100644 --- a/luxonis_train/assigners/atts_assigner.py +++ b/luxonis_train/assigners/atts_assigner.py @@ -49,9 +49,10 @@ def forward( @type pred_bboxes: Tensor @param pred_bboxes: Predicted bboxes of shape [bs, n_anchors, 4] @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and - output positive mask of shape [bs, n_anchors]. + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes] and output positive mask of shape + [bs, n_anchors]. """ self.n_anchors = anchor_bboxes.size(0) @@ -61,9 +62,13 @@ def forward( if self.n_max_boxes == 0: device = gt_bboxes.device return ( - torch.full([self.bs, self.n_anchors], self.n_classes).to(device), + torch.full([self.bs, self.n_anchors], self.n_classes).to( + device + ), torch.zeros([self.bs, self.n_anchors, 4]).to(device), - torch.zeros([self.bs, self.n_anchors, self.n_classes]).to(device), + torch.zeros([self.bs, self.n_anchors, self.n_classes]).to( + device + ), torch.zeros([self.bs, self.n_anchors]).to(device), torch.zeros([self.bs, self.n_anchors]).to(device), ) @@ -78,7 +83,10 @@ def forward( gt_centers = self._get_bbox_center(gt_bboxes_flat) anchor_centers = self._get_bbox_center(anchor_bboxes) distances = ( - (gt_centers[:, None, :] - anchor_centers[None, :, :]).pow(2).sum(-1).sqrt() + (gt_centers[:, None, :] - anchor_centers[None, :, :]) + .pow(2) + .sum(-1) + .sqrt() ) distances = distances.reshape([self.bs, -1, self.n_anchors]) @@ -103,7 +111,11 @@ def forward( ) # Generate final assignments based on masks - assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = self._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) @@ -140,7 +152,8 @@ def _select_topk_candidates( @type mask_gt: Tensor @param mask_gt: Mask for valid GT per image. @rtype: tuple[Tensor, Tensor] - @return: Mask of selected anchors and indices of selected anchors. + @return: Mask of selected anchors and indices of selected + anchors. """ mask_gt = mask_gt.repeat(1, 1, self.topk).bool() level_distances = torch.split(distances, n_level_bboxes, dim=-1) @@ -157,9 +170,13 @@ def _select_topk_candidates( ) topk_idxs.append(per_level_topk_idxs + start_idx) per_level_topk_idxs = torch.where( - mask_gt, per_level_topk_idxs, torch.zeros_like(per_level_topk_idxs) + mask_gt, + per_level_topk_idxs, + torch.zeros_like(per_level_topk_idxs), + ) + is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum( + dim=-2 ) - is_in_topk = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(dim=-2) is_in_topk = torch.where( is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk ) @@ -174,14 +191,18 @@ def _get_positive_samples( topk_idxs: Tensor, overlaps: Tensor, ) -> Tensor: - """Computes threshold and returns mask for samples over threshold. + """Computes threshold and returns mask for samples over + threshold. @type is_in_topk: Tensor - @param is_in_topk: Mask of selected anchors [bx, n_max_boxes, n_anchors] + @param is_in_topk: Mask of selected anchors [bx, n_max_boxes, + n_anchors] @type topk_idxs: Tensor - @param topk_idxs: Indices of selected anchors [bx, n_max_boxes, topK * n_levels] + @param topk_idxs: Indices of selected anchors [bx, n_max_boxes, + topK * n_levels] @type overlaps: Tensor - @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors] + @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, + n_anchors] @rtype: Tensor @return: Mask of positive samples [bx, n_max_boxes, n_anchors] """ @@ -196,14 +217,17 @@ def _get_positive_samples( assist_idxs = assist_idxs[:, None] flatten_idxs = topk_idxs + assist_idxs candidate_overlaps = _candidate_overlaps.reshape(-1)[flatten_idxs] - candidate_overlaps = candidate_overlaps.reshape([self.bs, self.n_max_boxes, -1]) + candidate_overlaps = candidate_overlaps.reshape( + [self.bs, self.n_max_boxes, -1] + ) overlaps_mean_per_gt = candidate_overlaps.mean(dim=-1, keepdim=True) overlaps_std_per_gt = candidate_overlaps.std(dim=-1, keepdim=True) overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt is_pos = torch.where( - _candidate_overlaps > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]), + _candidate_overlaps + > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]), is_in_topk, torch.zeros_like(is_in_topk), ) @@ -227,15 +251,18 @@ def _get_final_assignments( @type mask_pos_sum: Tensor @param mask_pos_sum: Mask of matched GTs [bs, n_anchors] @rtype: tuple[Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes]. + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes]. """ # assigned target labels batch_idx = torch.arange( self.bs, dtype=gt_labels.dtype, device=gt_labels.device ) batch_idx = batch_idx[..., None] - assigned_gt_idx = (assigned_gt_idx + batch_idx * self.n_max_boxes).long() + assigned_gt_idx = ( + assigned_gt_idx + batch_idx * self.n_max_boxes + ).long() assigned_labels = gt_labels.flatten()[assigned_gt_idx.flatten()] assigned_labels = assigned_labels.reshape([self.bs, self.n_anchors]) assigned_labels = torch.where( @@ -249,7 +276,9 @@ def _get_final_assignments( assigned_bboxes = assigned_bboxes.reshape([self.bs, self.n_anchors, 4]) # assigned target scores - assigned_scores = F.one_hot(assigned_labels.long(), self.n_classes + 1).float() + assigned_scores = F.one_hot( + assigned_labels.long(), self.n_classes + 1 + ).float() assigned_scores = assigned_scores[:, :, : self.n_classes] return assigned_labels, assigned_bboxes, assigned_scores diff --git a/luxonis_train/assigners/tal_assigner.py b/luxonis_train/assigners/tal_assigner.py index 8660525e..ea228eba 100644 --- a/luxonis_train/assigners/tal_assigner.py +++ b/luxonis_train/assigners/tal_assigner.py @@ -66,9 +66,10 @@ def forward( @type mask_gt: Tensor @param mask_gt: Mask for valid GTs [bs, n_max_boxes, 1] @rtype: tuple[Tensor, Tensor, Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes] and - output mask of shape [bs, n_anchors] + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes] and output mask of shape [bs, + n_anchors] """ self.bs = pred_scores.size(0) self.n_max_boxes = gt_bboxes.size(1) @@ -76,7 +77,9 @@ def forward( if self.n_max_boxes == 0: device = gt_bboxes.device return ( - torch.full_like(pred_scores[..., 0], self.n_classes).to(device), + torch.full_like(pred_scores[..., 0], self.n_classes).to( + device + ), torch.zeros_like(pred_bboxes).to(device), torch.zeros_like(pred_scores).to(device), torch.zeros_like(pred_scores[..., 0]).to(device), @@ -105,7 +108,11 @@ def forward( ) # Generate final targets based on masks - assigned_labels, assigned_bboxes, assigned_scores = self._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = self._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) @@ -137,7 +144,8 @@ def _get_alignment_metric( gt_labels: Tensor, gt_bboxes: Tensor, ): - """Calculates anchor alignment metric and IoU between GTs and predicted bboxes. + """Calculates anchor alignment metric and IoU between GTs and + predicted bboxes. @type pred_scores: Tensor @param pred_scores: Predicted scores [bs, n_anchors, 1] @@ -151,7 +159,9 @@ def _get_alignment_metric( pred_scores = pred_scores.permute(0, 2, 1) gt_labels = gt_labels.to(torch.long) ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long) - ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) + ind[0] = ( + torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes) + ) ind[1] = gt_labels.squeeze(-1) bbox_scores = pred_scores[ind[0], ind[1]] @@ -169,23 +179,29 @@ def _select_topk_candidates( """Selects k anchors based on provided metrics tensor. @type metrics: Tensor - @param metrics: Metrics tensor of shape [bs, n_max_boxes, n_anchors] + @param metrics: Metrics tensor of shape [bs, n_max_boxes, + n_anchors] @type largest: bool - @param largest: Flag if should keep largest topK. Defaults to True. + @param largest: Flag if should keep largest topK. Defaults to + True. @type topk_mask: Tensor - @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes, topk] + @param topk_mask: Mask for valid GTs of shape [bs, n_max_boxes, + topk] @rtype: Tensor - @return: Mask of selected anchors of shape [bs, n_max_boxes, n_anchors] + @return: Mask of selected anchors of shape [bs, n_max_boxes, + n_anchors] """ n_anchors = metrics.shape[-1] topk_metrics, topk_idxs = torch.topk( metrics, self.topk, dim=-1, largest=largest ) if topk_mask is None: - topk_mask = (topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps).tile( - [1, 1, self.topk] - ) - topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs)) + topk_mask = ( + topk_metrics.max(dim=-1, keepdim=True)[0] > self.eps + ).tile([1, 1, self.topk]) + topk_idxs = torch.where( + topk_mask, topk_idxs, torch.zeros_like(topk_idxs) + ) is_in_topk = F.one_hot(topk_idxs, n_anchors).sum(dim=-2) is_in_topk = torch.where( is_in_topk > 1, torch.zeros_like(is_in_topk), is_in_topk @@ -210,8 +226,9 @@ def _get_final_assignments( @type mask_pos_sum: Tensor @param mask_pos_sum: Mask of matched GTs [bs, n_anchors] @rtype: tuple[Tensor, Tensor, Tensor] - @return: Assigned labels of shape [bs, n_anchors], assigned bboxes of shape [bs, - n_anchors, 4], assigned scores of shape [bs, n_anchors, n_classes]. + @return: Assigned labels of shape [bs, n_anchors], assigned + bboxes of shape [bs, n_anchors, 4], assigned scores of shape + [bs, n_anchors, n_classes]. """ # assigned target labels batch_ind = torch.arange( @@ -228,7 +245,9 @@ def _get_final_assignments( assigned_scores = F.one_hot(assigned_labels, self.n_classes) mask_pos_scores = mask_pos_sum[:, :, None].repeat(1, 1, self.n_classes) assigned_scores = torch.where( - mask_pos_scores > 0, assigned_scores, torch.full_like(assigned_scores, 0) + mask_pos_scores > 0, + assigned_scores, + torch.full_like(assigned_scores, 0), ) assigned_labels = torch.where( diff --git a/luxonis_train/assigners/utils.py b/luxonis_train/assigners/utils.py index 8987fc59..fe9fba4b 100644 --- a/luxonis_train/assigners/utils.py +++ b/luxonis_train/assigners/utils.py @@ -20,7 +20,9 @@ def candidates_in_gt( @return: Mask for anchors inside any GT bbox """ n_anchors = anchor_centers.size(0) - anchor_centers = anchor_centers.unsqueeze(0).repeat(gt_bboxes.size(0), 1, 1) + anchor_centers = anchor_centers.unsqueeze(0).repeat( + gt_bboxes.size(0), 1, 1 + ) gt_bboxes_lt = gt_bboxes[:, :2].unsqueeze(1).repeat(1, n_anchors, 1) gt_bboxes_rb = gt_bboxes[:, 2:].unsqueeze(1).repeat(1, n_anchors, 1) bbox_delta_lt = anchor_centers - gt_bboxes_lt @@ -33,12 +35,15 @@ def candidates_in_gt( def fix_collisions( mask_pos: Tensor, overlaps: Tensor, n_max_boxes: int ) -> tuple[Tensor, Tensor, Tensor]: - """If an anchor is assigned to multiple GTs, the one with highest IoU is selected. + """If an anchor is assigned to multiple GTs, the one with highest + IoU is selected. @type mask_pos: Tensor - @param mask_pos: Mask of assigned anchors [bs, n_max_boxes, n_anchors] + @param mask_pos: Mask of assigned anchors [bs, n_max_boxes, + n_anchors] @type overlaps: Tensor - @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, n_anchors] + @param overlaps: IoUs between GTs and anchors [bx, n_max_boxes, + n_anchors] @type n_max_boxes: int @param n_max_boxes: Number of maximum boxes per image @rtype: tuple[Tensor, Tensor, Tensor] @@ -46,7 +51,9 @@ def fix_collisions( """ mask_pos_sum = mask_pos.sum(dim=-2) if mask_pos_sum.max() > 1: - mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat([1, n_max_boxes, 1]) + mask_multi_gts = (mask_pos_sum.unsqueeze(1) > 1).repeat( + [1, n_max_boxes, 1] + ) max_overlaps_idx = overlaps.argmax(dim=1) is_max_overlaps = F.one_hot(max_overlaps_idx, n_max_boxes) is_max_overlaps = is_max_overlaps.permute(0, 2, 1).to(overlaps.dtype) @@ -57,8 +64,8 @@ def fix_collisions( def batch_iou(batch1: Tensor, batch2: Tensor) -> Tensor: - """Calculates IoU for each pair of bboxes in the batch. Bboxes must be in xyxy - format. + """Calculates IoU for each pair of bboxes in the batch. Bboxes must + be in xyxy format. @type batch1: Tensor @param batch1: Tensor of shape C{[bs, N, 4]} diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index b0cd952d..7d6097e7 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -17,7 +17,11 @@ class BaseAttachedModule( - nn.Module, Generic[Unpack[Ts]], ABC, metaclass=AutoRegisterMeta, register=False + nn.Module, + Generic[Unpack[Ts]], + ABC, + metaclass=AutoRegisterMeta, + register=False, ): """Base class for all modules that are attached to a L{LuxonisNode}. @@ -102,7 +106,8 @@ def node(self) -> BaseNode: """Reference to the node that this module is attached to. @type: L{BaseNode} - @raises RuntimeError: If the node was not provided during initialization. + @raises RuntimeError: If the node was not provided during + initialization. """ if self._node is None: raise RuntimeError( @@ -116,7 +121,8 @@ def n_keypoints(self) -> int: """Getter for the number of keypoints. @type: int - @raises ValueError: If the number of keypoints cannot be determined. + @raises ValueError: If the number of keypoints cannot be + determined. """ return self.node.n_keypoints @@ -125,9 +131,11 @@ def n_classes(self) -> int: """Getter for the number of classes. @type: int - @raises ValueError: If the number of classes cannot be determined. - @raises ValueError: If the number of classes is different for different tasks. - In that case, use the C{node.get_n_classes} method. + @raises ValueError: If the number of classes cannot be + determined. + @raises ValueError: If the number of classes is different for + different tasks. In that case, use the C{node.get_n_classes} + method. """ return self.node.n_classes @@ -156,10 +164,14 @@ def node_tasks(self) -> dict[LabelType, str]: @raises RuntimeError: If the node does not have the `tasks` attribute set. """ if self.node._tasks is None: - raise RuntimeError("Node must have the `tasks` attribute specified.") + raise RuntimeError( + "Node must have the `tasks` attribute specified." + ) return self.node._tasks - def get_label(self, labels: Labels, label_type: LabelType | None = None) -> Tensor: + def get_label( + self, labels: Labels, label_type: LabelType | None = None + ) -> Tensor: """Extracts a specific label from the labels dictionary. If the label type is not provided, the first label that matches the @@ -260,7 +272,9 @@ def get_input_tensors( ) return inputs[self.node_tasks[self.required_labels[0]]] - def prepare(self, inputs: Packet[Tensor], labels: Labels) -> tuple[Unpack[Ts]]: + def prepare( + self, inputs: Packet[Tensor], labels: Labels + ) -> tuple[Unpack[Ts]]: """Prepares node outputs for the forward pass of the module. This default implementation selects the output and label based on diff --git a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py index 3cf6af28..d25825cb 100644 --- a/luxonis_train/attached_modules/losses/adaptive_detection_loss.py +++ b/luxonis_train/attached_modules/losses/adaptive_detection_loss.py @@ -23,7 +23,9 @@ logger = logging.getLogger(__name__) -class AdaptiveDetectionLoss(BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]): +class AdaptiveDetectionLoss( + BaseLoss[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor] +): node: EfficientBBoxHead supported_labels = [LabelType.BOUNDINGBOX] @@ -131,8 +133,12 @@ def forward( assigned_scores: Tensor, mask_positive: Tensor, ): - one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1] - loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label) + one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[ + ..., :-1 + ] + loss_cls = self.varifocal_loss( + pred_scores, assigned_scores, one_hot_label + ) if assigned_scores.sum() > 1: loss_cls /= assigned_scores.sum() @@ -147,7 +153,9 @@ def forward( bbox_format="xyxy", )[0] - loss = self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + loss = ( + self.class_loss_weight * loss_cls + self.iou_loss_weight * loss_iou + ) sub_losses = {"class": loss_cls.detach(), "iou": loss_iou.detach()} @@ -176,7 +184,9 @@ def _init_parameters(self, features: list[Tensor]): self.grid_cell_offset, multiply_with_stride=True, ) - self.anchor_points_strided = self.anchor_points / self.stride_tensor + self.anchor_points_strided = ( + self.anchor_points / self.stride_tensor + ) def _run_assigner( self, @@ -206,11 +216,14 @@ def _run_assigner( mask_gt, ) - def _preprocess_bbox_target(self, target: Tensor, batch_size: int) -> Tensor: - """Preprocess target in shape [batch_size, N, 5] where N is maximum number of - instances in one image.""" + def _preprocess_bbox_target( + self, target: Tensor, batch_size: int + ) -> Tensor: + """Preprocess target in shape [batch_size, N, 5] where N is the + maximum number of instances in one image.""" sample_ids, counts = cast( - tuple[Tensor, Tensor], torch.unique(target[:, 0].int(), return_counts=True) + tuple[Tensor, Tensor], + torch.unique(target[:, 0].int(), return_counts=True), ) c_max = int(counts.max()) if counts.numel() > 0 else 0 out_target = torch.zeros(batch_size, c_max, 5, device=target.device) @@ -254,7 +267,8 @@ def forward( self, pred_score: Tensor, target_score: Tensor, label: Tensor ) -> Tensor: weight = ( - self.alpha * pred_score.pow(self.gamma) * (1 - label) + target_score * label + self.alpha * pred_score.pow(self.gamma) * (1 - label) + + target_score * label ) ce_loss = F.binary_cross_entropy( pred_score.float(), target_score.float(), reduction="none" diff --git a/luxonis_train/attached_modules/losses/base_loss.py b/luxonis_train/attached_modules/losses/base_loss.py index 89ce8d8c..7a69d0d8 100644 --- a/luxonis_train/attached_modules/losses/base_loss.py +++ b/luxonis_train/attached_modules/losses/base_loss.py @@ -17,19 +17,23 @@ class BaseLoss( ): """A base class for all loss functions. - This class defines the basic interface for all loss functions. It utilizes automatic - registration of defined subclasses to a L{LOSSES} registry. + This class defines the basic interface for all loss functions. It + utilizes automatic registration of defined subclasses to a L{LOSSES} + registry. """ @abstractmethod - def forward(self, *args: Unpack[Ts]) -> Tensor | tuple[Tensor, dict[str, Tensor]]: + def forward( + self, *args: Unpack[Ts] + ) -> Tensor | tuple[Tensor, dict[str, Tensor]]: """Forward pass of the loss function. @type args: Unpack[Ts] @param args: Prepared inputs from the L{prepare} method. @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] - @return: The main loss and optional a dictionary of sublosses (for logging). - Only the main loss is used for backpropagation. + @return: The main loss and optional a dictionary of sublosses + (for logging). Only the main loss is used for + backpropagation. """ ... @@ -45,8 +49,10 @@ def run( @type labels: L{Labels} @param labels: Labels from the dataset. @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] - @return: The main loss and optional a dictionary of sublosses (for logging). - Only the main loss is used for backpropagation. - @raises IncompatibleException: If the inputs are not compatible with the module. + @return: The main loss and optional a dictionary of sublosses + (for logging). Only the main loss is used for + backpropagation. + @raises IncompatibleException: If the inputs are not compatible + with the module. """ return self(*self.prepare(inputs, labels)) diff --git a/luxonis_train/attached_modules/losses/bce_with_logits.py b/luxonis_train/attached_modules/losses/bce_with_logits.py index 8fbff5d0..b759d06b 100644 --- a/luxonis_train/attached_modules/losses/bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/bce_with_logits.py @@ -17,33 +17,37 @@ def __init__( pos_weight: Tensor | None = None, **kwargs: Any, ): - """This loss combines a L{nn.Sigmoid} layer and the L{nn.BCELoss} in one single - class. This version is more numerically stable than using a plain C{Sigmoid} - followed by a {BCELoss} as, by combining the operations into one layer, we take - advantage of the log-sum-exp trick for numerical stability. + """This loss combines a L{nn.Sigmoid} layer and the + L{nn.BCELoss} in one single class. This version is more + numerically stable than using a plain C{Sigmoid} followed by a + {BCELoss} as, by combining the operations into one layer, we + take advantage of the log-sum-exp trick for numerical stability. @type weight: list[float] | None - @param weight: a manual rescaling weight given to the loss of each batch - element. If given, has to be a list of length C{nbatch}. Defaults to - C{None}. + @param weight: a manual rescaling weight given to the loss of + each batch element. If given, has to be a list of length + C{nbatch}. Defaults to C{None}. @type reduction: Literal["none", "mean", "sum"] - @param reduction: Specifies the reduction to apply to the output: C{"none"} | - C{"mean"} | C{"sum"}. C{"none"}: no reduction will be applied, C{"mean"}: - the sum of the output will be divided by the number of elements in the - output, C{"sum"}: the output will be summed. Note: C{size_average} and - C{reduce} are in the process of being deprecated, and in the meantime, - specifying either of those two args will override C{reduction}. Defaults to - C{"mean"}. + @param reduction: Specifies the reduction to apply to the + output: C{"none"} | C{"mean"} | C{"sum"}. C{"none"}: no + reduction will be applied, C{"mean"}: the sum of the output + will be divided by the number of elements in the output, + C{"sum"}: the output will be summed. Note: C{size_average} + and C{reduce} are in the process of being deprecated, and in + the meantime, specifying either of those two args will + override C{reduction}. Defaults to C{"mean"}. @type pos_weight: Tensor | None - @param pos_weight: a weight of positive examples to be broadcasted with target. - Must be a tensor with equal size along the class dimension to the number of - classes. Pay close attention to PyTorch's broadcasting semantics in order to - achieve the desired operations. For a target of size [B, C, H, W] (where B - is batch size) pos_weight of size [B, C, H, W] will apply different - pos_weights to each element of the batch or [C, H, W] the same pos_weights - across the batch. To apply the same positive weight along all spacial - dimensions for a 2D multi-class target [C, H, W] use: [C, 1, 1]. Defaults to - C{None}. + @param pos_weight: a weight of positive examples to be + broadcasted with target. Must be a tensor with equal size + along the class dimension to the number of classes. Pay + close attention to PyTorch's broadcasting semantics in order + to achieve the desired operations. For a target of size [B, + C, H, W] (where B is batch size) pos_weight of size [B, C, + H, W] will apply different pos_weights to each element of + the batch or [C, H, W] the same pos_weights across the + batch. To apply the same positive weight along all spacial + dimensions for a 2D multi-class target [C, H, W] use: [C, 1, + 1]. Defaults to C{None}. """ super().__init__(**kwargs) self.criterion = nn.BCEWithLogitsLoss( diff --git a/luxonis_train/attached_modules/losses/cross_entropy.py b/luxonis_train/attached_modules/losses/cross_entropy.py index e1858686..4be0cfdc 100644 --- a/luxonis_train/attached_modules/losses/cross_entropy.py +++ b/luxonis_train/attached_modules/losses/cross_entropy.py @@ -12,8 +12,8 @@ class CrossEntropyLoss(BaseLoss[Tensor, Tensor]): - """This criterion computes the cross entropy loss between input logits and - target.""" + """This criterion computes the cross entropy loss between input + logits and target.""" supported_labels = [LabelType.SEGMENTATION, LabelType.CLASSIFICATION] diff --git a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py index e9fba8b6..d996dcfd 100644 --- a/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/efficient_keypoint_bbox_loss.py @@ -73,7 +73,9 @@ def __init__( **kwargs, ) - self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([viz_pw])) + self.b_cross_entropy = BCEWithLogitsLoss( + pos_weight=torch.tensor([viz_pw]) + ) self.sigmas = get_sigmas( sigmas=sigmas, n_keypoints=self.n_keypoints, @@ -87,7 +89,9 @@ def __init__( def prepare( self, inputs: Packet[Tensor], labels: Labels - ) -> tuple[Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor]: + ) -> tuple[ + Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor + ]: feats = self.get_input_tensors(inputs, "features") pred_scores = self.get_input_tensors(inputs, "class_scores")[0] pred_distri = self.get_input_tensors(inputs, "distributions")[0] @@ -148,9 +152,11 @@ def prepare( assigned_bboxes = assigned_bboxes / self.stride_tensor area = ( - assigned_bboxes[mask_positive][:, 0] - assigned_bboxes[mask_positive][:, 2] + assigned_bboxes[mask_positive][:, 0] + - assigned_bboxes[mask_positive][:, 2] ) * ( - assigned_bboxes[mask_positive][:, 1] - assigned_bboxes[mask_positive][:, 3] + assigned_bboxes[mask_positive][:, 1] + - assigned_bboxes[mask_positive][:, 3] ) return ( @@ -189,8 +195,12 @@ def forward( ).mean() visibility_loss = self.b_cross_entropy.forward(pred_kpts[..., 2], mask) - one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[..., :-1] - loss_cls = self.varifocal_loss(pred_scores, assigned_scores, one_hot_label) + one_hot_label = F.one_hot(assigned_labels.long(), self.n_classes + 1)[ + ..., :-1 + ] + loss_cls = self.varifocal_loss( + pred_scores, assigned_scores, one_hot_label + ) if assigned_scores.sum() > 1: loss_cls /= assigned_scores.sum() @@ -224,27 +234,29 @@ def forward( def _preprocess_kpts_target( self, kpts_target: Tensor, batch_size: int, scale_tensor: Tensor ) -> Tensor: - """Preprocesses the target keypoints in shape [batch_size, N, n_keypoints, 3] - where N is the maximum number of keypoints in one image.""" + """Preprocesses the target keypoints in shape [batch_size, N, + n_keypoints, 3] where N is the maximum number of keypoints in + one image.""" _, counts = torch.unique(kpts_target[:, 0].int(), return_counts=True) max_kpts = int(counts.max()) if counts.numel() > 0 else 0 batched_keypoints = torch.zeros( - (batch_size, max_kpts, self.n_keypoints, 3), device=kpts_target.device + (batch_size, max_kpts, self.n_keypoints, 3), + device=kpts_target.device, ) for i in range(batch_size): keypoints_i = kpts_target[kpts_target[:, 0] == i] scaled_keypoints_i = keypoints_i[:, 2:].clone() - batched_keypoints[i, : keypoints_i.shape[0]] = scaled_keypoints_i.view( - -1, self.n_keypoints, 3 + batched_keypoints[i, : keypoints_i.shape[0]] = ( + scaled_keypoints_i.view(-1, self.n_keypoints, 3) ) batched_keypoints[i, :, :, :2] *= scale_tensor[:2] return batched_keypoints def dist2kpts_noscale(self, anchor_points: Tensor, kpts: Tensor) -> Tensor: - """Adjusts and scales predicted keypoints relative to anchor points without - considering image stride.""" + """Adjusts and scales predicted keypoints relative to anchor + points without considering image stride.""" adj_kpts = kpts.clone() scale = 2.0 x_adj = anchor_points[:, [0]] - 0.5 diff --git a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py index e59f0c52..8c9230ae 100644 --- a/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py +++ b/luxonis_train/attached_modules/losses/implicit_keypoint_bbox_loss.py @@ -55,8 +55,8 @@ def __init__( balance: list[float] | None = None, **kwargs: Any, ): - """Joint loss for keypoint and box predictions for cases where the keypoints and - boxes are inherently linked. + """Joint loss for keypoint and box predictions for cases where + the keypoints and boxes are inherently linked. Based on U{YOLO-Pose: Enhancing YOLO for Multi Person Pose Estimation Using Object Keypoint Similarity Loss}. @@ -115,7 +115,9 @@ def __init__( self.bias = bias - self.b_cross_entropy = BCEWithLogitsLoss(pos_weight=torch.tensor([obj_pw])) + self.b_cross_entropy = BCEWithLogitsLoss( + pos_weight=torch.tensor([obj_pw]) + ) self.class_loss = SmoothBCEWithLogitsLoss( label_smoothing=label_smoothing, bce_pow=cls_pw, @@ -135,22 +137,27 @@ def __init__( def prepare( self, outputs: Packet[Tensor], labels: Labels ) -> tuple[list[Tensor], KeypointTargetType]: - """Prepares the labels to be in the correct format for loss calculation. + """Prepares the labels to be in the correct format for loss + calculation. @type outputs: Packet[Tensor] @param outputs: Output from the forward pass. @type labels: L{Labels} @param labels: Dictionary containing the labels. - @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], list[Tensor], - list[tuple[Tensor, Tensor, Tensor, Tensor]], list[Tensor]]] - @return: Tuple containing the original output and the postprocessed labels. The - processed labels are a tuple containing the class targets, box targets, - keypoint targets, indices and anchors. Indicies are a tuple containing - vectors of indices for batch, anchor, feature y and feature x dimensions, - respectively. They are all of shape (n_targets,). The indices are used to - index the output tensors of shape (batch_size, n_anchors, feature_height, - feature_width, n_classes + box_offset + n_keypoints * 3) to get a tensor of - shape (n_targets, n_classes + box_offset + n_keypoints * 3). + @rtype: tuple[list[Tensor], tuple[list[Tensor], list[Tensor], + list[Tensor], list[tuple[Tensor, Tensor, Tensor, Tensor]], + list[Tensor]]] + @return: Tuple containing the original output and the + postprocessed labels. The processed labels are a tuple + containing the class targets, box targets, keypoint targets, + indices and anchors. Indicies are a tuple containing vectors + of indices for batch, anchor, feature y and feature x + dimensions, respectively. They are all of shape + (n_targets,). The indices are used to index the output + tensors of shape (batch_size, n_anchors, feature_height, + feature_width, n_classes + box_offset + n_keypoints * 3) to + get a tensor of shape (n_targets, n_classes + box_offset + + n_keypoints * 3). """ predictions = self.get_input_tensors(outputs, "features") @@ -178,16 +185,21 @@ def prepare( anchors: list[Tensor] = [] anchor_indices = ( - torch.arange(self.n_anchors, device=targets.device, dtype=torch.float32) + torch.arange( + self.n_anchors, device=targets.device, dtype=torch.float32 + ) .reshape(self.n_anchors, 1) .repeat(1, n_targets) .unsqueeze(-1) ) - targets = torch.cat((targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2) + targets = torch.cat( + (targets.repeat(self.n_anchors, 1, 1), anchor_indices), 2 + ) xy_deltas = ( torch.tensor( - [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], device=targets.device + [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1]], + device=targets.device, ).float() * self.bias ) @@ -253,9 +265,15 @@ def forward( "kpt_regression": torch.tensor(0.0, device=device), } - for pred, class_target, box_target, kpt_target, index, anchor, balance in zip( - predictions, *targets, self.balance - ): + for ( + pred, + class_target, + box_target, + kpt_target, + index, + anchor, + balance, + ) in zip(predictions, *targets, self.balance): obj_targets = torch.zeros_like(pred[..., 0], device=device) n_targets = len(class_target) @@ -294,7 +312,8 @@ def forward( self.class_loss.forward( pred_subset[ :, - self.box_offset : self.box_offset + self.n_classes, + self.box_offset : self.box_offset + + self.n_classes, ], class_target, ) @@ -310,7 +329,9 @@ def forward( loss = cast(Tensor, sum(sub_losses.values())).reshape([]) return loss, {name: loss.detach() for name, loss in sub_losses.items()} - def _create_keypoint_target(self, scaled_targets: Tensor, box_xy_deltas: Tensor): + def _create_keypoint_target( + self, scaled_targets: Tensor, box_xy_deltas: Tensor + ): keypoint_target = scaled_targets[:, self.box_offset + 1 : -1] for j in range(self.n_keypoints): idx = 3 * j diff --git a/luxonis_train/attached_modules/losses/keypoint_loss.py b/luxonis_train/attached_modules/losses/keypoint_loss.py index fecf40ce..c17ac7a1 100644 --- a/luxonis_train/attached_modules/losses/keypoint_loss.py +++ b/luxonis_train/attached_modules/losses/keypoint_loss.py @@ -28,22 +28,25 @@ def __init__( visibility_loss_weight: float = 1.0, **kwargs: Any, ): - """Keypoint based loss that is computed from OKS-based regression and visibility - loss. + """Keypoint based loss that is computed from OKS-based + regression and visibility loss. @type n_keypoints: int @param n_keypoints: Number of keypoints. @type bce_power: float - @param bce_power: Power used for BCE visibility loss. Defaults to C{1.0}. - @param sigmas: Sigmas used for OKS. If None then use COCO ones if possible or - default ones. Defaults to C{None}. + @param bce_power: Power used for BCE visibility loss. Defaults + to C{1.0}. + @param sigmas: Sigmas used for OKS. If None then use COCO ones + if possible or default ones. Defaults to C{None}. @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area. If None then use - default one. Defaults to C{None}. + @param area_factor: Factor by which we multiply bbox area. If + None then use default one. Defaults to C{None}. @type regression_loss_weight: float - @param regression_loss_weight: Weight of regression loss. Defaults to C{1.0}. + @param regression_loss_weight: Weight of regression loss. + Defaults to C{1.0}. @type visibility_loss_weight: float - @param visibility_loss_weight: Weight of visibility loss. Defaults to C{1.0}. + @param visibility_loss_weight: Weight of visibility loss. + Defaults to C{1.0}. """ super().__init__(**kwargs) @@ -60,18 +63,21 @@ def __init__( def forward( self, prediction: Tensor, target: Tensor, area: Tensor ) -> tuple[Tensor, dict[str, Tensor]]: - """Computes the keypoint loss and visibility loss for a given prediction and - target. + """Computes the keypoint loss and visibility loss for a given + prediction and target. @type prediction: Tensor - @param prediction: Predicted tensor of shape C{[n_detections, n_keypoints * 3]}. + @param prediction: Predicted tensor of shape C{[n_detections, + n_keypoints * 3]}. @type target: Tensor - @param target: Target tensor of shape C{[n_detections, n_keypoints * 3]}. + @param target: Target tensor of shape C{[n_detections, + n_keypoints * 3]}. @type area: Tensor @param area: Area tensor of shape C{[n_detections]}. @rtype: tuple[Tensor, dict[str, Tensor]] - @return: A tuple containing the total loss tensor of shape C{[1,]} and a - dictionary with the regression loss and visibility loss tensors. + @return: A tuple containing the total loss tensor of shape + C{[1,]} and a dictionary with the regression loss and + visibility loss tensors. """ sigmas = self.sigmas.to(prediction.device) diff --git a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py index 7915cdce..884d4863 100644 --- a/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py +++ b/luxonis_train/attached_modules/losses/sigmoid_focal_loss.py @@ -37,7 +37,11 @@ def __init__( def forward(self, preds: Tensor, target: Tensor) -> Tensor: loss = sigmoid_focal_loss( - preds, target, alpha=self.alpha, gamma=self.gamma, reduction=self.reduction + preds, + target, + alpha=self.alpha, + gamma=self.gamma, + reduction=self.reduction, ) return loss diff --git a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py index 77071e06..edc2bf98 100644 --- a/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py +++ b/luxonis_train/attached_modules/losses/smooth_bce_with_logits.py @@ -22,20 +22,23 @@ def __init__( """BCE with logits loss and label smoothing. @type label_smoothing: float - @param label_smoothing: Label smoothing factor. Defaults to C{0.0}. + @param label_smoothing: Label smoothing factor. Defaults to + C{0.0}. @type bce_pow: float @param bce_pow: Weight for positive samples. Defaults to C{1.0}. @type weight: list[float] | None - @param weight: a manual rescaling weight given to the loss of each batch - element. If given, it has to be a list of length C{nbatch}. + @param weight: a manual rescaling weight given to the loss of + each batch element. If given, it has to be a list of length + C{nbatch}. @type reduction: Literal["mean", "sum", "none"] - @param reduction: Specifies the reduction to apply to the output: C{'none'} | - C{'mean'} | C{'sum'}. C{'none'}: no reduction will be applied, C{'mean'}: - the sum of the output will be divided by the number of elements in the - output, C{'sum'}: the output will be summed. Note: C{size_average} and - C{reduce} are in the process of being deprecated, and in the meantime, - specifying either of those two args will override C{reduction}. Defaults to - C{'mean'}. + @param reduction: Specifies the reduction to apply to the + output: C{'none'} | C{'mean'} | C{'sum'}. C{'none'}: no + reduction will be applied, C{'mean'}: the sum of the output + will be divided by the number of elements in the output, + C{'sum'}: the output will be summed. Note: C{size_average} + and C{reduce} are in the process of being deprecated, and in + the meantime, specifying either of those two args will + override C{reduction}. Defaults to C{'mean'}. """ super().__init__(**kwargs) self.positive_smooth_const = 1.0 - label_smoothing diff --git a/luxonis_train/attached_modules/losses/softmax_focal_loss.py b/luxonis_train/attached_modules/losses/softmax_focal_loss.py index 5caf5d69..43c844f3 100644 --- a/luxonis_train/attached_modules/losses/softmax_focal_loss.py +++ b/luxonis_train/attached_modules/losses/softmax_focal_loss.py @@ -23,11 +23,12 @@ def __init__( reduction: Literal["none", "mean", "sum"] = "mean", **kwargs: Any, ): - """Focal loss implementation for binary classification and segmentation tasks - using Softmax. + """Focal loss implementation for binary classification and + segmentation tasks using Softmax. @type alpha: float - @param alpha: Weighting factor for the rare class. Defaults to C{0.25}. + @param alpha: Weighting factor for the rare class. Defaults to + C{0.25}. @type gamma: float @param gamma: Focusing parameter. Defaults to C{2.0}. @type reduction: Literal["none", "mean", "sum"] diff --git a/luxonis_train/attached_modules/metrics/base_metric.py b/luxonis_train/attached_modules/metrics/base_metric.py index ed232d26..a4109d2d 100644 --- a/luxonis_train/attached_modules/metrics/base_metric.py +++ b/luxonis_train/attached_modules/metrics/base_metric.py @@ -19,8 +19,9 @@ class BaseMetric( ): """A base class for all metrics. - This class defines the basic interface for all metrics. It utilizes automatic - registration of defined subclasses to a L{METRICS} registry. + This class defines the basic interface for all metrics. It utilizes + automatic registration of defined subclasses to a L{METRICS} + registry. """ @abstractmethod @@ -33,7 +34,9 @@ def update(self, *args: Unpack[Ts]) -> None: ... @abstractmethod - def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]: + def compute( + self, + ) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor]: """Computes the metric. @rtype: Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tensor] @@ -48,12 +51,14 @@ def compute(self) -> Tensor | tuple[Tensor, dict[str, Tensor]] | dict[str, Tenso def run_update(self, outputs: Packet[Tensor], labels: Labels) -> None: """Calls the metric's update method. - Validates and prepares the inputs, then calls the metric's update method. + Validates and prepares the inputs, then calls the metric's + update method. @type outputs: Packet[Tensor] @param outputs: The outputs of the model. @type labels: Labels - @param labels: The labels of the model. @raises L{IncompatibleException}: If the - inputs are not compatible with the module. + @param labels: The labels of the model. @raises + L{IncompatibleException}: If the inputs are not compatible + with the module. """ self.update(*self.prepare(outputs, labels)) diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision.py b/luxonis_train/attached_modules/metrics/mean_average_precision.py index cc479d76..6d51f55b 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision.py @@ -13,10 +13,11 @@ class MeanAveragePrecision( BaseMetric[list[dict[str, Tensor]], list[dict[str, Tensor]]] ): - """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) for object - detection predictions. + """Compute the Mean-Average-Precision (mAP) and Mean-Average-Recall + (mAR) for object detection predictions. - Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall (mAR) + Adapted from U{Mean-Average-Precision (mAP) and Mean-Average-Recall + (mAR) }. """ @@ -56,7 +57,9 @@ def prepare( curr_bboxs = box_convert(curr_label[:, 2:], "xywh", "xyxy") curr_bboxs[:, 0::2] *= image_size[1] curr_bboxs[:, 1::2] *= image_size[0] - label_list.append({"boxes": curr_bboxs, "labels": curr_label[:, 1].int()}) + label_list.append( + {"boxes": curr_bboxs, "labels": curr_label[:, 1].int()} + ) return output_list, label_list diff --git a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py index 26f95a51..3b34c242 100644 --- a/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py +++ b/luxonis_train/attached_modules/metrics/mean_average_precision_keypoints.py @@ -47,7 +47,8 @@ def __init__( box_format: Literal["xyxy", "xywh", "cxcywh"] = "xyxy", **kwargs, ): - """Implementation of the mean average precision metric for keypoint detections. + """Implementation of the mean average precision metric for + keypoint detections. Adapted from: U{https://github.com/Lightning-AI/torchmetrics/blob/v1.0.1/src/ torchmetrics/detection/mean_ap.py}. @@ -66,7 +67,9 @@ def __init__( """ super().__init__(**kwargs) - self.sigmas = get_sigmas(sigmas, self.n_keypoints, caller_name=self.name) + self.sigmas = get_sigmas( + sigmas, self.n_keypoints, caller_name=self.name + ) self.area_factor = get_with_default( area_factor, "bbox area scaling", self.name, default=0.53 ) @@ -88,7 +91,9 @@ def __init__( self.add_state("groundtruth_labels", default=[], dist_reduce_fx=None) self.add_state("groundtruth_area", default=[], dist_reduce_fx=None) self.add_state("groundtruth_crowds", default=[], dist_reduce_fx=None) - self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None) + self.add_state( + "groundtruth_keypoints", default=[], dist_reduce_fx=None + ) def prepare( self, inputs: Packet[Tensor], labels: Labels @@ -117,7 +122,9 @@ def prepare( "boxes": output_bboxes[i][:, :4], "scores": output_bboxes[i][:, 4], "labels": output_bboxes[i][:, 5].int(), - "keypoints": output_kpts[i].reshape(-1, self.n_keypoints * 3), + "keypoints": output_kpts[i].reshape( + -1, self.n_keypoints * 3 + ), } ) @@ -220,7 +227,9 @@ def compute(self) -> tuple[Tensor, dict[str, Tensor]]: coco_target.createIndex() coco_preds.createIndex() - self.coco_eval = COCOeval(coco_target, coco_preds, iouType="keypoints") + self.coco_eval = COCOeval( + coco_target, coco_preds, iouType="keypoints" + ) self.coco_eval.params.kpt_oks_sigmas = self.sigmas.cpu().numpy() self.coco_eval.params.maxDets = [self.max_dets] @@ -251,13 +260,17 @@ def _get_coco_format( crowds: list[Tensor] | None = None, area: list[Tensor] | None = None, ) -> dict[str, list[dict[str, Any]]]: - """Transforms and returns all cached targets or predictions in COCO format. + """Transforms and returns all cached targets or predictions in + COCO format. - Format is defined at U{https://cocodataset.org/#format-data}. + Format is defined at U{ + https://cocodataset.org/#format-data}. """ images: list[dict[str, int]] = [] annotations: list[dict[str, Any]] = [] - annotation_id = 1 # has to start with 1, otherwise COCOEval results are wrong + annotation_id = ( + 1 # has to start with 1, otherwise COCOEval results are wrong + ) for image_id, (image_boxes, image_kpts, image_labels) in enumerate( zip(boxes, keypoints, labels) @@ -295,7 +308,11 @@ def _get_coco_format( area_stat = image_box[2] * image_box[3] * self.area_factor n_keypoints = len( - [i for i in range(2, len(image_kpt), 3) if image_kpt[i] != 0] + [ + i + for i in range(2, len(image_kpt), 3) + if image_kpt[i] != 0 + ] ) # number of annotated keypoints annotation = { "id": annotation_id, @@ -304,7 +321,9 @@ def _get_coco_format( "area": area_stat, "category_id": image_label, "iscrowd": ( - crowds[image_id][k].cpu().tolist() if crowds is not None else 0 + crowds[image_id][k].cpu().tolist() + if crowds is not None + else 0 ), "keypoints": image_kpt, "num_keypoints": n_keypoints, @@ -324,9 +343,15 @@ def _get_coco_format( annotation_id += 1 classes = [{"id": i, "name": str(i)} for i in self._get_classes()] - return {"images": images, "annotations": annotations, "categories": classes} + return { + "images": images, + "annotations": annotations, + "categories": classes, + } - def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor]: + def _get_safe_item_values( + self, item: dict[str, Tensor] + ) -> tuple[Tensor, Tensor]: """Convert and return the boxes.""" boxes = self._fix_empty_tensors(item["boxes"]) if boxes.numel() > 0: @@ -335,7 +360,8 @@ def _get_safe_item_values(self, item: dict[str, Tensor]) -> tuple[Tensor, Tensor return boxes, keypoints def _get_classes(self) -> list[int]: - """Return a list of unique classes found in ground truth and detection data.""" + """Return a list of unique classes found in ground truth and + detection data.""" if len(self.pred_labels) > 0 or len(self.groundtruth_labels) > 0: return ( torch.cat(self.pred_labels + self.groundtruth_labels) @@ -347,7 +373,8 @@ def _get_classes(self) -> list[int]: @staticmethod def _fix_empty_tensors(input_tensor: Tensor) -> Tensor: - """Empty tensors can cause problems in DDP mode, this methods corrects them.""" + """Empty tensors can cause problems in DDP mode, this methods + corrects them.""" if input_tensor.numel() == 0 and input_tensor.ndim == 1: return input_tensor.unsqueeze(0) return input_tensor diff --git a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py index 182cb269..503a00ad 100644 --- a/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py +++ b/luxonis_train/attached_modules/metrics/object_keypoint_similarity.py @@ -37,28 +37,35 @@ def __init__( use_cocoeval_oks: bool = True, **kwargs: Any, ) -> None: - """Object Keypoint Similarity metric for evaluating keypoint predictions. + """Object Keypoint Similarity metric for evaluating keypoint + predictions. @type sigmas: list[float] | None - @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then - use COCO if possible otherwise defaults. Defaults to C{None}. + @param sigmas: Sigma for each keypoint to weigh its importance, + if C{None}, then use COCO if possible otherwise defaults. + Defaults to C{None}. @type area_factor: float | None - @param area_factor: Factor by which we multiply bbox area. If None then use - default one. Defaults to C{None}. + @param area_factor: Factor by which we multiply bbox area. If + None then use default one. Defaults to C{None}. @type use_cocoeval_oks: bool - @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use - the one from definition. Defaults to C{True}. + @param use_cocoeval_oks: Whether to use same OKS formula as in + COCOeval or use the one from definition. Defaults to + C{True}. """ super().__init__(**kwargs) - self.sigmas = get_sigmas(sigmas, self.n_keypoints, caller_name=self.name) + self.sigmas = get_sigmas( + sigmas, self.n_keypoints, caller_name=self.name + ) self.area_factor = get_with_default( area_factor, "bbox area scaling", self.name, default=0.53 ) self.use_cocoeval_oks = use_cocoeval_oks self.add_state("pred_keypoints", default=[], dist_reduce_fx=None) - self.add_state("groundtruth_keypoints", default=[], dist_reduce_fx=None) + self.add_state( + "groundtruth_keypoints", default=[], dist_reduce_fx=None + ) self.add_state("groundtruth_scales", default=[], dist_reduce_fx=None) def prepare( @@ -92,8 +99,12 @@ def prepare( curr_kpts[:, 1::3] *= image_size[0] curr_bboxs_widths = curr_bboxs[:, 2] - curr_bboxs[:, 0] curr_bboxs_heights = curr_bboxs[:, 3] - curr_bboxs[:, 1] - curr_scales = curr_bboxs_widths * curr_bboxs_heights * self.area_factor - label_list_oks.append({"keypoints": curr_kpts, "scales": curr_scales}) + curr_scales = ( + curr_bboxs_widths * curr_bboxs_heights * self.area_factor + ) + label_list_oks.append( + {"keypoints": curr_kpts, "scales": curr_scales} + ) return output_list_oks, label_list_oks @@ -139,10 +150,14 @@ def compute(self) -> Tensor: image_mean_oks = torch.zeros(len(self.groundtruth_keypoints)) for i, (pred_kpts, gt_kpts, gt_scales) in enumerate( zip( - self.pred_keypoints, self.groundtruth_keypoints, self.groundtruth_scales + self.pred_keypoints, + self.groundtruth_keypoints, + self.groundtruth_scales, ) ): - gt_kpts = torch.reshape(gt_kpts, (-1, self.n_keypoints, 3)) # [N, K, 3] + gt_kpts = torch.reshape( + gt_kpts, (-1, self.n_keypoints, 3) + ) # [N, K, 3] image_ious = compute_oks( pred_kpts, @@ -154,7 +169,9 @@ def compute(self) -> Tensor: gt_indices, pred_indices = linear_sum_assignment( image_ious.cpu().numpy(), maximize=True ) - matched_ious = [image_ious[n, m] for n, m in zip(gt_indices, pred_indices)] + matched_ious = [ + image_ious[n, m] for n, m in zip(gt_indices, pred_indices) + ] image_mean_oks[i] = torch.tensor(matched_ious).mean() final_oks = image_mean_oks.nanmean() @@ -163,7 +180,8 @@ def compute(self) -> Tensor: @staticmethod def _fix_empty_tensors(input_tensor: Tensor) -> Tensor: - """Empty tensors can cause problems in DDP mode, this methods corrects them.""" + """Empty tensors can cause problems in DDP mode, this methods + corrects them.""" if input_tensor.numel() == 0 and input_tensor.ndim == 1: return input_tensor.unsqueeze(0) return input_tensor @@ -176,7 +194,8 @@ def compute_oks( sigmas: Tensor, use_cocoeval_oks: bool, ) -> Tensor: - """Compute Object Keypoint Similarity between every GT and prediction. + """Compute Object Keypoint Similarity between every GT and + prediction. @type pred: Tensor[N, K, 3] @param pred: Predicted keypoints. @@ -185,11 +204,11 @@ def compute_oks( @type scales: Tensor[M] @param scales: Scales of the bounding boxes. @type sigmas: Tensor - @param sigmas: Sigma for each keypoint to weigh its importance, if C{None}, then use - same weights for all. + @param sigmas: Sigma for each keypoint to weigh its importance, if + C{None}, then use same weights for all. @type use_cocoeval_oks: bool - @param use_cocoeval_oks: Whether to use same OKS formula as in COCOeval or use the - one from definition. + @param use_cocoeval_oks: Whether to use same OKS formula as in + COCOeval or use the one from definition. @rtype: Tensor @return: Object Keypoint Similarity every pred and gt [M, N] """ diff --git a/luxonis_train/attached_modules/visualizers/base_visualizer.py b/luxonis_train/attached_modules/visualizers/base_visualizer.py index 2690f04c..817a09d5 100644 --- a/luxonis_train/attached_modules/visualizers/base_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/base_visualizer.py @@ -17,8 +17,9 @@ class BaseVisualizer( ): """A base class for all visualizers. - This class defines the basic interface for all visualizers. It utilizes automatic - registration of defined subclasses to the L{VISUALIZERS} registry. + This class defines the basic interface for all visualizers. It + utilizes automatic registration of defined subclasses to the + L{VISUALIZERS} registry. """ @abstractmethod @@ -27,7 +28,12 @@ def forward( label_canvas: Tensor, prediction_canvas: Tensor, *args: Unpack[Ts], - ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]] | list[Tensor]: + ) -> ( + Tensor + | tuple[Tensor, Tensor] + | tuple[Tensor, list[Tensor]] + | list[Tensor] + ): """Forward pass of the visualizer. Takes an image and the prepared inputs from the `prepare` method and @@ -62,4 +68,6 @@ def run( inputs: Packet[Tensor], labels: Labels, ) -> Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]]: - return self(label_canvas, prediction_canvas, *self.prepare(inputs, labels)) + return self( + label_canvas, prediction_canvas, *self.prepare(inputs, labels) + ) diff --git a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py index b2c8f411..e544bf06 100644 --- a/luxonis_train/attached_modules/visualizers/bbox_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/bbox_visualizer.py @@ -5,7 +5,12 @@ from torch import Tensor from .base_visualizer import BaseVisualizer -from .utils import Color, draw_bounding_box_labels, draw_bounding_boxes, get_color +from .utils import ( + Color, + draw_bounding_box_labels, + draw_bounding_boxes, + get_color, +) class BBoxVisualizer(BaseVisualizer[list[Tensor], Tensor]): @@ -24,26 +29,33 @@ def __init__( ): """Visualizer for bounding box predictions. - Creates a visualization of the bounding box predictions and labels. + Creates a visualization of the bounding box predictions and + labels. @type labels: dict[int, str] | list[str] | None - @param labels: Either a dictionary mapping class indices to names, or a list of - names. If list is provided, the label mapping is done by index. By default, - no labels are drawn. + @param labels: Either a dictionary mapping class indices to + names, or a list of names. If list is provided, the label + mapping is done by index. By default, no labels are drawn. @type draw_labels: bool - @param draw_labels: Whether or not to draw labels. Defaults to C{True}. + @param draw_labels: Whether or not to draw labels. Defaults to + C{True}. @type colors: dict[int, Color] | list[Color] | None - @param colors: Either a dictionary mapping class indices to colors, or a list of - colors. If list is provided, the color mapping is done by index. By default, - random colors are used. + @param colors: Either a dictionary mapping class indices to + colors, or a list of colors. If list is provided, the color + mapping is done by index. By default, random colors are + used. @type fill: bool - @param fill: Whether or not to fill the bounding boxes. Defaults to C{False}. + @param fill: Whether or not to fill the bounding boxes. Defaults + to C{False}. @type width: int | None - @param width: The width of the bounding box lines. Defaults to C{1}. + @param width: The width of the bounding box lines. Defaults to + C{1}. @type font: str | None - @param font: A filename containing a TrueType font. Defaults to C{None}. + @param font: A filename containing a TrueType font. Defaults to + C{None}. @type font_size: int | None - @param font_size: The font size to use for the labels. Defaults to C{None}. + @param font_size: The font size to use for the labels. Defaults + to C{None}. """ super().__init__(**kwargs) if isinstance(labels, list): @@ -54,9 +66,13 @@ def __init__( } if colors is None: - colors = {label: get_color(i) for i, label in self.bbox_labels.items()} + colors = { + label: get_color(i) for i, label in self.bbox_labels.items() + } if isinstance(colors, list): - colors = {self.bbox_labels[i]: color for i, color in enumerate(colors)} + colors = { + self.bbox_labels[i]: color for i, color in enumerate(colors) + } self.colors = colors self.fill = fill self.width = width @@ -158,16 +174,17 @@ def forward( predictions: list[Tensor], targets: Tensor, ) -> tuple[Tensor, Tensor]: - """Creates a visualization of the bounding box predictions and labels. + """Creates a visualization of the bounding box predictions and + labels. @type label_canvas: Tensor @param label_canvas: The canvas containing the labels. @type prediction_canvas: Tensor @param prediction_canvas: The canvas containing the predictions. @type prediction: Tensor - @param prediction: The predicted bounding boxes. The shape should be [N, 6], - where N is the number of bounding boxes and the last dimension is [x1, y1, - x2, y2, class, conf]. + @param prediction: The predicted bounding boxes. The shape + should be [N, 6], where N is the number of bounding boxes + and the last dimension is [x1, y1, x2, y2, class, conf]. @type targets: Tensor @param targets: The target bounding boxes. """ diff --git a/luxonis_train/attached_modules/visualizers/classification_visualizer.py b/luxonis_train/attached_modules/visualizers/classification_visualizer.py index c048872d..9d26172b 100644 --- a/luxonis_train/attached_modules/visualizers/classification_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/classification_visualizer.py @@ -23,8 +23,8 @@ def __init__( """Visualizer for classification tasks. @type include_plot: bool - @param include_plot: Whether to include a plot of the class probabilities in the - visualization. Defaults to C{True}. + @param include_plot: Whether to include a plot of the class + probabilities in the visualization. Defaults to C{True}. """ super().__init__(**kwargs) self.include_plot = include_plot @@ -38,7 +38,9 @@ def _get_class_name(self, pred: Tensor) -> str: return str(idx) return self.class_names[idx] - def _generate_plot(self, prediction: Tensor, width: int, height: int) -> Tensor: + def _generate_plot( + self, prediction: Tensor, width: int, height: int + ) -> Tensor: pred = prediction.softmax(-1).detach().cpu().numpy() fig, ax = plt.subplots(figsize=(width / 100, height / 100)) ax.bar(np.arange(len(pred)), pred) @@ -88,7 +90,9 @@ def forward( overlay[i] = numpy_to_torch_img(arr) if self.include_plot: plots[i] = self._generate_plot( - prediction, prediction_canvas.shape[3], prediction_canvas.shape[2] + prediction, + prediction_canvas.shape[3], + prediction_canvas.shape[2], ) if self.include_plot: diff --git a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py index 287d5e1c..53b9cb88 100644 --- a/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/keypoint_visualizer.py @@ -22,18 +22,20 @@ def __init__( """Visualizer for keypoints. @type visibility_threshold: float - @param visibility_threshold: Threshold for visibility of keypoints. If the - visibility of a keypoint is below this threshold, it is considered as not - visible. Defaults to C{0.5}. + @param visibility_threshold: Threshold for visibility of + keypoints. If the visibility of a keypoint is below this + threshold, it is considered as not visible. Defaults to + C{0.5}. @type connectivity: list[tuple[int, int]] | None - @param connectivity: List of tuples of keypoint indices that define the - connections in the skeleton. Defaults to C{None}. + @param connectivity: List of tuples of keypoint indices that + define the connections in the skeleton. Defaults to C{None}. @type visible_color: L{Color} - @param visible_color: Color of visible keypoints. Either a string or a tuple of - RGB values. Defaults to C{"red"}. + @param visible_color: Color of visible keypoints. Either a + string or a tuple of RGB values. Defaults to C{"red"}. @type nonvisible_color: L{Color} | None - @param nonvisible_color: Color of nonvisible keypoints. If C{None}, nonvisible - keypoints are not drawn. Defaults to C{None}. + @param nonvisible_color: Color of nonvisible keypoints. If + C{None}, nonvisible keypoints are not drawn. Defaults to + C{None}. """ super().__init__(**kwargs) self.visibility_threshold = visibility_threshold @@ -62,7 +64,9 @@ def draw_predictions( if nonvisible_color is not None: _kwargs = deepcopy(kwargs) _kwargs["colors"] = nonvisible_color - nonvisible_kpts = prediction[..., :2] * mask.unsqueeze(-1).float() + nonvisible_kpts = ( + prediction[..., :2] * mask.unsqueeze(-1).float() + ) viz[i] = draw_keypoints( viz[i].clone(), nonvisible_kpts[..., :2], diff --git a/luxonis_train/attached_modules/visualizers/multi_visualizer.py b/luxonis_train/attached_modules/visualizers/multi_visualizer.py index 79c9ebdf..b7ecbfbb 100644 --- a/luxonis_train/attached_modules/visualizers/multi_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/multi_visualizer.py @@ -7,7 +7,8 @@ class MultiVisualizer(BaseVisualizer[Packet[Tensor], Labels]): - """Special type of visualizer that combines multiple visualizers together. + """Special type of visualizer that combines multiple visualizers + together. All the visualizers are applied in the order they are provided and they all draw on the same canvas. @@ -25,7 +26,9 @@ def __init__(self, visualizers: list[Kwargs], **kwargs): self.visualizers = [] for item in visualizers: visualizer_params = item.get("params", {}) - visualizer = VISUALIZERS.get(item["name"])(**visualizer_params, **kwargs) + visualizer = VISUALIZERS.get(item["name"])( + **visualizer_params, **kwargs + ) self.visualizers.append(visualizer) def prepare( @@ -42,12 +45,16 @@ def forward( labels: Labels, ) -> tuple[Tensor, Tensor]: for visualizer in self.visualizers: - match visualizer.run(label_canvas, prediction_canvas, outputs, labels): + match visualizer.run( + label_canvas, prediction_canvas, outputs, labels + ): case Tensor() as prediction_viz: prediction_canvas = prediction_viz case (Tensor(data=label_viz), Tensor(data=prediction_viz)): label_canvas = label_viz prediction_canvas = prediction_viz case _: - raise NotImplementedError("Unexpected return type from visualizer.") + raise NotImplementedError( + "Unexpected return type from visualizer." + ) return label_canvas, prediction_canvas diff --git a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py index a95511e7..15e2fd09 100644 --- a/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py +++ b/luxonis_train/attached_modules/visualizers/segmentation_visualizer.py @@ -5,7 +5,12 @@ from torch import Tensor from .base_visualizer import BaseVisualizer -from .utils import Color, draw_segmentation_labels, get_color, seg_output_to_bool +from .utils import ( + Color, + draw_segmentation_labels, + get_color, + seg_output_to_bool, +) logger = logging.getLogger(__name__) log_disable = False @@ -97,7 +102,8 @@ def forward( targets: Tensor, **kwargs, ) -> tuple[Tensor, Tensor]: - """Creates a visualization of the segmentation predictions and labels. + """Creates a visualization of the segmentation predictions and + labels. @type label_canvas: Tensor @param label_canvas: The canvas to draw the labels on. @@ -145,7 +151,9 @@ def _adjust_colors( if not log_disable: if colors is None: - logger.warning("No colors provided. Using random colors instead.") + logger.warning( + "No colors provided. Using random colors instead." + ) elif data.size(1) != len(colors): logger.warning( f"Number of colors ({len(colors)}) does not match number of " diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py index d5603bda..d2bfa931 100644 --- a/luxonis_train/attached_modules/visualizers/utils.py +++ b/luxonis_train/attached_modules/visualizers/utils.py @@ -44,13 +44,14 @@ def figure_to_torch(fig: Figure, width: int, height: int) -> Tensor: def torch_img_to_numpy( img: Tensor, reverse_colors: bool = False ) -> npt.NDArray[np.uint8]: - """Converts a torch image (CHW) to a numpy array (HWC). Optionally also converts - colors. + """Converts a torch image (CHW) to a numpy array (HWC). Optionally + also converts colors. @type img: Tensor @param img: Torch image (CHW) @type reverse_colors: bool - @param reverse_colors: Whether to reverse colors (RGB to BGR). Defaults to False. + @param reverse_colors: Whether to reverse colors (RGB to BGR). + Defaults to False. @rtype: npt.NDArray[np.uint8] @return: Numpy image (HWC) """ @@ -129,8 +130,8 @@ def draw_bounding_box_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor: @type img: Tensor @param img: Image to draw on. @type label: Tensor - @param label: Bounding box label. The shape should be (n_instances, 4), where the - last dimension is (x, y, w, h). + @param label: Bounding box label. The shape should be (n_instances, + 4), where the last dimension is (x, y, w, h). @type kwargs: dict @param kwargs: Additional arguments to pass to L{torchvision.utils.draw_bounding_boxes}. @@ -150,10 +151,11 @@ def draw_keypoint_labels(img: Tensor, label: Tensor, **kwargs) -> Tensor: @type img: Tensor @param img: Image to draw on. @type label: Tensor - @param label: Keypoint label. The shape should be (n_instances, 3), where the last - dimension is (x, y, visibility). + @param label: Keypoint label. The shape should be (n_instances, 3), + where the last dimension is (x, y, visibility). @type kwargs: dict - @param kwargs: Additional arguments to pass to L{torchvision.utils.draw_keypoints}. + @param kwargs: Additional arguments to pass to + L{torchvision.utils.draw_keypoints}. @rtype: Tensor @return: Image with keypoint labels drawn on. """ @@ -191,7 +193,8 @@ def unnormalize( std: list[float] | float | None = None, to_uint8: bool = False, ) -> Tensor: - """Unnormalizes an image back to original values, optionally converts it to uint8. + """Unnormalizes an image back to original values, optionally + converts it to uint8. @type img: Tensor @param img: Image to unnormalize. @@ -304,9 +307,12 @@ def get_color(seed: int) -> Color: # # TEST: def combine_visualizations( - visualization: Tensor | tuple[Tensor, Tensor] | tuple[Tensor, list[Tensor]], + visualization: Tensor + | tuple[Tensor, Tensor] + | tuple[Tensor, list[Tensor]], ) -> Tensor: - """Default way of combining multiple visualizations into one final image.""" + """Default way of combining multiple visualizations into one final + image.""" def resize_to_match( fst: Tensor, @@ -411,7 +417,9 @@ def resize_to_match( case Tensor() as viz: return viz case (Tensor(data=viz_labels), Tensor(data=viz_predictions)): - viz_labels, viz_predictions = resize_to_match(viz_labels, viz_predictions) + viz_labels, viz_predictions = resize_to_match( + viz_labels, viz_predictions + ) return torch.cat([viz_labels, viz_predictions], dim=-1) case (Tensor(data=_), [*viz]) if isinstance(viz, list) and all( diff --git a/luxonis_train/callbacks/gpu_stats_monitor.py b/luxonis_train/callbacks/gpu_stats_monitor.py index 244e8a6d..a189ed3f 100644 --- a/luxonis_train/callbacks/gpu_stats_monitor.py +++ b/luxonis_train/callbacks/gpu_stats_monitor.py @@ -49,9 +49,9 @@ def __init__( fan_speed: bool = False, temperature: bool = False, ): - """Automatically monitors and logs GPU stats during training stage. - C{GPUStatsMonitor} is a callback and in order to use it you need to assign a - logger in the C{Trainer}. + """Automatically monitors and logs GPU stats during training + stage. C{GPUStatsMonitor} is a callback and in order to use it + you need to assign a logger in the C{Trainer}. GPU stats are mainly based on C{nvidia-smi --query-gpu} command. The description of the queries is as follows: @@ -158,7 +158,9 @@ def on_train_batch_start( gpu_stat_keys = self._get_gpu_stat_keys() gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys]) - logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys) + logs = self._parse_gpu_stats( + self._device_ids, gpu_stats, gpu_stat_keys + ) if self._log_stats.inter_step_time and self._snap_inter_step_time: # First log at beginning of second step @@ -184,9 +186,13 @@ def on_train_batch_end( if not trainer._logger_connector.should_update_logs: return - gpu_stat_keys = self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys() + gpu_stat_keys = ( + self._get_gpu_stat_keys() + self._get_gpu_device_stat_keys() + ) gpu_stats = self._get_gpu_stats([k for k, _ in gpu_stat_keys]) - logs = self._parse_gpu_stats(self._device_ids, gpu_stats, gpu_stat_keys) + logs = self._parse_gpu_stats( + self._device_ids, gpu_stats, gpu_stat_keys + ) if self._log_stats.intra_step_time and self._snap_intra_step_time: logs["batch_time/intra_step (ms)"] = ( @@ -204,7 +210,9 @@ def _get_gpu_ids(device_ids: List[int]) -> List[str]: cuda_visible_devices: List[str] = os.getenv( "CUDA_VISIBLE_DEVICES", default=default ).split(",") - return [cuda_visible_devices[device_id].strip() for device_id in device_ids] + return [ + cuda_visible_devices[device_id].strip() for device_id in device_ids + ] def _get_gpu_stats(self, queries: List[str]) -> List[List[float]]: if not queries: @@ -242,7 +250,9 @@ def _to_float(x: str) -> float: @staticmethod def _parse_gpu_stats( - device_ids: List[int], stats: List[List[float]], keys: List[Tuple[str, str]] + device_ids: List[int], + stats: List[List[float]], + keys: List[Tuple[str, str]], ) -> Dict[str, float]: """Parse the gpu stats into a loggable dict.""" logs = {} @@ -279,6 +289,8 @@ def _get_gpu_device_stat_keys(self) -> List[Tuple[str, str]]: stat_keys.append(("fan.speed", "%")) if self._log_stats.temperature: - stat_keys.extend([("temperature.gpu", "°C"), ("temperature.memory", "°C")]) + stat_keys.extend( + [("temperature.gpu", "°C"), ("temperature.memory", "°C")] + ) return stat_keys diff --git a/luxonis_train/callbacks/luxonis_progress_bar.py b/luxonis_train/callbacks/luxonis_progress_bar.py index 93c6c0a6..b8bf6512 100644 --- a/luxonis_train/callbacks/luxonis_progress_bar.py +++ b/luxonis_train/callbacks/luxonis_progress_bar.py @@ -3,7 +3,11 @@ import lightning.pytorch as pl import tabulate -from lightning.pytorch.callbacks import ProgressBar, RichProgressBar, TQDMProgressBar +from lightning.pytorch.callbacks import ( + ProgressBar, + RichProgressBar, + TQDMProgressBar, +) from rich.console import Console from rich.table import Table @@ -29,7 +33,8 @@ def print_results( ) -> None: """Prints results to the console. - This includes the stage name, loss value, and tables with metrics. + This includes the stage name, loss value, and tables with + metrics. @type stage: str @param stage: Stage name. @@ -38,12 +43,13 @@ def print_results( @type metrics: Mapping[str, Mapping[str, int | str | float]] @param metrics: Metrics in format {table_name: table}. """ - pass + ... @CALLBACKS.register_module() class LuxonisTQDMProgressBar(TQDMProgressBar, BaseLuxonisProgressBar): - """Custom text progress bar based on TQDMProgressBar from Pytorch Lightning.""" + """Custom text progress bar based on TQDMProgressBar from Pytorch + Lightning.""" def __init__(self): super().__init__(leave=True) @@ -70,7 +76,8 @@ def _print_table( @type key_name: str @param key_name: Name of the key column. Defaults to C{"Name"}. @type value_name: str - @param value_name: Name of the value column. Defaults to C{"Value"}. + @param value_name: Name of the value column. Defaults to + C{"Value"}. """ self._rule(title) print( @@ -99,7 +106,8 @@ def print_results( @CALLBACKS.register_module() class LuxonisRichProgressBar(RichProgressBar, BaseLuxonisProgressBar): - """Custom rich text progress bar based on RichProgressBar from Pytorch Lightning.""" + """Custom rich text progress bar based on RichProgressBar from + Pytorch Lightning.""" def __init__(self): super().__init__(leave=True) @@ -129,7 +137,8 @@ def print_table( @type key_name: str @param key_name: Name of the key column. Defaults to C{"Name"}. @type value_name: str - @param value_name: Name of the value column. Defaults to C{"Value"}. + @param value_name: Name of the value column. Defaults to + C{"Value"}. """ rich_table = Table( title=title, @@ -149,7 +158,9 @@ def print_results( metrics: Mapping[str, Mapping[str, int | str | float]], ) -> None: self.console.rule(f"{stage}", style="bold magenta") - self.console.print(f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]") + self.console.print( + f"[bold magenta]Loss:[/bold magenta] [white]{loss}[/white]" + ) self.console.print("[bold magenta]Metrics:[/bold magenta]") for table_name, table in metrics.items(): self.print_table(table_name, table) diff --git a/luxonis_train/callbacks/metadata_logger.py b/luxonis_train/callbacks/metadata_logger.py index 3186a666..ab29f7d0 100644 --- a/luxonis_train/callbacks/metadata_logger.py +++ b/luxonis_train/callbacks/metadata_logger.py @@ -15,8 +15,9 @@ class MetadataLogger(pl.Callback): def __init__(self, hyperparams: list[str]): """Callback that logs training metadata. - Metadata include all defined hyperparameters together with git hashes of - luxonis-ml and luxonis-train packages. Also stores this information locally. + Metadata include all defined hyperparameters together with git + hashes of luxonis-ml and luxonis-train packages. Also stores + this information locally. @type hyperparams: list[str] @param hyperparams: List of hyperparameters to log. @@ -25,7 +26,9 @@ def __init__(self, hyperparams: list[str]): self.hyperparams = hyperparams def on_fit_start( - self, _: pl.Trainer, pl_module: "luxonis_train.models.LuxonisLightningModule" + self, + _: pl.Trainer, + pl_module: "luxonis_train.models.LuxonisLightningModule", ) -> None: cfg: Config = pl_module.cfg @@ -35,7 +38,9 @@ def on_fit_start( if luxonis_ml_hash: # pragma: no cover hparams["luxonis_ml"] = luxonis_ml_hash - luxonis_train_hash = self._get_editable_package_git_hash("luxonis_train") + luxonis_train_hash = self._get_editable_package_git_hash( + "luxonis_train" + ) if luxonis_train_hash: # pragma: no cover hparams["luxonis_train"] = luxonis_train_hash @@ -52,8 +57,8 @@ def _get_editable_package_git_hash( @type package_name: str @param package_name: Name of the package. @rtype: str or None - @return: Git hash of the package or None if the package is not installed in - editable mode. + @return: Git hash of the package or None if the package is not + installed in editable mode. """ try: distribution = pkg_resources.get_distribution(package_name) diff --git a/luxonis_train/callbacks/module_freezer.py b/luxonis_train/callbacks/module_freezer.py index 4f73ff30..de0afa99 100644 --- a/luxonis_train/callbacks/module_freezer.py +++ b/luxonis_train/callbacks/module_freezer.py @@ -13,7 +13,8 @@ def __init__(self, frozen_modules: list[tuple[nn.Module, int]]): """Callback that freezes parts of the model. @type frozen_modules: list[tuple[nn.Module, int]] - @param frozen_modules: List of tuples of modules and epochs to freeze until. + @param frozen_modules: List of tuples of modules and epochs to + freeze until. """ super().__init__() self.frozen_modules = frozen_modules diff --git a/luxonis_train/callbacks/needs_checkpoint.py b/luxonis_train/callbacks/needs_checkpoint.py index ad6d80e4..b3de6aed 100644 --- a/luxonis_train/callbacks/needs_checkpoint.py +++ b/luxonis_train/callbacks/needs_checkpoint.py @@ -10,7 +10,9 @@ class NeedsCheckpoint(pl.Callback): def __init__( - self, preferred_checkpoint: Literal["metric", "loss"] = "metric", **kwargs + self, + preferred_checkpoint: Literal["metric", "loss"] = "metric", + **kwargs, ): super().__init__(**kwargs) self.preferred_checkpoint = preferred_checkpoint diff --git a/luxonis_train/callbacks/test_on_train_end.py b/luxonis_train/callbacks/test_on_train_end.py index f2bb09ec..a60a16dd 100644 --- a/luxonis_train/callbacks/test_on_train_end.py +++ b/luxonis_train/callbacks/test_on_train_end.py @@ -27,4 +27,6 @@ def on_train_end( for callback in trainer.callbacks: # type: ignore if isinstance(callback, ModelCheckpoint): if hash(callback.monitor) in best_paths: - callback.best_model_path = best_paths[hash(callback.monitor)] + callback.best_model_path = best_paths[ + hash(callback.monitor) + ] diff --git a/luxonis_train/callbacks/upload_checkpoint.py b/luxonis_train/callbacks/upload_checkpoint.py index 29da59ef..b9753e94 100644 --- a/luxonis_train/callbacks/upload_checkpoint.py +++ b/luxonis_train/callbacks/upload_checkpoint.py @@ -12,7 +12,8 @@ @CALLBACKS.register_module() class UploadCheckpoint(pl.Callback): - """Callback that uploads best checkpoint based on the validation loss.""" + """Callback that uploads best checkpoint based on the validation + loss.""" def __init__(self): """Constructs `UploadCheckpoint`. @@ -43,7 +44,9 @@ def on_save_checkpoint( if curr_best_checkpoint not in self.last_best_checkpoints: self.logger.info("Uploading checkpoint...") temp_filename = ( - Path(curr_best_checkpoint).parent.with_suffix(".ckpt").name + Path(curr_best_checkpoint) + .parent.with_suffix(".ckpt") + .name ) torch.save(checkpoint, temp_filename) module.logger.upload_artifact(temp_filename, typ="weights") diff --git a/luxonis_train/core/core.py b/luxonis_train/core/core.py index 778d6cae..cffa3ff1 100644 --- a/luxonis_train/core/core.py +++ b/luxonis_train/core/core.py @@ -19,7 +19,10 @@ from typeguard import typechecked from luxonis_train.attached_modules.visualizers import get_unnormalized_images -from luxonis_train.callbacks import LuxonisRichProgressBar, LuxonisTQDMProgressBar +from luxonis_train.callbacks import ( + LuxonisRichProgressBar, + LuxonisTQDMProgressBar, +) from luxonis_train.loaders import BaseLoaderTorch, collate_fn from luxonis_train.models import LuxonisLightningModule from luxonis_train.utils import Config, DatasetMetadata, LuxonisTrackerPL @@ -40,8 +43,8 @@ class LuxonisModel: """Common logic of the core components. - This class contains common logic of the core components (trainer, evaluator, - exporter, etc.). + This class contains common logic of the core components (trainer, + evaluator, exporter, etc.). """ def __init__( @@ -152,7 +155,9 @@ def __init__( sampler = None # TODO: implement weighted sampler if self.cfg.trainer.use_weighted_sampler: - raise NotImplementedError("Weighted sampler is not implemented yet.") + raise NotImplementedError( + "Weighted sampler is not implemented yet." + ) self.pytorch_loaders = { view: torch_data.DataLoader( @@ -162,7 +167,9 @@ def __init__( collate_fn=collate_fn, shuffle=view == "train", drop_last=( - self.cfg.trainer.skip_last_batch if view == "train" else False + self.cfg.trainer.skip_last_batch + if view == "train" + else False ), pin_memory=self.cfg.trainer.pin_memory, sampler=sampler if view == "train" else None, @@ -170,7 +177,9 @@ def __init__( for view in ["train", "val", "test"] } - self.dataset_metadata = DatasetMetadata.from_loader(self.loaders["train"]) + self.dataset_metadata = DatasetMetadata.from_loader( + self.loaders["train"] + ) self.cfg.save_data(osp.join(self.run_save_dir, "config.yaml")) @@ -206,14 +215,17 @@ def train( @type new_thread: bool @param new_thread: Runs training in new thread if set to True. @type resume_weights: str | None - @param resume_weights: Path to checkpoint to resume training from. + @param resume_weights: Path to the checkpoint from which to to + resume the training. """ if self.cfg.trainer.matmul_precision is not None: logger.info( f"Setting matmul precision to {self.cfg.trainer.matmul_precision}" ) - torch.set_float32_matmul_precision(self.cfg.trainer.matmul_precision) + torch.set_float32_matmul_precision( + self.cfg.trainer.matmul_precision + ) if resume_weights is not None: resume_weights = str( @@ -221,7 +233,9 @@ def train( ) def graceful_exit(signum: int, _): # pragma: no cover - logger.info(f"{signal.Signals(signum).name} received, stopping training...") + logger.info( + f"{signal.Signals(signum).name} received, stopping training..." + ) ckpt_path = osp.join(self.run_save_dir, "resume.ckpt") self.pl_trainer.save_checkpoint(ckpt_path) self.tracker.upload_artifact( @@ -264,7 +278,10 @@ def thread_exception_hook(args): self.thread.start() def export( - self, onnx_save_path: str | None = None, *, weights: str | Path | None = None + self, + onnx_save_path: str | None = None, + *, + weights: str | Path | None = None, ) -> None: """Runs export. @@ -285,8 +302,12 @@ def export( export_save_dir = Path(self.run_save_dir, "export") export_save_dir.mkdir(parents=True, exist_ok=True) - export_path = export_save_dir / (self.cfg.exporter.name or self.cfg.model.name) - onnx_save_path = onnx_save_path or str(export_path.with_suffix(".onnx")) + export_path = export_save_dir / ( + self.cfg.exporter.name or self.cfg.model.name + ) + onnx_save_path = onnx_save_path or str( + export_path.with_suffix(".onnx") + ) with replace_weights(self.lightning_module, weights): output_names = self.lightning_module.export_onnx( @@ -296,7 +317,9 @@ def export( try_onnx_simplify(onnx_save_path) self._exported_models["onnx"] = Path(onnx_save_path) - scale_values, mean_values, reverse_channels = get_preprocessing(self.cfg) + scale_values, mean_values, reverse_channels = get_preprocessing( + self.cfg + ) if self.cfg.exporter.blobconverter.active: try: @@ -308,7 +331,9 @@ def export( str(export_save_dir), onnx_save_path, ) - self._exported_models["blob"] = export_path.with_suffix(".blob") + self._exported_models["blob"] = export_path.with_suffix( + ".blob" + ) except ImportError: logger.error("Failed to import `blobconverter`") logger.warning( @@ -350,20 +375,20 @@ def test( self, new_thread: Literal[False] = ..., view: Literal["train", "test", "val"] = "val", - ) -> Mapping[str, float]: - ... + ) -> Mapping[str, float]: ... @overload def test( self, new_thread: Literal[True] = ..., view: Literal["train", "test", "val"] = "val", - ) -> None: - ... + ) -> None: ... @typechecked def test( - self, new_thread: bool = False, view: Literal["train", "val", "test"] = "val" + self, + new_thread: bool = False, + view: Literal["train", "val", "test"] = "val", ) -> Mapping[str, float] | None: """Runs testing. @@ -372,7 +397,8 @@ def test( @type view: Literal["train", "test", "val"] @param view: Which view to run the testing on. Defauls to "val". @rtype: Mapping[str, float] | None - @return: If new_thread is False, returns a dictionary test results. + @return: If new_thread is False, returns a dictionary test + results. """ loader = self.pytorch_loaders[view] @@ -396,11 +422,12 @@ def infer( """Runs inference. @type view: str - @param view: Which split to run the inference on. Valid values are: 'train', - 'val', 'test'. Defaults to "val". + @param view: Which split to run the inference on. Valid values + are: 'train', 'val', 'test'. Defaults to "val". @type save_dir: str | Path | None - @param save_dir: Directory where to save the visualizations. If not specified, - visualizations will be rendered on the screen. + @param save_dir: Directory where to save the visualizations. If + not specified, visualizations will be rendered on the + screen. """ self.lightning_module.eval() @@ -429,10 +456,14 @@ def _objective(trial: optuna.trial.Trial) -> float: **tracker_params, ) - run_save_dir = osp.join(cfg_tracker.save_directory, child_tracker.run_name) + run_save_dir = osp.join( + cfg_tracker.save_directory, child_tracker.run_name + ) assert self.cfg.tuner is not None - curr_params = get_trial_params(all_augs, self.cfg.tuner.params, trial) + curr_params = get_trial_params( + all_augs, self.cfg.tuner.params, trial + ) curr_params["model.predefined_model"] = None cfg_copy = self.cfg.model_copy(deep=True) @@ -462,7 +493,9 @@ def _objective(trial: optuna.trial.Trial) -> float: else LuxonisTQDMProgressBar() ] - pruner_callback = PyTorchLightningPruningCallback(trial, monitor="val/loss") + pruner_callback = PyTorchLightningPruningCallback( + trial, monitor="val/loss" + ) callbacks.append(pruner_callback) if self.cfg.trainer.seed is not None: @@ -484,7 +517,9 @@ def _objective(trial: optuna.trial.Trial) -> float: except optuna.TrialPruned as e: logger.info(e) - if "val/loss" not in pl_trainer.callback_metrics: # pragma: no cover + if ( + "val/loss" not in pl_trainer.callback_metrics + ): # pragma: no cover raise ValueError( "No validation loss found. " "This can happen if `TestOnTrainEnd` callback is used." @@ -494,9 +529,13 @@ def _objective(trial: optuna.trial.Trial) -> float: cfg_tuner = self.cfg.tuner if cfg_tuner is None: - raise ValueError("You have to specify the `tuner` section in config.") + raise ValueError( + "You have to specify the `tuner` section in config." + ) - all_augs = [a.name for a in self.cfg.trainer.preprocessing.augmentations] + all_augs = [ + a.name for a in self.cfg.trainer.preprocessing.augmentations + ] rank = rank_zero_only.rank cfg_tracker = self.cfg.tracker tracker_params = cfg_tracker.model_dump() @@ -564,8 +603,8 @@ def archive(self, path: str | Path | None = None) -> Path: """Generates an NN Archive out of a model executable. @type path: str | Path | None - @param path: Path to the model executable. If not specified, the model will be - exported first. + @param path: Path to the model executable. If not specified, the + model will be exported first. @rtype: Path @return: Path to the generated NN Archive. """ @@ -592,8 +631,12 @@ def _mult(lst: list[float | int]) -> list[float]: return [round(x * 255.0, 5) for x in lst] preprocessing = { # TODO: keep preprocessing same for each input? - "mean": _mult(self.cfg.trainer.preprocessing.normalize.params["mean"]), - "scale": _mult(self.cfg.trainer.preprocessing.normalize.params["std"]), + "mean": _mult( + self.cfg.trainer.preprocessing.normalize.params["mean"] + ), + "scale": _mult( + self.cfg.trainer.preprocessing.normalize.params["std"] + ), "reverse_channels": self.cfg.trainer.preprocessing.train_rgb, "interleaved_to_planar": False, # TODO: make it modifiable? } @@ -652,7 +695,9 @@ def _mult(lst: list[float | int]) -> list[float]: logger.info(f"NN Archive saved to {archive_path}") if self.cfg.archiver.upload_url is not None: # pragma: no cover - LuxonisFileSystem.upload(archive_path, self.cfg.archiver.upload_url) + LuxonisFileSystem.upload( + archive_path, self.cfg.archiver.upload_url + ) if self.cfg.archiver.upload_to_run: self.tracker.upload_artifact(archive_path, typ="archive") @@ -664,14 +709,15 @@ def get_status(self) -> tuple[int, int]: """Get current status of training. @rtype: tuple[int, int] - @return: First element is current epoch, second element is total number of - epochs. + @return: First element is current epoch, second element is total + number of epochs. """ return self.lightning_module.get_status() @rank_zero_only def get_status_percentage(self) -> float: - """Return percentage of current training, takes into account early stopping. + """Return percentage of current training, takes into account + early stopping. @rtype: float @return: Percentage of current training in range 0-100. @@ -680,7 +726,8 @@ def get_status_percentage(self) -> float: @rank_zero_only def get_error_message(self) -> str | None: - """Return error message if one occurs while running in thread, otherwise None. + """Return error message if one occurs while running in thread, + otherwise None. @rtype: str | None @return: Error message @@ -689,10 +736,12 @@ def get_error_message(self) -> str | None: @rank_zero_only def get_min_loss_checkpoint_path(self) -> str | None: - """Return best checkpoint path with respect to minimal validation loss. + """Return best checkpoint path with respect to minimal + validation loss. @rtype: str - @return: Path to best checkpoint with respect to minimal validation loss + @return: Path to best checkpoint with respect to minimal + validation loss """ if not self.pl_trainer.checkpoint_callbacks: return None @@ -700,10 +749,12 @@ def get_min_loss_checkpoint_path(self) -> str | None: @rank_zero_only def get_best_metric_checkpoint_path(self) -> str | None: - """Return best checkpoint path with respect to best validation metric. + """Return best checkpoint path with respect to best validation + metric. @rtype: str - @return: Path to best checkpoint with respect to best validation metric + @return: Path to best checkpoint with respect to best validation + metric """ if len(self.pl_trainer.checkpoint_callbacks) < 2: return None diff --git a/luxonis_train/core/utils/archive_utils.py b/luxonis_train/core/utils/archive_utils.py index a27c8f90..96c2bcde 100644 --- a/luxonis_train/core/utils/archive_utils.py +++ b/luxonis_train/core/utils/archive_utils.py @@ -98,7 +98,9 @@ def _get_onnx_inputs(onnx_path: Path) -> dict[str, MetadataDict]: for inp in model.graph.input: shape = [dim.dim_value for dim in inp.type.tensor_type.shape.dim] inputs[inp.name]["shape"] = shape - inputs[inp.name]["dtype"] = _from_onnx_dtype(inp.type.tensor_type.elem_type) + inputs[inp.name]["dtype"] = _from_onnx_dtype( + inp.type.tensor_type.elem_type + ) return inputs @@ -137,7 +139,9 @@ def _get_head_specific_parameters( parameters = {} if head_name == "ClassificationHead": - parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value + parameters["is_softmax"] = getattr( + ImplementedHeadsIsSoxtmaxed, head_name + ).value elif head_name == "EfficientBBoxHead": parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv6.value head_node = nodes[head_alias] @@ -145,7 +149,9 @@ def _get_head_specific_parameters( parameters["conf_threshold"] = head_node.conf_thres parameters["max_det"] = head_node.max_det elif head_name in ["SegmentationHead", "BiSeNetHead"]: - parameters["is_softmax"] = getattr(ImplementedHeadsIsSoxtmaxed, head_name).value + parameters["is_softmax"] = getattr( + ImplementedHeadsIsSoxtmaxed, head_name + ).value elif head_name == "ImplicitKeypointBBoxHead": parameters["subtype"] = ObjectDetectionSubtypeYOLO.YOLOv7.value head_node = nodes[head_alias] @@ -166,13 +172,16 @@ def _get_head_specific_parameters( return parameters -def _get_head_outputs(outputs: list[dict], head_name: str, head_type: str) -> list[str]: +def _get_head_outputs( + outputs: list[dict], head_name: str, head_type: str +) -> list[str]: """Get model outputs in a head-specific format. @type outputs: list[dict] @param outputs: List of NN Archive outputs. @type head_name: str - @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its custom alias. + @param head_name: Type of the head (e.g. 'EfficientBBoxHead') or its + custom alias. @type head_type: str @param head_name: Type of the head (e.g. 'EfficientBBoxHead'). @rtype: list[str] @@ -238,7 +247,9 @@ def get_heads( task = str(next(iter(task.values()))) classes = _get_classes(node_name, task, class_dict) - head_outputs = _get_head_outputs(outputs, node_alias, node_name) + head_outputs = _get_head_outputs( + outputs, node_alias, node_name + ) head_dict = { "parser": parser, "metadata": { diff --git a/luxonis_train/core/utils/tune_utils.py b/luxonis_train/core/utils/tune_utils.py index e2fe692e..d9d6c4c0 100644 --- a/luxonis_train/core/utils/tune_utils.py +++ b/luxonis_train/core/utils/tune_utils.py @@ -61,17 +61,23 @@ def get_trial_params( case "int", [int(low), int(high), *tail]: step = tail[0] if tail else 1 if not isinstance(step, int): - raise ValueError(f"Step for int type must be int, but got {step}") + raise ValueError( + f"Step for int type must be int, but got {step}" + ) new_value = trial.suggest_int(key_name, low, high, step=step) case "loguniform", [float(low), float(high)]: new_value = trial.suggest_loguniform(key_name, low, high) case "uniform", [float(low), float(high)]: new_value = trial.suggest_uniform(key_name, low, high) case _, _: - raise KeyError(f"Combination of {key_type} and {value} not supported") + raise KeyError( + f"Combination of {key_type} and {value} not supported" + ) new_params[key_name] = new_value if len(new_params) == 0: - raise ValueError("No paramteres to tune. Specify them under `tuner.params`.") + raise ValueError( + "No paramteres to tune. Specify them under `tuner.params`." + ) return new_params diff --git a/luxonis_train/loaders/base_loader.py b/luxonis_train/loaders/base_loader.py index 78607a29..b6b8a863 100644 --- a/luxonis_train/loaders/base_loader.py +++ b/luxonis_train/loaders/base_loader.py @@ -10,7 +10,8 @@ from luxonis_train.utils.types import Labels LuxonisLoaderTorchOutput = tuple[dict[str, Tensor], Labels] -"""LuxonisLoaderTorchOutput is a tuple of source tensors and corresponding labels.""" +"""LuxonisLoaderTorchOutput is a tuple of source tensors and +corresponding labels.""" class BaseLoaderTorch( @@ -20,8 +21,8 @@ class BaseLoaderTorch( register=False, registry=LOADERS, ): - """Base abstract loader class that enforces LuxonisLoaderTorchOutput output label - structure.""" + """Base abstract loader class that enforces LuxonisLoaderTorchOutput + output label structure.""" def __init__( self, @@ -115,11 +116,12 @@ def get_classes(self) -> dict[str, list[str]]: ... def get_n_keypoints(self) -> dict[str, int] | None: - """Returns the dictionary defining the semantic skeleton for each class using - keypoints. + """Returns the dictionary defining the semantic skeleton for + each class using keypoints. @rtype: Dict[str, Dict] - @return: A dictionary mapping classes to their skeleton definitions. + @return: A dictionary mapping classes to their skeleton + definitions. """ return None @@ -130,16 +132,19 @@ def collate_fn( """Default collate function used for training. @type batch: list[LuxonisLoaderTorchOutput] - @param batch: List of loader outputs (dict of Tensors) and labels (dict of Tensors) - in the LuxonisLoaderTorchOutput format. + @param batch: List of loader outputs (dict of Tensors) and labels + (dict of Tensors) in the LuxonisLoaderTorchOutput format. @rtype: tuple[dict[str, Tensor], dict[LabelType, Tensor]] - @return: Tuple of inputs and annotations in the format expected by the model. + @return: Tuple of inputs and annotations in the format expected by + the model. """ inputs: tuple[dict[str, Tensor], ...] labels: tuple[Labels, ...] inputs, labels = zip(*batch) - out_inputs = {k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys()} + out_inputs = { + k: torch.stack([i[k] for i in inputs], 0) for k in inputs[0].keys() + } out_labels: Labels = {} diff --git a/luxonis_train/loaders/luxonis_loader_torch.py b/luxonis_train/loaders/luxonis_loader_torch.py index 328f87be..8286a7a2 100644 --- a/luxonis_train/loaders/luxonis_loader_torch.py +++ b/luxonis_train/loaders/luxonis_loader_torch.py @@ -156,7 +156,9 @@ def _parse_dataset( f"Supported types are: {', '.join(DatasetType.__members__)}." ) - logger.info(f"Parsing dataset from {dataset_dir} with name '{dataset_name}'") + logger.info( + f"Parsing dataset from {dataset_dir} with name '{dataset_name}'" + ) return LuxonisParser( dataset_dir, diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index 18136e40..fb9b2523 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -18,7 +18,9 @@ BaseMetric, BaseVisualizer, ) -from luxonis_train.attached_modules.metrics.torchmetrics import TorchMetricWrapper +from luxonis_train.attached_modules.metrics.torchmetrics import ( + TorchMetricWrapper, +) from luxonis_train.attached_modules.visualizers import ( combine_visualizations, get_unnormalized_images, @@ -36,7 +38,12 @@ ) from luxonis_train.utils.config import AttachedModuleConfig, Config from luxonis_train.utils.graph import Graph -from luxonis_train.utils.registry import CALLBACKS, OPTIMIZERS, SCHEDULERS, Registry +from luxonis_train.utils.registry import ( + CALLBACKS, + OPTIMIZERS, + SCHEDULERS, + Registry, +) from .luxonis_output import LuxonisOutput @@ -110,13 +117,13 @@ def __init__( @type save_dir: str @param save_dir: Directory to save checkpoints. @type input_shapes: dict[str, Size] - @param input_shapes: Dictionary of input shapes. Keys are input names, values - are shapes. + @param input_shapes: Dictionary of input shapes. Keys are input + names, values are shapes. @type dataset_metadata: L{DatasetMetadata} | None @param dataset_metadata: Dataset metadata. @type kwargs: Any - @param kwargs: Additional arguments to pass to the L{LightningModule} - constructor. + @param kwargs: Additional arguments to pass to the + L{LightningModule} constructor. """ super().__init__(**kwargs) @@ -135,11 +142,17 @@ def __init__( self.main_metric: str | None = None self.save_dir = save_dir self.test_step_outputs: list[Mapping[str, Tensor | float | int]] = [] - self.training_step_outputs: list[Mapping[str, Tensor | float | int]] = [] - self.validation_step_outputs: list[Mapping[str, Tensor | float | int]] = [] + self.training_step_outputs: list[ + Mapping[str, Tensor | float | int] + ] = [] + self.validation_step_outputs: list[ + Mapping[str, Tensor | float | int] + ] = [] self.losses: dict[str, dict[str, BaseLoss]] = defaultdict(dict) self.metrics: dict[str, dict[str, BaseMetric]] = defaultdict(dict) - self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict(dict) + self.visualizers: dict[str, dict[str, BaseVisualizer]] = defaultdict( + dict + ) self._logged_images = 0 @@ -157,7 +170,9 @@ def __init__( elif isinstance(node_cfg.freezing.unfreeze_after, int): unfreeze_after = node_cfg.freezing.unfreeze_after else: - unfreeze_after = int(node_cfg.freezing.unfreeze_after * epochs) + unfreeze_after = int( + node_cfg.freezing.unfreeze_after * epochs + ) frozen_nodes.append((node_name, unfreeze_after)) if node_cfg.task is not None: @@ -177,8 +192,14 @@ def __init__( node_cfg.task = {next(iter(Node.tasks)): node_cfg.task} else: - node_cfg.task = {**Node._process_tasks(Node.tasks), **node_cfg.task} - nodes[node_name] = (Node, {**node_cfg.params, "_tasks": node_cfg.task}) + node_cfg.task = { + **Node._process_tasks(Node.tasks), + **node_cfg.task, + } + nodes[node_name] = ( + Node, + {**node_cfg.params, "_tasks": node_cfg.task}, + ) # Handle inputs for this node if node_cfg.input_sources: @@ -256,12 +277,12 @@ def _initiate_nodes( ) -> nn.ModuleDict: """Initializes all the nodes in the model. - Traverses the graph and initiates each node using outputs of the preceding - nodes. + Traverses the graph and initiates each node using outputs of the + preceding nodes. @type nodes: dict[str, tuple[type[LuxonisNode], Kwargs]] - @param nodes: Dictionary of nodes to be initiated. Keys are node names, values - are tuples of node class and node kwargs. + @param nodes: Dictionary of nodes to be initiated. Keys are node + names, values are tuples of node class and node kwargs. @rtype: L{nn.ModuleDict}[str, L{LuxonisNode}] @return: Dictionary of initiated nodes. """ @@ -273,9 +294,10 @@ def _initiate_nodes( for source_name, shape in shapes.items() } - for node_name, (Node, node_kwargs), node_input_names, _ in traverse_graph( - self.graph, nodes - ): + for node_name, ( + Node, + node_kwargs, + ), node_input_names, _ in traverse_graph(self.graph, nodes): node_dummy_inputs: list[Packet[Tensor]] = [] """List of dummy input packets for the node. @@ -318,23 +340,27 @@ def forward( ) -> LuxonisOutput: """Forward pass of the model. - Traverses the graph and step-by-step computes the outputs of each node. Each - next node is computed only when all of its predecessors are computed. Once the - outputs are not needed anymore, they are removed from the memory. + Traverses the graph and step-by-step computes the outputs of + each node. Each next node is computed only when all of its + predecessors are computed. Once the outputs are not needed + anymore, they are removed from the memory. @type inputs: L{Tensor} @param inputs: Input tensor. @type task_labels: L{TaskLabels} | None @param task_labels: Labels dictionary. Defaults to C{None}. @type images: L{Tensor} | None - @param images: Canvas tensor for visualizers. Defaults to C{None}. + @param images: Canvas tensor for visualizers. Defaults to + C{None}. @type compute_loss: bool - @param compute_loss: Whether to compute losses. Defaults to C{True}. + @param compute_loss: Whether to compute losses. Defaults to + C{True}. @type compute_metrics: bool - @param compute_metrics: Whether to update metrics. Defaults to C{True}. + @param compute_metrics: Whether to update metrics. Defaults to + C{True}. @type compute_visualizations: bool - @param compute_visualizations: Whether to compute visualizations. Defaults to - C{False}. + @param compute_visualizations: Whether to compute + visualizations. Defaults to C{False}. @rtype: L{LuxonisOutput} @return: Output of the model. """ @@ -358,11 +384,19 @@ def forward( outputs = node.run(node_inputs) computed[node_name] = outputs - if compute_loss and node_name in self.losses and labels is not None: + if ( + compute_loss + and node_name in self.losses + and labels is not None + ): for loss_name, loss in self.losses[node_name].items(): losses[node_name][loss_name] = loss.run(outputs, labels) - if compute_metrics and node_name in self.metrics and labels is not None: + if ( + compute_metrics + and node_name in self.metrics + and labels is not None + ): for metric in self.metrics[node_name].values(): metric.run_update(outputs, labels) @@ -372,7 +406,9 @@ def forward( and images is not None and labels is not None ): - for viz_name, visualizer in self.visualizers[node_name].items(): + for viz_name, visualizer in self.visualizers[ + node_name + ].items(): viz = combine_visualizations( visualizer.run( images, @@ -440,7 +476,8 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]: @type save_path: str @param save_path: Path where the exported model will be saved. @type kwargs: Any - @param kwargs: Additional arguments for the L{torch.onnx.export} method. + @param kwargs: Additional arguments for the L{torch.onnx.export} + method. @rtype: list[str] @return: List of output names. """ @@ -453,7 +490,8 @@ def export_onnx(self, save_path: str, **kwargs) -> list[str]: } inputs_deep_clone = { - k: torch.zeros(elem.shape).to(self.device) for k, elem in inputs.items() + k: torch.zeros(elem.shape).to(self.device) + for k, elem in inputs.items() } inputs_for_onnx = {"inputs": inputs_deep_clone} @@ -524,22 +562,26 @@ def export_forward(inputs) -> tuple[Tensor, ...]: def process_losses( self, - losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]]], + losses_dict: dict[ + str, dict[str, Tensor | tuple[Tensor, dict[str, Tensor]]] + ], ) -> tuple[Tensor, dict[str, Tensor]]: """Processes individual losses from the model run. - Goes over the computed losses and computes the final loss as a weighted sum of - all the losses. + Goes over the computed losses and computes the final loss as a + weighted sum of all the losses. - @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, dict[str, - Tensor]]]] - @param losses_dict: Dictionary of computed losses. Each node can have multiple - losses attached. The first key identifies the node, the second key - identifies the specific loss. Values are either single tensors or tuples of - tensors and sublosses. + @type losses_dict: dict[str, dict[str, Tensor | tuple[Tensor, + dict[str, Tensor]]]] + @param losses_dict: Dictionary of computed losses. Each node can + have multiple losses attached. The first key identifies the + node, the second key identifies the specific loss. Values + are either single tensors or tuples of tensors and + sublosses. @rtype: tuple[Tensor, dict[str, Tensor]] - @return: Tuple of final loss and dictionary of processed sublosses. The - dictionary is in a format of {loss_name: loss_value}. + @return: Tuple of final loss and dictionary of processed + sublosses. The dictionary is in a format of {loss_name: + loss_value}. """ final_loss = torch.zeros(1, device=self.device) training_step_output: dict[str, Tensor] = {} @@ -553,9 +595,9 @@ def process_losses( loss *= self.loss_weights[loss_name] final_loss += loss - training_step_output[ - f"loss/{node_name}/{loss_name}" - ] = loss.detach().cpu() + training_step_output[f"loss/{node_name}/{loss_name}"] = ( + loss.detach().cpu() + ) if self.cfg.trainer.log_sub_losses and sublosses: for subloss_name, subloss_value in sublosses.items(): training_step_output[ @@ -564,10 +606,14 @@ def process_losses( training_step_output["loss"] = final_loss.detach().cpu() return final_loss, training_step_output - def training_step(self, train_batch: tuple[dict[str, Tensor], Labels]) -> Tensor: + def training_step( + self, train_batch: tuple[dict[str, Tensor], Labels] + ) -> Tensor: """Performs one step of training with provided batch.""" outputs = self.forward(*train_batch) - assert outputs.losses, "Losses are empty, check if you have defined any loss" + assert ( + outputs.losses + ), "Losses are empty, check if you have defined any loss" loss, training_step_output = self.process_losses(outputs.losses) self.training_step_outputs.append(training_step_output) @@ -610,7 +656,8 @@ def get_status(self) -> tuple[int, int]: return self.current_epoch, self.cfg.trainer.epochs def get_status_percentage(self) -> float: - """Returns percentage of current training, takes into account early stopping.""" + """Returns percentage of current training, takes into account + early stopping.""" if self._trainer.early_stopping_callback: # model haven't yet stop from early stopping callback if self._trainer.early_stopping_callback.stopped_epoch == 0: @@ -621,7 +668,9 @@ def get_status_percentage(self) -> float: return (self.current_epoch / self.cfg.trainer.epochs) * 100 def _evaluation_step( - self, mode: Literal["test", "val"], batch: tuple[dict[str, Tensor], Labels] + self, + mode: Literal["test", "val"], + batch: tuple[dict[str, Tensor], Labels], ) -> dict[str, Tensor]: inputs, labels = batch images = None @@ -667,7 +716,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None: logger.info("Metrics computed.") for node_name, metrics in computed_metrics.items(): for metric_name, metric_value in metrics.items(): - metric_results[node_name][metric_name] = metric_value.cpu().item() + metric_results[node_name][metric_name] = ( + metric_value.cpu().item() + ) self.log( f"{mode}/metric/{node_name}/{metric_name}", metric_value, @@ -687,7 +738,9 @@ def _evaluation_epoch_end(self, mode: Literal["test", "val"]) -> None: def configure_callbacks(self) -> list[pl.Callback]: """Configures Pytorch Lightning callbacks.""" self.min_val_loss_checkpoints_path = f"{self.save_dir}/min_val_loss" - self.best_val_metric_checkpoints_path = f"{self.save_dir}/best_val_metric" + self.best_val_metric_checkpoints_path = ( + f"{self.save_dir}/best_val_metric" + ) model_name = self.cfg.model.name callbacks: list[pl.Callback] = [ @@ -721,7 +774,9 @@ def configure_callbacks(self) -> list[pl.Callback]: for callback in self.cfg.trainer.callbacks: if callback.active: - callbacks.append(CALLBACKS.get(callback.name)(**callback.params)) + callbacks.append( + CALLBACKS.get(callback.name)(**callback.params) + ) return callbacks @@ -748,11 +803,12 @@ def configure_optimizers( def load_checkpoint(self, path: str | Path | None) -> None: """Loads checkpoint weights from provided path. - Loads the checkpoints gracefully, ignoring keys that are not found in the model - state dict or in the checkpoint. + Loads the checkpoints gracefully, ignoring keys that are not + found in the model state dict or in the checkpoint. @type path: str | None - @param path: Path to the checkpoint. If C{None}, no checkpoint will be loaded. + @param path: Path to the checkpoint. If C{None}, no checkpoint + will be loaded. """ if path is None: return @@ -816,7 +872,9 @@ def _init_attached_module( return module_name, node_name @staticmethod - def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict: + def _to_module_dict( + modules: dict[str, dict[str, nn.Module]], + ) -> nn.ModuleDict: return nn.ModuleDict( { node_name: nn.ModuleDict(node_modules) @@ -826,7 +884,9 @@ def _to_module_dict(modules: dict[str, dict[str, nn.Module]]) -> nn.ModuleDict: @property def _progress_bar(self) -> BaseLuxonisProgressBar: - return cast(BaseLuxonisProgressBar, self._trainer.progress_bar_callback) + return cast( + BaseLuxonisProgressBar, self._trainer.progress_bar_callback + ) @rank_zero_only def _print_results( @@ -836,16 +896,20 @@ def _print_results( logger.info(f"{stage} loss: {loss:.4f}") - self._progress_bar.print_results(stage=stage, loss=loss, metrics=metrics) + self._progress_bar.print_results( + stage=stage, loss=loss, metrics=metrics + ) if self.main_metric is not None: main_metric_node, main_metric_name = self.main_metric.split("/") main_metric = metrics[main_metric_node][main_metric_name] - logger.info(f"{stage} main metric ({self.main_metric}): {main_metric:.4f}") + logger.info( + f"{stage} main metric ({self.main_metric}): {main_metric:.4f}" + ) def _is_train_eval_epoch(self) -> bool: - """Checks if train eval should be performed on current epoch based on configured - train_metrics_interval.""" + """Checks if train eval should be performed on current epoch + based on configured train_metrics_interval.""" train_metrics_interval = self.cfg.trainer.train_metrics_interval # add +1 to current_epoch because starting epoch is at 0 return ( diff --git a/luxonis_train/models/predefined_models/base_predefined_model.py b/luxonis_train/models/predefined_models/base_predefined_model.py index 08cca4ee..9388f345 100644 --- a/luxonis_train/models/predefined_models/base_predefined_model.py +++ b/luxonis_train/models/predefined_models/base_predefined_model.py @@ -19,23 +19,19 @@ class BasePredefinedModel( ): @property @abstractmethod - def nodes(self) -> list[ModelNodeConfig]: - ... + def nodes(self) -> list[ModelNodeConfig]: ... @property @abstractmethod - def losses(self) -> list[LossModuleConfig]: - ... + def losses(self) -> list[LossModuleConfig]: ... @property @abstractmethod - def metrics(self) -> list[MetricModuleConfig]: - ... + def metrics(self) -> list[MetricModuleConfig]: ... @property @abstractmethod - def visualizers(self) -> list[AttachedModuleConfig]: - ... + def visualizers(self) -> list[AttachedModuleConfig]: ... def generate_model( self, diff --git a/luxonis_train/models/predefined_models/detection_model.py b/luxonis_train/models/predefined_models/detection_model.py index d6cd4520..94c4487f 100644 --- a/luxonis_train/models/predefined_models/detection_model.py +++ b/luxonis_train/models/predefined_models/detection_model.py @@ -47,7 +47,9 @@ def nodes(self) -> list[ModelNodeConfig]: name="EfficientBBoxHead", alias="detection_head", freezing=self.head_params.pop("freezing", {}), - inputs=["detection_neck"] if self.use_neck else ["detection_backbone"], + inputs=["detection_neck"] + if self.use_neck + else ["detection_backbone"], params=self.head_params, task=self.task_name, ) diff --git a/luxonis_train/nodes/backbones/contextspatial.py b/luxonis_train/nodes/backbones/contextspatial.py index 55de77e1..cf98cd4c 100644 --- a/luxonis_train/nodes/backbones/contextspatial.py +++ b/luxonis_train/nodes/backbones/contextspatial.py @@ -99,8 +99,12 @@ def __init__(self, backbone: nn.Module): super().__init__() self.backbone = backbone - self.up16 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True) - self.up32 = nn.Upsample(scale_factor=2.0, mode="bilinear", align_corners=True) + self.up16 = nn.Upsample( + scale_factor=2.0, mode="bilinear", align_corners=True + ) + self.up32 = nn.Upsample( + scale_factor=2.0, mode="bilinear", align_corners=True + ) self.refine16 = ConvModule(128, 128, 3, 1, 1) self.refine32 = ConvModule(128, 128, 3, 1, 1) diff --git a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py index 3549bbff..0143855c 100644 --- a/luxonis_train/nodes/backbones/efficientrep/efficientrep.py +++ b/luxonis_train/nodes/backbones/efficientrep/efficientrep.py @@ -62,8 +62,12 @@ def __init__( channels_list = channels_list or [64, 128, 256, 512, 1024] n_repeats = n_repeats or [1, 6, 12, 18, 6] - channels_list = [make_divisible(i * width_mul, 8) for i in channels_list] - n_repeats = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats] + channels_list = [ + make_divisible(i * width_mul, 8) for i in channels_list + ] + n_repeats = [ + (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats + ] self.repvgg_encoder = RepVGGBlock( in_channels=self.in_channels, @@ -102,7 +106,8 @@ def set_export_mode(self, mode: bool = True) -> None: """Reparametrizes instances of L{RepVGGBlock} in the network. @type mode: bool - @param mode: Whether to set the export mode. Defaults to C{True}. + @param mode: Whether to set the export mode. Defaults to + C{True}. """ super().set_export_mode(mode) if self.export: diff --git a/luxonis_train/nodes/backbones/micronet/blocks.py b/luxonis_train/nodes/backbones/micronet/blocks.py index de77b4b2..3da5e15e 100644 --- a/luxonis_train/nodes/backbones/micronet/blocks.py +++ b/luxonis_train/nodes/backbones/micronet/blocks.py @@ -58,7 +58,9 @@ def __init__( use_dy1, use_dy2, use_dy3 = use_dynamic_shift group1, group2 = groups_2 reduction = 8 * reduction_factor - intermediate_channels = in_channels * expansion_ratios[0] * expansion_ratios[1] + intermediate_channels = ( + in_channels * expansion_ratios[0] * expansion_ratios[1] + ) if groups_1[0] == 0: self.layers = self._create_lite_block( @@ -229,7 +231,9 @@ def _create_full_block( if use_dy1 > 0 else nn.ReLU6(True), ChannelShuffle(groups_1[1]), - DepthSpatialSepConv(intermediate_channels, (1, 1), kernel_size, stride), + DepthSpatialSepConv( + intermediate_channels, (1, 1), kernel_size, stride + ), DYShiftMax( intermediate_channels, intermediate_channels, @@ -268,7 +272,9 @@ def _create_full_block( if use_dy3 > 0 else nn.Sequential(), ChannelShuffle(group2), - ChannelShuffle(out_channels // 2) if use_dy3 != 0 else nn.Sequential(), + ChannelShuffle(out_channels // 2) + if use_dy3 != 0 + else nn.Sequential(), ) def forward(self, inputs: Tensor) -> Tensor: @@ -282,11 +288,12 @@ class ChannelShuffle(nn.Module): def __init__(self, groups: int): """Shuffle the channels of the input tensor. - This operation is used to mix information between groups after grouped - convolutions. + This operation is used to mix information between groups after + grouped convolutions. @type groups: int - @param groups: Number of groups to divide the channels into before shuffling. + @param groups: Number of groups to divide the channels into + before shuffling. """ super().__init__() @@ -315,25 +322,32 @@ def __init__( ): """Dynamic Shift-Max activation function. - This module implements the Dynamic Shift-Max operation, which adaptively fuses - and selects channel information based on the input. + This module implements the Dynamic Shift-Max operation, which + adaptively fuses and selects channel information based on the + input. @type in_channels: int @param in_channels: Number of input channels. @type out_channels: int @param out_channels: Number of output channels. @type init_a: tuple[float, float] - @param init_a: Initial values for the 'a' parameters. Defaults to (0.0, 0.0). + @param init_a: Initial values for the 'a' parameters. Defaults + to (0.0, 0.0). @type init_b: tuple[float, float] - @param init_b: Initial values for the 'b' parameters. Defaults to (0.0, 0.0). + @param init_b: Initial values for the 'b' parameters. Defaults + to (0.0, 0.0). @type use_relu: bool - @param use_relu: Whether to use ReLU activation. Defaults to True. + @param use_relu: Whether to use ReLU activation. Defaults to + True. @type groups: int - @param groups: Number of groups for channel shuffling. Defaults to 6. + @param groups: Number of groups for channel shuffling. Defaults + to 6. @type reduction: int - @param reduction: Reduction factor for the squeeze operation. Defaults to 4. + @param reduction: Reduction factor for the squeeze operation. + Defaults to 4. @type expansion: bool - @param expansion: Whether to use expansion in grouping. Defaults to False. + @param expansion: Whether to use expansion in grouping. Defaults + to False. """ super().__init__() self.exp: Literal[2, 4] = 4 if use_relu else 2 @@ -360,7 +374,9 @@ def __init__( index = index.view(1, groups, channels_per_group, 1, 1) index_groups = torch.split(index, [1, groups - 1], dim=1) index_groups = torch.cat([index_groups[1], index_groups[0]], dim=1) - index_splits = torch.split(index_groups, [1, channels_per_group - 1], dim=2) + index_splits = torch.split( + index_groups, [1, channels_per_group - 1], dim=2 + ) index_splits = torch.cat([index_splits[1], index_splits[0]], dim=2) self.index = index_splits.view(in_channels).long() @@ -411,7 +427,11 @@ def _make_divisible( class SpatialSepConvSF(nn.Module): def __init__( - self, in_channels: int, outs: tuple[int, int], kernel_size: int, stride: int + self, + in_channels: int, + outs: tuple[int, int], + kernel_size: int, + stride: int, ): super().__init__() out_channels1, out_channels2 = outs @@ -443,7 +463,9 @@ def forward(self, x: Tensor) -> Tensor: class Stem(nn.Module): - def __init__(self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4)): + def __init__( + self, in_channels: int, stride: int, outs: tuple[int, int] = (4, 4) + ): super().__init__() self.stem = nn.Sequential( SpatialSepConvSF(in_channels, outs, 3, stride), nn.ReLU6(True) @@ -455,7 +477,11 @@ def forward(self, x: Tensor) -> Tensor: class DepthSpatialSepConv(nn.Module): def __init__( - self, in_channels: int, expand: tuple[int, int], kernel_size: int, stride: int + self, + in_channels: int, + expand: tuple[int, int], + kernel_size: int, + stride: int, ): super().__init__() exp1, exp2 = expand diff --git a/luxonis_train/nodes/backbones/micronet/micronet.py b/luxonis_train/nodes/backbones/micronet/micronet.py index 32f51d09..82df5cb3 100644 --- a/luxonis_train/nodes/backbones/micronet/micronet.py +++ b/luxonis_train/nodes/backbones/micronet/micronet.py @@ -17,14 +17,15 @@ def __init__( ): """MicroNet backbone. - This class creates the full MicroNet architecture based on the specified - variant. It consists of a stem layer followed by multiple MicroBlocks. + This class creates the full MicroNet architecture based on the + specified variant. It consists of a stem layer followed by + multiple MicroBlocks. @type variant: Literal["M1", "M2", "M3"] @param variant: Model variant to use. Defaults to "M1". @type out_indices: list[int] | None - @param out_indices: Indices of the output layers. If provided, overrides the - variant value. + @param out_indices: Indices of the output layers. If provided, + overrides the variant value. """ super().__init__(**kwargs) diff --git a/luxonis_train/nodes/backbones/mobileone/blocks.py b/luxonis_train/nodes/backbones/mobileone/blocks.py index 276f08c5..63e19eae 100644 --- a/luxonis_train/nodes/backbones/mobileone/blocks.py +++ b/luxonis_train/nodes/backbones/mobileone/blocks.py @@ -47,9 +47,11 @@ def __init__( @type groups: int @param groups: Group number. Defaults to 1. @type use_se: bool - @param use_se: Whether to use SE-ReLU activations. Defaults to False. + @param use_se: Whether to use SE-ReLU activations. Defaults to + False. @type n_conv_branches: int - @param n_conv_branches: Number of linear conv branches. Defaults to 1. + @param n_conv_branches: Number of linear conv branches. Defaults + to 1. """ super().__init__() @@ -61,7 +63,6 @@ def __init__( self.n_conv_branches = n_conv_branches self.inference_mode = False - # Check if SE-ReLU is requested self.se: nn.Module if use_se: self.se = SqueezeExciteBlock( @@ -108,9 +109,9 @@ def __init__( activation=nn.Identity(), ) - def forward(self, inputs: Tensor): + def forward(self, inputs: Tensor) -> Tensor: """Apply forward pass.""" - # Inference mode forward pass. + if self.inference_mode: return self.activation(self.se(self.reparam_conv(inputs))) @@ -179,13 +180,17 @@ def _get_kernel_bias(self) -> tuple[Tensor, Tensor]: kernel_scale, bias_scale = self._fuse_bn_tensor(self.rbr_scale) # Pad scale branch kernel to match conv branch kernel size. pad = self.kernel_size // 2 - kernel_scale = torch.nn.functional.pad(kernel_scale, [pad, pad, pad, pad]) + kernel_scale = torch.nn.functional.pad( + kernel_scale, [pad, pad, pad, pad] + ) # get weights and bias of skip branch kernel_identity = torch.zeros(()) bias_identity = torch.zeros(()) if self.rbr_skip is not None: - kernel_identity, bias_identity = self._fuse_bn_tensor(self.rbr_skip) + kernel_identity, bias_identity = self._fuse_bn_tensor( + self.rbr_skip + ) # get weights and bias of conv branches kernel_conv = torch.zeros(()) @@ -217,13 +222,21 @@ def _fuse_bn_tensor(self, branch: nn.Module) -> tuple[Tensor, Tensor]: if not hasattr(self, "id_tensor"): input_dim = self.in_channels // self.groups kernel_value = torch.zeros( - (self.in_channels, input_dim, self.kernel_size, self.kernel_size), + ( + self.in_channels, + input_dim, + self.kernel_size, + self.kernel_size, + ), dtype=branch.weight.dtype, device=branch.weight.device, ) for i in range(self.in_channels): kernel_value[ - i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2 + i, + i % input_dim, + self.kernel_size // 2, + self.kernel_size // 2, ] = 1 self.id_tensor = kernel_value kernel = self.id_tensor diff --git a/luxonis_train/nodes/backbones/mobileone/variants.py b/luxonis_train/nodes/backbones/mobileone/variants.py index 0eeaca93..fbb0add3 100644 --- a/luxonis_train/nodes/backbones/mobileone/variants.py +++ b/luxonis_train/nodes/backbones/mobileone/variants.py @@ -9,7 +9,9 @@ class MobileOneVariant(BaseModel): use_se: bool = False -def get_variant(variant: Literal["s0", "s1", "s2", "s3", "s4"]) -> MobileOneVariant: +def get_variant( + variant: Literal["s0", "s1", "s2", "s3", "s4"], +) -> MobileOneVariant: variants = { "s0": MobileOneVariant( width_multipliers=(0.75, 1.0, 1.0, 2.0), diff --git a/luxonis_train/nodes/backbones/repvgg/repvgg.py b/luxonis_train/nodes/backbones/repvgg/repvgg.py index c0818341..fd8a5e67 100644 --- a/luxonis_train/nodes/backbones/repvgg/repvgg.py +++ b/luxonis_train/nodes/backbones/repvgg/repvgg.py @@ -124,7 +124,8 @@ def set_export_mode(self, mode: bool = True) -> None: """Reparametrizes instances of L{RepVGGBlock} in the network. @type mode: bool - @param mode: Whether to set the export mode. Defaults to C{True}. + @param mode: Whether to set the export mode. Defaults to + C{True}. """ super().set_export_mode(mode) if self.export: diff --git a/luxonis_train/nodes/backbones/resnet.py b/luxonis_train/nodes/backbones/resnet.py index 36656e82..93a13d4a 100644 --- a/luxonis_train/nodes/backbones/resnet.py +++ b/luxonis_train/nodes/backbones/resnet.py @@ -15,7 +15,11 @@ def __init__( zero_init_residual: bool = False, groups: int = 1, width_per_group: int = 64, - replace_stride_with_dilation: tuple[bool, bool, bool] = (False, False, False), + replace_stride_with_dilation: tuple[bool, bool, bool] = ( + False, + False, + False, + ), **kwargs: Any, ): """ResNet backbone. diff --git a/luxonis_train/nodes/backbones/rexnetv1.py b/luxonis_train/nodes/backbones/rexnetv1.py index 0f5abdf4..6567586a 100644 --- a/luxonis_train/nodes/backbones/rexnetv1.py +++ b/luxonis_train/nodes/backbones/rexnetv1.py @@ -68,7 +68,9 @@ def __init__( self.out_indices = out_indices or [1, 4, 10, 17] kernel_sizes = ( - [kernel_sizes] * 6 if isinstance(kernel_sizes, int) else kernel_sizes + [kernel_sizes] * 6 + if isinstance(kernel_sizes, int) + else kernel_sizes ) strides = [ @@ -83,7 +85,9 @@ def __init__( features: list[nn.Module] = [] inplanes = input_ch / multiplier if multiplier < 1.0 else input_ch - first_channel = 32 / multiplier if multiplier < 1.0 or fix_head_stem else 32 + first_channel = ( + 32 / multiplier if multiplier < 1.0 or fix_head_stem else 32 + ) first_channel = make_divisible( int(round(first_channel * multiplier)), divisible_value ) @@ -119,7 +123,12 @@ def __init__( assert channels_group for in_c, c, t, k, s in zip( - in_channels_group, channels_group, ts, kernel_sizes, strides, strict=True + in_channels_group, + channels_group, + ts, + kernel_sizes, + strides, + strict=True, ): features.append( LinearBottleneck( @@ -128,7 +137,9 @@ def __init__( ) pen_channels = ( - int(1280 * multiplier) if multiplier > 1 and not fix_head_stem else 1280 + int(1280 * multiplier) + if multiplier > 1 and not fix_head_stem + else 1280 ) features.append( ConvModule( diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index aba30049..62292873 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -230,7 +230,9 @@ def get_task_name(self, task: LabelType) -> str: @raises ValueError: If the task is not supported by the node. """ if not self._tasks: - raise ValueError(f"Node {self.name} does not have any tasks defined.") + raise ValueError( + f"Node {self.name} does not have any tasks defined." + ) if task not in self._tasks: raise ValueError( @@ -248,8 +250,8 @@ def task(self) -> str: @type: str @raises RuntimeError: If the node doesn't define any tasks. - @raises RuntimeError: If the node defines more than one task. In that case, use - the L{get_task_name} method. + @raises RuntimeError: If the node defines more than one task. In + that case, use the L{get_task_name} method. """ if not self._tasks: raise RuntimeError(f"{self.name} does not have any tasks defined.") @@ -286,7 +288,8 @@ def n_keypoints(self) -> int: """Getter for the number of keypoints. @type: int - @raises ValueError: If the number of keypoints cannot be determined. + @raises ValueError: If the number of keypoints cannot be + determined. """ if self._n_keypoints is not None: return self._n_keypoints @@ -311,9 +314,11 @@ def n_classes(self) -> int: """Getter for the number of classes. @type: int - @raises ValueError: If the number of classes cannot be determined. - @raises ValueError: If the number of classes is different for different tasks. - In that case, use the L{get_n_classes} method. + @raises ValueError: If the number of classes cannot be + determined. + @raises ValueError: If the number of classes is different for + different tasks. In that case, use the L{get_n_classes} + method. """ if self._n_classes is not None: return self._n_classes @@ -376,7 +381,8 @@ def input_shapes(self) -> list[Packet[Size]]: """Getter for the input shapes. @type: list[Packet[Size]] - @raises RuntimeError: If the C{input_shapes} were not set during initialization. + @raises RuntimeError: If the C{input_shapes} were not set during + initialization. """ if self._input_shapes is None: @@ -388,8 +394,8 @@ def original_in_shape(self) -> Size: """Getter for the original input shape as [N, H, W]. @type: Size - @raises RuntimeError: If the C{original_in_shape} were not set during - initialization. + @raises RuntimeError: If the C{original_in_shape} were not set + during initialization. """ if self._original_in_shape is None: raise self._non_set_error("original_in_shape") @@ -400,8 +406,8 @@ def dataset_metadata(self) -> DatasetMetadata: """Getter for the dataset metadata. @type: L{DatasetMetadata} - @raises RuntimeError: If the C{dataset_metadata} were not set during - initialization. + @raises RuntimeError: If the C{dataset_metadata} were not set + during initialization. """ if self._dataset_metadata is None: raise RuntimeError( @@ -449,14 +455,15 @@ def in_sizes(self) -> Size | list[Size]: def in_channels(self) -> int | list[int]: """Simplified getter for the number of input channels. - Should work out of the box for most cases where the C{input_shapes} are - sufficiently simple. Otherwise the C{input_shapes} should be used directly. If - C{attach_index} is set to "all" or is a slice, returns a list of input channels, + Should work out of the box for most cases where the + C{input_shapes} are sufficiently simple. Otherwise the + C{input_shapes} should be used directly. If C{attach_index} is + set to "all" or is a slice, returns a list of input channels, otherwise returns a single value. @type: int | list[int] - @raises RuntimeError: If the C{input_shapes} are too complicated for the default - implementation of C{in_sizes}. + @raises RuntimeError: If the C{input_shapes} are too complicated + for the default implementation of C{in_sizes}. """ return self._get_nth_size(-3) @@ -502,21 +509,22 @@ def set_export_mode(self, mode: bool = True) -> None: def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT: """Prepares inputs for the forward pass. - Unwraps the inputs from the C{list[Packet[Tensor]]} input so they can be passed - to the forward call. The default implementation expects a single input with - C{features} key and returns the tensor or tensors at the C{attach_index} - position. + Unwraps the inputs from the C{list[Packet[Tensor]]} input so + they can be passed to the forward call. The default + implementation expects a single input with C{features} key and + returns the tensor or tensors at the C{attach_index} position. - For most cases the default implementation should be sufficient. Exceptions are - modules with multiple inputs or producing more complex outputs. This is - typically the case for output nodes. + For most cases the default implementation should be sufficient. + Exceptions are modules with multiple inputs or producing more + complex outputs. This is typically the case for output nodes. @type inputs: list[Packet[Tensor]] @param inputs: Inputs to the node. @rtype: ForwardInputT - @return: Prepared inputs, ready to be passed to the L{forward} method. - @raises RuntimeError: If the number of inputs is not equal to 1. In such cases - the method has to be overridden. + @return: Prepared inputs, ready to be passed to the L{forward} + method. + @raises RuntimeError: If the number of inputs is not equal to 1. + In such cases the method has to be overridden. """ if len(inputs) > 1: raise RuntimeError( @@ -585,7 +593,8 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: return {task: outputs} def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: - """Combines the forward pass with the wrapping and unwrapping of the inputs. + """Combines the forward pass with the wrapping and unwrapping of + the inputs. @type inputs: list[Packet[Tensor]] @param inputs: Inputs to the module. @@ -611,15 +620,15 @@ def run(self, inputs: list[Packet[Tensor]]) -> Packet[Tensor]: def get_attached(self, lst: list[T]) -> list[T] | T: """Gets the attached elements from a list. - This method is used to get the attached elements from a list based on the - C{attach_index} attribute. + This method is used to get the attached elements from a list + based on the C{attach_index} attribute. @type lst: list[T] - @param lst: List to get the attached elements from. Can be either a list of - tensors or a list of sizes. + @param lst: List to get the attached elements from. Can be + either a list of tensors or a list of sizes. @rtype: list[T] | T - @return: Attached elements. If C{attach_index} is set to C{"all"} or is a slice, - returns a list of attached elements. + @return: Attached elements. If C{attach_index} is set to + C{"all"} or is a slice, returns a list of attached elements. @raises ValueError: If the C{attach_index} is invalid. """ @@ -654,7 +663,9 @@ def _normalize_slice(i: int, j: int) -> slice: case (int(i), int(j), int(k)): return lst[i:j:k] case _: - raise ValueError(f"Invalid attach index: `{self.attach_index}`") + raise ValueError( + f"Invalid attach index: `{self.attach_index}`" + ) def _get_nth_size(self, idx: int) -> int | list[int]: match self.in_sizes: diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index a32f6f87..9231ea85 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -10,7 +10,8 @@ class EfficientDecoupledBlock(nn.Module): def __init__(self, n_classes: int, in_channels: int): - """Efficient Decoupled block used for class and regression predictions. + """Efficient Decoupled block used for class and regression + predictions. @type n_classes: int @param n_classes: Number of classes. @@ -36,7 +37,9 @@ def __init__(self, n_classes: int, in_channels: int): padding=1, activation=nn.SiLU(), ), - nn.Conv2d(in_channels=in_channels, out_channels=n_classes, kernel_size=1), + nn.Conv2d( + in_channels=in_channels, out_channels=n_classes, kernel_size=1 + ), ) self.regression_branch = nn.Sequential( ConvModule( @@ -149,7 +152,10 @@ def __init__( super().__init__( nn.ConvTranspose2d( - in_channels, out_channels, kernel_size=kernel_size, stride=stride + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, ), ConvModule(out_channels, out_channels, kernel_size=3, padding=1), ) @@ -296,7 +302,9 @@ def forward(self, x: Tensor) -> Tensor: else: id_out = self.rbr_identity(x) - return self.nonlinearity(self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out)) + return self.nonlinearity( + self.se(self.rbr_dense(x) + self.rbr_1x1(x) + id_out) + ) def reparametrize(self) -> None: if hasattr(self, "rbr_reparam"): @@ -323,7 +331,8 @@ def reparametrize(self) -> None: del self.id_tensor def _get_equivalent_kernel_bias(self) -> tuple[Tensor, Tensor]: - """Derives the equivalent kernel and bias in a DIFFERENTIABLE way.""" + """Derives the equivalent kernel and bias in a DIFFERENTIABLE + way.""" kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) @@ -340,7 +349,9 @@ def _pad_1x1_to_3x3_tensor(self, kernel1x1: Tensor | None) -> Tensor: else: return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1]) - def _fuse_bn_tensor(self, branch: nn.Module | None) -> tuple[Tensor, Tensor]: + def _fuse_bn_tensor( + self, branch: nn.Module | None + ) -> tuple[Tensor, Tensor]: if branch is None: return torch.tensor(0), torch.tensor(0) if isinstance(branch, nn.Sequential): @@ -380,9 +391,9 @@ def __init__( out_channels: int, n_blocks: int = 1, ): - """Module which repeats the block n times. First block accepts in_channels and - outputs out_channels while subsequent blocks accept out_channels and output - out_channels. + """Module which repeats the block n times. First block accepts + in_channels and outputs out_channels while subsequent blocks + accept out_channels and output out_channels. @type block: L{nn.Module} @param block: Block to repeat. @@ -410,8 +421,11 @@ def forward(self, x: Tensor) -> Tensor: class SpatialPyramidPoolingBlock(nn.Module): - def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 5): - """Spatial Pyramid Pooling block with ReLU activation on three different scales. + def __init__( + self, in_channels: int, out_channels: int, kernel_size: int = 5 + ): + """Spatial Pyramid Pooling block with ReLU activation on three + different scales. @type in_channels: int @param in_channels: Number of input channels. @@ -473,7 +487,9 @@ def forward(self, x: Tensor) -> Tensor: class FeatureFusionBlock(nn.Module): - def __init__(self, in_channels: int, out_channels: int, reduction: int = 1): + def __init__( + self, in_channels: int, out_channels: int, reduction: int = 1 + ): """Feature Fusion block adapted from: U{https://github.com/taveraantonio/BiseNetv1}. @type in_channels: int @@ -604,8 +620,8 @@ def __init__( @type in_channels: int @param in_channels: Number of input channels. @type in_channels_next: int - @param in_channels_next: Number of input channels of next input which is used in - concat. + @param in_channels_next: Number of input channels of next input + which is used in concat. @type out_channels: int @param out_channels: Number of output channels. @type n_repeats: int @@ -656,10 +672,11 @@ def __init__( @type in_channels: int @param in_channels: Number of input channels. @type downsample_out_channels: int - @param downsample_out_channels: Number of output channels after downsample. + @param downsample_out_channels: Number of output channels after + downsample. @type in_channels_next: int - @param in_channels_next: Number of input channels of next input which is used in - concat. + @param in_channels_next: Number of input channels of next input + which is used in concat. @type out_channels: int @param out_channels: Number of output channels. @type n_repeats: int diff --git a/luxonis_train/nodes/heads/classification_head.py b/luxonis_train/nodes/heads/classification_head.py index 93b5c684..5961c853 100644 --- a/luxonis_train/nodes/heads/classification_head.py +++ b/luxonis_train/nodes/heads/classification_head.py @@ -13,12 +13,12 @@ class ClassificationHead(BaseNode[Tensor, Tensor]): def __init__(self, dropout_rate: float = 0.2, **kwargs: Any): """Simple classification head. - Consists of a global average pooling layer followed by a dropout layer and a - single linear layer. + Consists of a global average pooling layer followed by a dropout + layer and a single linear layer. @type dropout_rate: float - @param dropout_rate: Dropout rate before last layer, range C{[0, 1]}. Defaults - to C{0.2}. + @param dropout_rate: Dropout rate before last layer, range C{[0, + 1]}. Defaults to C{0.2}. """ super().__init__(**kwargs) diff --git a/luxonis_train/nodes/heads/efficient_bbox_head.py b/luxonis_train/nodes/heads/efficient_bbox_head.py index 8bef5044..6f0e01e7 100644 --- a/luxonis_train/nodes/heads/efficient_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_bbox_head.py @@ -33,18 +33,20 @@ def __init__( ): """Head for object detection. - Adapted from U{YOLOv6: A Single-Stage Object Detection Framework for Industrial - Applications }. + Adapted from U{YOLOv6: A Single-Stage Object Detection Framework + for Industrial Applications + }. @type n_heads: Literal[2,3,4] - @param n_heads: Number of output heads. Defaults to 3. B{Note:} Should be same - also on neck in most cases. + @param n_heads: Number of output heads. Defaults to 3. B{Note:} + Should be same also on neck in most cases. @type conf_thres: float - @param conf_thres: Threshold for confidence. Defaults to C{0.25}. + @param conf_thres: Threshold for confidence. Defaults to + C{0.25}. @type iou_thres: float @param iou_thres: Threshold for IoU. Defaults to C{0.45}. @type max_det: int - @param max_det: Maximum number of detections retained after NMS. Defaults to - C{300}. + @param max_det: Maximum number of detections retained after NMS. + Defaults to C{300}. """ super().__init__(**kwargs) @@ -96,17 +98,24 @@ def wrap( if self.export: outputs: list[Tensor] = [] - for out_cls, out_reg in zip(cls_score_list, reg_distri_list, strict=True): + for out_cls, out_reg in zip( + cls_score_list, reg_distri_list, strict=True + ): conf, _ = out_cls.max(1, keepdim=True) out = torch.cat([out_reg, conf, out_cls], dim=1) outputs.append(out) return {self.task: outputs} cls_tensor = torch.cat( - [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2 + [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], + dim=2, ).permute(0, 2, 1) reg_tensor = torch.cat( - [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2 + [ + reg_distri_list[i].flatten(2) + for i in range(len(reg_distri_list)) + ], + dim=2, ).permute(0, 2, 1) if self.training: @@ -126,7 +135,8 @@ def wrap( } def _fit_stride_to_n_heads(self): - """Returns correct stride for number of heads and attach index.""" + """Returns correct stride for number of heads and attach + index.""" stride = torch.tensor( [ self.original_in_shape[1] / x[2] # type: ignore @@ -139,7 +149,8 @@ def _fit_stride_to_n_heads(self): def _process_to_bbox( self, output: tuple[list[Tensor], Tensor, Tensor] ) -> list[Tensor]: - """Performs post-processing of the output and returns bboxs after NMS.""" + """Performs post-processing of the output and returns bboxs + after NMS.""" features, cls_score_list, reg_dist_list = output _, anchor_points, _, stride_tensor = anchors_for_fpn_features( features, @@ -149,7 +160,9 @@ def _process_to_bbox( multiply_with_stride=False, ) - pred_bboxes = dist2bbox(reg_dist_list, anchor_points, out_format="xyxy") + pred_bboxes = dist2bbox( + reg_dist_list, anchor_points, out_format="xyxy" + ) pred_bboxes *= stride_tensor output_merged = torch.cat( diff --git a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py index 5bb6bcc0..51b8b704 100644 --- a/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/efficient_keypoint_bbox_head.py @@ -69,7 +69,12 @@ def forward( ) -> tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]]: features, cls_score_list, reg_distri_list = super().forward(inputs) - _, self.anchor_points, _, self.stride_tensor = anchors_for_fpn_features( + ( + _, + self.anchor_points, + _, + self.stride_tensor, + ) = anchors_for_fpn_features( features, self.stride, self.grid_cell_size, @@ -85,7 +90,8 @@ def forward( return features, cls_score_list, reg_distri_list, kpt_list def wrap( - self, output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]] + self, + output: tuple[list[Tensor], list[Tensor], list[Tensor], list[Tensor]], ) -> Packet[Tensor]: features, cls_score_list, reg_distri_list, kpt_list = output bs = features[0].shape[0] @@ -108,10 +114,15 @@ def wrap( return {"outputs": outputs} cls_tensor = torch.cat( - [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], dim=2 + [cls_score_list[i].flatten(2) for i in range(len(cls_score_list))], + dim=2, ).permute(0, 2, 1) reg_tensor = torch.cat( - [reg_distri_list[i].flatten(2) for i in range(len(reg_distri_list))], dim=2 + [ + reg_distri_list[i].flatten(2) + for i in range(len(reg_distri_list)) + ], + dim=2, ).permute(0, 2, 1) kpt_tensor = torch.cat( [ @@ -156,8 +167,12 @@ def _dist2kpts(self, kpts: Tensor) -> Tensor: anchor_points_x = anchor_points_transposed[0].view(1, -1, 1) anchor_points_y = anchor_points_transposed[1].view(1, -1, 1) - y[:, :, 0::3] = (y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5)) * stride_tensor - y[:, :, 1::3] = (y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5)) * stride_tensor + y[:, :, 0::3] = ( + y[:, :, 0::3] * 2.0 + (anchor_points_x - 0.5) + ) * stride_tensor + y[:, :, 1::3] = ( + y[:, :, 1::3] * 2.0 + (anchor_points_y - 0.5) + ) * stride_tensor y[:, :, 2::3] = y[:, :, 2::3].sigmoid() return y @@ -165,10 +180,13 @@ def _dist2kpts(self, kpts: Tensor) -> Tensor: def _process_to_bbox_and_kps( self, output: tuple[list[Tensor], Tensor, Tensor, Tensor] ) -> list[Tensor]: - """Performs post-processing of the output and returns bboxs after NMS.""" + """Performs post-processing of the output and returns bboxs + after NMS.""" features, cls_score_list, reg_dist_list, keypoints = output - pred_bboxes = dist2bbox(reg_dist_list, self.anchor_points, out_format="xyxy") + pred_bboxes = dist2bbox( + reg_dist_list, self.anchor_points, out_format="xyxy" + ) pred_bboxes *= self.stride_tensor output_merged = torch.cat( diff --git a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py index e8b4ad5b..5de88650 100644 --- a/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py +++ b/luxonis_train/nodes/heads/implicit_keypoint_bbox_head.py @@ -18,7 +18,9 @@ logger = logging.getLogger(__name__) -class ImplicitKeypointBBoxHead(BaseNode[list[Tensor], tuple[list[Tensor], Tensor]]): +class ImplicitKeypointBBoxHead( + BaseNode[list[Tensor], tuple[list[Tensor], Tensor]] +): tasks = [LabelType.KEYPOINTS, LabelType.BOUNDINGBOX] in_channels: list[int] @@ -70,8 +72,12 @@ def __init__( if anchors is None: logger.info("No anchors provided, generating them automatically.") - anchors, recall = self.dataset_metadata.autogenerate_anchors(self.n_heads) - logger.info(f"Anchors generated. Best possible recall: {recall:.2f}") + anchors, recall = self.dataset_metadata.autogenerate_anchors( + self.n_heads + ) + logger.info( + f"Anchors generated. Best possible recall: {recall:.2f}" + ) self.box_offset = 5 self.n_det_out = self.n_classes + self.box_offset @@ -81,7 +87,9 @@ def __init__( self.grid: list[Tensor] = [] self.anchors = torch.tensor(anchors).float().view(self.n_heads, -1, 2) - self.anchor_grid = self.anchors.clone().view(self.n_heads, 1, -1, 1, 1, 2) + self.anchor_grid = self.anchors.clone().view( + self.n_heads, 1, -1, 1, 1, 2 + ) self.channel_list, self.stride = self._fit_to_n_heads(self.in_channels) @@ -130,11 +138,17 @@ def forward(self, inputs: list[Tensor]) -> tuple[list[Tensor], Tensor]: batch_size, _, feature_height, feature_width = feat.shape if i >= len(self.grid): self.grid.append( - self._construct_grid(feature_width, feature_height).to(feat.device) + self._construct_grid(feature_width, feature_height).to( + feat.device + ) ) feat = feat.reshape( - batch_size, self.n_anchors, self.n_out, feature_height, feature_width + batch_size, + self.n_anchors, + self.n_out, + feature_height, + feature_width, ).permute(0, 1, 3, 4, 2) features.append(feat) @@ -167,7 +181,8 @@ def wrap(self, output: tuple[list[Tensor], Tensor]) -> Packet[Tensor]: return { "boundingbox": [detection[:, :6] for detection in nms], "keypoints": [ - detection[:, 6:].reshape(-1, self.n_keypoints, 3) for detection in nms + detection[:, 6:].reshape(-1, self.n_keypoints, 3) + for detection in nms ], "features": features, } @@ -179,7 +194,9 @@ def _build_predictions( bbox = feat[..., : self.box_offset + self.n_classes] keypoints = feat[..., self.box_offset + self.n_classes :] - box_cxcy, box_wh, box_tail = process_bbox_predictions(bbox, anchor_grid) + box_cxcy, box_wh, box_tail = process_bbox_predictions( + bbox, anchor_grid + ) grid = grid.to(box_cxcy.device) stride = stride.to(box_cxcy.device) box_cxcy = (box_cxcy + grid) * stride @@ -207,7 +224,9 @@ def _infer_bbox( ) return torch.cat((out_bbox_xy, out_bbox_wh, out_bbox[..., 4:]), dim=-1) - def _fit_to_n_heads(self, channel_list: list[int]) -> tuple[list[int], Tensor]: + def _fit_to_n_heads( + self, channel_list: list[int] + ) -> tuple[list[int], Tensor]: out_channel_list = channel_list[: self.n_heads] stride = torch.tensor( [ @@ -221,11 +240,15 @@ def _fit_to_n_heads(self, channel_list: list[int]) -> tuple[list[int], Tensor]: def _initialize_weights_and_biases(self, class_freq: Tensor | None = None): for m in self.modules(): if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) elif isinstance(m, nn.BatchNorm2d): m.eps = 1e-3 m.momentum = 0.03 - elif isinstance(m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6)): + elif isinstance( + m, (nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6) + ): m.inplace = True for mi, s in zip(self.learnable_mul_add_conv, self.stride): @@ -240,7 +263,8 @@ def _initialize_weights_and_biases(self, class_freq: Tensor | None = None): def _construct_grid(self, feature_width: int, feature_height: int): grid_y, grid_x = torch.meshgrid( - [torch.arange(feature_height), torch.arange(feature_width)], indexing="ij" + [torch.arange(feature_height), torch.arange(feature_width)], + indexing="ij", ) return ( torch.stack((grid_x, grid_y), 2) diff --git a/luxonis_train/nodes/necks/reppan_neck.py b/luxonis_train/nodes/necks/reppan_neck.py index 9e64e97a..107151a6 100644 --- a/luxonis_train/nodes/necks/reppan_neck.py +++ b/luxonis_train/nodes/necks/reppan_neck.py @@ -47,9 +47,15 @@ def __init__( n_repeats = n_repeats or [12, 12, 12, 12] channels_list = channels_list or [256, 128, 128, 256, 256, 512] - channels_list = [make_divisible(ch * width_mul, 8) for ch in channels_list] - n_repeats = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats] - channels_list, n_repeats = self._fit_to_n_heads(channels_list, n_repeats) + channels_list = [ + make_divisible(ch * width_mul, 8) for ch in channels_list + ] + n_repeats = [ + (max(round(i * depth_mul), 1) if i > 1 else i) for i in n_repeats + ] + channels_list, n_repeats = self._fit_to_n_heads( + channels_list, n_repeats + ) self.up_blocks = nn.ModuleList() @@ -108,12 +114,16 @@ def __init__( def forward(self, inputs: list[Tensor]) -> list[Tensor]: x = inputs[-1] up_block_outs: list[Tensor] = [] - for up_block, input_ in zip(self.up_blocks, inputs[-2::-1], strict=False): + for up_block, input_ in zip( + self.up_blocks, inputs[-2::-1], strict=False + ): conv_out, x = up_block(x, input_) up_block_outs.append(conv_out) outs = [x] - for down_block, up_out in zip(self.down_blocks, reversed(up_block_outs)): + for down_block, up_out in zip( + self.down_blocks, reversed(up_block_outs) + ): x = down_block(x, up_out) outs.append(x) return outs @@ -121,7 +131,8 @@ def forward(self, inputs: list[Tensor]) -> list[Tensor]: def _fit_to_n_heads( self, channels_list: list[int], n_repeats: list[int] ) -> tuple[list[int], list[int]]: - """Fits channels_list and n_repeats to n_heads by removing or adding items. + """Fits channels_list and n_repeats to n_heads by removing or + adding items. Also scales the numbers based on offset """ diff --git a/luxonis_train/utils/boundingbox.py b/luxonis_train/utils/boundingbox.py index fa630643..9b97bfe6 100644 --- a/luxonis_train/utils/boundingbox.py +++ b/luxonis_train/utils/boundingbox.py @@ -239,7 +239,9 @@ def bbox_iou( sin_alpha_1 = torch.abs(s_cw) / sigma sin_alpha_2 = torch.abs(s_ch) / sigma threshold = pow(2, 0.5) / 2 - sin_alpha = torch.where(sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1) + sin_alpha = torch.where( + sin_alpha_1 > threshold, sin_alpha_2, sin_alpha_1 + ) angle_cost = torch.cos(torch.arcsin(sin_alpha) * 2 - math.pi / 2) # distance cost @@ -279,7 +281,8 @@ def non_max_suppression( max_det: int = 300, predicts_objectness: bool = True, ) -> list[Tensor]: - """Non-maximum suppression on model's predictions to keep only best instances. + """Non-maximum suppression on model's predictions to keep only best + instances. @type preds: Tensor @param preds: Model's prediction tensor of shape [bs, N, M]. @@ -332,7 +335,9 @@ def non_max_suppression( torch.max(preds[..., 5 : 5 + n_classes], dim=-1)[0] > conf_thres, ) - output = [torch.zeros((0, preds.size(-1)), device=preds.device)] * preds.size(0) + output = [ + torch.zeros((0, preds.size(-1)), device=preds.device) + ] * preds.size(0) for i, x in enumerate(preds): curr_out = x[candidate_mask[i]] @@ -355,7 +360,9 @@ def non_max_suppression( if multi_label: box_idx, class_idx = ( - (curr_out[:, 5 : 5 + n_classes] > conf_thres).nonzero(as_tuple=False).T + (curr_out[:, 5 : 5 + n_classes] > conf_thres) + .nonzero(as_tuple=False) + .T ) keep_mask[box_idx] = True curr_out = torch.cat( @@ -367,9 +374,13 @@ def non_max_suppression( 1, ) else: - conf, class_idx = curr_out[:, 5 : 5 + n_classes].max(1, keepdim=True) + conf, class_idx = curr_out[:, 5 : 5 + n_classes].max( + 1, keepdim=True + ) keep_mask[conf.view(-1) > conf_thres] = True - curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[keep_mask] + curr_out = torch.cat((bboxes, conf, class_idx.float()), 1)[ + keep_mask + ] if has_additional: curr_out = torch.hstack( @@ -406,20 +417,21 @@ def anchors_from_dataset( n_generations: int = 1000, ratio_threshold: float = 4.0, ) -> tuple[Tensor, float]: - """Generates anchors based on bounding box annotations present in provided data - loader. It uses K-Means for initial proposals which are then refined with genetic - algorithm. + """Generates anchors based on bounding box annotations present in + provided data loader. It uses K-Means for initial proposals which + are then refined with genetic algorithm. @type loader: L{torch.utils.data.DataLoader} @param loader: Data loader. @type n_anchors: int - @param n_anchors: Number of anchors, this is normally n_heads * 3 which generates 3 - anchors per layer. Defaults to 9. + @param n_anchors: Number of anchors, this is normally n_heads * 3 + which generates 3 anchors per layer. Defaults to 9. @type n_generations: int - @param n_generations: Number of iterations for anchor improvement with genetic - algorithm. Defaults to 1000. + @param n_generations: Number of iterations for anchor improvement + with genetic algorithm. Defaults to 1000. @type ratio_threshold: float - @param ratio_threshold: Minimum threshold for ratio. Defaults to 4.0. + @param ratio_threshold: Minimum threshold for ratio. Defaults to + 4.0. @rtype: tuple[Tensor, float] @return: Proposed anchors and the best possible recall. """ @@ -450,7 +462,8 @@ def anchors_from_dataset( except Exception: print("Fallback to random anchor init") proposed_anchors = ( - torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) * img_size + torch.sort(torch.rand(n_anchors * 2))[0].reshape(n_anchors, 2) + * img_size ) proposed_anchors = proposed_anchors[ @@ -458,7 +471,8 @@ def anchors_from_dataset( ] # sort small to large def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate how well most suitable anchor box matches each target bbox.""" + """Calculate how well most suitable anchor box matches each + target bbox.""" symmetric_size_ratios = torch.min( wh[:, None] / anchors[None], anchors[None] / wh[:, None] ) @@ -467,17 +481,20 @@ def calc_best_anchor_ratio(anchors: Tensor, wh: Tensor) -> Tensor: return best_anchor_ratio def calc_best_possible_recall(anchors: Tensor, wh: Tensor) -> Tensor: - """Calculate best possible recall if every bbox is matched to an appropriate - anchor.""" + """Calculate best possible recall if every bbox is matched to an + appropriate anchor.""" best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) - best_possible_recall = (best_anchor_ratio > 1 / ratio_threshold).float().mean() + best_possible_recall = ( + (best_anchor_ratio > 1 / ratio_threshold).float().mean() + ) return best_possible_recall def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor: """Fitness function used for anchor evolve.""" best_anchor_ratio = calc_best_anchor_ratio(anchors, wh) return ( - best_anchor_ratio * (best_anchor_ratio > 1 / ratio_threshold).float() + best_anchor_ratio + * (best_anchor_ratio > 1 / ratio_threshold).float() ).mean() # Genetic algorithm @@ -495,7 +512,9 @@ def anchor_fitness(anchors: Tensor, wh: Tensor) -> Tensor: + mutation_noise_mean ).clip(0.3, 3.0) - mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip(min=2.0) + mutated_anchors = (proposed_anchors.clone() * anchor_mutation).clip( + min=2.0 + ) mutated_fitness = anchor_fitness(mutated_anchors, wh) if mutated_fitness > best_fitness: best_fitness = mutated_fitness @@ -516,20 +535,22 @@ def anchors_for_fpn_features( grid_cell_offset: float = 0.5, multiply_with_stride: bool = False, ) -> tuple[Tensor, Tensor, list[int], Tensor]: - """Generates anchor boxes, points and strides based on FPN feature shapes and - strides. + """Generates anchor boxes, points and strides based on FPN feature + shapes and strides. @type features: list[Tensor] @param features: List of FPN features. @type strides: Tensor @param strides: Strides of FPN features. @type grid_cell_size: float - @param grid_cell_size: Cell size in respect to input image size. Defaults to 5.0. + @param grid_cell_size: Cell size in respect to input image size. + Defaults to 5.0. @type grid_cell_offset: float - @param grid_cell_offset: Percent grid cell center's offset. Defaults to 0.5. + @param grid_cell_offset: Percent grid cell center's offset. Defaults + to 0.5. @type multiply_with_stride: bool - @param multiply_with_stride: Whether to multiply per FPN values with its stride. - Defaults to False. + @param multiply_with_stride: Whether to multiply per FPN values with + its stride. Defaults to False. @rtype: tuple[Tensor, Tensor, list[int], Tensor] @return: BBox anchors, center anchors, number of anchors, strides """ @@ -563,7 +584,9 @@ def anchors_for_fpn_features( anchors.append(anchor) anchor_point = ( - torch.stack([shift_x, shift_y], dim=-1).reshape(-1, 2).to(feature.dtype) + torch.stack([shift_x, shift_y], dim=-1) + .reshape(-1, 2) + .to(feature.dtype) ) anchor_points.append(anchor_point) @@ -592,7 +615,8 @@ def process_bbox_predictions( @type anchor: Tensor @param anchor: Anchor boxes @rtype: tuple[Tensor, Tensor, Tensor] - @return: xy and wh predictions and tail. The tail is anything after xywh. + @return: xy and wh predictions and tail. The tail is anything after + xywh. """ out_bbox = bbox.sigmoid() out_bbox_xy = out_bbox[..., 0:2] * 2.0 - 0.5 @@ -648,10 +672,12 @@ def compute_iou_loss( else: bbox_mask = torch.ones_like(pred_bboxes, dtype=torch.bool) - pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape([-1, 4]) - target_bboxes_pos = torch.masked_select(target_bboxes, bbox_mask).reshape( + pred_bboxes_pos = torch.masked_select(pred_bboxes, bbox_mask).reshape( [-1, 4] ) + target_bboxes_pos = torch.masked_select( + target_bboxes, bbox_mask + ).reshape([-1, 4]) iou = bbox_iou( pred_bboxes_pos, diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index 670e1008..d73e882e 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -11,7 +11,12 @@ LuxonisFileSystem, ) from pydantic import Field, field_validator, model_validator -from pydantic.types import FilePath, NonNegativeFloat, NonNegativeInt, PositiveInt +from pydantic.types import ( + FilePath, + NonNegativeFloat, + NonNegativeInt, + PositiveInt, +) from typing_extensions import Self logger = logging.getLogger(__name__) @@ -104,7 +109,9 @@ def check_predefined_model(self) -> Self: from luxonis_train.utils.registry import MODELS if self.predefined_model: - logger.info(f"Using predefined model: `{self.predefined_model.name}`") + logger.info( + f"Using predefined model: `{self.predefined_model.name}`" + ) model = MODELS.get(self.predefined_model.name)( **self.predefined_model.params ) @@ -130,7 +137,9 @@ def check_graph(self) -> Self: raise ValueError("Model graph is not acyclic.") if not self.outputs: outputs: list[str] = [] # nodes which are not inputs to any nodes - inputs = set(node_name for node in self.nodes for node_name in node.inputs) + inputs = set( + node_name for node in self.nodes for node_name in node.inputs + ) for node in self.nodes: name = node.alias or node.name if name not in inputs: @@ -233,7 +242,9 @@ class PreprocessingConfig(BaseModelExtraForbid): def check_normalize(self) -> Self: if self.normalize.active: self.augmentations.append( - AugmentationConfig(name="Normalize", params=self.normalize.params) + AugmentationConfig( + name="Normalize", params=self.normalize.params + ) ) return self @@ -334,7 +345,9 @@ class OnnxExportConfig(BaseModelExtraForbid): class BlobconverterExportConfig(BaseModelExtraForbid): active: bool = False shaves: int = 6 - version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = "2022.1" + version: Literal["2021.2", "2021.3", "2021.4", "2022.1", "2022.3_RVC3"] = ( + "2022.1" + ) class ArchiveConfig(BaseModelExtraForbid): @@ -416,7 +429,9 @@ def get_config( return instance fs = LuxonisFileSystem(cfg) if fs.is_mlflow: - logger.info("Setting `project_id` and `run_id` to config's MLFlow run") + logger.info( + "Setting `project_id` and `run_id` to config's MLFlow run" + ) instance.tracker.project_id = fs.experiment_id instance.tracker.run_id = fs.run_id return instance diff --git a/luxonis_train/utils/dataset_metadata.py b/luxonis_train/utils/dataset_metadata.py index 84c5aa85..35ebbef8 100644 --- a/luxonis_train/utils/dataset_metadata.py +++ b/luxonis_train/utils/dataset_metadata.py @@ -12,14 +12,16 @@ def __init__( n_keypoints: dict[str, int] | None = None, loader: BaseLoaderTorch | None = None, ): - """An object containing metadata about the dataset. Used to infer the number of - classes, number of keypoints, I{etc.} instead of passing them as arguments to - the model. + """An object containing metadata about the dataset. Used to + infer the number of classes, number of keypoints, I{etc.} + instead of passing them as arguments to the model. @type classes: dict[str, list[str]] | None - @param classes: Dictionary mapping tasks to lists of class names. + @param classes: Dictionary mapping tasks to lists of class + names. @type n_keypoints: dict[str, int] | None - @param n_keypoints: Dictionary mapping tasks to the number of keypoints. + @param n_keypoints: Dictionary mapping tasks to the number of + keypoints. @type loader: DataLoader | None @param loader: Dataset loader. """ @@ -34,13 +36,17 @@ def n_classes(self, task: str | None = None) -> int: @param task: Task to get the number of classes for. @rtype: int @return: Number of classes for the specified label type. - @raises ValueError: If the C{task} is not present in the dataset. - @raises RuntimeError: If the C{task} was not provided and the dataset contains - different number of classes for different label types. + @raises ValueError: If the C{task} is not present in the + dataset. + @raises RuntimeError: If the C{task} was not provided and the + dataset contains different number of classes for different + label types. """ if task is not None: if task not in self._classes: - raise ValueError(f"Task '{task}' is not present in the dataset.") + raise ValueError( + f"Task '{task}' is not present in the dataset." + ) return len(self._classes[task]) n_classes = len(list(self._classes.values())[0]) for classes in self._classes.values(): @@ -58,13 +64,17 @@ def n_keypoints(self, task: str | None = None) -> int: @param task: Task to get the number of keypoints for. @rtype: int @return: Number of keypoints for the specified label type. - @raises ValueError: If the C{task} is not present in the dataset. - @raises RuntimeError: If the C{task} was not provided and the dataset contains - different number of keypoints for different label types. + @raises ValueError: If the C{task} is not present in the + dataset. + @raises RuntimeError: If the C{task} was not provided and the + dataset contains different number of keypoints for different + label types. """ if task is not None: if task not in self._n_keypoints: - raise ValueError(f"Task '{task}' is not present in the dataset.") + raise ValueError( + f"Task '{task}' is not present in the dataset." + ) return self._n_keypoints[task] n_keypoints = next(iter(self._n_keypoints.values())) for n in self._n_keypoints.values(): @@ -82,13 +92,17 @@ def classes(self, task: str | None = None) -> list[str]: @param task: Task to get the class names for. @rtype: list[str] @return: List of class names for the specified label type. - @raises ValueError: If the C{task} is not present in the dataset. - @raises RuntimeError: If the C{task} was not provided and the dataset contains - different class names for different label types. + @raises ValueError: If the C{task} is not present in the + dataset. + @raises RuntimeError: If the C{task} was not provided and the + dataset contains different class names for different label + types. """ if task is not None: if task not in self._classes: - raise ValueError(f"Task type {task} is not present in the dataset.") + raise ValueError( + f"Task type {task} is not present in the dataset." + ) return self._classes[task] class_names = list(self._classes.values())[0] for classes in self._classes.values(): @@ -98,15 +112,18 @@ def classes(self, task: str | None = None) -> list[str]: ) return class_names - def autogenerate_anchors(self, n_heads: int) -> tuple[list[list[float]], float]: + def autogenerate_anchors( + self, n_heads: int + ) -> tuple[list[list[float]], float]: """Automatically generates anchors for the provided dataset. @type n_heads: int @param n_heads: Number of heads to generate anchors for. @rtype: tuple[list[list[float]], float] - @return: List of anchors in [-1,6] format and recall of the anchors. - @raises RuntimeError: If the dataset loader was not provided during - initialization. + @return: List of anchors in [-1,6] format and recall of the + anchors. + @raises RuntimeError: If the dataset loader was not provided + during initialization. """ if self._loader is None: raise RuntimeError( @@ -127,7 +144,8 @@ def from_loader(cls, loader: BaseLoaderTorch) -> "DatasetMetadata": @type dataset: LuxonisDataset @param dataset: Dataset to create the metadata from. @rtype: DatasetMetadata - @return: Instance of L{DatasetMetadata} created from the provided dataset. + @return: Instance of L{DatasetMetadata} created from the + provided dataset. """ classes = loader.get_classes() n_keypoints = loader.get_n_keypoints() diff --git a/luxonis_train/utils/exceptions.py b/luxonis_train/utils/exceptions.py index 6621e4eb..bab8c1aa 100644 --- a/luxonis_train/utils/exceptions.py +++ b/luxonis_train/utils/exceptions.py @@ -1,8 +1,11 @@ class IncompatibleException(Exception): - """Raised when two parts of the model are incompatible with each other.""" + """Raised when two parts of the model are incompatible with each + other.""" @classmethod - def from_missing_task(cls, task: str, present_tasks: list[str], class_name: str): + def from_missing_task( + cls, task: str, present_tasks: list[str], class_name: str + ): return cls( f"{class_name} requires '{task}' label, but it was not found in " f"the label dictionary. Available labels: {present_tasks}." diff --git a/luxonis_train/utils/general.py b/luxonis_train/utils/general.py index a1a2cbce..45013807 100644 --- a/luxonis_train/utils/general.py +++ b/luxonis_train/utils/general.py @@ -10,7 +10,8 @@ def make_divisible(x: int | float, divisor: int) -> int: - """Upward revision the value x to make it evenly divisible by the divisor. + """Upward revision the value x to make it evenly divisible by the + divisor. Equivalent to M{ceil(x / divisor) * divisor}. @@ -27,16 +28,20 @@ def make_divisible(x: int | float, divisor: int) -> int: def infer_upscale_factor( in_size: tuple[int, int] | int, orig_size: tuple[int, int] | int ) -> int: - """Infer the upscale factor from the input shape and the original shape. + """Infer the upscale factor from the input shape and the original + shape. @type in_size: tuple[int, int] | int - @param in_size: Input shape as a tuple of (height, width) or just one of them. + @param in_size: Input shape as a tuple of (height, width) or just + one of them. @type orig_size: tuple[int, int] | int - @param orig_size: Original shape as a tuple of (height, width) or just one of them. + @param orig_size: Original shape as a tuple of (height, width) or + just one of them. @rtype: int @return: Upscale factor. - @raise ValueError: If the C{in_size} cannot be upscaled to the C{orig_size}. This - can happen if the upscale factors are not integers or are different. + @raise ValueError: If the C{in_size} cannot be upscaled to the + C{orig_size}. This can happen if the upscale factors are not + integers or are different. """ def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float: @@ -79,11 +84,14 @@ def _infer_upscale_factor(in_size: int, orig_size: int) -> int | float: f"Width: {wf}, height: {hf}." ) - raise NotImplementedError(f"Unexpected case: {width_factor}, {height_factor}") + raise NotImplementedError( + f"Unexpected case: {width_factor}, {height_factor}" + ) def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]: - """Converts a packet of tensors to a packet of shapes. Used for debugging purposes. + """Converts a packet of tensors to a packet of shapes. Used for + debugging purposes. @type packet: Packet[Tensor] @param packet: Packet of tensors. @@ -100,16 +108,20 @@ def to_shape_packet(packet: Packet[Tensor]) -> Packet[Size]: def get_with_default( - value: T | None, action_name: str, caller_name: str | None = None, *, default: T + value: T | None, + action_name: str, + caller_name: str | None = None, + *, + default: T, ) -> T: - """Returns value if it is not C{None}, otherwise returns the default value and log - an info. + """Returns value if it is not C{None}, otherwise returns the default + value and log an info. @type value: T | None @param value: Value to return. @type action_name: str - @param action_name: Name of the action for which the default value is being used. - Used for logging. + @param action_name: Name of the action for which the default value + is being used. Used for logging. @type caller_name: str | None @param caller_name: Name of the caller function. Used for logging. @type default: T diff --git a/luxonis_train/utils/graph.py b/luxonis_train/utils/graph.py index 1f2f043a..a2b72832 100644 --- a/luxonis_train/utils/graph.py +++ b/luxonis_train/utils/graph.py @@ -12,8 +12,9 @@ def is_acyclic(graph: Graph) -> bool: """Tests if graph is acyclic. @type graph: dict[str, list[str]] - @param graph: Graph in a format of a dictionary of predecessors. Keys are node - names, values are inputs to the node (list of node names). + @param graph: Graph in a format of a dictionary of predecessors. + Keys are node names, values are inputs to the node (list of node + names). @rtype: bool @return: True if graph is acyclic, False otherwise. """ @@ -53,13 +54,14 @@ def traverse_graph( """Traverses the graph in topological order. @type graph: dict[str, list[str]] - @param graph: Graph in a format of a dictionary of predecessors. Keys are node - names, values are inputs to the node (list of node names). + @param graph: Graph in a format of a dictionary of predecessors. + Keys are node names, values are inputs to the node (list of node + names). @type nodes: dict[str, T] @param nodes: Dictionary mapping node names to node objects. @rtype: Iterator[tuple[str, T, list[str], list[str]]] - @return: Iterator of tuples containing node name, node object, node dependencies and - unprocessed nodes. + @return: Iterator of tuples containing node name, node object, node + dependencies and unprocessed nodes. @raises RuntimeError: If the graph is malformed. """ # sort the set to allow reproducibility diff --git a/luxonis_train/utils/keypoints.py b/luxonis_train/utils/keypoints.py index 613e3fb8..9fbc741d 100644 --- a/luxonis_train/utils/keypoints.py +++ b/luxonis_train/utils/keypoints.py @@ -6,7 +6,9 @@ logger = logging.getLogger(__name__) -def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Tensor]: +def process_keypoints_predictions( + keypoints: Tensor, +) -> tuple[Tensor, Tensor, Tensor]: """Extracts x, y and visibility from keypoints predictions. @type keypoints: Tensor @@ -23,13 +25,15 @@ def process_keypoints_predictions(keypoints: Tensor) -> tuple[Tensor, Tensor, Te def get_sigmas( - sigmas: list[float] | None, n_keypoints: int, caller_name: str | None = None + sigmas: list[float] | None, + n_keypoints: int, + caller_name: str | None = None, ) -> Tensor: """Validate or create sigma values for each keypoint. @type sigmas: list[float] | None - @param sigmas: List of sigmas for each keypoint. If C{None}, then default sigmas are - used. + @param sigmas: List of sigmas for each keypoint. If C{None}, then + default sigmas are used. @type n_keypoints: int @param n_keypoints: Number of keypoints. @type caller_name: str | None diff --git a/luxonis_train/utils/registry.py b/luxonis_train/utils/registry.py index c41a26cd..02532d32 100644 --- a/luxonis_train/utils/registry.py +++ b/luxonis_train/utils/registry.py @@ -1,5 +1,5 @@ -"""This module implements a metaclass for automatic registration of classes.""" - +"""This module implements a metaclass for automatic registration of +classes.""" import lightning.pytorch as pl from luxonis_ml.utils.registry import Registry @@ -11,16 +11,24 @@ CALLBACKS: Registry[type[pl.Callback]] = Registry(name="callbacks") """Registry for all callbacks.""" -LOADERS: Registry[type["lt.loaders.BaseLoaderTorch"]] = Registry(name="loaders") +LOADERS: Registry[type["lt.loaders.BaseLoaderTorch"]] = Registry( + name="loaders" +) """Registry for all loaders.""" -LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry(name="losses") +LOSSES: Registry[type["lt.attached_modules.BaseLoss"]] = Registry( + name="losses" +) """Registry for all losses.""" -METRICS: Registry[type["lt.attached_modules.BaseMetric"]] = Registry(name="metrics") +METRICS: Registry[type["lt.attached_modules.BaseMetric"]] = Registry( + name="metrics" +) """Registry for all metrics.""" -MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry(name="models") +MODELS: Registry[type["lt.models.BasePredefinedModel"]] = Registry( + name="models" +) """Registry for all models.""" NODES: Registry[type["lt.nodes.BaseNode"]] = Registry(name="nodes") @@ -32,5 +40,7 @@ SCHEDULERS: Registry[type[_LRScheduler]] = Registry(name="schedulers") """Registry for all schedulers.""" -VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry("visualizers") +VISUALIZERS: Registry[type["lt.visualizers.BaseVisualizer"]] = Registry( + "visualizers" +) """Registry for all visualizers.""" diff --git a/luxonis_train/utils/tracker.py b/luxonis_train/utils/tracker.py index 1c4a42e7..35d7af70 100644 --- a/luxonis_train/utils/tracker.py +++ b/luxonis_train/utils/tracker.py @@ -6,7 +6,8 @@ class LuxonisTrackerPL(LuxonisTracker, Logger): - """Implementation of LuxonisTracker that is compatible with PytorchLightning.""" + """Implementation of LuxonisTracker that is compatible with + PytorchLightning.""" def __init__(self, *, _auto_finalize: bool = True, **kwargs: Any): """ diff --git a/luxonis_train/utils/types.py b/luxonis_train/utils/types.py index 158cf185..3a7ca7f4 100644 --- a/luxonis_train/utils/types.py +++ b/luxonis_train/utils/types.py @@ -7,15 +7,15 @@ """Kwargs is a dictionary containing keyword arguments.""" Labels = dict[str, tuple[Tensor, LabelType]] -"""Labels is a dictionary containing a tuple of tensors and their corresponding label -type.""" +"""Labels is a dictionary containing a tuple of tensors and their +corresponding label type.""" AttachIndexType = Literal["all"] | int | tuple[int, int] | tuple[int, int, int] -"""AttachIndexType is used to specify to which output of the prevoius node does the -current node attach to. +"""AttachIndexType is used to specify to which output of the prevoius +node does the current node attach to. -It can be either "all" (all outputs), an index of the output or a tuple of indices of -the output (specifying a range of outputs). +It can be either "all" (all outputs), an index of the output or a tuple +of indices of the output (specifying a range of outputs). """ T = TypeVar("T", Tensor, Size) diff --git a/pyproject.toml b/pyproject.toml index 97635f36..1bfe40ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ version = {attr = "luxonis_train.__version__"} [tool.ruff] target-version = "py310" -line-length = 88 +line-length = 79 indent-width = 4 [tool.ruff.lint] @@ -47,6 +47,8 @@ select = ["E4", "E7", "E9", "F", "W", "B", "I"] [tool.docformatter] black = true +wrap-summaries = 72 +wrap-descriptions = 72 [tool.pyright] typeCheckingMode = "basic" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 9aa7d4ab..ef5a2142 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -76,7 +76,9 @@ def generator(): for bbox_annotation in annotations.get( "BoundingBox2DAnnotation", defaultdict(list) )["values"]: - class_ = bbox_annotation["labelName"].split("-")[-1].lower() + class_ = ( + bbox_annotation["labelName"].split("-")[-1].lower() + ) if class_ == "motorbiek": class_ = "motorbike" x, y = bbox_annotation["origin"] @@ -137,7 +139,10 @@ def generator(): ] mask = cv2.cvtColor( cv2.imread( - str(sequence_path / vehicle_type_segmentation["filename"]) + str( + sequence_path + / vehicle_type_segmentation["filename"] + ) ), cv2.COLOR_BGR2RGB, ) @@ -189,7 +194,10 @@ def coco_dataset() -> LuxonisDataset: url = "https://drive.google.com/uc?id=1XlvFK7aRmt8op6-hHkWVKIJQeDtOwoRT" output_zip = WORK_DIR / "COCO_people_subset.zip" - if not output_zip.exists() and not (WORK_DIR / "COCO_people_subset").exists(): + if ( + not output_zip.exists() + and not (WORK_DIR / "COCO_people_subset").exists() + ): gdown.download(url, str(output_zip), quiet=False) parser = LuxonisParser( diff --git a/tests/integration/multi_input_modules.py b/tests/integration/multi_input_modules.py index 5cb32225..e6fd0476 100644 --- a/tests/integration/multi_input_modules.py +++ b/tests/integration/multi_input_modules.py @@ -61,24 +61,19 @@ def unwrap(self, inputs: list[dict[str, list[Tensor]]]): return [item for inp in inputs for key in inp for item in inp[key]] -class FullBackbone(MultiInputTestBaseNode): - ... +class FullBackbone(MultiInputTestBaseNode): ... -class RGBDBackbone(MultiInputTestBaseNode): - ... +class RGBDBackbone(MultiInputTestBaseNode): ... -class PointcloudBackbone(MultiInputTestBaseNode): - ... +class PointcloudBackbone(MultiInputTestBaseNode): ... -class FusionNeck(MultiInputTestBaseNode): - ... +class FusionNeck(MultiInputTestBaseNode): ... -class FusionNeck2(MultiInputTestBaseNode): - ... +class FusionNeck2(MultiInputTestBaseNode): ... class CustomSegHead1(MultiInputTestBaseNode): diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 7d3587c4..1367d996 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -104,7 +104,9 @@ def test_custom_tasks( with tarfile.open(archive_path) as tar: extracted_cfg = tar.extractfile("config.json") - assert extracted_cfg is not None, "Config JSON not found in the archive." + assert ( + extracted_cfg is not None + ), "Config JSON not found in the archive." generated_config = json.loads(extracted_cfg.read().decode()) del generated_config["model"]["heads"][1]["metadata"]["anchors"] diff --git a/tests/unittests/test_assigners/test_atts_assigner.py b/tests/unittests/test_assigners/test_atts_assigner.py index f6af0c01..4ab6f939 100644 --- a/tests/unittests/test_assigners/test_atts_assigner.py +++ b/tests/unittests/test_assigners/test_atts_assigner.py @@ -25,7 +25,12 @@ def test_forward(): pred_bboxes = torch.rand(bs, n_anchors, 4) labels, bboxes, scores, mask, assigned_gt_idx = assigner.forward( - anchor_bboxes, n_level_bboxes, gt_labels, gt_bboxes, mask_gt, pred_bboxes + anchor_bboxes, + n_level_bboxes, + gt_labels, + gt_bboxes, + mask_gt, + pred_bboxes, ) assert labels.shape == (bs, n_anchors) @@ -59,7 +64,11 @@ def test_select_topk_candidates(): ) assert is_in_topk.shape == (batch_size, n_max_boxes, n_anchors) - assert topk_idxs.shape == (batch_size, n_max_boxes, topk * len(n_level_bboxes)) + assert topk_idxs.shape == ( + batch_size, + n_max_boxes, + topk * len(n_level_bboxes), + ) def test_get_positive_samples(): @@ -97,7 +106,11 @@ def test_get_final_assignments(): assigned_gt_idx = torch.randint(0, n_max_boxes, (batch_size, n_anchors)) mask_pos_sum = torch.randint(0, 2, (batch_size, n_anchors)) - assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = assigner._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) diff --git a/tests/unittests/test_assigners/test_tal_assigner.py b/tests/unittests/test_assigners/test_tal_assigner.py index f028b55a..cb94b62d 100644 --- a/tests/unittests/test_assigners/test_tal_assigner.py +++ b/tests/unittests/test_assigners/test_tal_assigner.py @@ -4,7 +4,9 @@ def test_init(): - assigner = TaskAlignedAssigner(n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9) + assigner = TaskAlignedAssigner( + n_classes=80, topk=13, alpha=1.0, beta=6.0, eps=1e-9 + ) assert assigner.n_classes == 80 assert assigner.topk == 13 assert assigner.alpha == 1.0 @@ -119,7 +121,11 @@ def test_get_final_assignments(): assigner.bs = batch_size # Set batch size assigner.n_max_boxes = gt_bboxes.size(1) - assigned_labels, assigned_bboxes, assigned_scores = assigner._get_final_assignments( + ( + assigned_labels, + assigned_bboxes, + assigned_scores, + ) = assigner._get_final_assignments( gt_labels, gt_bboxes, assigned_gt_idx, mask_pos_sum ) diff --git a/tests/unittests/test_assigners/test_utils.py b/tests/unittests/test_assigners/test_utils.py index 1b071327..d10e1d47 100644 --- a/tests/unittests/test_assigners/test_utils.py +++ b/tests/unittests/test_assigners/test_utils.py @@ -1,6 +1,10 @@ import torch -from luxonis_train.assigners.utils import batch_iou, candidates_in_gt, fix_collisions +from luxonis_train.assigners.utils import ( + batch_iou, + candidates_in_gt, + fix_collisions, +) def test_fix_collisions(): diff --git a/tests/unittests/test_base_attached_module.py b/tests/unittests/test_base_attached_module.py index 96956d82..c6ffdd48 100644 --- a/tests/unittests/test_base_attached_module.py +++ b/tests/unittests/test_base_attached_module.py @@ -6,29 +6,25 @@ class DummyBackbone(BaseNode): - def forward(self, _): - ... + def forward(self, _): ... class DummySegmentationHead(BaseNode): tasks = [LabelType.SEGMENTATION] - def forward(self, _): - ... + def forward(self, _): ... class DummyBBoxHead(BaseNode): tasks = [LabelType.BOUNDINGBOX] - def forward(self, _): - ... + def forward(self, _): ... class DummyDetectionHead(BaseNode): tasks = [LabelType.BOUNDINGBOX, LabelType.KEYPOINTS] - def forward(self, _): - ... + def forward(self, _): ... class DummyLoss(BaseLoss): @@ -37,13 +33,11 @@ class DummyLoss(BaseLoss): (LabelType.KEYPOINTS, LabelType.BOUNDINGBOX), ] - def forward(self, _): - ... + def forward(self, _): ... class NoLabelLoss(BaseLoss): - def forward(self, _): - ... + def forward(self, _): ... @pytest.fixture @@ -72,7 +66,9 @@ def test_valid_properties(): assert loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"} assert loss.required_labels == [LabelType.SEGMENTATION] assert no_labels_loss.node == head - assert no_labels_loss.node_tasks == {LabelType.SEGMENTATION: "segmentation"} + assert no_labels_loss.node_tasks == { + LabelType.SEGMENTATION: "segmentation" + } assert no_labels_loss.required_labels == [] @@ -116,7 +112,9 @@ def test_input_tensors(inputs): seg_head = DummySegmentationHead() seg_loss = DummyLoss(node=seg_head) assert seg_loss.get_input_tensors(inputs) == ["segmentation"] - assert seg_loss.get_input_tensors(inputs, "segmentation") == ["segmentation"] + assert seg_loss.get_input_tensors(inputs, "segmentation") == [ + "segmentation" + ] assert seg_loss.get_input_tensors(inputs, LabelType.SEGMENTATION) == [ "segmentation" ] @@ -140,7 +138,10 @@ def test_prepare(inputs, labels): assert seg_loss.prepare(inputs, labels) == ("segmentation", "segmentation") inputs["segmentation"].append("segmentation2") - assert seg_loss.prepare(inputs, labels) == ("segmentation2", "segmentation") + assert seg_loss.prepare(inputs, labels) == ( + "segmentation2", + "segmentation", + ) with pytest.raises(RuntimeError): NoLabelLoss(node=backbone).prepare(inputs, labels) diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py index 47955699..15668db6 100644 --- a/tests/unittests/test_base_node.py +++ b/tests/unittests/test_base_node.py @@ -9,8 +9,7 @@ class DummyNode(BaseNode, register=False): - def forward(self, _): - ... + def forward(self, _): ... @pytest.fixture @@ -35,7 +34,9 @@ def packet() -> Packet[Tensor]: ((1, -1), [2, 3, 4]), ], ) -def test_attach_index(attach_index: AttachIndexType, expected: list[int] | int): +def test_attach_index( + attach_index: AttachIndexType, expected: list[int] | int +): lst = [1, 2, 3, 4, 5] class DummyBaseNode: @@ -91,11 +92,14 @@ def test_check_type_override(): class DummyNode(BaseNode, register=False): in_channels: int - def forward(self, _): - ... + def forward(self, _): ... with pytest.raises(IncompatibleException): - DummyNode(input_shapes=[{"features": [Size((3, 224, 224)) for _ in range(3)]}]) + DummyNode( + input_shapes=[ + {"features": [Size((3, 224, 224)) for _ in range(3)]} + ] + ) def test_tasks(): @@ -108,7 +112,9 @@ class DummyMultiHead(DummyNode): dummy_head = DummyHead() dummy_node = DummyNode() dummy_multi_head = DummyMultiHead(n_keypoints=4) - assert dummy_head.get_task_name(LabelType.CLASSIFICATION) == "classification" + assert ( + dummy_head.get_task_name(LabelType.CLASSIFICATION) == "classification" + ) assert dummy_head.task == "classification" with pytest.raises(ValueError): dummy_head.get_task_name(LabelType.SEGMENTATION) diff --git a/tests/unittests/test_loaders/test_base_loader.py b/tests/unittests/test_loaders/test_base_loader.py index abf8df72..dee1ecef 100644 --- a/tests/unittests/test_loaders/test_base_loader.py +++ b/tests/unittests/test_loaders/test_base_loader.py @@ -75,7 +75,12 @@ def build_batch_element(): with subtests.test("segmentation"): assert "segmentation" in annotations - assert annotations["segmentation"][0].shape == (batch_size, 1, 224, 224) + assert annotations["segmentation"][0].shape == ( + batch_size, + 1, + 224, + 224, + ) assert annotations["segmentation"][0].dtype == torch.int64 with subtests.test("keypoints"): diff --git a/tests/unittests/test_losses/test_bce_with_logits_loss.py b/tests/unittests/test_losses/test_bce_with_logits_loss.py index 778ce302..f94b5cb1 100644 --- a/tests/unittests/test_losses/test_bce_with_logits_loss.py +++ b/tests/unittests/test_losses/test_bce_with_logits_loss.py @@ -16,7 +16,9 @@ def test_forward_pass(): predictions = torch.full([bs, n_cl], 1.5) # logit loss_fn = BCEWithLogitsLoss() - loss = loss_fn.forward(predictions, targets) # -log(sigmoid(1.5)) = 0.2014 + loss = loss_fn.forward( + predictions, targets + ) # -log(sigmoid(1.5)) = 0.2014 assert isinstance(loss, torch.Tensor) assert loss.shape == torch.Size([]) diff --git a/tests/unittests/test_utils/test_boxutils.py b/tests/unittests/test_utils/test_boxutils.py index a0d238bc..2b05a428 100644 --- a/tests/unittests/test_utils/test_boxutils.py +++ b/tests/unittests/test_utils/test_boxutils.py @@ -15,10 +15,14 @@ def generate_random_bboxes( n_bboxes: int, max_width: int, max_height: int, format: str = "xyxy" ): - x1y1 = torch.rand(n_bboxes, 2) * torch.tensor([max_width - 1, max_height - 1]) + x1y1 = torch.rand(n_bboxes, 2) * torch.tensor( + [max_width - 1, max_height - 1] + ) wh = ( - torch.rand(n_bboxes, 2) * (torch.tensor([max_width, max_height]) - 1 - x1y1) + 1 + torch.rand(n_bboxes, 2) + * (torch.tensor([max_width, max_height]) - 1 - x1y1) + + 1 ) if format == "xyxy": @@ -30,7 +34,9 @@ def generate_random_bboxes( cxcy = x1y1 + wh / 2 bboxes = torch.cat((cxcy, wh), dim=1) else: - raise ValueError("Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'.") + raise ValueError( + "Unsupported format. Choose from 'xyxy', 'xywh', 'cxcywh'." + ) return bboxes @@ -87,7 +93,9 @@ def test_compute_iou_loss(): pred_bboxes = generate_random_bboxes(8, 640, 640, "xyxy") target_bboxes = generate_random_bboxes(8, 640, 640, "xyxy") - loss_iou, iou = compute_iou_loss(pred_bboxes, target_bboxes, iou_type="giou") + loss_iou, iou = compute_iou_loss( + pred_bboxes, target_bboxes, iou_type="giou" + ) assert isinstance(loss_iou, torch.Tensor) assert isinstance(iou, torch.Tensor) @@ -113,9 +121,12 @@ def test_anchors_for_fpn_features(): features = [torch.rand(1, 256, 14, 14), torch.rand(1, 256, 28, 28)] strides = torch.tensor([8, 16]) - anchors, anchor_points, n_anchors_list, stride_tensor = anchors_for_fpn_features( - features, strides - ) + ( + anchors, + anchor_points, + n_anchors_list, + stride_tensor, + ) = anchors_for_fpn_features(features, strides) assert isinstance(anchors, torch.Tensor) assert isinstance(anchor_points, torch.Tensor) diff --git a/tests/unittests/test_utils/test_graph.py b/tests/unittests/test_utils/test_graph.py index ae308d6a..c63e4b72 100644 --- a/tests/unittests/test_utils/test_graph.py +++ b/tests/unittests/test_utils/test_graph.py @@ -37,7 +37,11 @@ def test_acyclic(graph: Graph, acyclic: bool): ( {"a": ["b"], "b": ["c"], "c": []}, {"a": 1, "b": 2, "c": 3}, - [("c", 3, [], ["a", "b"]), ("b", 2, ["c"], ["a"]), ("a", 1, ["b"], [])], + [ + ("c", 3, [], ["a", "b"]), + ("b", 2, ["c"], ["a"]), + ("a", 1, ["b"], []), + ], ), ( {"a": ["b", "c"], "b": ["d"], "c": ["d"], "d": []}, diff --git a/tests/unittests/test_utils/test_keypoints.py b/tests/unittests/test_utils/test_keypoints.py index f14c4e37..3d20dae6 100644 --- a/tests/unittests/test_utils/test_keypoints.py +++ b/tests/unittests/test_utils/test_keypoints.py @@ -1,7 +1,10 @@ import pytest import torch -from luxonis_train.utils.keypoints import get_sigmas, process_keypoints_predictions +from luxonis_train.utils.keypoints import ( + get_sigmas, + process_keypoints_predictions, +) def test_get_sigmas(): From 98c33fa1f0b6f0369f37a0ba504f4efb14e7095f Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 09:08:51 +0200 Subject: [PATCH 067/102] torchmetrics tests --- .../test_metrics/test_torchmetrics.py | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/unittests/test_metrics/test_torchmetrics.py diff --git a/tests/unittests/test_metrics/test_torchmetrics.py b/tests/unittests/test_metrics/test_torchmetrics.py new file mode 100644 index 00000000..2bc8b8e0 --- /dev/null +++ b/tests/unittests/test_metrics/test_torchmetrics.py @@ -0,0 +1,31 @@ +import pytest +import torchmetrics +from luxonis_ml.data import LabelType + +from luxonis_train.attached_modules.metrics.torchmetrics import ( + TorchMetricWrapper, +) +from luxonis_train.nodes import BaseNode + + +def test_torchmetrics(): + class DummyNode(BaseNode): + tasks = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + + def forward(self, _): ... + + class DummyMetric(TorchMetricWrapper): + supported_labels = [LabelType.CLASSIFICATION, LabelType.SEGMENTATION] + Metric = torchmetrics.Accuracy + + node_1_class = DummyNode(n_classes=1) + node_2_classes = DummyNode(n_classes=2) + assert DummyMetric(node=node_1_class)._task == "binary" + assert DummyMetric(node=node_2_classes)._task == "multiclass" + assert DummyMetric(task="binary") + + with pytest.raises(ValueError): + DummyMetric() + + with pytest.raises(ValueError): + DummyMetric(task="multiclass") From 01259302a5bb53f9c87def1fea01f06659141344 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 09:09:21 +0200 Subject: [PATCH 068/102] fixed config alias --- luxonis_train/utils/config.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index d73e882e..ba1c6560 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -10,7 +10,7 @@ LuxonisConfig, LuxonisFileSystem, ) -from pydantic import Field, field_validator, model_validator +from pydantic import AliasChoices, Field, field_validator, model_validator from pydantic.types import ( FilePath, NonNegativeFloat, @@ -280,7 +280,14 @@ class TrainerConfig(BaseModelExtraForbid): accelerator: Literal["auto", "cpu", "gpu", "tpu"] = "auto" devices: int | list[int] | str = "auto" strategy: Literal["auto", "ddp"] = "auto" - n_sanity_val_steps: Annotated[int, Field(alias="num_sanity_val_steps")] = 2 + n_sanity_val_steps: Annotated[ + int, + Field( + validation_alias=AliasChoices( + "n_sanity_val_steps", "num_sanity_val_steps" + ) + ), + ] = 2 profiler: Literal["simple", "advanced"] | None = None matmul_precision: Literal["medium", "high", "highest"] | None = None verbose: bool = True @@ -291,10 +298,16 @@ class TrainerConfig(BaseModelExtraForbid): accumulate_grad_batches: PositiveInt = 1 use_weighted_sampler: bool = False epochs: PositiveInt = 100 - n_workers: Annotated[NonNegativeInt, Field(alias="num_workers")] = 4 + n_workers: Annotated[ + NonNegativeInt, + Field(validation_alias=AliasChoices("n_workers", "num_workers")), + ] = 4 train_metrics_interval: Literal[-1] | PositiveInt = -1 validation_interval: Literal[-1] | PositiveInt = 1 - n_log_images: Annotated[NonNegativeInt, Field(alias="num_log_images")] = 4 + n_log_images: Annotated[ + NonNegativeInt, + Field(validation_alias=AliasChoices("n_log_images", "num_log_images")), + ] = 4 skip_last_batch: bool = True pin_memory: bool = True log_sub_losses: bool = True From c88c298255b458d91105edf205a7461e3c4fb9e3 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 09:09:42 +0200 Subject: [PATCH 069/102] added type --- luxonis_train/attached_modules/visualizers/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/attached_modules/visualizers/utils.py b/luxonis_train/attached_modules/visualizers/utils.py index d2bfa931..402ab98f 100644 --- a/luxonis_train/attached_modules/visualizers/utils.py +++ b/luxonis_train/attached_modules/visualizers/utils.py @@ -321,7 +321,7 @@ def resize_to_match( keep_size: Literal["larger", "smaller", "first", "second"] = "larger", resize_along: Literal["width", "height", "exact"] = "height", keep_aspect_ratio: bool = True, - ): + ) -> tuple[Tensor, Tensor]: """Resizes two images so they have the same size. Resizes two images so they can be concateneted together. It's possible to From 77736839527f5f60687f35aadfbd0b47c38ccf5b Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 09:09:54 +0200 Subject: [PATCH 070/102] added pragmas --- luxonis_train/models/luxonis_lightning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/luxonis_train/models/luxonis_lightning.py b/luxonis_train/models/luxonis_lightning.py index fb9b2523..2bbf8ca9 100644 --- a/luxonis_train/models/luxonis_lightning.py +++ b/luxonis_train/models/luxonis_lightning.py @@ -267,7 +267,7 @@ def __init__( @property def core(self) -> "luxonis_train.core.LuxonisModel": """Returns the core model.""" - if self._core is None: + if self._core is None: # pragma: no cover raise ValueError("Core reference is not set.") return self._core @@ -461,7 +461,7 @@ def compute_metrics(self) -> dict[str, dict[str, Tensor]]: computed_submetrics = {metric_name: metric_value} case dict(submetrics): computed_submetrics = submetrics - case unknown: + case unknown: # pragma: no cover raise ValueError( f"Metric {metric_name} returned unexpected value of " f"type {type(unknown)}." From 819d9333767a65c829d1555a3e4d662bacabd8d9 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Fri, 13 Sep 2024 09:17:01 +0200 Subject: [PATCH 071/102] added onnx output rename tests --- tests/configs/archive_config.yaml | 14 ++++++++++++++ tests/integration/test_simple.py | 3 ++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/configs/archive_config.yaml b/tests/configs/archive_config.yaml index f7d8ae6f..71589f4d 100644 --- a/tests/configs/archive_config.yaml +++ b/tests/configs/archive_config.yaml @@ -27,3 +27,17 @@ model: - name: ClassificationHead inputs: - EfficientRep + +exporter: + output_names: + - seg0 + - class0 + - bbox0 + - bbox1 + - bbox2 + - effkpt0 + - effkpt1 + - effkpt2 + - impl + - seg1 + diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 1367d996..244dac42 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -144,8 +144,9 @@ def test_tune(opts: dict[str, Any], coco_dataset: LuxonisDataset): assert STUDY_PATH.exists() -def test_archive(coco_dataset: LuxonisDataset): +def test_archive(test_output_dir: Path, coco_dataset: LuxonisDataset): opts = { + "tracker.save_directory": str(test_output_dir), "loader.params.dataset_name": coco_dataset.identifier, } model = LuxonisModel("tests/configs/archive_config.yaml", opts) From fb2f0db436d1a872f60a3b491bb350e598075924 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sat, 14 Sep 2024 08:00:58 +0200 Subject: [PATCH 072/102] added test markers and ordering --- pyproject.toml | 6 +++++- requirements-dev.txt | 1 + tests/conftest.py | 18 ++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 tests/conftest.py diff --git a/pyproject.toml b/pyproject.toml index 1bfe40ba..3c610a3c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,11 @@ reportUnnecessaryIsInstance = "none" [tool.pytest.ini_options] testpaths = ["tests"] -addopts = "--cov=luxonis_train --cov-report=term --cov-report=html --cov-report=xml --disable-warnings" +addopts = "--cov=luxonis_train --cov-report=html --disable-warnings" +markers = [ + "unit: mark a test as a unit test", + "integration: mark a test as an integration test", +] [tool.coverage.run] omit = [ diff --git a/requirements-dev.txt b/requirements-dev.txt index 0b939aa3..e4dbd194 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,3 +5,4 @@ opencv-stubs>=0.0.8 pytest-cov>=4.1.0 pytest-subtests>=0.12.1 pytest-md>=0.2.0 +pytest-order>=1.3.0 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 00000000..4a8a492c --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,18 @@ +import pytest + + +def pytest_collection_modifyitems(items): + for item in items: + if "/unittests/" in str(item.fspath): + item.add_marker(pytest.mark.unit) + # ensure unittests run before integration tests + item.add_marker(pytest.mark.order(0)) + elif "/integration/" in str(item.fspath): + item.add_marker(pytest.mark.integration) + + +def pytest_configure(config): + config.addinivalue_line("markers", "unit: mark test as a unit test") + config.addinivalue_line( + "markers", "integration: mark test as an integration test" + ) From 9daa751b47334e73b64b43152e191554c92f03b2 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sat, 14 Sep 2024 09:05:05 +0200 Subject: [PATCH 073/102] changed errors --- .../attached_modules/base_attached_module.py | 14 +++-- luxonis_train/nodes/base_node.py | 59 ++++++++++--------- tests/unittests/test_base_node.py | 10 ++-- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/luxonis_train/attached_modules/base_attached_module.py b/luxonis_train/attached_modules/base_attached_module.py index 7d6097e7..904120a2 100644 --- a/luxonis_train/attached_modules/base_attached_module.py +++ b/luxonis_train/attached_modules/base_attached_module.py @@ -121,8 +121,8 @@ def n_keypoints(self) -> int: """Getter for the number of keypoints. @type: int - @raises ValueError: If the number of keypoints cannot be - determined. + @raises ValueError: If the node does not support keypoints. + @raises RuntimeError: If the node doesn't define any task. """ return self.node.n_keypoints @@ -131,10 +131,9 @@ def n_classes(self) -> int: """Getter for the number of classes. @type: int - @raises ValueError: If the number of classes cannot be - determined. + @raises RuntimeError: If the node doesn't define any task. @raises ValueError: If the number of classes is different for - different tasks. In that case, use the C{node.get_n_classes} + different tasks. In that case, use the L{get_n_classes} method. """ return self.node.n_classes @@ -152,7 +151,10 @@ def class_names(self) -> list[str]: """Getter for the class names. @type: list[str] - @raises ValueError: If the class names cannot be determined. + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the class names are different for + different tasks. In that case, use the L{get_class_names} + method. """ return self.node.class_names diff --git a/luxonis_train/nodes/base_node.py b/luxonis_train/nodes/base_node.py index 62292873..aad0b2f2 100644 --- a/luxonis_train/nodes/base_node.py +++ b/luxonis_train/nodes/base_node.py @@ -208,7 +208,7 @@ def _check_type_overrides(self) -> None: properties.append(name) for name, typ in self.__annotations__.items(): if name in properties: - with suppress(ValueError): + with suppress(RuntimeError): value = getattr(self, name) try: check_type(value, typ) @@ -227,16 +227,15 @@ def get_task_name(self, task: LabelType) -> str: @param task: Task to get the name for. @rtype: str @return: Name of the task. + @raises RuntimeError: If the node does not define any tasks. @raises ValueError: If the task is not supported by the node. """ if not self._tasks: - raise ValueError( - f"Node {self.name} does not have any tasks defined." - ) + raise RuntimeError(f"Node '{self.name}' does not define any task.") if task not in self._tasks: raise ValueError( - f"Node {self.name} does not support the {task.value} task." + f"Node '{self.name}' does not support the '{task.value}' task." ) return self._tasks[task] @@ -249,17 +248,17 @@ def task(self) -> str: """Getter for the task. @type: str - @raises RuntimeError: If the node doesn't define any tasks. - @raises RuntimeError: If the node defines more than one task. In - that case, use the L{get_task_name} method. + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the node defines more than one task. In + that case, use the L{get_task_name} method instead. """ if not self._tasks: - raise RuntimeError(f"{self.name} does not have any tasks defined.") + raise RuntimeError(f"{self.name} does not define any task.") if len(self._tasks) > 1: - raise RuntimeError( + raise ValueError( f"Node {self.name} has multiple tasks defined. " - "Use `get_task_name` method instead." + "Use the `get_task_name` method instead." ) return next(iter(self._tasks.values())) @@ -288,20 +287,20 @@ def n_keypoints(self) -> int: """Getter for the number of keypoints. @type: int - @raises ValueError: If the number of keypoints cannot be - determined. + @raises ValueError: If the node does not support keypoints. + @raises RuntimeError: If the node doesn't define any task. """ if self._n_keypoints is not None: return self._n_keypoints if self._tasks: if LabelType.KEYPOINTS not in self._tasks: - raise (ValueError(f"{self.name} does not support keypoints.")) + raise ValueError(f"{self.name} does not support keypoints.") return self.dataset_metadata.n_keypoints( self.get_task_name(LabelType.KEYPOINTS) ) - raise ValueError( + raise RuntimeError( f"{self.name} does not have any tasks defined, " "`BaseNode.n_keypoints` property cannot be used. " "Either override the `tasks` class attribute, " @@ -314,8 +313,7 @@ def n_classes(self) -> int: """Getter for the number of classes. @type: int - @raises ValueError: If the number of classes cannot be - determined. + @raises RuntimeError: If the node doesn't define any task. @raises ValueError: If the number of classes is different for different tasks. In that case, use the L{get_n_classes} method. @@ -324,7 +322,7 @@ def n_classes(self) -> int: return self._n_classes if not self._tasks: - raise ValueError( + raise RuntimeError( f"{self.name} does not have any tasks defined, " "`BaseNode.n_classes` property cannot be used. " "Either override the `tasks` class attribute, " @@ -351,10 +349,13 @@ def class_names(self) -> list[str]: """Getter for the class names. @type: list[str] - @raises ValueError: If the class names cannot be determined. + @raises RuntimeError: If the node doesn't define any task. + @raises ValueError: If the class names are different for + different tasks. In that case, use the L{get_class_names} + method. """ if not self._tasks: - raise ValueError( + raise RuntimeError( f"{self.name} does not have any tasks defined, " "`BaseNode.class_names` property cannot be used. " "Either override the `tasks` class attribute, " @@ -523,12 +524,12 @@ def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT: @rtype: ForwardInputT @return: Prepared inputs, ready to be passed to the L{forward} method. - @raises RuntimeError: If the number of inputs is not equal to 1. + @raises ValueError: If the number of inputs is not equal to 1. In such cases the method has to be overridden. """ if len(inputs) > 1: - raise RuntimeError( - f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead." + raise ValueError( + f"Node {self.name} expects a single input, but got {len(inputs)} inputs instead. " "If the node expects multiple inputs, the `unwrap` method should be overridden." ) return self.get_attached(inputs[0]["features"]) # type: ignore @@ -537,9 +538,9 @@ def unwrap(self, inputs: list[Packet[Tensor]]) -> ForwardInputT: def forward(self, inputs: ForwardInputT) -> ForwardOutputT: """Forward pass of the module. - @type inputs: ForwardInputT + @type inputs: L{ForwardInputT} @param inputs: Inputs to the module. - @rtype: ForwardOutputT + @rtype: L{ForwardOutputT} @return: Result of the forward pass. """ ... @@ -572,8 +573,8 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: @rtype: L{Packet}[Tensor] @return: Wrapped output. - @raises RuntimeError: If the output is not a tensor or a list of tensors. - In such cases the method has to be overridden. + @raises ValueError: If the C{output} argument is not a tensor or a list of tensors. + In such cases the L{wrap} method should be overridden. """ if isinstance(output, Tensor): @@ -583,7 +584,7 @@ def wrap(self, output: ForwardOutputT) -> Packet[Tensor]: ): outputs = list(output) else: - raise RuntimeError( + raise ValueError( "Default `wrap` expects a single tensor or a list of tensors." ) try: @@ -676,6 +677,6 @@ def _get_nth_size(self, idx: int) -> int | list[int]: def _non_set_error(self, name: str) -> RuntimeError: return RuntimeError( - f"{self.name} is trying to access `{name}`, " + f"'{self.name}' node is trying to access `{name}`, " "but it was not set during initialization. " ) diff --git a/tests/unittests/test_base_node.py b/tests/unittests/test_base_node.py index 15668db6..68386f73 100644 --- a/tests/unittests/test_base_node.py +++ b/tests/unittests/test_base_node.py @@ -70,9 +70,9 @@ def test_invalid(packet: Packet[Tensor]): _ = node.original_in_shape with pytest.raises(RuntimeError): _ = node.dataset_metadata - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): node.unwrap([packet, packet]) - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): node.wrap({"inp": torch.rand(3, 224, 224)}) @@ -119,13 +119,13 @@ class DummyMultiHead(DummyNode): with pytest.raises(ValueError): dummy_head.get_task_name(LabelType.SEGMENTATION) - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): dummy_node.get_task_name(LabelType.SEGMENTATION) with pytest.raises(RuntimeError): _ = dummy_node.task - with pytest.raises(RuntimeError): + with pytest.raises(ValueError): _ = dummy_multi_head.task metadata = DatasetMetadata( @@ -151,7 +151,7 @@ class DummyMultiHead(DummyNode): assert dummy_multi_head.n_keypoints == 4 with pytest.raises(ValueError): _ = dummy_head.n_keypoints - with pytest.raises(ValueError): + with pytest.raises(RuntimeError): _ = dummy_node.n_keypoints dummy_head = DummyHead(n_classes=5) From 02ee1a2defb76e979475a7607a1cf1229eb2682b Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sat, 14 Sep 2024 09:05:19 +0200 Subject: [PATCH 074/102] fix classification pre defined model tests --- luxonis_train/models/predefined_models/classification_model.py | 2 +- tests/integration/test_simple.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/luxonis_train/models/predefined_models/classification_model.py b/luxonis_train/models/predefined_models/classification_model.py index d1253e4d..e390b667 100644 --- a/luxonis_train/models/predefined_models/classification_model.py +++ b/luxonis_train/models/predefined_models/classification_model.py @@ -15,7 +15,7 @@ @dataclass class ClassificationModel(BasePredefinedModel): backbone: str = "MicroNet" - task: Literal["multiclass", "multilabel"] = "multilabel" + task: Literal["multiclass", "multilabel"] = "multiclass" backbone_params: Kwargs = field(default_factory=dict) head_params: Kwargs = field(default_factory=dict) loss_params: Kwargs = field(default_factory=dict) diff --git a/tests/integration/test_simple.py b/tests/integration/test_simple.py index 244dac42..784db01a 100644 --- a/tests/integration/test_simple.py +++ b/tests/integration/test_simple.py @@ -58,7 +58,7 @@ def test_predefined_models( config_file = f"configs/{config_file}.yaml" opts |= { "loader.params.dataset_name": cifar10_dataset.dataset_name - if config_file == "classification_model" + if "classification_model" in config_file else coco_dataset.dataset_name, } model = LuxonisModel(config_file, opts) From af0b90dcde1e05ce08f23db4aab0403c2fb833b1 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sat, 14 Sep 2024 09:05:33 +0200 Subject: [PATCH 075/102] fixed torchmetrics --- .../attached_modules/metrics/torchmetrics.py | 93 +++++++++++-------- .../test_metrics/test_torchmetrics.py | 21 +++++ 2 files changed, 76 insertions(+), 38 deletions(-) diff --git a/luxonis_train/attached_modules/metrics/torchmetrics.py b/luxonis_train/attached_modules/metrics/torchmetrics.py index 5bddc0b9..a8797a13 100644 --- a/luxonis_train/attached_modules/metrics/torchmetrics.py +++ b/luxonis_train/attached_modules/metrics/torchmetrics.py @@ -1,4 +1,5 @@ import logging +from contextlib import suppress from typing import Any import torchmetrics @@ -16,47 +17,63 @@ class TorchMetricWrapper(BaseMetric[Tensor]): def __init__(self, **kwargs: Any): super().__init__(node=kwargs.pop("node", None)) task = kwargs.get("task") - - if self.n_classes > 1: - if task == "binary": - raise ValueError( - f"Task type set to '{task}', but the dataset has more than 1 class. " - f"Set the `task` parameter for {self.name} to either 'multiclass' or 'multilabel'." - ) - task = "multiclass" - else: - if task == "multiclass": - raise ValueError( - f"Task type set to '{task}', but the dataset has only 1 class. " - f"Set the `task` parameter for {self.name} to 'binary'." - ) - task = "binary" - if "task" not in kwargs: - logger.warning( - f"Task type not specified for {self.name}, assuming '{task}'. " - "If this is not correct, please set the `task` parameter explicitly." + if task is None: + if "num_classes" in kwargs: + if kwargs["num_classes"] == 1: + task = "binary" + else: + task = "multiclass" + elif "num_labels" in kwargs: + task = "multilabel" + else: + with suppress(RuntimeError, ValueError): + if self.n_classes == 1: + task = "binary" + else: + task = "multiclass" + + if task is None: + raise ValueError( + f"'{self.name}' does not have the 'task' parameter set. " + "and it is not possible to infer it from the other arguments. " + "You can either set the 'task' parameter explicitly, provide either 'num_classes' or 'num_labels' argument, " + "or use this metric with a node. " + "The 'task' can be one of 'binary', 'multiclass', or 'multilabel'. " ) - kwargs["task"] = task self._task = task + kwargs["task"] = task + + n_classes: int | None = kwargs.get( + "num_classes", kwargs.get("num_labels") + ) + + if n_classes is None: + with suppress(RuntimeError, ValueError): + n_classes = self.n_classes + + if n_classes is None and task != "binary": + arg_name = "num_classes" if task == "multiclass" else "num_labels" + raise ValueError( + f"'{self.name}' metric does not have the '{arg_name}' parameter set " + "and it is not possible to infer it from the other arguments. " + "You can either set the '{arg_name}' parameter explicitly, or use this metric with a node." + ) + + if task == "binary" and n_classes is not None and n_classes > 1: + raise ValueError( + f"Task type set to '{task}', but the dataset has more than 1 class. " + f"Set the `task` argument of '{self.name}' to either 'multiclass' or 'multilabel'." + ) + elif task != "binary" and n_classes == 1: + raise ValueError( + f"Task type set to '{task}', but the dataset has only 1 class. " + f"Set the `task` argument of '{self.name}' to 'binary'." + ) - if self._task == "multiclass": - if "num_classes" not in kwargs: - try: - kwargs["num_classes"] = self.n_classes - except RuntimeError as e: - raise ValueError( - "Either `node` or `num_classes` must be provided to " - "multiclass torchmetrics." - ) from e - else: - if "num_labels" not in kwargs: - try: - kwargs["num_labels"] = self.n_classes - except RuntimeError as e: - raise ValueError( - "Either `node` or `num_labels` must be provided to " - "multilabel torchmetrics." - ) from e + if task == "multiclass": + kwargs["num_classes"] = n_classes + elif task == "multilabel": + kwargs["num_labels"] = n_classes self.metric = self.Metric(**kwargs) diff --git a/tests/unittests/test_metrics/test_torchmetrics.py b/tests/unittests/test_metrics/test_torchmetrics.py index 2bc8b8e0..141a3785 100644 --- a/tests/unittests/test_metrics/test_torchmetrics.py +++ b/tests/unittests/test_metrics/test_torchmetrics.py @@ -20,8 +20,14 @@ class DummyMetric(TorchMetricWrapper): node_1_class = DummyNode(n_classes=1) node_2_classes = DummyNode(n_classes=2) + node = DummyNode() assert DummyMetric(node=node_1_class)._task == "binary" assert DummyMetric(node=node_2_classes)._task == "multiclass" + assert DummyMetric(node=node_2_classes, task="multilabel") + assert DummyMetric(num_classes=1)._task == "binary" + assert DummyMetric(num_classes=2)._task == "multiclass" + assert DummyMetric(num_labels=2)._task == "multilabel" + assert DummyMetric(task="binary") with pytest.raises(ValueError): @@ -29,3 +35,18 @@ class DummyMetric(TorchMetricWrapper): with pytest.raises(ValueError): DummyMetric(task="multiclass") + + with pytest.raises(ValueError): + DummyMetric(task="invalid") + + with pytest.raises(ValueError): + DummyMetric(task="binary", node=node_2_classes) + + with pytest.raises(ValueError): + DummyMetric(task="multiclass", node=node_1_class) + + with pytest.raises(ValueError): + DummyMetric(task="multiclass", node=node) + + with pytest.raises(ValueError): + DummyMetric(task="multilabel", node=node) From 0301165c87572ca13be4eea0c3613bbf23d7e858 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sat, 14 Sep 2024 17:33:51 +0200 Subject: [PATCH 076/102] updated test command --- .github/workflows/ci.yaml | 2 +- CONTRIBUTING.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 033ef804..7db33dd4 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -134,7 +134,7 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --junit-xml pytest.xml + custom-arguments: --junit-xml pytest.xml --coverage-report xml - name: Create Test Report uses: EnricoMi/publish-unit-test-result-action@v2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d3636e13..be11d8a0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,10 +66,10 @@ pyright --warnings --level warning --pythonversion 3.10 luxonis_train We use [pytest](https://docs.pytest.org/en/stable/) for testing. The tests are located in the `tests` directory. You can run the tests locally by running `pytest` in the root directory. -This command will run all tests and print a coverage report. +This command will run all tests generate HTML coverage report. > \[!TIP\] -> This will also generate an HTML coverage report in the `htmlcov` directory. +> The coverage report will be saved to `htmlcov` directory. > If you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser. > \[!IMPORTANT\] From cffcfb3c093eaa20b42b73ed4c912877b9df4f33 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Sun, 15 Sep 2024 06:37:53 +0200 Subject: [PATCH 077/102] fixed argument --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 7db33dd4..4c79d608 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -134,7 +134,7 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --junit-xml pytest.xml --coverage-report xml + custom-arguments: --junit-xml pytest.xml --cov-report xml - name: Create Test Report uses: EnricoMi/publish-unit-test-result-action@v2 From 080dd1de908267aa9440465ecbec158e5f11fed9 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Sun, 15 Sep 2024 05:04:44 +0000 Subject: [PATCH 078/102] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index ee07d4c2..34387324 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 96% - 96% + 97% + 97% From a2a89030d859ae60a49261834e8efeb3e2d1275a Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 10:37:26 +0000 Subject: [PATCH 079/102] fix: ddrnet segmentation heads arguments --- configs/ddrnet_segmentation_model.yaml | 1 - .../ddrnet_segmentation_model.py | 11 ++--------- luxonis_train/nodes/backbones/ddrnet/ddrnet.py | 2 +- .../nodes/heads/ddrnet_segmentation_head.py | 18 ++++++++---------- 4 files changed, 11 insertions(+), 21 deletions(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index a8239dbf..aa36b119 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -6,7 +6,6 @@ model: predefined_model: name: DDRNetSegmentationModel params: - n_classes: 80 task: multiclass backbone_params: use_aux_heads: True # set to False to disable auxiliary heads (for export) diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index 9082a541..c7a09f73 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -12,7 +12,6 @@ @dataclass class DDRNetSegmentationModel(SegmentationModel): backbone: str = "DDRNet" - n_classes: int = 1 highres_planes: int = 64 layer5_bottleneck_expansion: int = 2 aux_head_params: Kwargs = field(default_factory=dict) @@ -24,15 +23,9 @@ def nodes(self) -> list[ModelNodeConfig]: {"layer5_bottleneck_expansion": self.layer5_bottleneck_expansion} ) - self.head_params.update( - {"in_planes": self.highres_planes * self.layer5_bottleneck_expansion} - ) - self.head_params.update({"n_classes": self.n_classes}) - self.head_params.update({"attach_index": 0}) + self.head_params.update({"attach_index": -1}) - self.aux_head_params.update({"in_planes": self.highres_planes}) - self.aux_head_params.update({"n_classes": self.n_classes}) - self.aux_head_params.update({"attach_index": 1}) + self.aux_head_params.update({"attach_index": -2}) node_list = [ ModelNodeConfig( diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 4ff1f72c..a87615b7 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -280,7 +280,7 @@ def forward(self, inputs: Tensor) -> list[Tensor]: x = x + out_layer5_skip if self._use_aux_heads: - return [x, x_extra] + return [x_extra, x] else: return [x] diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index d907fa79..e7136c48 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -5,22 +5,20 @@ from luxonis_train.nodes.base_node import BaseNode from luxonis_train.nodes.blocks import ConvModule +from luxonis_train.utils.general import infer_upscale_factor from luxonis_train.utils.types import LabelType logger = logging.getLogger(__name__) class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): - in_height: int - n_classes: int + attach_index: int = -1 + tasks: list[LabelType] = [LabelType.SEGMENTATION] - attach_index: int = 0 def __init__( self, - in_planes: int = 128, inter_planes: int = 64, - scale_factor: int = 8, inter_mode: str = "bilinear", **kwargs, ): @@ -32,19 +30,19 @@ def __init__( @see: U{Paper } @license: U{Apache License, Version 2.0 } - @type in_planes: int - @param in_planes: Width of input. Defaults to 128. @type inter_planes: int @param inter_planes: Width of internal conv. Must be a multiple of scale_factor^2 when inter_mode is pixel_shuffle. Defaults to 64. - @type scale_factor: int - @param scale_factor: Scaling factor. Defaults to 8. @type inter_mode: str @param inter_mode: Upsampling method. One of nearest, linear, bilinear, bicubic, trilinear, area or pixel_shuffle. If pixel_shuffle is set, nn.PixelShuffle is used for scaling. Defaults to "bilinear". """ super().__init__(**kwargs) + model_in_h, model_in_w = self.original_in_shape[1:] + scale_factor = 2 ** infer_upscale_factor( + (self.in_height, self.in_width), (model_in_h, model_in_w) + ) self.scale_factor = scale_factor if inter_mode == "pixel_shuffle": @@ -54,7 +52,7 @@ def __init__( ) self.conv1 = ConvModule( - in_planes, + self.in_channels, inter_planes, kernel_size=3, padding=1, From 59c4683ae07a61605dcc4416420d2bc8ebc90813 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 10:43:54 +0000 Subject: [PATCH 080/102] fix: DAPPMBranch forward input type --- luxonis_train/nodes/backbones/ddrnet/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index 5a01c61a..cb685585 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -65,7 +65,7 @@ def __init__( ), ) - def forward(self, x: Tensor) -> Tensor: + def forward(self, x: Tensor | list[Tensor]) -> Tensor: """Process input through the DAPPM branch. @type x: Tensor or list[Tensor] From f9e0f292a6d8f9db330af4b25aa51d4bbe188271 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 10:50:28 +0000 Subject: [PATCH 081/102] refactor: rename planes to channels --- .../nodes/backbones/ddrnet/blocks.py | 92 +++++++++---------- .../nodes/backbones/ddrnet/ddrnet.py | 52 +++++------ .../nodes/heads/ddrnet_segmentation_head.py | 18 ++-- 3 files changed, 81 insertions(+), 81 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index cb685585..b5f0b673 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -18,8 +18,8 @@ def __init__( self, kernel_size: int, stride: int, - in_planes: int, - branch_planes: int, + in_channels: int, + branch_channels: int, inter_mode: str = "bilinear", ): """A DAPPM branch. @@ -32,10 +32,10 @@ def __init__( AdaptiveAvgPool2d over all the input is performed (output is 1x1). When stride=1, no average pooling is performed. When stride>1, average pooling is performed (scaling the input down and up again). - @type in_planes: int - @param in_planes: Number of input channels. - @type branch_planes: int - @param branch_planes: Width after the first convolution. + @type in_channels: int + @param in_channels: Number of input channels. + @type branch_channels: int + @param branch_channels: Width after the first convolution. @type inter_mode: str @param inter_mode: Interpolation mode for upscaling. Defaults to "bilinear". """ @@ -49,19 +49,19 @@ def __init__( nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=stride) ) - down_list.append(nn.BatchNorm2d(in_planes)) + down_list.append(nn.BatchNorm2d(in_channels)) down_list.append(nn.ReLU(inplace=True)) - down_list.append(nn.Conv2d(in_planes, branch_planes, kernel_size=1, bias=False)) + down_list.append(nn.Conv2d(in_channels, branch_channels, kernel_size=1, bias=False)) self.down_scale = nn.Sequential(*down_list) self.up_scale = UpscaleOnline(inter_mode) if stride != 1: self.process = nn.Sequential( - nn.BatchNorm2d(branch_planes), + nn.BatchNorm2d(branch_channels), nn.ReLU(inplace=True), nn.Conv2d( - branch_planes, branch_planes, kernel_size=3, padding=1, bias=False + branch_channels, branch_channels, kernel_size=3, padding=1, bias=False ), ) @@ -94,21 +94,21 @@ def forward(self, x: Tensor | list[Tensor]) -> Tensor: class DAPPM(nn.Module): def __init__( self, - in_planes: int, - branch_planes: int, - out_planes: int, + in_channels: int, + branch_channels: int, + out_channels: int, kernel_sizes: list[int], strides: list[int], inter_mode: str = "bilinear", ): """DAPPM (Dynamic Attention Pyramid Pooling Module). - @type in_planes: int - @param in_planes: Number of input channels. - @type branch_planes: int - @param branch_planes: Width after the first convolution in each branch. - @type out_planes: int - @param out_planes: Number of output channels. + @type in_channels: int + @param in_channels: Number of input channels. + @type branch_channels: int + @param branch_channels: Width after the first convolution in each branch. + @type out_channels: int + @param out_channels: Number of output channels. @type kernel_sizes: list[int] @param kernel_sizes: List of kernel sizes for each branch. @type strides: list[int] @@ -127,8 +127,8 @@ def __init__( DAPPMBranch( kernel_size=kernel_size, stride=stride, - in_planes=in_planes, - branch_planes=branch_planes, + in_channels=in_channels, + branch_channels=branch_channels, inter_mode=inter_mode, ) for kernel_size, stride in zip(kernel_sizes, strides) @@ -136,19 +136,19 @@ def __init__( ) self.compression = nn.Sequential( - nn.BatchNorm2d(branch_planes * len(self.branches)), + nn.BatchNorm2d(branch_channels * len(self.branches)), nn.ReLU(inplace=True), nn.Conv2d( - branch_planes * len(self.branches), - out_planes, + branch_channels * len(self.branches), + out_channels, kernel_size=1, bias=False, ), ) self.shortcut = nn.Sequential( - nn.BatchNorm2d(in_planes), + nn.BatchNorm2d(in_channels), nn.ReLU(inplace=True), - nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False), + nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), ) def forward(self, x: Tensor) -> Tensor: @@ -215,15 +215,15 @@ def __init__( self.layer1 = _make_layer( block=block, - in_planes=width, - planes=width, + in_channels=width, + channels=width, num_blocks=layers[0], ) self.layer2 = _make_layer( block=block, - in_planes=width, - planes=width * 2, + in_channels=width, + channels=width * 2, num_blocks=layers[1], stride=2, ) @@ -232,8 +232,8 @@ def __init__( [ _make_layer( block=block, - in_planes=width * 2, - planes=width * 4, + in_channels=width * 2, + channels=width * 4, num_blocks=layers[2], stride=2, ) @@ -241,8 +241,8 @@ def __init__( + [ _make_layer( block=block, - in_planes=width * 4, - planes=width * 4, + in_channels=width * 4, + channels=width * 4, num_blocks=layers[2], stride=1, ) @@ -252,8 +252,8 @@ def __init__( self.layer4 = _make_layer( block=block, - in_planes=width * 4, - planes=width * 8, + in_channels=width * 4, + channels=width * 8, num_blocks=layers[3], stride=2, ) @@ -304,8 +304,8 @@ def get_backbone_output_number_of_channels(self) -> dict[str, int]: def _make_layer( block: Type[nn.Module], - in_planes: int, - planes: int, + in_channels: int, + channels: int, num_blocks: int, stride: int = 1, expansion: int = 1, @@ -314,10 +314,10 @@ def _make_layer( @type block: Type[nn.Module] @param block: The block class to be used. - @type in_planes: int - @param in_planes: Number of input channels. - @type planes: int - @param planes: Number of output channels. + @type in_channels: int + @param in_channels: Number of input channels. + @type channels: int + @param channels: Number of output channels. @type num_blocks: int @param num_blocks: Number of blocks in the layer. @type stride: int @@ -329,18 +329,18 @@ def _make_layer( layers: list[nn.Module] = [] layers.append( - block(in_planes, planes, stride, final_relu=num_blocks > 1, expansion=expansion) + block(in_channels, channels, stride, final_relu=num_blocks > 1, expansion=expansion) ) - in_planes = planes * expansion + in_channels = channels * expansion if num_blocks > 1: for i in range(1, num_blocks): final_relu = i != (num_blocks - 1) layers.append( block( - in_planes, - planes, + in_channels, + channels, stride=1, final_relu=final_relu, expansion=expansion, diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index a87615b7..df4155f1 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -22,7 +22,7 @@ def __init__( self, use_aux_heads: bool = True, upscale_module: nn.Module = None, - highres_planes: int = 64, + highres_channels: int = 64, spp_width: int = 128, ssp_inter_mode: str = "bilinear", segmentation_inter_mode: str = "bilinear", @@ -33,7 +33,7 @@ def __init__( spp_kernel_sizes: list[int] = None, spp_strides: list[int] = None, layer3_repeats: int = 1, - planes: int = 32, + channels: int = 32, layers: list[int] = None, **kwargs, ): @@ -50,8 +50,8 @@ def __init__( @type upscale_module: nn.Module @param upscale_module: Module for upscaling (e.g., bilinear interpolation). Defaults to UpscaleOnline(). - @type highres_planes: int - @param highres_planes: Number of channels in the high resolution net. Defaults + @type highres_channels: int + @param highres_channels: Number of channels in the high resolution net. Defaults to 64. @type spp_width: int @param spp_width: Width of the branches in the SPP block. Defaults to 128. @@ -81,8 +81,8 @@ def __init__( 0]. @type layer3_repeats: int @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1. - @type planes: int - @param planes: Base number of channels. Defaults to 32. + @type channels: int + @param channels: Base number of channels. Defaults to 32. @type layers: list[int] @param layers: Number of blocks in each layer of the backbone. Defaults to [2, 2, 2, 2, 1, 2, 2, 1]. @@ -109,13 +109,13 @@ def __init__( self.skip_block = skip_block self.relu = nn.ReLU(inplace=False) self.layer3_repeats = layer3_repeats - self.planes = planes + self.channels = channels self.layers = layers self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] self._backbone = BasicDDRBackBone( block=self.block, - width=self.planes, + width=self.channels, layers=self.backbone_layers, input_channels=self.in_channels, layer3_repeats=self.layer3_repeats, @@ -131,7 +131,7 @@ def __init__( self.compression3.append( ConvModule( in_channels=out_chan_backbone["layer3"], - out_channels=highres_planes, + out_channels=highres_channels, kernel_size=1, bias=False, activation=nn.Identity(), @@ -139,7 +139,7 @@ def __init__( ) self.down3.append( ConvModule( - in_channels=highres_planes, + in_channels=highres_channels, out_channels=out_chan_backbone["layer3"], kernel_size=3, stride=2, @@ -150,8 +150,8 @@ def __init__( ) self.layer3_skip.append( _make_layer( - in_planes=out_chan_backbone["layer2"] if i == 0 else highres_planes, - planes=highres_planes, + in_channels=out_chan_backbone["layer2"] if i == 0 else highres_channels, + channels=highres_channels, block=skip_block, num_blocks=self.additional_layers[1], ) @@ -159,7 +159,7 @@ def __init__( self.compression4 = ConvModule( in_channels=out_chan_backbone["layer4"], - out_channels=highres_planes, + out_channels=highres_channels, kernel_size=1, bias=False, activation=nn.Identity(), @@ -167,8 +167,8 @@ def __init__( self.down4 = nn.Sequential( ConvModule( - in_channels=highres_planes, - out_channels=highres_planes * 2, + in_channels=highres_channels, + out_channels=highres_channels * 2, kernel_size=3, stride=2, padding=1, @@ -176,7 +176,7 @@ def __init__( activation=nn.ReLU(inplace=True), ), ConvModule( - in_channels=highres_planes * 2, + in_channels=highres_channels * 2, out_channels=out_chan_backbone["layer4"], kernel_size=3, stride=2, @@ -188,37 +188,37 @@ def __init__( self.layer4_skip = _make_layer( block=skip_block, - in_planes=highres_planes, - planes=highres_planes, + in_channels=highres_channels, + channels=highres_channels, num_blocks=self.additional_layers[2], ) self.layer5_skip = _make_layer( block=layer5_block, - in_planes=highres_planes, - planes=highres_planes, + in_channels=highres_channels, + channels=highres_channels, num_blocks=self.additional_layers[3], expansion=layer5_bottleneck_expansion, ) self.layer5 = _make_layer( block=layer5_block, - in_planes=out_chan_backbone["layer4"], - planes=out_chan_backbone["layer4"], + in_channels=out_chan_backbone["layer4"], + channels=out_chan_backbone["layer4"], num_blocks=self.additional_layers[0], stride=2, expansion=layer5_bottleneck_expansion, ) self.spp = DAPPM( - in_planes=out_chan_backbone["layer4"] * layer5_bottleneck_expansion, - branch_planes=spp_width, - out_planes=highres_planes * layer5_bottleneck_expansion, + in_channels=out_chan_backbone["layer4"] * layer5_bottleneck_expansion, + branch_channels=spp_width, + out_channels=highres_channels * layer5_bottleneck_expansion, inter_mode=self.ssp_inter_mode, kernel_sizes=spp_kernel_sizes, strides=spp_strides, ) - self.highres_planes = highres_planes + self.highres_channels = highres_channels self.layer5_bottleneck_expansion = layer5_bottleneck_expansion self.init_params() diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index e7136c48..b9cbbcf3 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -18,7 +18,7 @@ class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): def __init__( self, - inter_planes: int = 64, + inter_channels: int = 64, inter_mode: str = "bilinear", **kwargs, ): @@ -30,8 +30,8 @@ def __init__( @see: U{Paper } @license: U{Apache License, Version 2.0 } - @type inter_planes: int - @param inter_planes: Width of internal conv. Must be a multiple of + @type inter_channels: int + @param inter_channels: Width of internal conv. Must be a multiple of scale_factor^2 when inter_mode is pixel_shuffle. Defaults to 64. @type inter_mode: str @param inter_mode: Upsampling method. One of nearest, linear, bilinear, bicubic, @@ -46,14 +46,14 @@ def __init__( self.scale_factor = scale_factor if inter_mode == "pixel_shuffle": - if inter_planes % (scale_factor**2) != 0: + if inter_channels % (scale_factor**2) != 0: raise ValueError( - "When using pixel_shuffle, inter_planes must be a multiple of scale_factor^2." + "When using pixel_shuffle, inter_channels must be a multiple of scale_factor^2." ) self.conv1 = ConvModule( self.in_channels, - inter_planes, + inter_channels, kernel_size=3, padding=1, bias=False, @@ -62,8 +62,8 @@ def __init__( if inter_mode == "pixel_shuffle": self.conv2 = ConvModule( - inter_planes, - inter_planes, + inter_channels, + inter_channels, kernel_size=1, padding=0, bias=True, @@ -72,7 +72,7 @@ def __init__( self.upscale = nn.PixelShuffle(scale_factor) else: self.conv2 = ConvModule( - inter_planes, + inter_channels, self.n_classes, kernel_size=1, padding=0, From 18c209bec9fa78d20aa7b5cdb850b3cad9ff98cc Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 10:51:50 +0000 Subject: [PATCH 082/102] fix: remove redundant predefined model params --- .../models/predefined_models/ddrnet_segmentation_model.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py index c7a09f73..beacca5e 100644 --- a/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py +++ b/luxonis_train/models/predefined_models/ddrnet_segmentation_model.py @@ -12,17 +12,10 @@ @dataclass class DDRNetSegmentationModel(SegmentationModel): backbone: str = "DDRNet" - highres_planes: int = 64 - layer5_bottleneck_expansion: int = 2 aux_head_params: Kwargs = field(default_factory=dict) @property def nodes(self) -> list[ModelNodeConfig]: - self.backbone_params.update({"highres_planes": self.highres_planes}) - self.backbone_params.update( - {"layer5_bottleneck_expansion": self.layer5_bottleneck_expansion} - ) - self.head_params.update({"attach_index": -1}) self.aux_head_params.update({"attach_index": -2}) From 1a8bb1d96d0f4c7b251ba5ebcafff001d3238630 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 10:52:14 +0000 Subject: [PATCH 083/102] style: formatting --- luxonis_train/nodes/backbones/ddrnet/blocks.py | 18 +++++++++++++++--- luxonis_train/nodes/backbones/ddrnet/ddrnet.py | 4 +++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index b5f0b673..a4e1baf3 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -51,7 +51,9 @@ def __init__( down_list.append(nn.BatchNorm2d(in_channels)) down_list.append(nn.ReLU(inplace=True)) - down_list.append(nn.Conv2d(in_channels, branch_channels, kernel_size=1, bias=False)) + down_list.append( + nn.Conv2d(in_channels, branch_channels, kernel_size=1, bias=False) + ) self.down_scale = nn.Sequential(*down_list) self.up_scale = UpscaleOnline(inter_mode) @@ -61,7 +63,11 @@ def __init__( nn.BatchNorm2d(branch_channels), nn.ReLU(inplace=True), nn.Conv2d( - branch_channels, branch_channels, kernel_size=3, padding=1, bias=False + branch_channels, + branch_channels, + kernel_size=3, + padding=1, + bias=False, ), ) @@ -329,7 +335,13 @@ def _make_layer( layers: list[nn.Module] = [] layers.append( - block(in_channels, channels, stride, final_relu=num_blocks > 1, expansion=expansion) + block( + in_channels, + channels, + stride, + final_relu=num_blocks > 1, + expansion=expansion, + ) ) in_channels = channels * expansion diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index df4155f1..27663c3f 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -150,7 +150,9 @@ def __init__( ) self.layer3_skip.append( _make_layer( - in_channels=out_chan_backbone["layer2"] if i == 0 else highres_channels, + in_channels=out_chan_backbone["layer2"] + if i == 0 + else highres_channels, channels=highres_channels, block=skip_block, num_blocks=self.additional_layers[1], From 7367a02978523e51c790ffbaa4a78faa931d96dc Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 10:54:30 +0000 Subject: [PATCH 084/102] fix: remove redundant backbone property --- .../nodes/backbones/ddrnet/ddrnet.py | 20 +------------------ 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 27663c3f..82fae811 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -1,4 +1,4 @@ -from typing import Dict, Type +from typing import Type from torch import Tensor, nn @@ -224,24 +224,6 @@ def __init__( self.layer5_bottleneck_expansion = layer5_bottleneck_expansion self.init_params() - @property - def backbone(self): - """Create a fake backbone module to load backbone pre-trained weights.""" - return nn.Sequential( - Dict( - [ - ("_backbone", self._backbone), - ("compression3", self.compression3), - ("compression4", self.compression4), - ("down3", self.down3), - ("down4", self.down4), - ("layer3_skip", self.layer3_skip), - ("layer4_skip", self.layer4_skip), - ("layer5_skip", self.layer5_skip), - ] - ) - ) - def forward(self, inputs: Tensor) -> list[Tensor]: width_output = inputs.shape[-1] // 8 height_output = inputs.shape[-2] // 8 From 5505aa9ce60529ed220ff5dab37e1c4cfaf849ca Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 11:15:44 +0000 Subject: [PATCH 085/102] fix: fully disable aux head during export --- luxonis_train/nodes/heads/ddrnet_segmentation_head.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index b9cbbcf3..95e45db2 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -1,6 +1,7 @@ import logging import torch.nn as nn +import torch from torch import Tensor from luxonis_train.nodes.base_node import BaseNode @@ -98,7 +99,7 @@ def set_export_mode(self, mode: bool = True) -> None: @param mode: Whether to set the export mode to True or False. Defaults to True. """ super().set_export_mode(mode) - if self.export and self.attach_index != 0: + if self.export and self.attach_index != -1: logger.info("Removing the auxiliary head.") - self.forward = lambda x: x + self.forward = lambda x: torch.tensor([]) From 3c0dcca6f41713a5bd3c047317e1be3ddab1c2b6 Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 12:10:02 +0000 Subject: [PATCH 086/102] feat: add DDRNet variants --- configs/ddrnet_segmentation_model.yaml | 1 + .../nodes/backbones/ddrnet/ddrnet.py | 28 +++++++++++++------ .../nodes/backbones/ddrnet/variants.py | 27 ++++++++++++++++++ .../nodes/heads/ddrnet_segmentation_head.py | 2 +- 4 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 luxonis_train/nodes/backbones/ddrnet/variants.py diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index aa36b119..9f5302b2 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -9,6 +9,7 @@ model: task: multiclass backbone_params: use_aux_heads: True # set to False to disable auxiliary heads (for export) + variant: '23-slim' loader: params: diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 82fae811..f5741b16 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -1,4 +1,4 @@ -from typing import Type +from typing import Literal, Type from torch import Tensor, nn @@ -15,14 +15,17 @@ BasicDDRBackBone, _make_layer, ) +from .variants import get_variant class DDRNet(BaseNode[Tensor, list[Tensor]]): def __init__( self, + variant: Literal["23-slim", "23"] = "23-slim", + channels: int | None = None, + highres_channels: int | None = None, use_aux_heads: bool = True, upscale_module: nn.Module = None, - highres_channels: int = 64, spp_width: int = 128, ssp_inter_mode: str = "bilinear", segmentation_inter_mode: str = "bilinear", @@ -33,7 +36,6 @@ def __init__( spp_kernel_sizes: list[int] = None, spp_strides: list[int] = None, layer3_repeats: int = 1, - channels: int = 32, layers: list[int] = None, **kwargs, ): @@ -45,14 +47,21 @@ def __init__( @see: U{Paper } @license: U{Apache License, Version 2.0 } + @type variant: Literal["23-slim", "23"] + @param variant: DDRNet variant. Defaults to "23-slim". + The variant determines the number of channels and highres_channels. + The following variants are available: + - "23-slim" (default): channels=32, highres_channels=64 + - "23": channels=64, highres_channels=128 + @type channels: int | None + @param channels: Base number of channels. If provided, overrides the variant values. + @type highres_channels: int | None + @param highres_channels: Number of channels in the high resolution net. If provided, overrides the variant values. @type use_aux_heads: bool @param use_aux_heads: Whether to use auxiliary heads. Defaults to True. @type upscale_module: nn.Module @param upscale_module: Module for upscaling (e.g., bilinear interpolation). Defaults to UpscaleOnline(). - @type highres_channels: int - @param highres_channels: Number of channels in the high resolution net. Defaults - to 64. @type spp_width: int @param spp_width: Width of the branches in the SPP block. Defaults to 128. @type ssp_inter_mode: str @@ -81,8 +90,6 @@ def __init__( 0]. @type layer3_repeats: int @param layer3_repeats: Number of times to repeat the 3rd stage. Defaults to 1. - @type channels: int - @param channels: Base number of channels. Defaults to 32. @type layers: list[int] @param layers: Number of blocks in each layer of the backbone. Defaults to [2, 2, 2, 2, 1, 2, 2, 1]. @@ -101,6 +108,11 @@ def __init__( super().__init__(**kwargs) + var = get_variant(variant) + + channels = channels or var.channels + highres_channels = highres_channels or var.highres_channels + self._use_aux_heads = use_aux_heads self.upscale = upscale_module self.ssp_inter_mode = ssp_inter_mode diff --git a/luxonis_train/nodes/backbones/ddrnet/variants.py b/luxonis_train/nodes/backbones/ddrnet/variants.py new file mode 100644 index 00000000..330a6d81 --- /dev/null +++ b/luxonis_train/nodes/backbones/ddrnet/variants.py @@ -0,0 +1,27 @@ +from typing import Literal + +from pydantic import BaseModel + + +class DDRNetVariant(BaseModel): + channels: int = 32 + highres_channels: int = 64 + + +def get_variant(variant: Literal["23-slim", "23"]) -> DDRNetVariant: + variants = { + "23-slim": DDRNetVariant( + channels=32, + highres_channels=64, + ), + "23": DDRNetVariant( + channels=64, + highres_channels=128, + ), + } + if variant not in variants: # pragma: no cover + raise ValueError( + "DDRNet model variant should be in " + f"{list(variants.keys())}, got {variant}." + ) + return variants[variant] \ No newline at end of file diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 95e45db2..bcdb0729 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -1,7 +1,7 @@ import logging -import torch.nn as nn import torch +import torch.nn as nn from torch import Tensor from luxonis_train.nodes.base_node import BaseNode From fd3e1ba9dd440a56f354a19f1db01357c5e3ecae Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 14:18:35 +0000 Subject: [PATCH 087/102] fix: callback order for DDRNet (export after test) --- configs/ddrnet_segmentation_model.yaml | 2 +- luxonis_train/nodes/backbones/ddrnet/variants.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 9f5302b2..7e6cda2c 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -29,8 +29,8 @@ trainer: num_log_images: 8 callbacks: - - name: ExportOnTrainEnd - name: TestOnTrainEnd + - name: ExportOnTrainEnd optimizer: name: SGD diff --git a/luxonis_train/nodes/backbones/ddrnet/variants.py b/luxonis_train/nodes/backbones/ddrnet/variants.py index 330a6d81..0e2d66c7 100644 --- a/luxonis_train/nodes/backbones/ddrnet/variants.py +++ b/luxonis_train/nodes/backbones/ddrnet/variants.py @@ -24,4 +24,4 @@ def get_variant(variant: Literal["23-slim", "23"]) -> DDRNetVariant: "DDRNet model variant should be in " f"{list(variants.keys())}, got {variant}." ) - return variants[variant] \ No newline at end of file + return variants[variant] From f1188e6cb68cd9af8ef83eea3be30433249ccf0f Mon Sep 17 00:00:00 2001 From: Nikita Date: Tue, 17 Sep 2024 18:59:12 +0000 Subject: [PATCH 088/102] fix: change default ddrnet scheduler to CosineAnnealingLR --- configs/ddrnet_segmentation_model.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 7e6cda2c..25779f4a 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -40,4 +40,6 @@ trainer: weight_decay: 0.0005 scheduler: - name: PolynomialLR + name: CosineAnnealingLR + params: + T_max: *epochs From ff39b617347299c7228f9cb3635c1e08b812e391 Mon Sep 17 00:00:00 2001 From: Nikita Date: Wed, 18 Sep 2024 13:12:08 +0000 Subject: [PATCH 089/102] fix: modify head architecture to match the original one --- .../nodes/heads/ddrnet_segmentation_head.py | 64 ++++++++----------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index bcdb0729..bd7d358c 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -5,7 +5,6 @@ from torch import Tensor from luxonis_train.nodes.base_node import BaseNode -from luxonis_train.nodes.blocks import ConvModule from luxonis_train.utils.general import infer_upscale_factor from luxonis_train.utils.types import LabelType @@ -46,48 +45,37 @@ def __init__( ) self.scale_factor = scale_factor - if inter_mode == "pixel_shuffle": - if inter_channels % (scale_factor**2) != 0: - raise ValueError( - "When using pixel_shuffle, inter_channels must be a multiple of scale_factor^2." - ) + if inter_mode == "pixel_shuffle" and inter_channels % (scale_factor**2) != 0: + raise ValueError( + "For pixel_shuffle, inter_channels must be a multiple of scale_factor^2." + ) - self.conv1 = ConvModule( - self.in_channels, - inter_channels, - kernel_size=3, - padding=1, - bias=False, - activation=nn.ReLU(inplace=True), + self.bn1 = nn.BatchNorm2d(self.in_channels) + self.conv1 = nn.Conv2d( + self.in_channels, inter_channels, kernel_size=3, padding=1, bias=False ) + self.bn2 = nn.BatchNorm2d(inter_channels) + self.relu = nn.ReLU(inplace=True) - if inter_mode == "pixel_shuffle": - self.conv2 = ConvModule( - inter_channels, - inter_channels, - kernel_size=1, - padding=0, - bias=True, - activation=nn.Identity(), - ) - self.upscale = nn.PixelShuffle(scale_factor) - else: - self.conv2 = ConvModule( - inter_channels, - self.n_classes, - kernel_size=1, - padding=0, - bias=True, - activation=nn.Identity(), - ) - self.upscale = nn.Upsample(scale_factor=scale_factor, mode=inter_mode) + self.conv2 = nn.Conv2d( + inter_channels, + inter_channels if inter_mode == "pixel_shuffle" else self.n_classes, + kernel_size=1, + padding=0, + bias=True, + ) + self.upscale = ( + nn.PixelShuffle(scale_factor) + if inter_mode == "pixel_shuffle" + else nn.Upsample(scale_factor=scale_factor, mode=inter_mode) + ) def forward(self, inputs: Tensor) -> Tensor: - x = self.conv1(inputs) - out = self.conv2(x) - out = self.upscale(out) - - return out + x = self.relu(self.bn1(inputs)) + x = self.conv1(x) + x = self.relu(self.bn2(x)) + x = self.conv2(x) + return self.upscale(x) def set_export_mode(self, mode: bool = True) -> None: """Sets the module to export mode. From 8be1aa4a0d851a593d4e1362593d0e743f1d4eb5 Mon Sep 17 00:00:00 2001 From: Nikita Date: Wed, 18 Sep 2024 14:21:49 +0000 Subject: [PATCH 090/102] feat: add argmax during export --- luxonis_train/nodes/heads/ddrnet_segmentation_head.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index bd7d358c..f397650c 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -75,7 +75,10 @@ def forward(self, inputs: Tensor) -> Tensor: x = self.conv1(x) x = self.relu(self.bn2(x)) x = self.conv2(x) - return self.upscale(x) + x = self.upscale(x) + if self.export: + return x.argmax(dim=1) + return x def set_export_mode(self, mode: bool = True) -> None: """Sets the module to export mode. From 66e7226a408e86d2dfa2baabd4a5b147e899a371 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 05:55:20 +0200 Subject: [PATCH 091/102] project settings update --- .github/workflows/ci.yaml | 2 +- CONTRIBUTING.md | 61 +++++++++++++++++++++++++++------------ pyproject.toml | 2 +- 3 files changed, 44 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4c79d608..6dbf1a87 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -134,7 +134,7 @@ jobs: PYTORCH_MPS_HIGH_WATERMARK_RATIO: 0.0 with: emoji: false - custom-arguments: --junit-xml pytest.xml --cov-report xml + custom-arguments: --junit-xml pytest.xml --cov luxonis_train --cov-report xml - name: Create Test Report uses: EnricoMi/publish-unit-test-result-action@v2 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index be11d8a0..20fd3607 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,11 +1,11 @@ -# Contributing to LuxonisTrain +# Contributing to LuxonisTrain **This guide is intended for our internal development team.** It outlines our workflow and standards for contributing to this project. ## Table Of Contents -- [Requirements](#requirements) +- [Pre-requisites](#pre-requisites) - [Pre-commit Hooks](#pre-commit-hooks) - [Documentation](#documentation) - [Type Checking](#type-checking) @@ -14,9 +14,16 @@ It outlines our workflow and standards for contributing to this project. - [GitHub Actions](#github-actions) - [Making and Reviewing Changes](#making-and-reviewing-changes) -## Requirements +## Pre-requisites -Install the development dependencies by running `pip install -r requirements-dev.txt` or installing the package with the `dev` extra: +Clone the repository and navigate to the root directory: + +```bash +git clone git@github.com:luxonis/luxonis-train.git +cd luxonis-train +``` + +Install the development dependencies by running `pip install -r requirements-dev.txt` or install the package with the `dev` extra flag: ```bash pip install -e .[dev] @@ -30,11 +37,11 @@ pip install -e .[dev] We use pre-commit hooks to ensure code quality and consistency: -1. Install pre-commit (see [pre-commit.com](https://pre-commit.com/#install)). +1. Install `pre-commit` (see [pre-commit.com](https://pre-commit.com/#install)). 1. Clone the repository and run `pre-commit install` in the root directory. -1. The pre-commit hook will now run automatically on `git commit`. +1. The `pre-commit` hook will now run automatically on `git commit`. - If the hook fails, it will print an error message and abort the commit. - - It will also modify the files in-place to fix any issues it can. + - Some hooks will also modify the files in-place to fix found issues. ## Documentation @@ -42,10 +49,19 @@ We use the [Epytext](https://epydoc.sourceforge.net/epytext.html) markup languag To verify that your documentation is formatted correctly, follow these steps: 1. Download [`get-docs.py`](https://github.com/luxonis/python-api-analyzer-to-json/blob/main/gen-docs.py) script -1. Run `python3 get-docs.py luxonis_ml` in the root directory. +1. Run `python3 get-docs.py luxonis_train` in the root directory. - If the script runs successfully and produces `docs.json` file, your documentation is formatted correctly. - - **NOTE:** If the script fails, it might not give the specific error message. In that case, you can run - the script for each file individually until you find the one that is causing the error. + +> \[!NOTE\] +> If the script fails, it might not give a specific error message. +> In that case, you can run the script for each file individually +> until you find the one that is causing the error. + +**Editor Support:** + +- **PyCharm** - built in support for generating `epytext` docstrings +- **Visual Studio Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext` +- **NeoVim** - [vim-python-docstring](https://github.com/pixelneo/vim-python-docstring) supports `epytext` style ## Type Checking @@ -55,23 +71,30 @@ The codebase is type-checked using [pyright](https://github.com/microsoft/pyrigh pyright --warnings --level warning --pythonversion 3.10 luxonis_train ``` -### Editor Support +**Editor Support:** -- **PyCharm** - built in support for generating `epytext` docstrings -- **Visual Studie Code** - [AI Docify](https://marketplace.visualstudio.com/items?itemName=AIC.docify) extension offers support for `epytext` -- **NeoVim** - [vim-python-docstring](https://github.com/pixelneo/vim-python-docstring) supports `epytext` style +- **PyCharm** - [Pyright](https://plugins.jetbrains.com/plugin/24145-pyright) extension +- **Visual Studio Code** - [Pyright](https://marketplace.visualstudio.com/items?itemName=ms-pyright.pyright) extension +- **NeoVim** - [LSP-Config](https://github.com/neovim/nvim-lspconfig) plugin with the [pyright configuration](https://github.com/neovim/nvim-lspconfig/blob/master/doc/server_configurations.md#pyright) ## Tests We use [pytest](https://docs.pytest.org/en/stable/) for testing. -The tests are located in the `tests` directory. You can run the tests locally by running `pytest` in the root directory. +The tests are located in the `tests` directory. To run the tests with coverage, use the following command: + +```bash +pytest --cov=luxonis_train --cov-report=html +``` -This command will run all tests generate HTML coverage report. +This command will run all tests and generate HTML coverage report. > \[!TIP\] > The coverage report will be saved to `htmlcov` directory. > If you want to inspect the coverage in more detail, open `htmlcov/index.html` in a browser. +> \[!TIP\] +> You can choose to run only the unit-tests or only the integration tests by adding `-m unit` or `-m integration` to the `pytest` command. + > \[!IMPORTANT\] > If a new feature is added, a new test should be added to cover it. > The minimum overall test coverage for a PR to be merged is 90%. @@ -89,12 +112,12 @@ Our GitHub Actions workflow is run when a new PR is opened. > Review the GitHub Actions output if your PR fails. > \[!IMPORTANT\] -> Successfull completion of all the workflow checks is required for merging a PR. +> Successful completion of all the workflow checks is required for merging a PR. ## Making and Submitting Changes 1. Make changes in a new branch. 1. Test your changes locally. -1. Commit (pre-commit hook will run). -1. Push to your branch and create a pull request. +1. Commit your changes (pre-commit hooks will run). +1. Push your branch and create a pull request. 1. The team will review and merge your PR. diff --git a/pyproject.toml b/pyproject.toml index 3c610a3c..d65978d4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ reportUnnecessaryIsInstance = "none" [tool.pytest.ini_options] testpaths = ["tests"] -addopts = "--cov=luxonis_train --cov-report=html --disable-warnings" +addopts = "--disable-warnings" markers = [ "unit: mark a test as a unit test", "integration: mark a test as an integration test", From 98e1622893332fbc02bd7d4254239e1fff330028 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 06:46:28 +0200 Subject: [PATCH 092/102] formatting --- .../nodes/backbones/ddrnet/blocks.py | 43 ++++++++++------ .../nodes/backbones/ddrnet/ddrnet.py | 24 ++++++--- luxonis_train/nodes/blocks/blocks.py | 51 ++++++++++++------- .../nodes/heads/ddrnet_segmentation_head.py | 21 ++++++-- 4 files changed, 94 insertions(+), 45 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index a4e1baf3..52267a25 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -5,6 +5,7 @@ Paper: U{https://arxiv.org/pdf/2101.06085.pdf} @license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} """ + from typing import Type import torch @@ -25,19 +26,22 @@ def __init__( """A DAPPM branch. @type kernel_size: int - @param kernel_size: The kernel size for the average pooling. When stride=0, this - parameter is omitted, and AdaptiveAvgPool2d over all the input is performed. + @param kernel_size: The kernel size for the average pooling. + When stride=0, this parameter is omitted, and + AdaptiveAvgPool2d over all the input is performed. @type stride: int @param stride: Stride for the average pooling. When stride=0, an - AdaptiveAvgPool2d over all the input is performed (output is 1x1). When - stride=1, no average pooling is performed. When stride>1, average pooling is - performed (scaling the input down and up again). + AdaptiveAvgPool2d over all the input is performed (output is + 1x1). When stride=1, no average pooling is performed. When + stride>1, average pooling is performed (scaling the input + down and up again). @type in_channels: int @param in_channels: Number of input channels. @type branch_channels: int @param branch_channels: Width after the first convolution. @type inter_mode: str - @param inter_mode: Interpolation mode for upscaling. Defaults to "bilinear". + @param inter_mode: Interpolation mode for upscaling. Defaults to + "bilinear". """ super().__init__() @@ -46,7 +50,9 @@ def __init__( down_list.append(nn.AdaptiveAvgPool2d((1, 1))) elif stride > 1: down_list.append( - nn.AvgPool2d(kernel_size=kernel_size, stride=stride, padding=stride) + nn.AvgPool2d( + kernel_size=kernel_size, stride=stride, padding=stride + ) ) down_list.append(nn.BatchNorm2d(in_channels)) @@ -89,7 +95,9 @@ def forward(self, x: Tensor | list[Tensor]) -> Tensor: in_width = x.shape[-1] in_height = x.shape[-2] out = self.down_scale(x) - out = self.up_scale(out, output_height=in_height, output_width=in_width) + out = self.up_scale( + out, output_height=in_height, output_width=in_width + ) if output_of_prev_branch is not None: out = self.process(out + output_of_prev_branch) @@ -112,7 +120,8 @@ def __init__( @type in_channels: int @param in_channels: Number of input channels. @type branch_channels: int - @param branch_channels: Width after the first convolution in each branch. + @param branch_channels: Width after the first convolution in + each branch. @type out_channels: int @param out_channels: Number of output channels. @type kernel_sizes: list[int] @@ -120,7 +129,8 @@ def __init__( @type strides: list[int] @param strides: List of strides for each branch. @type inter_mode: str - @param inter_mode: Interpolation mode for upscaling. Defaults to "bilinear". + @param inter_mode: Interpolation mode for upscaling. Defaults to + "bilinear". """ super().__init__() @@ -162,7 +172,8 @@ def forward(self, x: Tensor) -> Tensor: @type x: Tensor @param x: Input tensor. - @return: Output tensor after processing through all branches and compression. + @return: Output tensor after processing through all branches and + compression. """ x_list = [self.branches[0](x)] @@ -193,7 +204,8 @@ def __init__( @type input_channels: int @param input_channels: Number of input channels. @type layer3_repeats: int - @param layer3_repeats: Number of repeats for layer3. Defaults to 1. + @param layer3_repeats: Number of repeats for layer3. Defaults to + 1. """ super().__init__() self.input_channels = input_channels @@ -284,10 +296,11 @@ def validate_backbone_attributes(self) -> None: ), f"Invalid backbone - attribute '{attribute}' is missing" def get_backbone_output_number_of_channels(self) -> dict[str, int]: - """Determine the number of output channels for each layer of the backbone. + """Determine the number of output channels for each layer of the + backbone. - Returns a dictionary with keys "layer2", "layer3", "layer4" and their respective - number of output channels. + Returns a dictionary with keys "layer2", "layer3", "layer4" and + their respective number of output channels. @return: Dictionary of output channel counts for each layer. """ diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index f5741b16..00d42043 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -123,7 +123,10 @@ def __init__( self.layer3_repeats = layer3_repeats self.channels = channels self.layers = layers - self.backbone_layers, self.additional_layers = self.layers[:4], self.layers[4:] + self.backbone_layers, self.additional_layers = ( + self.layers[:4], + self.layers[4:], + ) self._backbone = BasicDDRBackBone( block=self.block, @@ -133,7 +136,9 @@ def __init__( layer3_repeats=self.layer3_repeats, ) self._backbone.validate_backbone_attributes() - out_chan_backbone = self._backbone.get_backbone_output_number_of_channels() + out_chan_backbone = ( + self._backbone.get_backbone_output_number_of_channels() + ) # Define layers for layer 3 self.compression3 = nn.ModuleList() @@ -224,7 +229,8 @@ def __init__( ) self.spp = DAPPM( - in_channels=out_chan_backbone["layer4"] * layer5_bottleneck_expansion, + in_channels=out_chan_backbone["layer4"] + * layer5_bottleneck_expansion, branch_channels=spp_width, out_channels=highres_channels * layer5_bottleneck_expansion, inter_mode=self.ssp_inter_mode, @@ -252,7 +258,9 @@ def forward(self, inputs: Tensor) -> list[Tensor]: x = out_layer3 + self.down3[i](self.relu(out_layer3_skip)) x_skip = out_layer3_skip + self.upscale( - self.compression3[i](self.relu(out_layer3)), height_output, width_output + self.compression3[i](self.relu(out_layer3)), + height_output, + width_output, ) # Save for auxiliary head @@ -264,7 +272,9 @@ def forward(self, inputs: Tensor) -> list[Tensor]: x = out_layer4 + self.down4(self.relu(out_layer4_skip)) x_skip = out_layer4_skip + self.upscale( - self.compression4(self.relu(out_layer4)), height_output, width_output + self.compression4(self.relu(out_layer4)), + height_output, + width_output, ) out_layer5_skip = self.layer5_skip(self.relu(x_skip)) @@ -283,7 +293,9 @@ def forward(self, inputs: Tensor) -> list[Tensor]: def init_params(self): for m in self.modules(): if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") + nn.init.kaiming_normal_( + m.weight, mode="fan_out", nonlinearity="relu" + ) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 033c0502..a6c49143 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -758,7 +758,12 @@ def __init__( super().__init__() self.expansion = expansion self.conv1 = nn.Conv2d( - in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False + in_planes, + planes, + kernel_size=3, + stride=stride, + padding=1, + bias=False, ) self.bn1 = nn.BatchNorm2d(planes) self.conv2 = nn.Conv2d( @@ -863,20 +868,24 @@ def forward(self, x: Tensor) -> Tensor: class UpscaleOnline(nn.Module): """Upscale tensor to a specified size during the forward pass. - This class supports cases where the required scale/size is only known when the input - is received. Only the interpolation mode is set in advance. + This class supports cases where the required scale/size is only + known when the input is received. Only the interpolation mode is set + in advance. """ def __init__(self, mode: str = "bilinear"): """Initialize UpscaleOnline with the interpolation mode. @type mode: str - @param mode: Interpolation mode for resizing. Defaults to "bilinear". + @param mode: Interpolation mode for resizing. Defaults to + "bilinear". """ super().__init__() self.mode = mode - def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: + def forward( + self, x: Tensor, output_height: int, output_width: int + ) -> Tensor: """Upscale the input tensor to the specified height and width. @type x: Tensor @@ -887,12 +896,14 @@ def forward(self, x: Tensor, output_height: int, output_width: int) -> Tensor: @param output_width: Desired width of the output tensor. @return: Upscaled tensor. """ - return F.interpolate(x, size=[output_height, output_width], mode=self.mode) + return F.interpolate( + x, size=[output_height, output_width], mode=self.mode + ) class DropPath(nn.Module): - """Drop paths (Stochastic Depth) per sample, when applied in the main path of - residual blocks. + """Drop paths (Stochastic Depth) per sample, when applied in the + main path of residual blocks. Intended usage of this block is as follows: @@ -911,12 +922,12 @@ def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): """Initializes the DropPath module. @type drop_prob: float - @param drop_prob: Probability of zeroing out individual vectors (channel - dimension) of each feature map. Defaults to 0.0. + @param drop_prob: Probability of zeroing out individual vectors + (channel dimension) of each feature map. Defaults to 0.0. @type scale_by_keep: bool - @param scale_by_keep: Whether to scale the output by the keep probability. - Enabled by default to maintain output mean & std in the same range as - without DropPath. Defaults to True. + @param scale_by_keep: Whether to scale the output by the keep + probability. Enabled by default to maintain output mean & + std in the same range as without DropPath. Defaults to True. """ super().__init__() self.drop_prob = drop_prob @@ -925,17 +936,19 @@ def __init__(self, drop_prob: float = 0.0, scale_by_keep: bool = True): def drop_path( self, x: Tensor, drop_prob: float = 0.0, scale_by_keep: bool = True ) -> Tensor: - """Drop paths (Stochastic Depth) per sample when applied in the main path of - residual blocks. + """Drop paths (Stochastic Depth) per sample when applied in the + main path of residual blocks. @type x: Tensor @param x: Input tensor. @type drop_prob: float - @param drop_prob: Probability of dropping a path. Defaults to 0.0. + @param drop_prob: Probability of dropping a path. Defaults to + 0.0. @type scale_by_keep: bool - @param scale_by_keep: Whether to scale the output by the keep probability. - Defaults to True. - @return: Tensor with dropped paths based on the provided drop probability. + @param scale_by_keep: Whether to scale the output by the keep + probability. Defaults to True. + @return: Tensor with dropped paths based on the provided drop + probability. """ keep_prob = 1 - drop_prob shape = (x.shape[0],) + (1,) * (x.ndim - 1) diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index f397650c..4212250e 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -45,21 +45,30 @@ def __init__( ) self.scale_factor = scale_factor - if inter_mode == "pixel_shuffle" and inter_channels % (scale_factor**2) != 0: + if ( + inter_mode == "pixel_shuffle" + and inter_channels % (scale_factor**2) != 0 + ): raise ValueError( "For pixel_shuffle, inter_channels must be a multiple of scale_factor^2." ) self.bn1 = nn.BatchNorm2d(self.in_channels) self.conv1 = nn.Conv2d( - self.in_channels, inter_channels, kernel_size=3, padding=1, bias=False + self.in_channels, + inter_channels, + kernel_size=3, + padding=1, + bias=False, ) self.bn2 = nn.BatchNorm2d(inter_channels) self.relu = nn.ReLU(inplace=True) self.conv2 = nn.Conv2d( inter_channels, - inter_channels if inter_mode == "pixel_shuffle" else self.n_classes, + inter_channels + if inter_mode == "pixel_shuffle" + else self.n_classes, kernel_size=1, padding=0, bias=True, @@ -83,11 +92,13 @@ def forward(self, inputs: Tensor) -> Tensor: def set_export_mode(self, mode: bool = True) -> None: """Sets the module to export mode. - Replaces the forward method with an identity function when in export mode. + Replaces the forward method with an identity function when in + export mode. @warning: The replacement is destructive and cannot be undone. @type mode: bool - @param mode: Whether to set the export mode to True or False. Defaults to True. + @param mode: Whether to set the export mode to True or False. + Defaults to True. """ super().set_export_mode(mode) if self.export and self.attach_index != -1: From 3cd7c7ac455bc0b2e8ca605343a2858fefc5b7e0 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 06:59:00 +0200 Subject: [PATCH 093/102] fixed types --- .../nodes/backbones/ddrnet/blocks.py | 81 +++++++------------ .../nodes/backbones/ddrnet/ddrnet.py | 51 +++++------- .../nodes/heads/ddrnet_segmentation_head.py | 5 +- 3 files changed, 55 insertions(+), 82 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index 52267a25..596907fd 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -6,8 +6,6 @@ @license: U{https://github.com/Deci-AI/super-gradients/blob/master/LICENSE.md} """ -from typing import Type - import torch from torch import Tensor, nn @@ -184,36 +182,36 @@ def forward(self, x: Tensor) -> Tensor: return out -class BasicDDRBackBone(nn.Module): +class BasicDDRBackbone(nn.Module): def __init__( self, - block: Type[nn.Module], - width: int, + block: type[nn.Module], + stem_channels: int, layers: list[int], - input_channels: int, + in_channels: int, layer3_repeats: int = 1, ): """Initialize the BasicDDRBackBone with specified parameters. @type block: Type[nn.Module] @param block: The block class to use for layers. - @type width: int - @param width: Width of the feature maps. + @type stem_channels: int + @param stem_channels: Number of output channels in the stem layer. @type layers: list[int] @param layers: Number of blocks in each layer. - @type input_channels: int - @param input_channels: Number of input channels. + @type in_channels: int + @param in_channels: Number of input channels. @type layer3_repeats: int @param layer3_repeats: Number of repeats for layer3. Defaults to 1. """ super().__init__() - self.input_channels = input_channels + self.input_channels = in_channels self.stem = nn.Sequential( ConvModule( - in_channels=input_channels, - out_channels=width, + in_channels=in_channels, + out_channels=stem_channels, kernel_size=3, stride=2, padding=1, @@ -221,8 +219,8 @@ def __init__( activation=nn.ReLU(inplace=True), ), ConvModule( - in_channels=width, - out_channels=width, + in_channels=stem_channels, + out_channels=stem_channels, kernel_size=3, stride=2, padding=1, @@ -231,36 +229,36 @@ def __init__( ), ) - self.layer1 = _make_layer( + self.layer1 = make_layer( block=block, - in_channels=width, - channels=width, + in_channels=stem_channels, + channels=stem_channels, num_blocks=layers[0], ) - self.layer2 = _make_layer( + self.layer2 = make_layer( block=block, - in_channels=width, - channels=width * 2, + in_channels=stem_channels, + channels=stem_channels * 2, num_blocks=layers[1], stride=2, ) self.layer3 = nn.ModuleList( [ - _make_layer( + make_layer( block=block, - in_channels=width * 2, - channels=width * 4, + in_channels=stem_channels * 2, + channels=stem_channels * 4, num_blocks=layers[2], stride=2, ) ] + [ - _make_layer( + make_layer( block=block, - in_channels=width * 4, - channels=width * 4, + in_channels=stem_channels * 4, + channels=stem_channels * 4, num_blocks=layers[2], stride=1, ) @@ -268,33 +266,14 @@ def __init__( ] ) - self.layer4 = _make_layer( + self.layer4 = make_layer( block=block, - in_channels=width * 4, - channels=width * 8, + in_channels=stem_channels * 4, + channels=stem_channels * 8, num_blocks=layers[3], stride=2, ) - def validate_backbone_attributes(self) -> None: - """Validate the existence of required backbone attributes. - - Ensures that the following attributes are present: "stem", "layer1", "layer2", - "layer3", "layer4", "input_channels". - """ - expected_attributes = [ - "stem", - "layer1", - "layer2", - "layer3", - "layer4", - "input_channels", - ] - for attribute in expected_attributes: - assert hasattr( - self, attribute - ), f"Invalid backbone - attribute '{attribute}' is missing" - def get_backbone_output_number_of_channels(self) -> dict[str, int]: """Determine the number of output channels for each layer of the backbone. @@ -321,8 +300,8 @@ def get_backbone_output_number_of_channels(self) -> dict[str, int]: return output_shapes -def _make_layer( - block: Type[nn.Module], +def make_layer( + block: type[nn.Module], in_channels: int, channels: int, num_blocks: int, diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 00d42043..2b719180 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -10,33 +10,32 @@ UpscaleOnline, ) -from .blocks import ( - DAPPM, - BasicDDRBackBone, - _make_layer, -) +from .blocks import DAPPM, BasicDDRBackbone, make_layer from .variants import get_variant class DDRNet(BaseNode[Tensor, list[Tensor]]): + in_channels: int + def __init__( self, variant: Literal["23-slim", "23"] = "23-slim", channels: int | None = None, highres_channels: int | None = None, use_aux_heads: bool = True, - upscale_module: nn.Module = None, + upscale_module: nn.Module | None = None, spp_width: int = 128, ssp_inter_mode: str = "bilinear", segmentation_inter_mode: str = "bilinear", + # TODO: nn.Module registry block: Type[nn.Module] = BasicResNetBlock, skip_block: Type[nn.Module] = BasicResNetBlock, layer5_block: Type[nn.Module] = Bottleneck, layer5_bottleneck_expansion: int = 2, - spp_kernel_sizes: list[int] = None, - spp_strides: list[int] = None, + spp_kernel_sizes: list[int] | None = None, + spp_strides: list[int] | None = None, layer3_repeats: int = 1, - layers: list[int] = None, + layers: list[int] | None = None, **kwargs, ): """DDRNet backbone. @@ -96,18 +95,13 @@ def __init__( @type kwargs: Any @param kwargs: Additional arguments to pass to L{BaseNode}. """ - - if upscale_module is None: - upscale_module = UpscaleOnline() - if spp_kernel_sizes is None: - spp_kernel_sizes = [1, 5, 9, 17, 0] - if spp_strides is None: - spp_strides = [1, 2, 4, 8, 0] - if layers is None: - layers = [2, 2, 2, 2, 1, 2, 2, 1] - super().__init__(**kwargs) + upscale_module = upscale_module or UpscaleOnline() + spp_kernel_sizes = spp_kernel_sizes or [1, 5, 9, 17, 0] + spp_strides = spp_strides or [1, 2, 4, 8, 0] + layers = layers or [2, 2, 2, 2, 1, 2, 2, 1] + var = get_variant(variant) channels = channels or var.channels @@ -117,8 +111,6 @@ def __init__( self.upscale = upscale_module self.ssp_inter_mode = ssp_inter_mode self.segmentation_inter_mode = segmentation_inter_mode - self.block = block - self.skip_block = skip_block self.relu = nn.ReLU(inplace=False) self.layer3_repeats = layer3_repeats self.channels = channels @@ -128,14 +120,13 @@ def __init__( self.layers[4:], ) - self._backbone = BasicDDRBackBone( - block=self.block, - width=self.channels, + self._backbone = BasicDDRBackbone( + block=block, + stem_channels=self.channels, layers=self.backbone_layers, - input_channels=self.in_channels, + in_channels=self.in_channels, layer3_repeats=self.layer3_repeats, ) - self._backbone.validate_backbone_attributes() out_chan_backbone = ( self._backbone.get_backbone_output_number_of_channels() ) @@ -166,7 +157,7 @@ def __init__( ) ) self.layer3_skip.append( - _make_layer( + make_layer( in_channels=out_chan_backbone["layer2"] if i == 0 else highres_channels, @@ -205,13 +196,13 @@ def __init__( ), ) - self.layer4_skip = _make_layer( + self.layer4_skip = make_layer( block=skip_block, in_channels=highres_channels, channels=highres_channels, num_blocks=self.additional_layers[2], ) - self.layer5_skip = _make_layer( + self.layer5_skip = make_layer( block=layer5_block, in_channels=highres_channels, channels=highres_channels, @@ -219,7 +210,7 @@ def __init__( expansion=layer5_bottleneck_expansion, ) - self.layer5 = _make_layer( + self.layer5 = make_layer( block=layer5_block, in_channels=out_chan_backbone["layer4"], channels=out_chan_backbone["layer4"], diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 4212250e..11d6ba2f 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -13,6 +13,9 @@ class DDRNetSegmentationHead(BaseNode[Tensor, Tensor]): attach_index: int = -1 + in_height: int + in_width: int + in_channels: int tasks: list[LabelType] = [LabelType.SEGMENTATION] @@ -104,4 +107,4 @@ def set_export_mode(self, mode: bool = True) -> None: if self.export and self.attach_index != -1: logger.info("Removing the auxiliary head.") - self.forward = lambda x: torch.tensor([]) + self.forward = lambda inputs: torch.tensor([]) From 401cccec36015b7d8b92036ef30e88309cacc1f4 Mon Sep 17 00:00:00 2001 From: Nikita Date: Thu, 19 Sep 2024 11:11:07 +0000 Subject: [PATCH 094/102] fix: change default ddrnet task to binary --- configs/ddrnet_segmentation_model.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/ddrnet_segmentation_model.yaml b/configs/ddrnet_segmentation_model.yaml index 25779f4a..2bd3b7e8 100644 --- a/configs/ddrnet_segmentation_model.yaml +++ b/configs/ddrnet_segmentation_model.yaml @@ -6,7 +6,7 @@ model: predefined_model: name: DDRNetSegmentationModel params: - task: multiclass + task: binary backbone_params: use_aux_heads: True # set to False to disable auxiliary heads (for export) variant: '23-slim' From 07270fe648a71656641ec9ca95953381ac2f4045 Mon Sep 17 00:00:00 2001 From: Nikita Date: Thu, 19 Sep 2024 14:39:08 +0000 Subject: [PATCH 095/102] fix: move BatchNorm2d before AvgPool to enable training with bs=1 --- luxonis_train/nodes/backbones/ddrnet/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index 596907fd..a2d16db7 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -44,6 +44,7 @@ def __init__( super().__init__() down_list = [] + down_list.append(nn.BatchNorm2d(in_channels)) if stride == 0: down_list.append(nn.AdaptiveAvgPool2d((1, 1))) elif stride > 1: @@ -53,7 +54,6 @@ def __init__( ) ) - down_list.append(nn.BatchNorm2d(in_channels)) down_list.append(nn.ReLU(inplace=True)) down_list.append( nn.Conv2d(in_channels, branch_channels, kernel_size=1, bias=False) From 052aea9152374a3e4006c6c40c0a37bce757e36b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 19 Sep 2024 15:29:21 +0000 Subject: [PATCH 096/102] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index 34387324..ee07d4c2 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 97% - 97% + 96% + 96% From 576745833f31128b97f0f1025b1713090138f47a Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 21:23:29 +0200 Subject: [PATCH 097/102] typo --- luxonis_train/utils/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/luxonis_train/utils/config.py b/luxonis_train/utils/config.py index 160d76e7..09cb8795 100644 --- a/luxonis_train/utils/config.py +++ b/luxonis_train/utils/config.py @@ -124,7 +124,7 @@ def check_main_metric(self) -> Self: "[Ignore if using predefined model] " "No metrics specified. " "This is likely unintended unless " - "the configuration is not used for training. " + "the configuration is not used for training." ) return self From 55c9fb817b06a233345840453372fe2b435a2dc4 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 21:23:37 +0200 Subject: [PATCH 098/102] fixed docstrings --- luxonis_train/nodes/blocks/blocks.py | 10 +++++----- luxonis_train/nodes/heads/ddrnet_segmentation_head.py | 3 +-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index a6c49143..6a0dd7a6 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -908,11 +908,11 @@ class DropPath(nn.Module): Intended usage of this block is as follows: >>> class ResNetBlock(nn.Module): - >>> def __init__(self, ..., drop_path_rate: float): - >>> self.drop_path = DropPath(drop_path_rate) - >>> - >>> def forward(self, x): - >>> return x + self.drop_path(self.conv_bn_act(x)) + ... def __init__(self, ..., drop_path_rate: float): + ... self.drop_path = DropPath(drop_path_rate) + + ... def forward(self, x): + ... return x + self.drop_path(self.conv_bn_act(x)) @see U{Original code (TIMM) } @license: U{Apache License 2.0 } diff --git a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py index 11d6ba2f..5e8468b0 100644 --- a/luxonis_train/nodes/heads/ddrnet_segmentation_head.py +++ b/luxonis_train/nodes/heads/ddrnet_segmentation_head.py @@ -95,8 +95,7 @@ def forward(self, inputs: Tensor) -> Tensor: def set_export_mode(self, mode: bool = True) -> None: """Sets the module to export mode. - Replaces the forward method with an identity function when in - export mode. + Replaces the forward method with a constant empty tensor. @warning: The replacement is destructive and cannot be undone. @type mode: bool From 19fcfcd6fa87e92d7b12453312a3c70e4441f2c8 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 21:24:02 +0200 Subject: [PATCH 099/102] removed dead code --- luxonis_train/nodes/blocks/blocks.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/luxonis_train/nodes/blocks/blocks.py b/luxonis_train/nodes/blocks/blocks.py index 6a0dd7a6..99fe2a9a 100644 --- a/luxonis_train/nodes/blocks/blocks.py +++ b/luxonis_train/nodes/blocks/blocks.py @@ -961,6 +961,3 @@ def forward(self, x: Tensor) -> Tensor: if self.drop_prob == 0.0 or not self.training: return x return self.drop_path(x, self.drop_prob, self.scale_by_keep) - - def extra_repr(self) -> str: - return f"drop_prob={round(self.drop_prob, 3):0.3f}" From 50e42eb4aa8c5927ec91498f5ecf3d1fc578e5fa Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 21:24:16 +0200 Subject: [PATCH 100/102] replaced typing.Type with builtin type --- .../nodes/backbones/ddrnet/ddrnet.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py index 2b719180..37779a19 100644 --- a/luxonis_train/nodes/backbones/ddrnet/ddrnet.py +++ b/luxonis_train/nodes/backbones/ddrnet/ddrnet.py @@ -1,4 +1,4 @@ -from typing import Literal, Type +from typing import Literal from torch import Tensor, nn @@ -28,9 +28,9 @@ def __init__( ssp_inter_mode: str = "bilinear", segmentation_inter_mode: str = "bilinear", # TODO: nn.Module registry - block: Type[nn.Module] = BasicResNetBlock, - skip_block: Type[nn.Module] = BasicResNetBlock, - layer5_block: Type[nn.Module] = Bottleneck, + block: type[nn.Module] = BasicResNetBlock, + skip_block: type[nn.Module] = BasicResNetBlock, + layer5_block: type[nn.Module] = Bottleneck, layer5_bottleneck_expansion: int = 2, spp_kernel_sizes: list[int] | None = None, spp_strides: list[int] | None = None, @@ -69,14 +69,14 @@ def __init__( @type segmentation_inter_mode: str @param segmentation_inter_mode: Interpolation mode for the segmentation head. Defaults to "bilinear". - @type block: Type[nn.Module] - @param block: Type of block to use in the backbone. Defaults to + @type block: type[nn.Module] + @param block: type of block to use in the backbone. Defaults to BasicResNetBlock. - @type skip_block: Type[nn.Module] - @param skip_block: Type of block for skip connections. Defaults to + @type skip_block: type[nn.Module] + @param skip_block: type of block for skip connections. Defaults to BasicResNetBlock. - @type layer5_block: Type[nn.Module] - @param layer5_block: Type of block for layer5 and layer5_skip. Defaults to + @type layer5_block: type[nn.Module] + @param layer5_block: type of block for layer5 and layer5_skip. Defaults to Bottleneck. @type layer5_bottleneck_expansion: int @param layer5_bottleneck_expansion: Expansion factor for Bottleneck block in From 438c7a31cd5bd34eb532678f95b7ac8005b75cc1 Mon Sep 17 00:00:00 2001 From: Martin Kozlovsky Date: Thu, 19 Sep 2024 21:24:32 +0200 Subject: [PATCH 101/102] replace assert with value error --- luxonis_train/nodes/backbones/ddrnet/blocks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/luxonis_train/nodes/backbones/ddrnet/blocks.py b/luxonis_train/nodes/backbones/ddrnet/blocks.py index a2d16db7..59f76b8b 100644 --- a/luxonis_train/nodes/backbones/ddrnet/blocks.py +++ b/luxonis_train/nodes/backbones/ddrnet/blocks.py @@ -129,12 +129,16 @@ def __init__( @type inter_mode: str @param inter_mode: Interpolation mode for upscaling. Defaults to "bilinear". + + @raises ValueError: If the lengths of `kernel_sizes` and `strides` + are not the same. """ super().__init__() - assert len(kernel_sizes) == len( - strides - ), "len of kernel_sizes and strides must be the same" + if len(kernel_sizes) != len(strides): # pragma: no cover + raise ValueError( + "The lenghts of `kernel_sizes` and `strides` must be the same" + ) self.branches = nn.ModuleList( [ From 8b1b4cd53325db61285d0ec342077b85c78cd16b Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Thu, 19 Sep 2024 19:52:46 +0000 Subject: [PATCH 102/102] [Automated] Updated coverage badge --- media/coverage_badge.svg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/media/coverage_badge.svg b/media/coverage_badge.svg index ee07d4c2..34387324 100644 --- a/media/coverage_badge.svg +++ b/media/coverage_badge.svg @@ -15,7 +15,7 @@ coverage coverage - 96% - 96% + 97% + 97%