upgrade to numpy 2.0 and remove imgaug (#13937)

* upgrade to numpy 2.0 and remove imgaug * fix bug * fix bug * fix bug * fix bug * fix bug * add license
PaddlePaddle · Nov 6, 2024 · 15fb82d · 15fb82d
1 parent 9d4a66c
commit 15fb82d
Show file tree

Hide file tree

Showing 4 changed files with 338 additions and 38 deletions.
diff --git a/ppocr/data/imaug/iaa_augment.py b/ppocr/data/imaug/iaa_augment.py
@@ -15,78 +15,198 @@
 This code is refer from:
 https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py
 """
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
+import os
+
+# Prevent automatic updates in Albumentations for stability in augmentation behavior
+os.environ["NO_ALBUMENTATIONS_UPDATE"] = "1"
 
 import numpy as np
-import imgaug
-import imgaug.augmenters as iaa
+import albumentations as A
+from albumentations.core.transforms_interface import DualTransform
+from albumentations.augmentations.geometric import functional as fgeometric
+
+
+# Custom resize transformation mimicking Imgaug's behavior with scaling
+class ImgaugLikeResize(DualTransform):
+    def __init__(
+        self, scale_range=(0.5, 3.0), interpolation=1, always_apply=False, p=1.0
+    ):
+        super(ImgaugLikeResize, self).__init__(always_apply, p)
+        self.scale_range = scale_range
+        self.interpolation = interpolation
+
+    # Resize the image based on a randomly chosen scale within the scale range
+    def apply(self, img, scale=1.0, **params):
+        height, width = img.shape[:2]
+        new_height = int(height * scale)
+        new_width = int(width * scale)
+
+        # For compatibility with Albumentations 1.4.15 and later
+        # return fgeometric.resize(
+        #     img, (new_height, new_width), interpolation=self.interpolation
+        # )
 
+        return fgeometric.resize(
+            img, new_height, new_width, interpolation=self.interpolation
+        )
 
+    # Apply the same scaling transformation to keypoints (e.g., polygon points)
+    def apply_to_keypoints(self, keypoints, scale=1.0, **params):
+        return np.array(
+            [(x * scale, y * scale) + tuple(rest) for x, y, *rest in keypoints]
+        )
+
+    # Get random scale parameter within the specified range
+    def get_params(self):
+        scale = np.random.uniform(self.scale_range[0], self.scale_range[1])
+        return {"scale": scale}
+
+
+# Builder class to translate custom augmenter arguments into Albumentations-compatible format
 class AugmenterBuilder(object):
     def __init__(self):
-        pass
+        # Map common Imgaug transformations to equivalent Albumentations transforms
+        self.imgaug_to_albu = {
+            "Fliplr": "HorizontalFlip",
+            "Flipud": "VerticalFlip",
+            "Affine": "Affine",
+            # Additional mappings can be added here if needed
+        }
 
+    # Recursive method to construct augmentation pipeline based on provided arguments
     def build(self, args, root=True):
         if args is None or len(args) == 0:
             return None
         elif isinstance(args, list):
+            # Build the full augmentation sequence if it's a root-level call
             if root:
                 sequence = [self.build(value, root=False) for value in args]
-                return iaa.Sequential(sequence)
+                return A.Compose(
+                    sequence,
+                    keypoint_params=A.KeypointParams(
+                        format="xy", remove_invisible=False
+                    ),
+                )
             else:
-                return getattr(iaa, args[0])(
-                    *[self.to_tuple_if_list(a) for a in args[1:]]
+                # Build individual augmenters for nested arguments
+                augmenter_type = args[0]
+                augmenter_args = args[1] if len(args) > 1 else {}
+                augmenter_args_mapped = self.map_arguments(
+                    augmenter_type, augmenter_args
                 )
+                augmenter_type_mapped = self.imgaug_to_albu.get(
+                    augmenter_type, augmenter_type
+                )
+                if augmenter_type_mapped == "Resize":
+                    return ImgaugLikeResize(**augmenter_args_mapped)
+                else:
+                    cls = getattr(A, augmenter_type_mapped)
+                    return cls(
+                        **{
+                            k: self.to_tuple_if_list(v)
+                            for k, v in augmenter_args_mapped.items()
+                        }
+                    )
         elif isinstance(args, dict):
-            cls = getattr(iaa, args["type"])
-            return cls(**{k: self.to_tuple_if_list(v) for k, v in args["args"].items()})
+            # Process individual transformation specified as dictionary
+            augmenter_type = args["type"]
+            augmenter_args = args.get("args", {})
+            augmenter_args_mapped = self.map_arguments(augmenter_type, augmenter_args)
+            augmenter_type_mapped = self.imgaug_to_albu.get(
+                augmenter_type, augmenter_type
+            )
+            if augmenter_type_mapped == "Resize":
+                return ImgaugLikeResize(**augmenter_args_mapped)
+            else:
+                cls = getattr(A, augmenter_type_mapped)
+                return cls(
+                    **{
+                        k: self.to_tuple_if_list(v)
+                        for k, v in augmenter_args_mapped.items()
+                    }
+                )
         else:
-            raise RuntimeError("unknown augmenter arg: " + str(args))
+            raise RuntimeError("Unknown augmenter arg: " + str(args))
 
+    # Map arguments to expected format for each augmenter type
+    def map_arguments(self, augmenter_type, augmenter_args):
+        augmenter_args = augmenter_args.copy()  # Avoid modifying the original arguments
+        if augmenter_type == "Resize":
+            # Ensure size is a valid 2-element list or tuple
+            size = augmenter_args.get("size")
+            if size:
+                if not isinstance(size, (list, tuple)) or len(size) != 2:
+                    raise ValueError(
+                        f"'size' must be a list or tuple of two numbers, but got {size}"
+                    )
+                min_scale, max_scale = size
+                return {
+                    "scale_range": (min_scale, max_scale),
+                    "interpolation": 1,  # Linear interpolation
+                    "p": 1.0,
+                }
+            else:
+                return {"scale_range": (1.0, 1.0), "interpolation": 1, "p": 1.0}
+        elif augmenter_type == "Affine":
+            # Map rotation to a tuple and ensure p=1.0 to apply transformation
+            rotate = augmenter_args.get("rotate", 0)
+            if isinstance(rotate, list):
+                rotate = tuple(rotate)
+            elif isinstance(rotate, (int, float)):
+                rotate = (float(rotate), float(rotate))
+            augmenter_args["rotate"] = rotate
+            augmenter_args["p"] = 1.0
+            return augmenter_args
+        else:
+            # For other augmenters, ensure 'p' probability is specified
+            p = augmenter_args.get("p", 1.0)
+            augmenter_args["p"] = p
+            return augmenter_args
+
+    # Convert lists to tuples for Albumentations compatibility
     def to_tuple_if_list(self, obj):
         if isinstance(obj, list):
             return tuple(obj)
         return obj
 
 
+# Wrapper class for image and polygon transformations using Imgaug-style augmentation
 class IaaAugment:
     def __init__(self, augmenter_args=None, **kwargs):
         if augmenter_args is None:
+            # Default augmenters if none are specified
             augmenter_args = [
                 {"type": "Fliplr", "args": {"p": 0.5}},
                 {"type": "Affine", "args": {"rotate": [-10, 10]}},
                 {"type": "Resize", "args": {"size": [0.5, 3]}},
             ]
         self.augmenter = AugmenterBuilder().build(augmenter_args)
 
+    # Apply the augmentations to image and polygon data
     def __call__(self, data):
         image = data["image"]
-        shape = image.shape
 
         if self.augmenter:
-            aug = self.augmenter.to_deterministic()
-            data["image"] = aug.augment_image(image)
-            data = self.may_augment_annotation(aug, data, shape)
-        return data
+            # Flatten polygons to individual keypoints for transformation
+            keypoints = []
+            keypoints_lengths = []
+            for poly in data["polys"]:
+                keypoints.extend([tuple(point) for point in poly])
+                keypoints_lengths.append(len(poly))
 
-    def may_augment_annotation(self, aug, data, shape):
-        if aug is None:
-            return data
+            # Apply the augmentation pipeline to image and keypoints
+            transformed = self.augmenter(image=image, keypoints=keypoints)
+            data["image"] = transformed["image"]
 
-        line_polys = []
-        for poly in data["polys"]:
-            new_poly = self.may_augment_poly(aug, shape, poly)
-            line_polys.append(new_poly)
-        data["polys"] = np.array(line_polys)
-        return data
+            # Extract transformed keypoints and reconstruct polygon structures
+            transformed_keypoints = transformed["keypoints"]
 
-    def may_augment_poly(self, aug, img_shape, poly):
-        keypoints = [imgaug.Keypoint(p[0], p[1]) for p in poly]
-        keypoints = aug.augment_keypoints(
-            [imgaug.KeypointsOnImage(keypoints, shape=img_shape)]
-        )[0].keypoints
-        poly = [(p.x, p.y) for p in keypoints]
-        return poly
+            # Reassemble polygons from transformed keypoints
+            new_polys = []
+            idx = 0
+            for length in keypoints_lengths:
+                new_poly = transformed_keypoints[idx : idx + length]
+                new_polys.append(np.array([kp[:2] for kp in new_poly]))
+                idx += length
+            data["polys"] = new_polys
+        return data
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,11 +41,11 @@ classifiers = [
 dependencies = [
     "shapely",
     "scikit-image",
-    "imgaug",
+    "six",
     "pyclipper",
     "lmdb",
     "tqdm",
-    "numpy<2.0",
+    "numpy",
     "rapidfuzz",
     "opencv-python",
     "opencv-contrib-python",
@@ -58,6 +58,7 @@ dependencies = [
     "fire>=0.3.0",
     "requests",
     "albumentations==1.4.10",
+    # to be compatible with albumentations
     "albucore==0.0.13"
 ]
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,10 @@
 shapely
 scikit-image
-imgaug
+six
 pyclipper
 lmdb
 tqdm
-numpy<2.0
+numpy
 rapidfuzz
 opencv-python
 opencv-contrib-python