bigmlcom · jaor · Jun 13, 2024 · Jun 11, 2024 · Jun 11, 2024 · Jun 12, 2024
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -3,6 +3,11 @@
 History
 -------
 
+5.9.0 (2024-06-12)
+~~~~~~~~~~~~~~~~~~
+
+- Adding MS COCO to BigML-COCO translator.
+
 5.8.1 (2024-05-31)
 ~~~~~~~~~~~~~~~~~~
 

diff --git a/README.rst b/README.rst
@@ -14,16 +14,6 @@ the
 BigMLer is open sourced under the `Apache License, Version
 2.0 <http://www.apache.org/licenses/LICENSE-2.0.html>`_.
 
-Support
-=======
-
-Please report problems and bugs to our `BigML.io issue
-tracker <https://github.com/bigmlcom/io/issues>`_.
-
-Discussions about the different bindings take place in the general
-`BigML mailing list <http://groups.google.com/group/bigml>`_. Or join us
-in our `Campfire chatroom <https://bigmlinc.campfirenow.com/f20a0>`_.
-
 Requirements
 ============
 
@@ -54,7 +44,7 @@ using:
 The external libraries used in this case exist for the majority of recent
 Operating System versions. Still, some of them might need especific
 compiler versions or dlls, so their installation may require an additional
-setup effort.
+setup effort and will not be supported by default.
 
 The full set of libraries can be installed using
 
@@ -146,32 +136,26 @@ For a detailed description of authentication instructions on Windows see the
 BigMLer on Windows
 ==================
 
-To install BigMLer on Windows environments, you'll need `Python for Windows
-(v.2.7.x) <http://www.python.org/download/>`_ installed.
-
-In addition to that, you'll need the ``pip`` tool to install BigMLer. To
-install pip, first you need to open your command line window (write ``cmd`` in
-the input field that appears when you click on ``Start`` and hit ``enter``),
-download this `python file <http://python-distribute.org/distribute_setup.py>`_
-and execute it
-
-.. code-block:: bash
-
-    c:\Python27\python.exe distribute_setup.py
-
-After that, you'll be able to install ``pip`` by typing the following command
+To install BigMLer on Windows environments, you'll Python installed.
+The code has been tested with Python 3.10 and you can create a conda
+environment with that Python version or download it from `Python for Windows
+ <http://www.python.org/download/>`_ and install it. In the last case, you'll
+ also need too install the ``pip`` tool to install BigMLer.
 
-.. code-block:: bash
-
-    c:\Python27\Scripts\easy_install.exe pip
+To install ``pip``, first you need to open your command line window
+(write ``cmd`` in
+the input field that appears when you click on ``Start`` and hit ``enter``).
+Then you can follow the steps described, for example, in this `guide
+<https://monovm.com/blog/how-to-install-pip-on-windows-linux/#How-to-install-PIP-on-Windows?-[A-Step-by-Step-Guide]>`_
+to install its latest version.
 
-And finally, to install BigMLer, just type
+And finally, to install BigMLer in its basic capacities, just type
 
 .. code-block:: bash
 
-    c:\Python27\Scripts\pip.exe install bigmler
+    python -m pip install bigmler
 
-and BigMLer should be installed in your computer. Then
+and BigMLer should be installed in your computer or conda environment. Then
 issuing
 
 .. code-block:: bash
@@ -180,6 +164,11 @@ issuing
 
 should show BigMLer version information.
 
+Extensions of BigMLer to use images are not supported in Windows by default.
+The libraries needed for those models are not available usually for that
+operating system. If your Machine Learning project involves images, we
+recommend that you choose a Linux based operating system.
+
 Finally, to start using BigMLer to handle your BigML resources, you need to
 set your credentials in BigML for authentication. If you want them to be
 permanently stored in your system, use
@@ -189,6 +178,9 @@ permanently stored in your system, use
     setx BIGML_USERNAME myusername
     setx BIGML_API_KEY ae579e7e53fb9abd646a6ff8aa99d4afe83ac291
 
+Note that ``setx`` will not change the environment variables of your actual
+console, so you will need to open a new one to start using them.
+
 
 BigML Development Mode
 ======================
@@ -347,3 +339,13 @@ Additional Information
 
 For additional information, see
 the `full documentation for BigMLer on Read the Docs <http://bigmler.readthedocs.org>`_.
+
+
+Support
+=======
+
+Please report problems and bugs to our `BigML.io issue
+tracker <https://github.com/bigmlcom/io/issues>`_.
+
+Discussions about the different bindings take place in the general
+`BigML mailing list <http://groups.google.com/group/bigml>`_.
diff --git a/bigmler/__init__.py b/bigmler/__init__.py
@@ -1,2 +1,2 @@
 # -*- coding: utf-8 -*-
-__version__ = '5.8.1'
+__version__ = '5.9.0'
diff --git a/bigmler/options/source.py b/bigmler/options/source.py
@@ -334,12 +334,16 @@ def get_source_options(defaults=None):
             'action': 'store',
             'dest': 'annotations_language',
             'default': defaults.get('annotations_language', None),
-            'choices': ["VOC", "YOLO"],
+            'choices': ["VOC", "YOLO", "COCO"],
             'help': ("Language used to provide the annotations for images."
                      "Annotations are expected to be provided using "
-                     "on file per image. The --train option should point"
+                     "one file per image. The --train option should point"
                      " to the directory that contains both images and"
-                     " the corresponding annotations.")},
+                     " the corresponding annotations, unless some "
+                     " folder attribute is provided in each"
+                     " annotation. In that case it should point to"
+                     " the folder parent directory and --anotations-dir"
+                     " should be used to point to the annotations files.")},
 
         # Annotations file
         # File that contains annotations for images
@@ -356,7 +360,15 @@ def get_source_options(defaults=None):
             'action': 'store',
             'dest': 'annotations_dir',
             'default': defaults.get('annotations_dir', None),
-            'help': "Directory for individual annotation files."},
+            'help': ("Directory for individual annotation files."
+                     " Used when annotations are provided using "
+                     "one file per image. The --train option should point"
+                     " to the directory that contains both images and"
+                     " the corresponding annotations, unless some "
+                     " folder attribute is provided in each"
+                     " annotation. In that case it should point to"
+                     " the folder parent directory and --anotations-dir"
+                     " should be used to point to the annotations files.")},
 
         # Images file
         # Compressed file with images used as reference for annotations

diff --git a/bigmler/processing/annotations.py b/bigmler/processing/annotations.py
@@ -51,6 +51,11 @@ def relative_path(base_dir, absolute_path):
     return os.path.relpath(absolute_path, base_dir)
 
 
+def get_file_ext(filename):
+    """Getting the file extension in lowercase and without the dot """
+    return os.path.splitext(filename)[1].lower()[1:]
+
+
 def fields_from_annotations(annotations_file):
     """Infers the type of the fields that will contain the annotations
     in an annotations file.
@@ -116,9 +121,8 @@ def bigml_metadata(args, images_list=None, new_fields=None):
             files = glob.glob(os.path.join(args.images_dir, "**"),
                               recursive=True)
             images_list = [filename for
-                           filename in files if
-                           os.path.splitext(filename)[1].lower()[1:] in
-                           IMAGE_EXTENSIONS]
+                           filename in files if get_file_ext(filename)
+                           in IMAGE_EXTENSIONS]
 
         if images_list:
             if not os.path.exists(zip_path):
@@ -157,16 +161,22 @@ def bigml_metadata(args, images_list=None, new_fields=None):
 
 
 def bigml_coco_file(args, session_file):
-    """Translates from alternative annotations format, like VOC and YOLO to
-    the format accepted by BigML
+    """Translates from alternative annotations format, like VOC, YOLO or
+    MSCOCO to the format accepted by BigML
 
     """
 
+    if args.annotations_file is not None:
+        args.original_annotations_file = args.annotations_file
     args.annotations_file = os.path.join(args.output_dir, "annotations.json")
-    filenames = voc_to_cocojson(args.annotations_dir, args,
-                                session_file) \
-        if args.annotations_language == "VOC" else \
-        yolo_to_cocojson(args.annotations_dir, args, session_file)
+    if args.annotations_language == "VOC":
+        filenames = voc_to_cocojson(args.annotations_dir, args, session_file)
+    elif args.annotations_language == "YOLO":
+        filenames = yolo_to_cocojson(args.annotations_dir, args, session_file)
+    elif args.annotations_language == "COCO":
+        filenames = mscoco_to_cocojson(args.original_annotations_file,
+                                       args, session_file)
+
     return bigml_metadata(args, images_list=filenames,
                           new_fields=[{"name": "boxes", "optype": "regions"}])
 
@@ -261,8 +271,7 @@ def yolo_to_cocojson(yolo_dir, args, session_file):
         filenames = glob.glob(os.path.join(images_dir, "**"),
                               recursive=True)
         filenames = [os.path.abspath(filename) for
-                     filename in filenames if
-                     os.path.splitext(filename)[1].lower() in
+                     filename in filenames if get_file_ext(filename) in
                      IMAGE_EXTENSIONS]
 
         ## Read yolo annotation txt file
@@ -299,8 +308,8 @@ def yolo_to_cocojson(yolo_dir, args, session_file):
                 ## the last one in the matched_file list is used
                 for a_file in matched_files:
                     filenames.append(a_file)
-                    ext = os.path.splitext(a_file)[1]
-                    if ext.lower() in IMAGE_EXTENSIONS:
+                    ext = get_file_ext(a_file)
+                    if ext in IMAGE_EXTENSIONS:
                         image_filename = a_file
                     else:
                         warnings += 1
@@ -514,8 +523,7 @@ def voc_to_cocojson(voc_dir, args, session_file):
             filenames = glob.glob(os.path.join(args.images_dir, "**"),
                                   recursive=True)
             filenames = [os.path.abspath(filename) for
-                         filename in filenames if
-                         os.path.splitext(filename)[1].lower() in
+                         filename in filenames if get_file_ext(filename) in
                          IMAGE_EXTENSIONS]
 
         for a_file in annotation_file_list:
@@ -568,3 +576,89 @@ def voc_to_cocojson(voc_dir, args, session_file):
 
     return [relative_path(args.images_dir, filename) for filename in \
         filenames]
+
+def mscoco_to_cocojson(mscoco_file, args, session_file):
+    """Translates annotations from a MS COCO format, where each image is
+    associated with a JSON file that contains one object per associated info.
+    Maps images, categories and annotations to image file names, labels and
+    regions. It returns the list of images it refers to.
+
+    """
+
+    output_json_array = []
+
+    filenames = []
+    labels = {}
+    images = {}
+
+    logfile_name = args.annotations_file + ".log"
+
+    with open(logfile_name, "w") as logfile:
+
+        warnings = 0
+        message = "Start converting COCO file from " + mscoco_file + "\n"
+        u.log_message(message, session_file, console=args.verbosity)
+        logfile.write("\n\n%s\n" % message)
+
+        # Loading the MS-COCO json into memory
+
+        with open(mscoco_file, "r") as handle:
+            data = json.load(handle)
+
+        # Images will be found either in the images_dir file or where
+        # the annotation file points to
+        if args.images_dir is not None and os.path.exists(args.images_dir):
+            filenames = glob.glob(os.path.join(args.images_dir, "**"),
+                                  recursive=True)
+            paths = [os.path.abspath(filename) for
+                     filename in filenames if get_file_ext(filename) in
+                     IMAGE_EXTENSIONS]
+            filenames = [os.path.basename(path) for path in paths]
+
+        # Extracting the file_name and id into a dict
+        images = dict([[image['id'],
+                        { "file": image['file_name'], "boxes": [] }]
+                       for image in data['images'] if image['file_name'] in
+                       filenames])
+        if data.get("categories") and data['categories'][0].get("name"):
+            # Extract the image category labels into a dict
+            labels = dict([[category['id'],
+                            { "name": category['name'],
+                              "super": category.get('supercategory', "") } ]
+                           for category in data['categories']])
+        # Adding the regions data
+        if data.get('annotations'):
+            for annotation in data['annotations']:
+                images[annotation["image_id"]]["boxes"].append({
+                    "label": labels[annotation['category_id']]['name'],
+                    "xmin": int(annotation["bbox"][0]),
+                    "ymin": int(annotation["bbox"][1]),
+                    "xmax": int(annotation["bbox"][0] + annotation["bbox"][2]),
+                    "ymax": int(annotation["bbox"][1] + annotation["bbox"][3])
+                })
+
+                if labels[annotation['category_id']]['super']:
+                    images[annotation["image_id"]]["boxes"].append({
+                        "label": labels[annotation['category_id']]['super'],
+                        "xmin": int(annotation["bbox"][0]),
+                        "ymin": int(annotation["bbox"][1]),
+                        "xmax": int(annotation["bbox"][0] +
+                                    annotation["bbox"][2]),
+                        "ymax": int(annotation["bbox"][1] +
+                                    annotation["bbox"][3])
+                    })
+
+        output_json_array = [images[image_id] for image_id in images.keys()]
+
+    if warnings > 0:
+        message = f"\nThere are {warnings} warnings, " \
+                f"see the log file {logfile_name}\n"
+        u.log_message(message, session_file, console=args.verbosity)
+
+    filenames = [image['file'] for image in output_json_array]
+
+    with open(args.annotations_file, 'w') as handler:
+        json.dump(output_json_array, handler, indent=2)
+
+    return [relative_path(args.images_dir, filename) for filename in \
+        filenames]
diff --git a/bigmler/tests/composite_steps.py b/bigmler/tests/composite_steps.py
@@ -95,17 +95,25 @@ def i_create_annotated_source(
 
 
 def i_create_lang_annotated_source(
-    step, annotations_dir=None, images_dir=None,
+    step, annotations_dir=None, annotations_file=None, images_dir=None,
     annotations_language=None, output_dir=None):
     """Step: I create  BigML composite for a "<annotations_language>"
-    <annotations_dir> and an <images_dir> and log results in <output_dir>
+    <annotations_[dir|file]> and an <images_dir> and log results in <output_dir>
     """
-    ok_(annotations_dir is not None and images_dir is not None and
+    ok_((annotations_dir is not None or annotations_file is not None)
+        and images_dir is not None and
         annotations_language is not None and output_dir is not None)
-    command = ("bigmler source --annotations-dir " + annotations_dir +
-               " --data " + images_dir + " --annotations-language " +
-               annotations_language +
-               " --store --output-dir " + output_dir)
+    if annotations_dir is not None:
+        command = ("bigmler source --annotations-dir " + annotations_dir +
+                   " --data " + images_dir + " --annotations-language " +
+                   annotations_language +
+                   " --store --output-dir " + output_dir)
+    else:
+        command = ("bigmler source --annotations-file " + annotations_file +
+                   " --data " + images_dir + " --annotations-language " +
+                   annotations_language +
+                   " --store --output-dir " + output_dir)
+
     shell_execute(command, os.path.join(output_dir, "txt.tmp"))
 
 
@@ -155,6 +163,7 @@ def check_annotation_fields(step, annotations_file):
     fields = fields_from_annotations(annotations_file)
     field_labels = labels_from_annotations(annotations_file)
     source_fields = Fields(world.source)
+
     field_names = list(source_fields.fields_by_name.keys())
     for field in fields:
         ok_(field["name"] in field_names)