Merge pull request #452 from OpenDataServices/450-451-custom-warnings…

…-exceptions Use custom warnings and exceptions
OpenDataServices · Aug 7, 2024 · 4d1c5ed · 4d1c5ed
2 parents 1d1c662 + 1b00770
commit 4d1c5ed
Show file tree

Hide file tree

Showing 8 changed files with 131 additions and 50 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+### Changed
+
+- Use custom warnings and exceptions [#450](https://github.com/OpenDataServices/flatten-tool/issues/450) [#451](https://github.com/OpenDataServices/flatten-tool/issues/451)
+
 ## [0.25.0] - 2024-07-05
 
 ### Fixed

diff --git a/flattentool/__init__.py b/flattentool/__init__.py
@@ -5,6 +5,7 @@
 from collections import OrderedDict
 from decimal import Decimal
 
+from flattentool.exceptions import FlattenToolError
 from flattentool.input import FORMATS as INPUT_FORMATS
 from flattentool.json_input import JSONParser
 from flattentool.lib import parse_sheet_configuration
@@ -37,7 +38,7 @@ def create_template(
     """
 
     if line_terminator not in LINE_TERMINATORS.keys():
-        raise Exception(f"{line_terminator} is not a valid line terminator")
+        raise FlattenToolError(f"{line_terminator} is not a valid line terminator")
 
     convert_flags = {"wkt": convert_wkt}
 
@@ -76,7 +77,7 @@ def spreadsheet_output(spreadsheet_output_class, name):
         spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)
 
     else:
-        raise Exception("The requested format is not available")
+        raise FlattenToolError("The requested format is not available")
 
 
 def flatten(
@@ -111,10 +112,10 @@ def flatten(
     if (filter_field is None and filter_value is not None) or (
         filter_field is not None and filter_value is None
     ):
-        raise Exception("You must use filter_field and filter_value together")
+        raise FlattenToolError("You must use filter_field and filter_value together")
 
     if line_terminator not in LINE_TERMINATORS.keys():
-        raise Exception(f"{line_terminator} is not a valid line terminator")
+        raise FlattenToolError(f"{line_terminator} is not a valid line terminator")
 
     convert_flags = {"wkt": convert_wkt}
 
@@ -175,7 +176,7 @@ def spreadsheet_output(spreadsheet_output_class, name):
             spreadsheet_output(OUTPUT_FORMATS[output_format], output_name)
 
         else:
-            raise Exception("The requested format is not available")
+            raise FlattenToolError("The requested format is not available")
 
 
 # From http://bugs.python.org/issue16535
@@ -239,11 +240,13 @@ def unflatten(
     """
 
     if input_format is None:
-        raise Exception("You must specify an input format (may autodetect in future")
+        raise FlattenToolError(
+            "You must specify an input format (may autodetect in future"
+        )
     elif input_format not in INPUT_FORMATS:
-        raise Exception("The requested format is not available")
+        raise FlattenToolError("The requested format is not available")
     if metatab_name and base_json:
-        raise Exception("Not allowed to use base_json with metatab")
+        raise FlattenToolError("Not allowed to use base_json with metatab")
 
     convert_flags = {"wkt": convert_wkt}
 

diff --git a/flattentool/exceptions.py b/flattentool/exceptions.py
@@ -1,4 +1,21 @@
-class DataErrorWarning(UserWarning):
+class FlattenToolError(Exception):
+    pass
+
+
+class FlattenToolValueError(FlattenToolError, ValueError):
+    pass
+
+
+class FlattenToolWarning(UserWarning):
+    """
+    A warning generated directly by flatten-tool.
+
+    """
+
+    pass
+
+
+class DataErrorWarning(FlattenToolWarning):
     """
     A warnings that indicates an error in the data, rather than the schema.
 

diff --git a/flattentool/input.py b/flattentool/input.py
@@ -26,7 +26,12 @@
 
 from openpyxl.utils.cell import get_column_letter
 
-from flattentool.exceptions import DataErrorWarning
+from flattentool.exceptions import (
+    DataErrorWarning,
+    FlattenToolError,
+    FlattenToolValueError,
+    FlattenToolWarning,
+)
 from flattentool.i18n import _
 from flattentool.lib import isint, parse_sheet_configuration
 from flattentool.ODSReader import ODSReader
@@ -142,7 +147,10 @@ def convert_type(type_string, value, timezone=pytz.timezone("UTC"), convert_flag
             feature = geojson.Feature(geometry=geom, properties={})
             return feature.geometry
         else:
-            warn("Install flattentool's optional geo dependencies to use geo features.")
+            warn(
+                "Install flattentool's optional geo dependencies to use geo features.",
+                FlattenToolWarning,
+            )
             return str(value)
     elif type_string == "":
         if type(value) == datetime.datetime:
@@ -151,20 +159,23 @@ def convert_type(type_string, value, timezone=pytz.timezone("UTC"), convert_flag
             return int(value)
         return value if type(value) in [int] else str(value)
     else:
-        raise ValueError('Unrecognised type: "{}"'.format(type_string))
+        raise FlattenToolValueError('Unrecognised type: "{}"'.format(type_string))
 
 
 def warnings_for_ignored_columns(v, extra_message):
     if isinstance(v, Cell):
-        warn("Column {} has been ignored, {}".format(v.cell_location[3], extra_message))
+        warn(
+            "Column {} has been ignored, {}".format(v.cell_location[3], extra_message),
+            DataErrorWarning,
+        )
     elif isinstance(v, dict):
         for x in v.values():
             warnings_for_ignored_columns(x, extra_message)
     elif isinstance(v, TemporaryDict):
         for x in v.to_list():
             warnings_for_ignored_columns(x, extra_message)
     else:
-        raise ValueError()
+        raise FlattenToolValueError()
 
 
 def merge(base, mergee, debug_info=None):
@@ -583,7 +594,7 @@ def extract_dict_to_error_path(path, input):
                 ).format(input[k].cell_value, sub_cell.cell_value)
                 output[p].append(sub_cell.cell_location)
         else:
-            raise Exception(
+            raise FlattenToolError(
                 _("Unexpected result type in the JSON cell tree: {}").format(input[k])
             )
     return output
@@ -606,7 +617,7 @@ def extract_dict_to_value(input):
         elif isinstance(input[k], Cell):
             output[k] = input[k].cell_value
         else:
-            raise Exception(
+            raise FlattenToolError(
                 _("Unexpected result type in the JSON cell tree: {}").format(input[k])
             )
     return output
@@ -693,7 +704,7 @@ def get_sheet_lines(self, sheet_name):
                 yield row
 
 
-class BadXLSXZipFile(BadZipFile):
+class BadXLSXZipFile(BadZipFile, FlattenToolError):
     pass
 
 
@@ -1009,7 +1020,7 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name, convert_fla
             list_index = -1
             if isint(next_path_item):
                 if current_type and current_type != "array":
-                    raise ValueError(
+                    raise FlattenToolValueError(
                         _(
                             "There is an array at '{}' when the schema says there should be a '{}'"
                         ).format(path_till_now, current_type)
@@ -1061,7 +1072,7 @@ def unflatten_main_with_parser(parser, line, timezone, xml, id_name, convert_fla
                 and current_type not in ["object", "array"]
                 and next_path_item
             ):
-                raise ValueError(
+                raise FlattenToolValueError(
                     _(
                         "There is an object or list at '{}' but it should be an {}"
                     ).format(path_till_now, current_type)

diff --git a/flattentool/json_input.py b/flattentool/json_input.py
@@ -28,7 +28,12 @@
 import zc.zlibstorage
 import ZODB.FileStorage
 
-from flattentool.exceptions import DataErrorWarning
+from flattentool.exceptions import (
+    DataErrorWarning,
+    FlattenToolError,
+    FlattenToolValueError,
+    FlattenToolWarning,
+)
 from flattentool.i18n import _
 from flattentool.input import path_search
 from flattentool.schema import make_sub_sheet_name
@@ -37,7 +42,7 @@
 BASIC_TYPES = [str, bool, int, Decimal, type(None)]
 
 
-class BadlyFormedJSONError(ValueError):
+class BadlyFormedJSONError(FlattenToolError, ValueError):
     pass
 
 
@@ -195,7 +200,10 @@ def __init__(
                 if isinstance(rollup, (list,)) and (
                     len(rollup) > 1 or (len(rollup) == 1 and rollup[0] is not True)
                 ):
-                    warn(_("Using rollUp values from schema, ignoring direct input."))
+                    warn(
+                        _("Using rollUp values from schema, ignoring direct input."),
+                        FlattenToolWarning,
+                    )
             elif isinstance(rollup, (list,)):
                 if len(rollup) == 1 and os.path.isfile(rollup[0]):
                     # Parse file, one json path per line.
@@ -209,15 +217,17 @@ def __init__(
                 elif len(rollup) == 1 and rollup[0] is True:
                     warn(
                         _(
-                            "No fields to rollup found (pass json path directly, as a list in a file, or via a schema)"
+                            "No fields to rollup found (pass json path directly, as a list in a file, or via a schema)",
+                            FlattenToolWarning,
                         )
                     )
                 else:
                     self.rollup = set(rollup)
             else:
                 warn(
                     _(
-                        "Invalid value passed for rollup (pass json path directly, as a list in a file, or via a schema)"
+                        "Invalid value passed for rollup (pass json path directly, as a list in a file, or via a schema)",
+                        FlattenToolWarning,
                     )
                 )
 
@@ -235,12 +245,12 @@ def __init__(
             json_filename = None
 
         if json_filename is None and root_json_dict is None:
-            raise ValueError(
+            raise FlattenToolValueError(
                 _("Either json_filename or root_json_dict must be supplied")
             )
 
         if json_filename is not None and root_json_dict is not None:
-            raise ValueError(
+            raise FlattenToolValueError(
                 _("Only one of json_file or root_json_dict should be supplied")
             )
 
@@ -276,7 +286,8 @@ def __init__(
                 warn(
                     _(
                         "You wanted to preserve the following fields which are not present in the supplied schema: {}"
-                    ).format(list(input_not_in_schema))
+                    ).format(list(input_not_in_schema)),
+                    FlattenToolWarning,
                 )
             except AttributeError:
                 # no schema
@@ -344,7 +355,8 @@ def parse(self):
                 warn(
                     _(
                         "You wanted to preserve the following fields which are not present in the input data: {}"
-                    ).format(nonexistent_input_paths)
+                    ).format(nonexistent_input_paths),
+                    FlattenToolWarning,
                 )
 
     def parse_json_dict(
@@ -383,13 +395,17 @@ def parse_json_dict(
                 try:
                     geom = shapely.geometry.shape(json_dict)
                 except (shapely.errors.GeometryTypeError, TypeError, ValueError) as e:
-                    warn(_("Invalid GeoJSON: {parser_msg}").format(parser_msg=repr(e)))
+                    warn(
+                        _("Invalid GeoJSON: {parser_msg}").format(parser_msg=repr(e)),
+                        DataErrorWarning,
+                    )
                     return
                 flattened_dict[_sheet_key] = geom.wkt
                 skip_type_and_coordinates = True
             else:
                 warn(
-                    "Install flattentool's optional geo dependencies to use geo features."
+                    "Install flattentool's optional geo dependencies to use geo features.",
+                    FlattenToolWarning,
                 )
 
         parent_id_fields = copy.copy(parent_id_fields) or OrderedDict()
@@ -482,7 +498,8 @@ def parse_json_dict(
                         if self.use_titles and not self.schema_parser:
                             warn(
                                 _(
-                                    "Warning: No schema was provided so column headings are JSON keys, not titles."
+                                    "Warning: No schema was provided so column headings are JSON keys, not titles.",
+                                    FlattenToolWarning,
                                 )
                             )
 
@@ -497,7 +514,7 @@ def parse_json_dict(
                                     continue
 
                                 if type(v) not in BASIC_TYPES:
-                                    raise ValueError(
+                                    raise FlattenToolValueError(
                                         _("Rolled up values must be basic types")
                                     )
                                 else:
@@ -583,7 +600,8 @@ def parse_json_dict(
                                     warn(
                                         _(
                                             'More than one value supplied for "{}". Could not provide rollup, so adding a warning to the relevant cell(s) in the spreadsheet.'
-                                        ).format(parent_name + key)
+                                        ).format(parent_name + key),
+                                        FlattenToolWarning,
                                     )
                                     flattened_dict[
                                         sheet_key(sheet, parent_name + key + "/0/" + k)
@@ -594,7 +612,8 @@ def parse_json_dict(
                                     warn(
                                         _(
                                             'More than one value supplied for "{}". Could not provide rollup, so adding a warning to the relevant cell(s) in the spreadsheet.'
-                                        ).format(parent_name + key)
+                                        ).format(parent_name + key),
+                                        FlattenToolWarning,
                                     )
                                     flattened_dict[
                                         sheet_key(sheet, parent_name + key + "/0/" + k)
@@ -638,7 +657,9 @@ def parse_json_dict(
                             top_level_of_sub_sheet=True,
                         )
             else:
-                raise ValueError(_("Unsupported type {}").format(type(value)))
+                raise FlattenToolValueError(
+                    _("Unsupported type {}").format(type(value))
+                )
 
         if top:
             sheet.append_line(flattened_dict)