From 1453e8de66ae9d9b94c0250327839bccc7242aac Mon Sep 17 00:00:00 2001 From: jvdd Date: Thu, 18 Apr 2024 18:19:55 +0200 Subject: [PATCH] :fire: disallow untyped defs --- pyproject.toml | 2 +- tsflex/chunking/chunking.py | 22 +++++++------ tsflex/features/feature.py | 2 +- tsflex/features/feature_collection.py | 22 ++++++------- tsflex/features/function_wrapper.py | 2 +- tsflex/features/integrations.py | 10 +++--- tsflex/features/logger.py | 2 +- tsflex/features/segmenter/strided_rolling.py | 33 ++++++++++++------- .../segmenter/strided_rolling_factory.py | 20 +++++++++-- tsflex/features/utils.py | 10 +++--- tsflex/processing/series_processor.py | 9 ++--- tsflex/processing/utils.py | 4 +-- 12 files changed, 84 insertions(+), 54 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e33ffaa..c43a1d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -120,7 +120,7 @@ warn_redundant_casts = true warn_unused_ignores = true check_untyped_defs = true no_implicit_reexport = true -disallow_untyped_defs = false # TODO: enable +disallow_untyped_defs = true # disallow_any_generics = false ignore_missing_imports = true # allow_redefinition = true diff --git a/tsflex/chunking/chunking.py b/tsflex/chunking/chunking.py index 92ac24e..4b6aa78 100644 --- a/tsflex/chunking/chunking.py +++ b/tsflex/chunking/chunking.py @@ -19,9 +19,9 @@ def _chunk_time_data( min_chunk_dur: Optional[Union[str, pd.Timedelta]] = None, max_chunk_dur: Optional[Union[str, pd.Timedelta]] = None, sub_chunk_overlap: Union[str, pd.Timedelta] = "0s", - copy=True, - verbose=False, -): + copy: bool = True, + verbose: bool = False, +) -> List[List[pd.Series]]: if min_chunk_dur is not None: min_chunk_dur = parse_time_arg(min_chunk_dur) if max_chunk_dur is not None: @@ -61,7 +61,9 @@ def _chunk_time_data( # Each list item can be seen as (t_start_chunk, t_end_chunk, chunk_list) same_range_chunks: List[Tuple[pd.Timestamp, pd.Timestamp, List[pd.Series]]] = [] - def print_verbose_time(sig, t_begin, t_end, msg=""): + def print_verbose_time( + sig: pd.Series, t_begin: pd.Timestamp, t_end: pd.Timestamp, msg: str = "" + ) -> None: fmt = "%Y-%m-%d %H:%M" if not verbose: return @@ -81,7 +83,7 @@ def slice_time( else: return sig[t_begin:t_end] - def insert_chunk(chunk: pd.Series): + def insert_chunk(chunk: pd.Series) -> None: """Insert the chunk into `same_range_chunks`.""" t_chunk_start, t_chunk_end = chunk.index[[0, -1]] @@ -194,9 +196,9 @@ def _chunk_sequence_data( min_chunk_dur: Optional[float] = None, max_chunk_dur: Optional[float] = None, sub_chunk_overlap: float = 0, - copy=True, - verbose=False, -): + copy: bool = True, + verbose: bool = False, +) -> List[List[pd.Series]]: raise NotImplementedError("Not implemented yet") @@ -218,8 +220,8 @@ def chunk_data( min_chunk_dur: Optional[Union[float, str, pd.Timedelta]] = None, max_chunk_dur: Optional[Union[float, str, pd.Timedelta]] = None, sub_chunk_overlap: Union[float, str, pd.Timedelta] = "0s", # TODO: make optional - copy=True, - verbose=False, + copy: bool = True, + verbose: bool = False, ) -> List[List[pd.Series]]: """Divide the time-series `data` in same time/sequence-range chunks. diff --git a/tsflex/features/feature.py b/tsflex/features/feature.py index 5e1968c..0192cb4 100644 --- a/tsflex/features/feature.py +++ b/tsflex/features/feature.py @@ -259,7 +259,7 @@ def __init__( ): # Cast functions to FuncWrapper, this avoids creating multiple # FuncWrapper objects for the same function in the FeatureDescriptor - def to_func_wrapper(f: Callable): + def to_func_wrapper(f: Callable) -> FuncWrapper: return f if isinstance(f, FuncWrapper) else FuncWrapper(f) functions = [to_func_wrapper(f) for f in to_list(functions)] diff --git a/tsflex/features/feature_collection.py b/tsflex/features/feature_collection.py index b8c2c11..7798a91 100644 --- a/tsflex/features/feature_collection.py +++ b/tsflex/features/feature_collection.py @@ -162,7 +162,7 @@ def _check_feature_descriptors( self, skip_none: bool, calc_stride: Optional[Union[float, pd.Timedelta, None]] = None, - ): + ) -> None: """Verify whether all added FeatureDescriptors imply the same-input data type. If this condition is not met, a warning will be raised. @@ -195,7 +195,7 @@ def _check_feature_descriptors( category=RuntimeWarning, ) - def _add_feature(self, feature: FeatureDescriptor): + def _add_feature(self, feature: FeatureDescriptor) -> None: """Add a `FeatureDescriptor` instance to the collection. Parameters @@ -238,7 +238,7 @@ def add( Union[FeatureDescriptor, MultipleFeatureDescriptors, FeatureCollection] ], ], - ): + ) -> None: """Add feature(s) to the FeatureCollection. Parameters @@ -324,13 +324,13 @@ def _executor_grouped(idx: int) -> pd.DataFrame: f = function if function.input_type is np.array: - def f(x: pd.DataFrame): + def f(x: pd.DataFrame) -> Any: # pass the inputs as positional arguments of numpy array type return function(*[x[c].values for c in cols_tuple]) else: # function.input_type is pd.Series - def f(x: pd.DataFrame): + def f(x: pd.DataFrame) -> Any: # pass the inputs as positional arguments of pd.Series type return function(*[x[c] for c in cols_tuple]) @@ -373,7 +373,7 @@ def _stroll_feat_generator( [len(self._feature_desc_dict[k]) for k in keys_wins_strides] ) - def get_stroll_function(idx) -> Tuple[StridedRolling, FuncWrapper]: + def get_stroll_function(idx: int) -> Tuple[StridedRolling, FuncWrapper]: key_idx = np.searchsorted(lengths, idx, "right") # right bc idx starts at 0 key, win = keys_wins_strides[key_idx] @@ -416,7 +416,7 @@ def _group_feat_generator( lengths = np.cumsum([len(self._feature_desc_dict[k]) for k in keys_wins]) def get_group_function( - idx, + idx: int, ) -> Tuple[pd.api.typing.DataFrameGroupBy, FuncWrapper,]: key_idx = np.searchsorted(lengths, idx, "right") # right bc idx starts at 0 key, win = keys_wins[key_idx] @@ -429,7 +429,7 @@ def get_group_function( return get_group_function - def _check_no_multiple_windows(self, error_case: str): + def _check_no_multiple_windows(self, error_case: str) -> None: """Check whether there are no multiple windows in the feature collection. Parameters @@ -633,11 +633,11 @@ def _group_by_consecutive( return df_grouped - def _calculate_group_by_consecutive( + def _calculate_group_by_consecutive( # type: ignore[no-untyped-def] self, data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], group_by: str, - return_df: Optional[bool] = False, + return_df: bool = False, **calculate_kwargs, ) -> Union[List[pd.DataFrame], pd.DataFrame]: """Calculate features on each consecutive group of the data. @@ -1261,7 +1261,7 @@ def calculate( f_handler, ) - def serialize(self, file_path: Union[str, Path]): + def serialize(self, file_path: Union[str, Path]) -> None: """Serialize this FeatureCollection instance. Parameters diff --git a/tsflex/features/function_wrapper.py b/tsflex/features/function_wrapper.py index b9b5a99..20ea896 100644 --- a/tsflex/features/function_wrapper.py +++ b/tsflex/features/function_wrapper.py @@ -87,7 +87,7 @@ class FuncWrapper(FrozenClass): """ - def __init__( + def __init__( # type: ignore[no-untyped-def] self, func: Callable, output_names: Optional[Union[List[str], str]] = None, diff --git a/tsflex/features/integrations.py b/tsflex/features/integrations.py index 19432cb..00da35f 100644 --- a/tsflex/features/integrations.py +++ b/tsflex/features/integrations.py @@ -3,7 +3,7 @@ __author__ = "Jeroen Van Der Donckt, Jonas Van Der Donckt" import importlib -from typing import Callable, Dict, List, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as np import pandas as pd @@ -35,7 +35,7 @@ def seglearn_wrapper(func: Callable, func_name: Optional[str] = None) -> FuncWra """ - def wrap_func(x: np.ndarray): + def wrap_func(x: np.ndarray) -> np.ndarray: out = func(x.reshape(1, len(x))) return out.flatten() @@ -144,7 +144,7 @@ def tsfel_feature_dict_wrapper(features_dict: Dict) -> List[FuncWrapper]: """ - def get_output_names(config: dict): + def get_output_names(config: dict) -> Union[str, List[str]]: """Create the output_names based on the configuration.""" nb_outputs = config["n_features"] func_name = config["function"].split(".")[-1] @@ -203,7 +203,7 @@ def tsfresh_combiner_wrapper(func: Callable, param: List[Dict]) -> FuncWrapper: """ - def wrap_func(x: Union[np.ndarray, pd.Series]): + def wrap_func(x: Union[np.ndarray, pd.Series]) -> Tuple[Any, ...]: out = func(x, param) return tuple(t[1] for t in out) @@ -330,7 +330,7 @@ def catch22_wrapper(catch22_all: Callable) -> FuncWrapper: """ catch22_names = catch22_all([0])["names"] - def wrap_catch22_all(x): + def wrap_catch22_all(x: np.ndarray) -> List[float]: return catch22_all(x)["values"] wrap_catch22_all.__name__ = "[wrapped]__" + _get_name(catch22_all) diff --git a/tsflex/features/logger.py b/tsflex/features/logger.py index 1e6b8b7..f95d167 100644 --- a/tsflex/features/logger.py +++ b/tsflex/features/logger.py @@ -148,7 +148,7 @@ def get_function_stats(logging_file_path: str) -> pd.DataFrame: .index.to_list() ) - def key_func(idx_level): + def key_func(idx_level): # type: ignore[no-untyped-def] if all(idx in sorted_funcs for idx in idx_level): return [sorted_funcs.index(idx) for idx in idx_level] return idx_level diff --git a/tsflex/features/segmenter/strided_rolling.py b/tsflex/features/segmenter/strided_rolling.py index 2c41787..bd07631 100644 --- a/tsflex/features/segmenter/strided_rolling.py +++ b/tsflex/features/segmenter/strided_rolling.py @@ -238,7 +238,7 @@ def __init__( RuntimeWarning, ) - def _calc_nb_segments_for_stride(self, stride) -> int: + def _calc_nb_segments_for_stride(self, stride: T) -> int: """Calculate the number of output items (segments) for a given single stride.""" assert self.start is not None and self.end is not None # for mypy nb_feats = max((self.end - self.start - self.window) // stride + 1, 0) @@ -293,7 +293,10 @@ def _get_output_index( ) def _construct_series_containers( - self, series_list, np_start_times, np_end_times + self, + series_list: List[pd.Series], + np_start_times: np.ndarray, + np_end_times: np.ndarray, ) -> List[StridedRolling._NumpySeriesContainer]: series_containers: List[StridedRolling._NumpySeriesContainer] = [] for series in series_list: @@ -487,7 +490,7 @@ def apply_func(self, func: FuncWrapper) -> pd.DataFrame: # --------------------------------- STATIC METHODS --------------------------------- @staticmethod - def _get_np_value(val): + def _get_np_value(val: Union[np.number, pd.Timestamp, pd.Timedelta]) -> np.number: # Convert everything to int64 if isinstance(val, pd.Timestamp): return val.to_datetime64() @@ -505,7 +508,9 @@ def construct_output_index( # ----------------------------- OVERRIDE THESE METHODS ----------------------------- @abstractmethod - def _update_start_end_indices_to_stroll_type(self, series_list: List[pd.Series]): + def _update_start_end_indices_to_stroll_type( + self, series_list: List[pd.Series] + ) -> None: # NOTE: This method will only be implemented (with code != pass) in the # TimeIndexSampleStridedRolling raise NotImplementedError @@ -522,7 +527,7 @@ def _create_feat_col_name(self, feat_name: str) -> str: class SequenceStridedRolling(StridedRolling): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], window: Union[int, float], @@ -535,7 +540,9 @@ def __init__( super().__init__(data, window, strides, *args, **kwargs) # ------------------------------- Overridden methods ------------------------------- - def _update_start_end_indices_to_stroll_type(self, series_list: List[pd.Series]): + def _update_start_end_indices_to_stroll_type( + self, series_list: List[pd.Series] + ) -> None: pass def _parse_segment_idxs(self, segment_idxs: np.ndarray) -> np.ndarray: @@ -554,7 +561,7 @@ def _create_feat_col_name(self, feat_name: str) -> str: class TimeStridedRolling(StridedRolling): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], window: pd.Timedelta, @@ -589,7 +596,9 @@ def _get_output_index( return super()._get_output_index(start_idxs, end_idxs, name) # ------------------------------- Overridden methods ------------------------------- - def _update_start_end_indices_to_stroll_type(self, series_list: List[pd.Series]): + def _update_start_end_indices_to_stroll_type( + self, series_list: List[pd.Series] + ) -> None: pass def _parse_segment_idxs(self, segment_idxs: np.ndarray) -> np.ndarray: @@ -616,7 +625,7 @@ def _create_feat_col_name(self, feat_name: str) -> str: class TimeIndexSampleStridedRolling(SequenceStridedRolling): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, # TODO -> update arguments data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], @@ -678,7 +687,9 @@ def apply_func(self, func: FuncWrapper) -> pd.DataFrame: return df # ---------------------------- Overridden methods ------------------------------ - def _update_start_end_indices_to_stroll_type(self, series_list: List[pd.Series]): + def _update_start_end_indices_to_stroll_type( + self, series_list: List[pd.Series] + ) -> None: # update the start and end times to the sequence datatype self.start, self.end = np.searchsorted( series_list[0].index.values, @@ -689,7 +700,7 @@ def _update_start_end_indices_to_stroll_type(self, series_list: List[pd.Series]) def _sliding_strided_window_1d( data: np.ndarray, window: int, step: int, nb_segments: int -): +) -> np.ndarray: """View based sliding strided-window for 1-dimensional data. Parameters diff --git a/tsflex/features/segmenter/strided_rolling_factory.py b/tsflex/features/segmenter/strided_rolling_factory.py index 2e35bcb..8d66c2f 100644 --- a/tsflex/features/segmenter/strided_rolling_factory.py +++ b/tsflex/features/segmenter/strided_rolling_factory.py @@ -8,6 +8,10 @@ __author__ = "Jonas Van Der Donckt" +from typing import List, Optional, Union + +import pandas as pd + from ...utils.attribute_parsing import AttributeParser, DataType from .strided_rolling import ( SequenceStridedRolling, @@ -26,7 +30,12 @@ class StridedRollingFactory: } @staticmethod - def get_segmenter(data, window, strides, **kwargs) -> StridedRolling: + def get_segmenter( # type: ignore[no-untyped-def] + data: Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]], + window: Union[int, float, pd.TimeDelta], + strides: Optional[List[Union[int, float, pd.TimeDelta]]], + **kwargs, + ) -> StridedRolling: """Get the appropriate StridedRolling instance for the passed data. The returned instance will be determined by the data its index type @@ -35,9 +44,9 @@ def get_segmenter(data, window, strides, **kwargs) -> StridedRolling: ---------- data : Union[pd.Series, pd.DataFrame, List[Union[pd.Series, pd.DataFrame]]] The data to segment. - window : Union[float, pd.TimeDelta] + window : Union[int, float, pd.TimeDelta] The window size to use for the segmentation. - strides : Union[List[Union[float, pd.TimeDelta]], None] + strides : Union[List[Union[int, float, pd.TimeDelta]], None] The stride(s) to use for the segmentation. **kwargs : dict, optional Additional keyword arguments, see the `StridedRolling` its documentation @@ -74,6 +83,11 @@ def get_segmenter(data, window, strides, **kwargs) -> StridedRolling: ) elif data_dtype == DataType.TIME and args_dtype == DataType.SEQUENCE: # Note: this is very niche and thus requires advanced knowledge + assert isinstance(window, int) + if strides is not None: + assert isinstance(strides, list) and all( + isinstance(s, int) for s in strides + ) return TimeIndexSampleStridedRolling(data, window, strides, **kwargs) elif data_dtype == DataType.SEQUENCE and args_dtype == DataType.TIME: raise ValueError("Cannot segment a sequence-series with a time window") diff --git a/tsflex/features/utils.py b/tsflex/features/utils.py index 1e5e4f4..ef51b5d 100644 --- a/tsflex/features/utils.py +++ b/tsflex/features/utils.py @@ -45,7 +45,7 @@ def _log_func_execution( log_window: Optional[str], log_strides: Optional[Union[str, Tuple[str, ...]]], output_names: List[str], -): +) -> None: """Log the execution time of a feature function.""" elapsed = time.perf_counter() - t_start @@ -56,7 +56,9 @@ def _log_func_execution( ) -def _determine_bounds(bound_method, series_list: List[pd.Series]) -> Tuple[Any, Any]: +def _determine_bounds( + bound_method: str, series_list: List[pd.Series] +) -> Tuple[Any, Any]: """Determine the bounds of the passed series. Parameters @@ -97,7 +99,7 @@ def _determine_bounds(bound_method, series_list: List[pd.Series]) -> Tuple[Any, raise ValueError(f"invalid bound method string passed {bound_method}") -def _check_start_end_array(start_idxs: np.ndarray, end_idxs: np.ndarray): +def _check_start_end_array(start_idxs: np.ndarray, end_idxs: np.ndarray) -> None: """Check if the start and end indices are valid. These are valid if they are of the same length and if the start indices are smaller @@ -184,7 +186,7 @@ def _make_single_func_robust( output_names = func_wrapper_kwargs.get("output_names") - def wrap_func(*series: Union[np.ndarray, pd.Series], **kwargs) -> Any: + def wrap_func(*series: Union[np.ndarray, pd.Series], **kwargs) -> Any: # type: ignore[no-untyped-def] if not passthrough_nans: series = [s[~np.isnan(s)] for s in series] # type: ignore[assignment] if any([len(s) < min_nb_samples for s in series]): diff --git a/tsflex/processing/series_processor.py b/tsflex/processing/series_processor.py index 25c23e1..4f5253a 100644 --- a/tsflex/processing/series_processor.py +++ b/tsflex/processing/series_processor.py @@ -41,8 +41,9 @@ def dataframe_func(func: Callable) -> Callable: """ - def wrapper( - *series: pd.Series, **kwargs # type: ignore[no-untyped-def] + def wrapper( # type: ignore[no-untyped-def] + *series: pd.Series, + **kwargs, ) -> Union[np.ndarray, pd.Series, pd.DataFrame, List[pd.Series]]: series_dict = {s.name: s for s in series} df = series_dict_to_df(series_dict) @@ -112,11 +113,11 @@ class SeriesProcessor(FrozenClass): """ - def __init__( + def __init__( # type: ignore[no-untyped-def] self, function: Callable, series_names: Union[str, Tuple[str, ...], List[str], List[Tuple[str, ...]]], - **kwargs, # type: ignore[no-untyped-def] + **kwargs, ): series_names = [to_tuple(names) for names in to_list(series_names)] # Assert that function inputs (series) all have the same length diff --git a/tsflex/processing/utils.py b/tsflex/processing/utils.py index dd37889..92eb874 100644 --- a/tsflex/processing/utils.py +++ b/tsflex/processing/utils.py @@ -14,12 +14,12 @@ from .series_pipeline import SeriesPipeline -def process_chunks_multithreaded( +def process_chunks_multithreaded( # type: ignore[no-untyped-def] same_range_chunks_list: List[List[Union[pd.Series, pd.DataFrame]]], series_pipeline: SeriesPipeline, show_progress: Optional[bool] = True, n_jobs: Optional[int] = None, - **processing_kwargs, # type: ignore[no-untyped-def] + **processing_kwargs, ) -> Optional[List[Any]]: """Process `same_range_chunks_list` in a multithreaded manner, order is preserved.