conditionally disable bottleneck (#5560)

pydata · Aug 12, 2021 · 3956b73 · 3956b73
1 parent 4bb9d9c
commit 3956b73
Show file tree

Hide file tree

Showing 10 changed files with 103 additions and 6 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -22,6 +22,10 @@ v0.19.1 (unreleased)
 
 New Features
 ~~~~~~~~~~~~
+- Add a option to disable the use of ``bottleneck`` (:pull:`5560`)
+  By `Justus Magin <https://github.com/keewis>`_.
+- Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`).
+  By `Pushkar Kopparla <https://github.com/pkopparla>`_.
 
 
 Breaking changes
@@ -104,8 +108,6 @@ New Features
 - Allow removal of the coordinate attribute ``coordinates`` on variables by setting ``.attrs['coordinates']= None``
   (:issue:`5510`).
   By `Elle Smith <https://github.com/ellesmith88>`_.
-- Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`).
-  By `Pushkar Kopparla <https://github.com/pkopparla>`_.
 - Added :py:meth:`DataArray.to_numpy`, :py:meth:`DataArray.as_numpy`, and :py:meth:`Dataset.as_numpy`. (:pull:`5568`).
   By `Tom Nicholas <https://github.com/TomNicholas>`_.
 - Units in plot labels are now automatically inferred from wrapped :py:meth:`pint.Quantity` arrays. (:pull:`5561`).

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
@@ -6228,6 +6228,12 @@ def rank(self, dim, pct=False, keep_attrs=None):
         ranked : Dataset
             Variables that do not depend on `dim` are dropped.
         """
+        if not OPTIONS["use_bottleneck"]:
+            raise RuntimeError(
+                "rank requires bottleneck to be enabled."
+                " Call `xr.set_options(use_bottleneck=True)` to enable it."
+            )
+
         if dim not in self.dims:
             raise ValueError(f"Dataset does not contain the dimension: {dim}")
 

diff --git a/xarray/core/missing.py b/xarray/core/missing.py
@@ -12,7 +12,7 @@
 from .common import _contains_datetime_like_objects, ones_like
 from .computation import apply_ufunc
 from .duck_array_ops import datetime_to_numeric, push, timedelta_to_numeric
-from .options import _get_keep_attrs
+from .options import OPTIONS, _get_keep_attrs
 from .pycompat import dask_version, is_duck_dask_array
 from .utils import OrderedSet, is_scalar
 from .variable import Variable, broadcast_variables
@@ -405,6 +405,12 @@ def _bfill(arr, n=None, axis=-1):
 
 def ffill(arr, dim=None, limit=None):
     """forward fill missing values"""
+    if not OPTIONS["use_bottleneck"]:
+        raise RuntimeError(
+            "ffill requires bottleneck to be enabled."
+            " Call `xr.set_options(use_bottleneck=True)` to enable it."
+        )
+
     axis = arr.get_axis_num(dim)
 
     # work around for bottleneck 178
@@ -422,6 +428,12 @@ def ffill(arr, dim=None, limit=None):
 
 def bfill(arr, dim=None, limit=None):
     """backfill missing values"""
+    if not OPTIONS["use_bottleneck"]:
+        raise RuntimeError(
+            "bfill requires bottleneck to be enabled."
+            " Call `xr.set_options(use_bottleneck=True)` to enable it."
+        )
+
     axis = arr.get_axis_num(dim)
 
     # work around for bottleneck 178

diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py
@@ -4,6 +4,8 @@
 import pandas as pd
 from numpy.core.multiarray import normalize_axis_index  # type: ignore[attr-defined]
 
+from .options import OPTIONS
+
 try:
     import bottleneck as bn
 
@@ -138,6 +140,7 @@ def f(values, axis=None, **kwargs):
 
         if (
             _USE_BOTTLENECK
+            and OPTIONS["use_bottleneck"]
             and isinstance(values, np.ndarray)
             and bn_func is not None
             and not isinstance(axis, tuple)

diff --git a/xarray/core/options.py b/xarray/core/options.py
@@ -14,6 +14,7 @@
 FILE_CACHE_MAXSIZE = "file_cache_maxsize"
 KEEP_ATTRS = "keep_attrs"
 WARN_FOR_UNCLOSED_FILES = "warn_for_unclosed_files"
+USE_BOTTLENECK = "use_bottleneck"
 
 
 OPTIONS = {
@@ -31,6 +32,7 @@
     FILE_CACHE_MAXSIZE: 128,
     KEEP_ATTRS: "default",
     WARN_FOR_UNCLOSED_FILES: False,
+    USE_BOTTLENECK: True,
 }
 
 _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"])
@@ -54,6 +56,7 @@ def _positive_integer(value):
     FILE_CACHE_MAXSIZE: _positive_integer,
     KEEP_ATTRS: lambda choice: choice in [True, False, "default"],
     WARN_FOR_UNCLOSED_FILES: lambda value: isinstance(value, bool),
+    USE_BOTTLENECK: lambda choice: choice in [True, False],
 }
 
 
@@ -122,6 +125,9 @@ class set_options:
       attrs, ``False`` to always discard them, or ``'default'`` to use original
       logic that attrs should only be kept in unambiguous circumstances.
       Default: ``'default'``.
+    - ``use_bottleneck``: allow using bottleneck. Either ``True`` to accelerate
+      operations using bottleneck if it is installed or ``False`` to never use it.
+      Default: ``True``
     - ``display_style``: display style to use in jupyter for xarray objects.
       Default: ``'html'``. Other options are ``'text'``.
     - ``display_expand_attrs``: whether to expand the attributes section for

diff --git a/xarray/core/rolling.py b/xarray/core/rolling.py
@@ -7,7 +7,7 @@
 
 from . import dtypes, duck_array_ops, utils
 from .arithmetic import CoarsenArithmetic
-from .options import _get_keep_attrs
+from .options import OPTIONS, _get_keep_attrs
 from .pycompat import is_duck_dask_array
 from .utils import either_dict_or_kwargs
 
@@ -517,7 +517,8 @@ def _numpy_or_bottleneck_reduce(
             del kwargs["dim"]
 
         if (
-            bottleneck_move_func is not None
+            OPTIONS["use_bottleneck"]
+            and bottleneck_move_func is not None
             and not is_duck_dask_array(self.obj.data)
             and len(self.dim) == 1
         ):

diff --git a/xarray/core/variable.py b/xarray/core/variable.py
@@ -33,7 +33,7 @@
     VectorizedIndexer,
     as_indexable,
 )
-from .options import _get_keep_attrs
+from .options import OPTIONS, _get_keep_attrs
 from .pycompat import (
     DuckArrayModule,
     cupy_array_type,
@@ -2052,6 +2052,12 @@ def rank(self, dim, pct=False):
         --------
         Dataset.rank, DataArray.rank
         """
+        if not OPTIONS["use_bottleneck"]:
+            raise RuntimeError(
+                "rank requires bottleneck to be enabled."
+                " Call `xr.set_options(use_bottleneck=True)` to enable it."
+            )
+
         import bottleneck as bn
 
         data = self.data

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -4972,6 +4972,12 @@ def test_rank(self):
         with pytest.raises(ValueError, match=r"does not contain"):
             x.rank("invalid_dim")
 
+    def test_rank_use_bottleneck(self):
+        ds = Dataset({"a": ("x", [0, np.nan, 2]), "b": ("y", [4, 6, 3, 4])})
+        with xr.set_options(use_bottleneck=False):
+            with pytest.raises(RuntimeError):
+                ds.rank("x")
+
     def test_count(self):
         ds = Dataset({"x": ("a", [np.nan, 1]), "y": 0, "z": np.nan})
         expected = Dataset({"x": 1, "y": 1, "z": 0})

diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
@@ -392,6 +392,38 @@ def test_ffill():
     assert_equal(actual, expected)
 
 
+def test_ffill_use_bottleneck():
+    da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
+    with xr.set_options(use_bottleneck=False):
+        with pytest.raises(RuntimeError):
+            da.ffill("x")
+
+
+@requires_dask
+def test_ffill_use_bottleneck_dask():
+    da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
+    da = da.chunk({"x": 1})
+    with xr.set_options(use_bottleneck=False):
+        with pytest.raises(RuntimeError):
+            da.ffill("x")
+
+
+def test_bfill_use_bottleneck():
+    da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
+    with xr.set_options(use_bottleneck=False):
+        with pytest.raises(RuntimeError):
+            da.bfill("x")
+
+
+@requires_dask
+def test_bfill_use_bottleneck_dask():
+    da = xr.DataArray(np.array([4, 5, np.nan], dtype=np.float64), dims="x")
+    da = da.chunk({"x": 1})
+    with xr.set_options(use_bottleneck=False):
+        with pytest.raises(RuntimeError):
+            da.bfill("x")
+
+
 @requires_bottleneck
 @requires_dask
 @pytest.mark.parametrize("method", ["ffill", "bfill"])

diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py
@@ -1673,6 +1673,23 @@ def test_reduce(self):
         with pytest.raises(ValueError, match=r"cannot supply both"):
             v.mean(dim="x", axis=0)
 
+    @requires_bottleneck
+    def test_reduce_use_bottleneck(self, monkeypatch):
+        def raise_if_called(*args, **kwargs):
+            raise RuntimeError("should not have been called")
+
+        import bottleneck as bn
+
+        monkeypatch.setattr(bn, "nanmin", raise_if_called)
+
+        v = Variable("x", [0.0, np.nan, 1.0])
+        with pytest.raises(RuntimeError, match="should not have been called"):
+            with set_options(use_bottleneck=True):
+                v.min()
+
+        with set_options(use_bottleneck=False):
+            v.min()
+
     @pytest.mark.parametrize("skipna", [True, False])
     @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
     @pytest.mark.parametrize(
@@ -1720,6 +1737,12 @@ def test_rank_dask_raises(self):
         with pytest.raises(TypeError, match=r"arrays stored as dask"):
             v.rank("x")
 
+    def test_rank_use_bottleneck(self):
+        v = Variable(["x"], [3.0, 1.0, np.nan, 2.0, 4.0])
+        with set_options(use_bottleneck=False):
+            with pytest.raises(RuntimeError):
+                v.rank("x")
+
     @requires_bottleneck
     def test_rank(self):
         import bottleneck as bn