diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6f8fc0f3c8..4bd8efb05f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -80,21 +80,16 @@ jobs: python -m pip install build python -m build -w awkward-cpp - - name: Install awkward-cpp - run: python -m pip install -v @(get-childitem -path awkward-cpp/dist/*.whl) - - - name: Build & install awkward - run: python -m pip install -v . + - name: Install awkward, awkward-cpp, and dependencies + run: >- + python -m pip install --only-binary "numpy,pandas,pyarrow,numexpr,numexpr" + -v . @(get-childitem -path awkward-cpp/dist/*.whl) + pytest-github-actions-annotate-failures + -r requirements-test.txt - name: Print versions run: python -m pip list - - name: Check if kernel specification is sorted - run: pipx run nox -s diagnostics -- --check-spec-sorted - - - name: Install test requirements - run: python -m pip install -v -r requirements-test.txt pytest-github-actions-annotate-failures - - name: Test specification if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' run: python -m pytest -vv -rs awkward-cpp/tests-spec @@ -156,21 +151,17 @@ jobs: python -m pip install build python -m build -w ./awkward-cpp - - name: Install awkward-cpp - run: python -m pip install -v ./awkward-cpp/dist/*.whl - - - name: Build & install awkward - run: python -m pip install -v . + - name: Install awkward, awkward-cpp, dask-awkward, and dependencies + run: >- + python -m pip install --only-binary "numpy,pandas,pyarrow,numexpr" + -v . ./awkward-cpp/dist/*.whl + pytest-github-actions-annotate-failures + dask-awkward + -r requirements-test.txt - name: Print versions run: python -m pip list - - name: Check if kernel specification is sorted - run: pipx run nox -s diagnostics -- --check-spec-sorted - - - name: Install test requirements - run: python -m pip install -v -r requirements-test.txt pytest-github-actions-annotate-failures - - name: Test specification if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' run: python -m pytest -vv -rs awkward-cpp/tests-spec @@ -190,24 +181,16 @@ jobs: strategy: matrix: python-version: + - '3.12' - '3.11' - '3.10' - '3.9' - - '3.8' - numpy-package: - - "numpy" - pyarrow-package: - - "pyarrow" + extra-pip-constraints: + - "-r requirements-test.txt" include: + # Lower bounds - python-version: '3.8' - numpy-package: "numpy==1.18.0" - pyarrow-package: "pyarrow" - - python-version: '3.8' - numpy-package: "numpy" - pyarrow-package: "pyarrow==7.0.0" - - python-version: '3.12' - numpy-package: "numpy>=1.26.0b1" - pyarrow-package: "pyarrow;python_version<'3.12'" + extra-pip-constraints: "-r requirements-test-minimal.txt" runs-on: ubuntu-22.04 @@ -246,21 +229,21 @@ jobs: python -m pip install build python -m build -w ./awkward-cpp - - name: Install awkward-cpp - run: python -m pip install -v ./awkward-cpp/dist/*.whl "${{ matrix.numpy-package }}" "${{ matrix.pyarrow-package }}" - - - name: Build & install awkward - run: python -m pip install -v . + - name: Install awkward, awkward-cpp, and dependencies + run: >- + python -m pip install --only-binary "numpy,pandas,pyarrow,numexpr" + -v . ./awkward-cpp/dist/*.whl + pytest-github-actions-annotate-failures + ${{ matrix.extra-pip-constraints }} - name: Print versions run: python -m pip list - name: Check if kernel specification is sorted + # We don't need to run this all the time + if: matrix.python-version == '3.12' run: pipx run nox -s diagnostics -- --check-spec-sorted - - name: Install test requirements - run: python -m pip install -v -r requirements-test.txt pytest-github-actions-annotate-failures - - name: Test specification if: steps.cache-awkward-cpp-wheel.outputs.cache-hit != 'true' run: python -m pytest -vv -rs awkward-cpp/tests-spec @@ -332,20 +315,16 @@ jobs: python3 -m pip install build python3 -m build -w ./awkward-cpp - - name: Install awkward-cpp - run: python3 -m pip install -v ./awkward-cpp/dist/*.whl - - - name: Build & install awkward - run: python3 -m pip install -v . - - - name: Also install dask-awkward - run: python3 -m pip install dask-awkward + - name: Install awkward, awkward-cpp, dask-awkward, and dependencies + run: >- + python -m pip install --only-binary "numpy,pandas,pyarrow,numexpr" + -v . ./awkward-cpp/dist/*.whl + pytest-github-actions-annotate-failures + dask-awkward + -r requirements-test.txt - name: Print versions run: python -m pip list - - name: Install test requirements - run: python -m pip install -v -r requirements-test.txt pytest-github-actions-annotate-failures - - name: Test run: python -m pytest -vv -rs tests diff --git a/requirements-test-minimal.txt b/requirements-test-minimal.txt new file mode 100644 index 0000000000..1550de8209 --- /dev/null +++ b/requirements-test-minimal.txt @@ -0,0 +1,6 @@ +fsspec;sys_platform != "win32" +numpy==1.18.0 +pyarrow==7.0.0 +pytest>=6 +pytest-cov +pytest-xdist diff --git a/requirements-test.txt b/requirements-test.txt index a46959b0a0..746187dd7e 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,10 +1,10 @@ fsspec;sys_platform != "win32" jax[cpu]>=0.2.15;sys_platform != "win32" and python_version < "3.12" numba>=0.50.0,!=0.58.0rc1;python_version < "3.12" -numexpr; python_version < "3.12" +numexpr>=2.7; python_version < "3.12" pandas>=0.24.0;sys_platform != "win32" and python_version < "3.12" pyarrow>=7.0.0;sys_platform != "win32" and python_version < "3.12" pytest>=6 pytest-cov pytest-xdist -uproot +uproot>=5 diff --git a/src/awkward/_nplikes/array_module.py b/src/awkward/_nplikes/array_module.py index 5bead34287..23b1128292 100644 --- a/src/awkward/_nplikes/array_module.py +++ b/src/awkward/_nplikes/array_module.py @@ -2,8 +2,10 @@ from __future__ import annotations import math +from functools import lru_cache import numpy +import packaging.version from awkward._nplikes.numpylike import ( ArrayLike, @@ -18,6 +20,20 @@ from awkward._typing import Any, Final, Literal np = NumpyMetadata.instance() +NUMPY_HAS_NEP_50 = packaging.version.Version( + numpy.__version__ +) >= packaging.version.Version("1.24") + + +@lru_cache +def _nplike_concatenate_has_casting(module: Any) -> bool: + x = module.zeros(2) + try: + module.concatenate((x, x), casting="same_kind") + except TypeError: + return False + else: + return True class ArrayModuleNumpyLike(NumpyLike): @@ -128,12 +144,15 @@ def meshgrid( def array_equal( self, x1: ArrayLike, x2: ArrayLike, *, equal_nan: bool = False - ) -> ArrayLike: + ) -> bool: assert not isinstance(x1, PlaceholderArray) assert not isinstance(x2, PlaceholderArray) - return self._module.asarray( - self._module.array_equal(x1, x2, equal_nan=equal_nan) - ) + if equal_nan: + both_nan = self._module.logical_and(x1 == np.nan, x2 == np.nan) + both_equal = x1 == x2 + return self._module.all(self._module.logical_or(both_equal, both_nan)) + else: + return self._module.array_equal(x1, x2) def searchsorted( self, @@ -150,28 +169,57 @@ def searchsorted( ############################ manipulation - def apply_ufunc( - self, - ufunc: UfuncLike, - method: str, - args: list[Any], - kwargs: dict[str, Any] | None = None, - ) -> ArrayLike | tuple[ArrayLike]: - # Determine input argument dtypes - input_arg_dtypes = [getattr(obj, "dtype", type(obj)) for obj in args] - # Resolve these for the given ufunc - arg_dtypes = tuple(input_arg_dtypes + [None] * ufunc.nout) - resolved_dtypes = ufunc.resolve_dtypes(arg_dtypes) - # Interpret the arguments under these dtypes - resolved_args = [ - self.asarray(arg, dtype=dtype) for arg, dtype in zip(args, resolved_dtypes) - ] - # Broadcast these resolved arguments - broadcasted_args = self.broadcast_arrays(*resolved_args) - # Allow other nplikes to replace implementation - impl = self.prepare_ufunc(ufunc) - # Compute the result - return impl(*broadcasted_args, **kwargs) + # Does NumPy support value-less ufunc resolution? + if NUMPY_HAS_NEP_50: + + def apply_ufunc( + self, + ufunc: UfuncLike, + method: str, + args: list[Any], + kwargs: dict[str, Any] | None = None, + ) -> ArrayLike | tuple[ArrayLike]: + # Determine input argument dtypes + input_arg_dtypes = [getattr(obj, "dtype", type(obj)) for obj in args] + # Resolve these for the given ufunc + arg_dtypes = tuple(input_arg_dtypes + [None] * ufunc.nout) + resolved_dtypes = ufunc.resolve_dtypes(arg_dtypes) + # Interpret the arguments under these dtypes, converting scalars to length-1 arrays + resolved_args = [ + self.asarray(arg, dtype=dtype) + for arg, dtype in zip(args, resolved_dtypes) + ] + # Broadcast to ensure all-scalar or all-nd-array + broadcasted_args = self.broadcast_arrays(*resolved_args) + # Allow other nplikes to replace implementation + impl = self.prepare_ufunc(ufunc) + # Compute the result + return impl(*broadcasted_args, **(kwargs or {})) + + else: + # Otherwise, perform default NumPy coercion (value-dependent) + def apply_ufunc( + self, + ufunc: UfuncLike, + method: str, + args: list[Any], + kwargs: dict[str, Any] | None = None, + ) -> ArrayLike | tuple[ArrayLike]: + # Convert np.generic to scalar arrays + resolved_args = [ + self.asarray(arg, dtype=arg.dtype) if hasattr(arg, "dtype") else arg + for arg in args + ] + broadcasted_args = self.broadcast_arrays(*resolved_args) + # Choose the broadcasted argument if it wasn't a Python scalar + non_generic_value_promoted_args = [ + y if hasattr(x, "ndim") else x + for x, y in zip(resolved_args, broadcasted_args) + ] + # Allow other nplikes to replace implementation + impl = self.prepare_ufunc(ufunc) + # Compute the result + return impl(*non_generic_value_promoted_args, **(kwargs or {})) def broadcast_arrays(self, *arrays: ArrayLike) -> list[ArrayLike]: assert not any(isinstance(x, PlaceholderArray) for x in arrays) @@ -327,7 +375,10 @@ def concat( axis: int | None = 0, ) -> ArrayLike: assert not any(isinstance(x, PlaceholderArray) for x in arrays) - return self._module.concatenate(arrays, axis=axis, casting="same_kind") + if _nplike_concatenate_has_casting(self._module): + return self._module.concatenate(arrays, axis=axis, casting="same_kind") + else: + return self._module.concatenate(arrays, axis=axis) def repeat( self, @@ -507,10 +558,11 @@ def max( return self._module.max(x, axis=axis, keepdims=keepdims, out=maybe_out) def count_nonzero( - self, x: ArrayLike, *, axis: int | None = None, keepdims: bool = False + self, x: ArrayLike, *, axis: int | tuple[int, ...] | None = None ) -> ArrayLike: assert not isinstance(x, PlaceholderArray) - return self._module.count_nonzero(x, axis=axis, keepdims=keepdims) + assert isinstance(axis, int) or axis is None + return self._module.count_nonzero(x, axis=axis) def cumsum( self, diff --git a/src/awkward/_nplikes/cupy.py b/src/awkward/_nplikes/cupy.py index 30c01032c1..f242447407 100644 --- a/src/awkward/_nplikes/cupy.py +++ b/src/awkward/_nplikes/cupy.py @@ -47,13 +47,15 @@ def frombuffer( np_array = numpy.frombuffer(buffer, dtype=dtype, count=count) return self._module.asarray(np_array) - def array_equal(self, x1: ArrayLike, x2: ArrayLike, *, equal_nan: bool = False): + def array_equal( + self, x1: ArrayLike, x2: ArrayLike, *, equal_nan: bool = False + ) -> bool: assert not isinstance(x1, PlaceholderArray) assert not isinstance(x2, PlaceholderArray) if x1.shape != x2.shape: return False else: - return self._module.all(x1 - x2 == 0) + return self._module.array_equal(x1, x2, equal_nan=equal_nan).get() def repeat( self, x: ArrayLike, repeats: ArrayLike | int, *, axis: int | None = None @@ -106,13 +108,10 @@ def any( return out def count_nonzero( - self, - x: ArrayLike, - *, - axis: int | tuple[int, ...] | None = None, - keepdims: bool = False, + self, x: ArrayLike, *, axis: int | tuple[int, ...] | None = None ) -> ArrayLike: assert not isinstance(x, PlaceholderArray) + assert isinstance(axis, int) or axis is None out = self._module.count_nonzero(x, axis=axis) if axis is None and isinstance(out, self._module.ndarray): return out.item() diff --git a/src/awkward/_nplikes/numpylike.py b/src/awkward/_nplikes/numpylike.py index c77f44011e..26ac7c63a7 100644 --- a/src/awkward/_nplikes/numpylike.py +++ b/src/awkward/_nplikes/numpylike.py @@ -639,7 +639,7 @@ def max( @abstractmethod def count_nonzero( - self, x: ArrayLike, *, axis: int | None = None, keepdims: bool = False + self, x: ArrayLike, *, axis: int | tuple[int, ...] | None = None ) -> ArrayLike: ... diff --git a/src/awkward/_nplikes/typetracer.py b/src/awkward/_nplikes/typetracer.py index e0474fff07..7693cd173c 100644 --- a/src/awkward/_nplikes/typetracer.py +++ b/src/awkward/_nplikes/typetracer.py @@ -5,6 +5,7 @@ from typing import Callable import numpy +import packaging.version import awkward as ak from awkward._nplikes.dispatch import register_nplike @@ -30,6 +31,9 @@ ) np = NumpyMetadata.instance() +NUMPY_HAS_NEP_50 = packaging.version.Version( + numpy.__version__ +) >= packaging.version.Version("1.24") def is_unknown_length(array: Any) -> bool: @@ -504,39 +508,89 @@ class TypeTracer(NumpyLike): is_eager: Final = True supports_structured_dtypes: Final = True - def apply_ufunc( - self, - ufunc: UfuncLike, - method: str, - args: list[Any], - kwargs: dict[str, Any] | None = None, - ) -> TypeTracerArray | tuple[TypeTracerArray]: - for x in args: - try_touch_data(x) + if NUMPY_HAS_NEP_50: + + def apply_ufunc( + self, + ufunc: UfuncLike, + method: str, + args: list[Any], + kwargs: dict[str, Any] | None = None, + ) -> TypeTracerArray | tuple[TypeTracerArray]: + for x in args: + try_touch_data(x) + + # Unwrap options, assume they don't occur + args = [x.content if isinstance(x, MaybeNone) else x for x in args] + # Determine input argument dtypes + input_arg_dtypes = [getattr(obj, "dtype", type(obj)) for obj in args] + # Resolve these for the given ufunc + arg_dtypes = tuple(input_arg_dtypes + [None] * ufunc.nout) + resolved_dtypes = ufunc.resolve_dtypes(arg_dtypes) + # Interpret the arguments under these dtypes + resolved_args = [ + self.asarray(arg, dtype=dtype) + for arg, dtype in zip(args, resolved_dtypes) + ] + # Broadcast to ensure all-scalar or all-nd-array + broadcasted_args = self.broadcast_arrays(*resolved_args) + broadcasted_shape = broadcasted_args[0].shape + result_dtypes = resolved_dtypes[ufunc.nin :] + + if len(result_dtypes) == 1: + return TypeTracerArray._new(result_dtypes[0], shape=broadcasted_shape) + else: + return ( + TypeTracerArray._new(dtype, shape=broadcasted_shape) + for dtype in result_dtypes + ) - # Unwrap options, assume they don't occur - args = [x.content if isinstance(x, MaybeNone) else x for x in args] - # Determine input argument dtypes - input_arg_dtypes = [getattr(obj, "dtype", type(obj)) for obj in args] - # Resolve these for the given ufunc - arg_dtypes = tuple(input_arg_dtypes + [None] * ufunc.nout) - resolved_dtypes = ufunc.resolve_dtypes(arg_dtypes) - # Interpret the arguments under these dtypes - resolved_args = [ - self.asarray(arg, dtype=dtype) for arg, dtype in zip(args, resolved_dtypes) - ] - # Broadcast these resolved arguments - broadcasted_args = self.broadcast_arrays(*resolved_args) - result_dtypes = resolved_dtypes[ufunc.nin :] - if len(result_dtypes) == 1: - return TypeTracerArray._new( - result_dtypes[0], shape=broadcasted_args[0].shape - ) - else: - return ( - TypeTracerArray._new(dtype, shape=b.shape) - for dtype, b in zip(result_dtypes, broadcasted_args) - ) + else: + + def apply_ufunc( + self, + ufunc: UfuncLike, + method: str, + args: list[Any], + kwargs: dict[str, Any] | None = None, + ) -> TypeTracerArray | tuple[TypeTracerArray]: + for x in args: + try_touch_data(x) + + # Unwrap options, assume they don't occur + args = [x.content if isinstance(x, MaybeNone) else x for x in args] + # Convert np.generic to scalar arrays + resolved_args = [ + self.asarray(arg, dtype=arg.dtype) if hasattr(arg, "dtype") else arg + for arg in args + ] + # Broadcast all inputs together + broadcasted_args = self.broadcast_arrays(*resolved_args) + broadcasted_shape = broadcasted_args[0].shape + # Choose the broadcasted argument if it wasn't a Python scalar + non_generic_value_promoted_args = [ + y if hasattr(x, "ndim") else x + for x, y in zip(resolved_args, broadcasted_args) + ] + # Build proxy (empty) arrays + proxy_args = [ + (numpy.empty(0, dtype=x.dtype) if hasattr(x, "dtype") else x) + for x in non_generic_value_promoted_args + ] + # Determine result dtype from proxy call + proxy_result = ufunc(*proxy_args, **(kwargs or {})) + if ufunc.nout == 1: + result_dtypes = [proxy_result.dtype] + else: + result_dtypes = [x.dtype for x in proxy_result] + + if len(result_dtypes) == 1: + return TypeTracerArray._new(result_dtypes[0], shape=broadcasted_shape) + else: + return ( + TypeTracerArray._new(dtype, shape=broadcasted_shape) + for dtype in result_dtypes + ) def _axis_is_valid(self, axis: int, ndim: int) -> bool: if axis < 0: @@ -973,7 +1027,7 @@ def broadcast_arrays(self, *arrays: ArrayLike) -> list[TypeTracerArray]: all_arrays = [] for x in arrays: if not hasattr(x, "shape"): - x = self.promote_scalar(x) + x = self.asarray(x) all_arrays.append(x) shapes = [x.shape for x in all_arrays] @@ -1321,7 +1375,7 @@ def any( raise NotImplementedError def count_nonzero( - self, x: ArrayLike, *, axis: int | None = None, keepdims: bool = False + self, x: ArrayLike, *, axis: int | None = None ) -> TypeTracerArray: assert not isinstance(x, PlaceholderArray) try_touch_data(x) diff --git a/tests/test_2649_dlpack_support.py b/tests/test_2649_dlpack_support.py index 196f9f8972..67bc3eee1a 100644 --- a/tests/test_2649_dlpack_support.py +++ b/tests/test_2649_dlpack_support.py @@ -2,9 +2,15 @@ import numpy as np import pytest +from packaging.version import parse as parse_version import awkward as ak +if parse_version(np.__version__) < parse_version("1.23.0"): + pytest.skip( + "NumPy 1.23 or greater is required for DLPack testing", allow_module_level=True + ) + def test_from_dlpack_numpy(): np_array = np.arange(2 * 3 * 4 * 5).reshape(2, 3, 4, 5) diff --git a/tests/test_2793_nep_70_gradual_support.py b/tests/test_2793_nep_70_gradual_support.py new file mode 100644 index 0000000000..9747d0a093 --- /dev/null +++ b/tests/test_2793_nep_70_gradual_support.py @@ -0,0 +1,48 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE +import contextlib + +import numpy as np +import packaging.version +import pytest + +import awkward as ak + +NUMPY_HAS_NEP_50 = packaging.version.parse(np.__version__) >= packaging.version.Version( + "1.24.0" +) + + +@pytest.mark.skipif(not NUMPY_HAS_NEP_50, reason="NEP-50 requires NumPy >= 1.24.0") +@pytest.mark.parametrize("backend", ["cpu", "typetracer"]) +def test_with_nep_50(backend): + array = ak.to_backend(np.arange(255, dtype=np.uint8), backend) + assert array.layout.dtype == np.dtype(np.uint8) + + typed_scalar = np.uint64(0) + assert (array + typed_scalar).layout.dtype == np.dtype(np.uint64) + + # With NEP-50, we can ask NumPy to use value-less type resolution + warn_context = ( + pytest.warns(DeprecationWarning, match="out-of-bound Python integers") + if backend == "cpu" + else contextlib.nullcontext() + ) + with warn_context: + untyped_scalar = 512 + assert (array + untyped_scalar).layout.dtype == np.dtype(np.uint8) + + +@pytest.mark.skipif(NUMPY_HAS_NEP_50, reason="NumPy >= 1.24.0 has NEP-50 support") +@pytest.mark.parametrize("backend", ["cpu", "typetracer"]) +def test_without_nep_50(backend): + array = ak.to_backend(np.arange(255, dtype=np.uint8), backend) + assert array.layout.dtype == np.dtype(np.uint8) + + # Without NEP-50, we still don't drop type information for typed-scalars, + # unlike NumPy. + typed_scalar = np.uint64(0) + assert (array + typed_scalar).layout.dtype == np.dtype(np.uint64) + + # But, with untyped scalars, we're forced to rely on NumPy's ufunc loop resolution + untyped_scalar = 512 + assert (array + untyped_scalar).layout.dtype == np.dtype(np.uint16)