scikit-hep · jpivarski · Apr 1, 2024 · Mar 20, 2024 · Mar 20, 2024 · Mar 20, 2024
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -61,6 +61,10 @@ jobs:
             python-architecture: x64
             runs-on: ubuntu-latest
             dependencies-kind: pypy
+          - python-version: '3.11'
+            python-architecture: x64
+            runs-on: ubuntu-latest
+            dependencies-kind: numpy2
 
     runs-on: ${{ matrix.runs-on }}
 

diff --git a/awkward-cpp/pyproject.toml b/awkward-cpp/pyproject.toml
@@ -9,7 +9,7 @@ build-backend = "scikit_build_core.build"
 name = "awkward_cpp"
 version = "31"
 dependencies = [
-    "numpy>=1.18.0,<2.0",
+    "numpy>=1.18.0",
     "importlib_resources;python_version < \"3.9\""
 ]
 readme = "README.md"

diff --git a/kernel-test-data.json b/kernel-test-data.json
@@ -1153,19 +1153,6 @@
                         "tobytemask": []
                     }
                 },
-                {
-                    "error": false,
-                    "message": "",
-                    "inputs": {
-                        "bitmasklength": 2,
-                        "frombitmask": [58, 59],
-                        "lsb_order": false,
-                        "validwhen": false
-                    },
-                    "outputs": {
-                        "tobytemask": [0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1]
-                    }
-                },
                 {
                     "error": false,
                     "message": "",
@@ -21966,19 +21953,6 @@
             "name": "awkward_BitMaskedArray_to_IndexedOptionArray",
             "status": true,
             "tests": [
-                {
-                    "error": false,
-                    "message": "",
-                    "inputs": {
-                        "bitmasklength": 2,
-                        "frombitmask": [58, 59],
-                        "lsb_order": false,
-                        "validwhen": false
-                    },
-                    "outputs": {
-                        "toindex": [0, 1, -1, -1, -1, 5, -1, 7, 8, 9, -1, -1, -1, 13, -1, -1]
-                    }
-                },
                 {
                     "error": false,
                     "message": "",

diff --git a/pyproject.toml b/pyproject.toml
@@ -42,7 +42,7 @@ classifiers = [
 dependencies = [
     "awkward_cpp==31",
     "importlib_metadata>=4.13.0;python_version < \"3.12\"",
-    "numpy>=1.18.0,<2.0",
+    "numpy>=1.18.0",
     "packaging",
     "typing_extensions>=4.1.0; python_version < \"3.11\"",
     "fsspec>=2022.11.0"

diff --git a/requirements-test-numpy2.txt b/requirements-test-numpy2.txt
@@ -0,0 +1,5 @@
+fsspec>=2022.11.0;sys_platform != "win32"
+numpy>=2.0.0b1
+pytest>=6
+pytest-cov
+pytest-xdist
diff --git a/src/awkward/_nplikes/numpy_like.py b/src/awkward/_nplikes/numpy_like.py
@@ -19,6 +19,7 @@
     TypeAlias,
     TypeVar,
 )
+from awkward.errors import AxisError
 
 if TYPE_CHECKING:
     from numpy.typing import DTypeLike
@@ -83,7 +84,7 @@ class NumpyMetadata(PublicSingleton):
     datetime_data = staticmethod(numpy.datetime_data)
     issubdtype = staticmethod(numpy.issubdtype)
 
-    AxisError = numpy.AxisError
+    AxisError = AxisError
 
 
 if hasattr(numpy, "float16"):

diff --git a/src/awkward/_operators.py b/src/awkward/_operators.py
@@ -33,7 +33,7 @@
 
 from __future__ import annotations
 
-from numpy.core import umath as um
+import numpy as np
 
 
 def _disables_array_ufunc(obj):
@@ -179,39 +179,39 @@ def __repr__(self):
     # overrides NEP.
 
     # comparisons don't have reflected and in-place versions
-    __lt__ = _binary_method(um.less, "lt")
-    __le__ = _binary_method(um.less_equal, "le")
-    __eq__ = _binary_method(um.equal, "eq")
-    __ne__ = _binary_method(um.not_equal, "ne")
-    __gt__ = _binary_method(um.greater, "gt")
-    __ge__ = _binary_method(um.greater_equal, "ge")
+    __lt__ = _binary_method(np.less, "lt")
+    __le__ = _binary_method(np.less_equal, "le")
+    __eq__ = _binary_method(np.equal, "eq")
+    __ne__ = _binary_method(np.not_equal, "ne")
+    __gt__ = _binary_method(np.greater, "gt")
+    __ge__ = _binary_method(np.greater_equal, "ge")
 
     # numeric methods
-    __add__, __radd__, __iadd__ = _numeric_methods(um.add, "add")
-    __sub__, __rsub__, __isub__ = _numeric_methods(um.subtract, "sub")
-    __mul__, __rmul__, __imul__ = _numeric_methods(um.multiply, "mul")
-    __matmul__, __rmatmul__, __imatmul__ = _numeric_methods(um.matmul, "matmul")
+    __add__, __radd__, __iadd__ = _numeric_methods(np.add, "add")
+    __sub__, __rsub__, __isub__ = _numeric_methods(np.subtract, "sub")
+    __mul__, __rmul__, __imul__ = _numeric_methods(np.multiply, "mul")
+    __matmul__, __rmatmul__, __imatmul__ = _numeric_methods(np.matmul, "matmul")
     # Python 3 does not use __div__, __rdiv__, or __idiv__
     __truediv__, __rtruediv__, __itruediv__ = _numeric_methods(
-        um.true_divide, "truediv"
+        np.true_divide, "truediv"
     )
     __floordiv__, __rfloordiv__, __ifloordiv__ = _numeric_methods(
-        um.floor_divide, "floordiv"
+        np.floor_divide, "floordiv"
     )
-    __mod__, __rmod__, __imod__ = _numeric_methods(um.remainder, "mod")
-    __divmod__ = _binary_method(um.divmod, "divmod")
-    __rdivmod__ = _reflected_binary_method(um.divmod, "divmod")
+    __mod__, __rmod__, __imod__ = _numeric_methods(np.remainder, "mod")
+    __divmod__ = _binary_method(np.divmod, "divmod")
+    __rdivmod__ = _reflected_binary_method(np.divmod, "divmod")
     # __idivmod__ does not exist
     # TODO: handle the optional third argument for __pow__?
-    __pow__, __rpow__, __ipow__ = _numeric_methods(um.power, "pow")
-    __lshift__, __rlshift__, __ilshift__ = _numeric_methods(um.left_shift, "lshift")
-    __rshift__, __rrshift__, __irshift__ = _numeric_methods(um.right_shift, "rshift")
-    __and__, __rand__, __iand__ = _numeric_methods(um.bitwise_and, "and")
-    __xor__, __rxor__, __ixor__ = _numeric_methods(um.bitwise_xor, "xor")
-    __or__, __ror__, __ior__ = _numeric_methods(um.bitwise_or, "or")
+    __pow__, __rpow__, __ipow__ = _numeric_methods(np.power, "pow")
+    __lshift__, __rlshift__, __ilshift__ = _numeric_methods(np.left_shift, "lshift")
+    __rshift__, __rrshift__, __irshift__ = _numeric_methods(np.right_shift, "rshift")
+    __and__, __rand__, __iand__ = _numeric_methods(np.bitwise_and, "and")
+    __xor__, __rxor__, __ixor__ = _numeric_methods(np.bitwise_xor, "xor")
+    __or__, __ror__, __ior__ = _numeric_methods(np.bitwise_or, "or")
 
     # unary methods
-    __neg__ = _unary_method(um.negative, "neg")
-    __pos__ = _unary_method(um.positive, "pos")
-    __abs__ = _unary_method(um.absolute, "abs")
-    __invert__ = _unary_method(um.invert, "invert")
+    __neg__ = _unary_method(np.negative, "neg")
+    __pos__ = _unary_method(np.positive, "pos")
+    __abs__ = _unary_method(np.absolute, "abs")
+    __invert__ = _unary_method(np.invert, "invert")
diff --git a/src/awkward/errors.py b/src/awkward/errors.py
@@ -11,4 +11,4 @@ class FieldNotFoundError(IndexError):
     pass
 
 
-AxisError = numpy.AxisError
+AxisError = getattr(numpy, "exceptions", numpy).AxisError
diff --git a/src/awkward/operations/ak_full_like.py b/src/awkward/operations/ak_full_like.py
@@ -158,7 +158,11 @@ def action(layout, backend, **kwargs):
 
         elif layout.parameter("__array__") in {"bytestring", "string"}:
             stringlike_type = layout.parameter("__array__")
+            charlike_type = "byte" if stringlike_type == "bytestring" else "char"
+
             if fill_value is _ZEROS:
+                # special case because output lists will all have length zero,
+                # rather than b"0" or "0" or something
                 asbytes = nplike.frombuffer(b"", dtype=np.uint8)
                 result = ak.contents.ListArray(
                     ak.index.Index64(
@@ -171,22 +175,26 @@ def action(layout, backend, **kwargs):
                     ),
                     ak.contents.NumpyArray(
                         asbytes,
-                        parameters={
-                            "__array__": "byte"
-                            if stringlike_type == "bytestring"
-                            else "char"
-                        },
+                        parameters={"__array__": charlike_type},
                     ),
                     parameters={"__array__": stringlike_type},
                 )
 
-            elif stringlike_type == "bytestring":
-                if isinstance(fill_value, bytes):
+            else:
+                # NumPy 2.x converts "0" and b"0" (ASCII codec 48) to True (because it's not codec 0)
+                # NumPy 1.x converts them to False because it parses bytestrings and strings
+                # both versions of NumPy parse bytestrings and strings when converting to anything other than booleans
+                numpy2_behavior = nplike.astype(nplike.asarray(["0"]), dtype=np.bool_)[
+                    0
+                ]
+                if dtype == np.dtype(np.bool_) and numpy2_behavior:
+                    asbytes = b"\1" if fill_value else b"\0"
+                elif isinstance(fill_value, bytes):
                     asbytes = fill_value
                 else:
                     asbytes = str(fill_value).encode("utf-8", "surrogateescape")
-                asbytes = nplike.frombuffer(asbytes, dtype=np.uint8)
 
+                asbytes = nplike.frombuffer(asbytes, dtype=np.uint8)
                 result = ak.contents.ListArray(
                     ak.index.Index64(
                         index_nplike.zeros(layout.length, dtype=np.int64),
@@ -195,25 +203,12 @@ def action(layout, backend, **kwargs):
                     ak.index.Index64(
                         index_nplike.full(layout.length, len(asbytes), dtype=np.int64)
                     ),
-                    ak.contents.NumpyArray(asbytes, parameters={"__array__": "byte"}),
-                    parameters={"__array__": "bytestring"},
-                )
-
-            else:
-                assert stringlike_type == "string"
-                asstr = str(fill_value).encode("utf-8", "surrogateescape")
-                asbytes = nplike.frombuffer(asstr, dtype=np.uint8)
-                result = ak.contents.ListArray(
-                    ak.index.Index64(
-                        index_nplike.zeros(layout.length, dtype=np.int64),
-                        nplike=index_nplike,
-                    ),
-                    ak.index.Index64(
-                        index_nplike.full(layout.length, len(asbytes), dtype=np.int64)
+                    ak.contents.NumpyArray(
+                        asbytes, parameters={"__array__": charlike_type}
                     ),
-                    ak.contents.NumpyArray(asbytes, parameters={"__array__": "char"}),
-                    parameters={"__array__": "string"},
+                    parameters={"__array__": stringlike_type},
                 )
+
             if dtype is not None:
                 # Interpret strings as numeric/bool types
                 result = ak.operations.strings_astype(

diff --git a/tests/test_0813_full_like_dtype_arg.py b/tests/test_0813_full_like_dtype_arg.py
@@ -144,3 +144,59 @@ def assert_array_type(new_array, intended_type):
     assert_array_type(int_type64, np.int64)
     assert_array_type(float_type, float)
     assert_array_type(bool_type, np.bool_)
+
+
+def test_numpy2_changes():
+    numpy2_behavior = np.asarray(["0"]).astype(np.bool_)[0]
+
+    if numpy2_behavior:
+        assert ak.full_like([[True, False], [], [True]], b"0").to_list() == [
+            [True, True],
+            [],
+            [True],
+        ]
+        assert ak.full_like([[True, False], [], [True]], "0").to_list() == [
+            [True, True],
+            [],
+            [True],
+        ]
+
+    else:
+        assert ak.full_like([[True, False], [], [True]], b"0").to_list() == [
+            [False, False],
+            [],
+            [False],
+        ]
+        assert ak.full_like([[True, False], [], [True]], "0").to_list() == [
+            [False, False],
+            [],
+            [False],
+        ]
+
+    assert ak.full_like(
+        [["one", "two"], [], ["three"]], 0, dtype=np.bool_
+    ).to_list() == [[False, False], [], [False]]
+    assert ak.full_like(
+        [[b"one", b"two"], [], [b"three"]], 0, dtype=np.bool_
+    ).to_list() == [[False, False], [], [False]]
+
+    assert ak.full_like([["one", "two"], [], ["three"]], "0").to_list() == [
+        ["0", "0"],
+        [],
+        ["0"],
+    ]
+    assert ak.full_like([["one", "two"], [], ["three"]], b"0").to_list() == [
+        ["0", "0"],
+        [],
+        ["0"],
+    ]
+    assert ak.full_like([[b"one", b"two"], [], [b"three"]], "0").to_list() == [
+        [b"0", b"0"],
+        [],
+        [b"0"],
+    ]
+    assert ak.full_like([[b"one", b"two"], [], [b"three"]], b"0").to_list() == [
+        [b"0", b"0"],
+        [],
+        [b"0"],
+    ]
diff --git a/tests/test_1137_num.py b/tests/test_1137_num.py
@@ -6,6 +6,7 @@
 import pytest
 
 import awkward as ak
+from awkward.errors import AxisError
 
 to_list = ak.operations.to_list
 
@@ -43,7 +44,7 @@ def test_emptyarray():
     array = ak.contents.EmptyArray()
     assert to_list(ak.num(array, 0)) == 0
     assert to_list(ak.num(array, -1)) == 0
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 1)
     assert "axis=1 exceeds the depth" in str(err.value)
 
@@ -58,7 +59,7 @@ def test_numpyarray():
         [[7, 7, 7, 7, 7], [7, 7, 7, 7, 7], [7, 7, 7, 7, 7]],
         [[7, 7, 7, 7, 7], [7, 7, 7, 7, 7], [7, 7, 7, 7, 7]],
     ]
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 4)
     assert "axis=4 exceeds the depth" in str(err.value)
 
@@ -75,7 +76,7 @@ def test_regulararray():
         [[7, 7, 7, 7, 7], [7, 7, 7, 7, 7], [7, 7, 7, 7, 7]],
         [[7, 7, 7, 7, 7], [7, 7, 7, 7, 7], [7, 7, 7, 7, 7]],
     ]
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 4)
     assert "axis=4 exceeds the depth" in str(err.value)
 
@@ -112,7 +113,7 @@ def test_listarray():
         [],
         [[2, 2, 2], [2, 2, 2]],
     ]
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 4)
     assert "axis=4 exceeds the depth" in str(err.value)
 
@@ -140,7 +141,7 @@ def test_listoffsetarray():
         [],
         [[2, 2, 2], [2, 2, 2]],
     ]
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 4)
     assert "axis=4 exceeds the depth" in str(err.value)
 
@@ -173,7 +174,7 @@ def test_indexedarray():
         [[2, 2, 2], [2, 2, 2], [2, 2, 2]],
     ]
 
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 4)
     assert "axis=4 exceeds the depth" in str(err.value)
 
@@ -210,7 +211,7 @@ def test_indexedoptionarray():
         [[2, 2, 2], [2, 2, 2], [2, 2, 2]],
     ]
 
-    with pytest.raises(np.AxisError) as err:
+    with pytest.raises(AxisError) as err:
         ak.num(array, 4)
     assert "axis=4 exceeds the depth" in str(err.value)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,4 +11,4 @@ class FieldNotFoundError(IndexError):
		pass


		AxisError = numpy.AxisError
		AxisError = getattr(numpy, "exceptions", numpy).AxisError