From b333fe41a89d1eb7b5c5c19fd56c374587666488 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Tue, 24 Oct 2023 18:02:06 +0100 Subject: [PATCH 1/2] fix: support all-`None` index in `awkward_Index_nones_as_index.cpp` (#2769) * fix: support all-None index * refactor: clarify logic * test: test argsort cases * Revert "refactor: clarify logic" This reverts commit 202a7c02096a9807d239d13cac1ece9d158c0923. * fix: drop option where necessary * docs: improve comment * fix: update demonstration kernel --- .../awkward_Index_nones_as_index.cpp | 11 ++-- kernel-specification.yml | 10 ++-- src/awkward/contents/indexedoptionarray.py | 13 +++-- tests/test_2769_argsort_all_none.py | 51 +++++++++++++++++++ 4 files changed, 73 insertions(+), 12 deletions(-) create mode 100644 tests/test_2769_argsort_all_none.py diff --git a/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp b/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp index 28c0f3be5e..ad2febc39e 100644 --- a/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp +++ b/awkward-cpp/src/cpu-kernels/awkward_Index_nones_as_index.cpp @@ -8,12 +8,17 @@ template ERROR awkward_Index_nones_as_index( T* toindex, int64_t length) { - int64_t last_index = 0; + int64_t n_non_null = 0; + // Assuming that `toindex` comprises of unique, contiguous integers (or -1), and is zero-based + // Compute the number of non-null values to determine our starting index for (int64_t i = 0; i < length; i++) { - toindex[i] > last_index ? last_index = toindex[i] : last_index; + if (toindex[i] != -1) { + n_non_null++; + } } + // Now set the null-value indices to by monotonically increasing and unique from the final index for (int64_t i = 0; i < length; i++) { - toindex[i] == -1 ? toindex[i] = ++last_index : toindex[i]; + toindex[i] == -1 ? toindex[i] = n_non_null++ : toindex[i]; } return success(); } diff --git a/kernel-specification.yml b/kernel-specification.yml index 2a120e505b..7d50a4df45 100644 --- a/kernel-specification.yml +++ b/kernel-specification.yml @@ -4500,14 +4500,14 @@ kernels: description: null definition: | def awkward_Index_nones_as_index(toindex, length): - last_index = 0 + num_non_null = 0 for i in range(length): - if toindex[i] > last_index: - last_index = toindex[i] + if toindex[i] != -1: + num_non_null += 1 for i in range(length): if toindex[i] == -1: - last_index = last_index + 1 - toindex[i] = last_index + toindex[i] = num_non_null + num_non_null += 1 automatic-tests: false manual-tests: [] diff --git a/src/awkward/contents/indexedoptionarray.py b/src/awkward/contents/indexedoptionarray.py index 429076affa..f1d9568a55 100644 --- a/src/awkward/contents/indexedoptionarray.py +++ b/src/awkward/contents/indexedoptionarray.py @@ -1265,10 +1265,15 @@ def _argsort_next( nextoutindex.length, ) ) - - out = ak.contents.IndexedOptionArray.simplified( - nextoutindex, out, parameters=self._parameters - ) + # Drop the option type: we have ensured that we don't have any + # -1 values in `nextoutindex` now! + out = out._carry(nextoutindex, False).copy( + parameters=parameters_union(out._parameters, self._parameters) + ) + else: + out = ak.contents.IndexedOptionArray.simplified( + nextoutindex, out, parameters=self._parameters + ) inject_nones = ( True diff --git a/tests/test_2769_argsort_all_none.py b/tests/test_2769_argsort_all_none.py new file mode 100644 index 0000000000..6e0a4acf2d --- /dev/null +++ b/tests/test_2769_argsort_all_none.py @@ -0,0 +1,51 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE + +import pytest # noqa: F401 + +import awkward as ak + + +def test_all_none(): + result = ak.argsort([None, None, None]) + assert ak.is_valid(result) + assert result.to_list() == [0, 1, 2] + assert isinstance(result.layout, ak.contents.NumpyArray) + assert result.type == ak.types.ArrayType(ak.types.NumpyType("int64"), 3) + + +def test_mixed_none_local(): + result = ak.argsort([None, 1, None, 2, 0, None, -1]) + assert ak.is_valid(result) + assert result.to_list() == [6, 4, 1, 3, 0, 2, 5] + assert result.type == ak.types.ArrayType(ak.types.NumpyType("int64"), 7) + + +def test_mixed_none_2d_local(): + result = ak.argsort( + [ + [None, 1, None, 0, None, None, -1], + None, + [None, 2, None, 2, 0, None, -2], + ], + axis=1, + ) + assert ak.is_valid(result) + assert result.to_list() == [[6, 3, 1, 0, 2, 4, 5], None, [6, 4, 1, 3, 0, 2, 5]] + assert result.type == ak.types.ArrayType( + ak.types.OptionType(ak.types.ListType(ak.types.NumpyType("int64"))), 3 + ) + + +def test_mixed_none_2d_nonlocal(): + result = ak.argsort( + [ + [None, 1, None, 0, None, None, -1], + [None, 2, None, 2, 0, None, -2], + ], + axis=0, + ) + assert ak.is_valid(result) + assert result.to_list() == [[0, 0, 0, 0, 1, 0, 1], [1, 1, 1, 1, 0, 1, 0]] + assert result.type == ak.types.ArrayType( + ak.types.ListType(ak.types.NumpyType("int64")), 2 + ) From 17aabdecdbbbc7c9eb6fae6e834ab51e607386f7 Mon Sep 17 00:00:00 2001 From: Angus Hollands Date: Wed, 25 Oct 2023 10:16:50 +0100 Subject: [PATCH 2/2] chore: add trove classifier to awkward-cpp (#2729) --- awkward-cpp/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/awkward-cpp/pyproject.toml b/awkward-cpp/pyproject.toml index ed71c2ad55..989e8f3e28 100644 --- a/awkward-cpp/pyproject.toml +++ b/awkward-cpp/pyproject.toml @@ -37,6 +37,7 @@ classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Topic :: Scientific/Engineering", "Topic :: Scientific/Engineering :: Information Analysis", "Topic :: Scientific/Engineering :: Mathematics",