From c641a805a14771ad7137e2727cd19a2d653bcd25 Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 15:43:55 +0100 Subject: [PATCH 1/7] Prevent potential data loss in numpy's dtype::get_itemsize(). The numpy 2.0 migration guide states [https://numpy.org/devdocs/numpy_2_0_migration_guide.html]: "PyDataType_ELSIZE and PyDataType_SET_ELSIZE (note that the result is now npy_intp and not int)." Even though it is rather unlikely to have have elements larger than 2GB each, prevent potential numerical overflows, which could occur when casting ssize_t to int on 64-bit systems. --- include/boost/python/numpy/dtype.hpp | 2 +- src/numpy/dtype.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/python/numpy/dtype.hpp b/include/boost/python/numpy/dtype.hpp index 4673745e57..fd9f626bb9 100644 --- a/include/boost/python/numpy/dtype.hpp +++ b/include/boost/python/numpy/dtype.hpp @@ -47,7 +47,7 @@ class BOOST_NUMPY_DECL dtype : public object { template static dtype get_builtin(); /// @brief Return the size of the data type in bytes. - int get_itemsize() const; + Py_ssize_t get_itemsize() const; /** * @brief Compare two dtypes for equivalence. diff --git a/src/numpy/dtype.cpp b/src/numpy/dtype.cpp index 1ce8c6ec32..7e5b2416e0 100644 --- a/src/numpy/dtype.cpp +++ b/src/numpy/dtype.cpp @@ -98,7 +98,7 @@ python::detail::new_reference dtype::convert(object const & arg, bool align) return python::detail::new_reference(reinterpret_cast(obj)); } -int dtype::get_itemsize() const { +Py_ssize_t dtype::get_itemsize() const { #if NPY_ABI_VERSION < 0x02000000 return reinterpret_cast(ptr())->elsize; #else From 4a0c0962bcac64a94f005a07ffee9861983feefa Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 16:11:42 +0100 Subject: [PATCH 2/7] Use boost::python::ssize_t instead of Py_ssize_t - it is available for all PY_VERSION_HEX. --- include/boost/python/numpy/dtype.hpp | 2 +- src/numpy/dtype.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/boost/python/numpy/dtype.hpp b/include/boost/python/numpy/dtype.hpp index fd9f626bb9..d7831f87dc 100644 --- a/include/boost/python/numpy/dtype.hpp +++ b/include/boost/python/numpy/dtype.hpp @@ -47,7 +47,7 @@ class BOOST_NUMPY_DECL dtype : public object { template static dtype get_builtin(); /// @brief Return the size of the data type in bytes. - Py_ssize_t get_itemsize() const; + boost::python::ssize_t get_itemsize() const; /** * @brief Compare two dtypes for equivalence. diff --git a/src/numpy/dtype.cpp b/src/numpy/dtype.cpp index 7e5b2416e0..3985af6046 100644 --- a/src/numpy/dtype.cpp +++ b/src/numpy/dtype.cpp @@ -98,7 +98,7 @@ python::detail::new_reference dtype::convert(object const & arg, bool align) return python::detail::new_reference(reinterpret_cast(obj)); } -Py_ssize_t dtype::get_itemsize() const { +boost::python::ssize_t dtype::get_itemsize() const { #if NPY_ABI_VERSION < 0x02000000 return reinterpret_cast(ptr())->elsize; #else From af8a5f7cc35d0c98565bb4a3d8bbe646462e6437 Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 16:16:11 +0100 Subject: [PATCH 3/7] Exempt negative itemsize from contiguity and alginment. --- src/numpy/ndarray.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/numpy/ndarray.cpp b/src/numpy/ndarray.cpp index af09ecc338..cb816089c3 100644 --- a/src/numpy/ndarray.cpp +++ b/src/numpy/ndarray.cpp @@ -43,6 +43,8 @@ bool is_c_contiguous(std::vector const & shape, std::vector const & strides, int itemsize) { + // An itemsize less than 0 is not useful - default to non-contiguity. + if (0 > itemsize) return false; std::vector::const_reverse_iterator j = strides.rbegin(); int total = itemsize; for (std::vector::const_reverse_iterator i = shape.rbegin(); i != shape.rend(); ++i, ++j) @@ -57,6 +59,8 @@ bool is_f_contiguous(std::vector const & shape, std::vector const & strides, int itemsize) { + // An itemsize less than 0 is not useful - default to non-contiguity. + if (0 > itemsize) return false; std::vector::const_iterator j = strides.begin(); int total = itemsize; for (std::vector::const_iterator i = shape.begin(); i != shape.end(); ++i, ++j) @@ -70,6 +74,8 @@ bool is_f_contiguous(std::vector const & shape, bool is_aligned(std::vector const & strides, int itemsize) { + // An itemsize less than 0 is not useful - default to non-aligned. + if (0 > itemsize) return false; for (std::vector::const_iterator i = strides.begin(); i != strides.end(); ++i) { if (*i % itemsize) return false; From 753b4349cb7534890f4e9872546fcfac880d8ff7 Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 16:34:51 +0100 Subject: [PATCH 4/7] Add comments. --- src/numpy/ndarray.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/numpy/ndarray.cpp b/src/numpy/ndarray.cpp index cb816089c3..dbb6532a10 100644 --- a/src/numpy/ndarray.cpp +++ b/src/numpy/ndarray.cpp @@ -45,6 +45,8 @@ bool is_c_contiguous(std::vector const & shape, { // An itemsize less than 0 is not useful - default to non-contiguity. if (0 > itemsize) return false; + // Check the strides (stride[n]) match the accumulated shapes as per C-style, + // i.e. starting from rightmost C-index (itemsize * prod_{i in [n, N)} shape[i]). std::vector::const_reverse_iterator j = strides.rbegin(); int total = itemsize; for (std::vector::const_reverse_iterator i = shape.rbegin(); i != shape.rend(); ++i, ++j) @@ -61,6 +63,8 @@ bool is_f_contiguous(std::vector const & shape, { // An itemsize less than 0 is not useful - default to non-contiguity. if (0 > itemsize) return false; + // Check the strides (stride[n]) match the accumulated shapes as per Fortran-style, + // i.e. starting from leftmost C-index (itemsize * prod_{i in [0, n]} shape[i]). std::vector::const_iterator j = strides.begin(); int total = itemsize; for (std::vector::const_iterator i = shape.begin(); i != shape.end(); ++i, ++j) @@ -76,6 +80,7 @@ bool is_aligned(std::vector const & strides, { // An itemsize less than 0 is not useful - default to non-aligned. if (0 > itemsize) return false; + // Check all strides to be aligned to itemsize. for (std::vector::const_iterator i = strides.begin(); i != strides.end(); ++i) { if (*i % itemsize) return false; From 25a1994b008f459ba40ecdf8c322f244a4dcacd7 Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 16:35:56 +0100 Subject: [PATCH 5/7] Add constness for unmodified internal parameters. --- src/numpy/ndarray.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/numpy/ndarray.cpp b/src/numpy/ndarray.cpp index dbb6532a10..addb80dcc9 100644 --- a/src/numpy/ndarray.cpp +++ b/src/numpy/ndarray.cpp @@ -41,7 +41,7 @@ int bitflag_to_numpy(ndarray::bitflag f) bool is_c_contiguous(std::vector const & shape, std::vector const & strides, - int itemsize) + int const itemsize) { // An itemsize less than 0 is not useful - default to non-contiguity. if (0 > itemsize) return false; @@ -59,7 +59,7 @@ bool is_c_contiguous(std::vector const & shape, bool is_f_contiguous(std::vector const & shape, std::vector const & strides, - int itemsize) + int const itemsize) { // An itemsize less than 0 is not useful - default to non-contiguity. if (0 > itemsize) return false; @@ -76,7 +76,7 @@ bool is_f_contiguous(std::vector const & shape, } bool is_aligned(std::vector const & strides, - int itemsize) + int const itemsize) { // An itemsize less than 0 is not useful - default to non-aligned. if (0 > itemsize) return false; @@ -128,7 +128,7 @@ ndarray from_data_impl(void * data, PyErr_SetString(PyExc_ValueError, "Length of shape and strides arrays do not match."); python::throw_error_already_set(); } - int itemsize = dt.get_itemsize(); + int const itemsize = dt.get_itemsize(); int flags = 0; if (writeable) flags |= NPY_ARRAY_WRITEABLE; if (is_c_contiguous(shape, strides, itemsize)) flags |= NPY_ARRAY_C_CONTIGUOUS; From 1105d37dcc7fe97272d65eec831a260307777c6e Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 16:39:59 +0100 Subject: [PATCH 6/7] Change size type to boost::python::ssize_t - as did numpy with version 2.0 and in accordance with PEP 353. --- src/numpy/ndarray.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/numpy/ndarray.cpp b/src/numpy/ndarray.cpp index addb80dcc9..1a1b8fc8e3 100644 --- a/src/numpy/ndarray.cpp +++ b/src/numpy/ndarray.cpp @@ -41,14 +41,14 @@ int bitflag_to_numpy(ndarray::bitflag f) bool is_c_contiguous(std::vector const & shape, std::vector const & strides, - int const itemsize) + boost::python::ssize_t const itemsize) { // An itemsize less than 0 is not useful - default to non-contiguity. if (0 > itemsize) return false; // Check the strides (stride[n]) match the accumulated shapes as per C-style, // i.e. starting from rightmost C-index (itemsize * prod_{i in [n, N)} shape[i]). std::vector::const_reverse_iterator j = strides.rbegin(); - int total = itemsize; + boost::python::ssize_t total = itemsize; for (std::vector::const_reverse_iterator i = shape.rbegin(); i != shape.rend(); ++i, ++j) { if (total != *j) return false; @@ -59,14 +59,14 @@ bool is_c_contiguous(std::vector const & shape, bool is_f_contiguous(std::vector const & shape, std::vector const & strides, - int const itemsize) + boost::python::ssize_t const itemsize) { // An itemsize less than 0 is not useful - default to non-contiguity. if (0 > itemsize) return false; // Check the strides (stride[n]) match the accumulated shapes as per Fortran-style, // i.e. starting from leftmost C-index (itemsize * prod_{i in [0, n]} shape[i]). std::vector::const_iterator j = strides.begin(); - int total = itemsize; + boost::python::ssize_t total = itemsize; for (std::vector::const_iterator i = shape.begin(); i != shape.end(); ++i, ++j) { if (total != *j) return false; @@ -76,7 +76,7 @@ bool is_f_contiguous(std::vector const & shape, } bool is_aligned(std::vector const & strides, - int const itemsize) + boost::python::ssize_t const itemsize) { // An itemsize less than 0 is not useful - default to non-aligned. if (0 > itemsize) return false; @@ -128,7 +128,7 @@ ndarray from_data_impl(void * data, PyErr_SetString(PyExc_ValueError, "Length of shape and strides arrays do not match."); python::throw_error_already_set(); } - int const itemsize = dt.get_itemsize(); + boost::python::ssize_t const itemsize = dt.get_itemsize(); int flags = 0; if (writeable) flags |= NPY_ARRAY_WRITEABLE; if (is_c_contiguous(shape, strides, itemsize)) flags |= NPY_ARRAY_C_CONTIGUOUS; From 6614188fcb9bbe3a2c2ee32f1035f4fad0dc3ce2 Mon Sep 17 00:00:00 2001 From: Johannes Wilde Date: Sat, 14 Dec 2024 16:43:30 +0100 Subject: [PATCH 7/7] Use the const-accessor methods when using const_iterators. --- src/numpy/ndarray.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/numpy/ndarray.cpp b/src/numpy/ndarray.cpp index 1a1b8fc8e3..a85e2520ab 100644 --- a/src/numpy/ndarray.cpp +++ b/src/numpy/ndarray.cpp @@ -47,9 +47,9 @@ bool is_c_contiguous(std::vector const & shape, if (0 > itemsize) return false; // Check the strides (stride[n]) match the accumulated shapes as per C-style, // i.e. starting from rightmost C-index (itemsize * prod_{i in [n, N)} shape[i]). - std::vector::const_reverse_iterator j = strides.rbegin(); + std::vector::const_reverse_iterator j = strides.crbegin(); boost::python::ssize_t total = itemsize; - for (std::vector::const_reverse_iterator i = shape.rbegin(); i != shape.rend(); ++i, ++j) + for (std::vector::const_reverse_iterator i = shape.crbegin(); i != shape.crend(); ++i, ++j) { if (total != *j) return false; total *= (*i); @@ -65,9 +65,9 @@ bool is_f_contiguous(std::vector const & shape, if (0 > itemsize) return false; // Check the strides (stride[n]) match the accumulated shapes as per Fortran-style, // i.e. starting from leftmost C-index (itemsize * prod_{i in [0, n]} shape[i]). - std::vector::const_iterator j = strides.begin(); + std::vector::const_iterator j = strides.cbegin(); boost::python::ssize_t total = itemsize; - for (std::vector::const_iterator i = shape.begin(); i != shape.end(); ++i, ++j) + for (std::vector::const_iterator i = shape.cbegin(); i != shape.cend(); ++i, ++j) { if (total != *j) return false; total *= (*i); @@ -81,7 +81,7 @@ bool is_aligned(std::vector const & strides, // An itemsize less than 0 is not useful - default to non-aligned. if (0 > itemsize) return false; // Check all strides to be aligned to itemsize. - for (std::vector::const_iterator i = strides.begin(); i != strides.end(); ++i) + for (std::vector::const_iterator i = strides.cbegin(); i != strides.cend(); ++i) { if (*i % itemsize) return false; }