From d21b2d6f58de91614703f9467cbf8fa89de26567 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Wed, 21 Feb 2024 19:27:16 -0800 Subject: [PATCH] gh-111140: Improve PyLong_AsNativeBytes API doc example & improve the test (#115380) This expands the examples to cover both realistic use cases for the API. I noticed thing in the test that could be done better so I added those as well: We need to guarantee that all bytes of the result are overwritten and that too many are not written. Tests now pre-fills the result with data in order to ensure that. Co-authored-by: Steve Dower --- Doc/c-api/long.rst | 74 +++++++++++++++++++++++++-------- Lib/test/test_capi/test_long.py | 30 ++++++++++--- 2 files changed, 82 insertions(+), 22 deletions(-) diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index f24282e76a33d15..582f5c7bf05471c 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -358,46 +358,86 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. Copy the Python integer value to a native *buffer* of size *n_bytes*:: - int value; - Py_ssize_t bytes = PyLong_AsNativeBytes(v, &value, sizeof(value), -1); + int32_t value; + Py_ssize_t bytes = PyLong_AsNativeBits(pylong, &value, sizeof(value), -1); if (bytes < 0) { - // Error occurred + // A Python exception was set with the reason. return NULL; } else if (bytes <= (Py_ssize_t)sizeof(value)) { // Success! } else { - // Overflow occurred, but 'value' contains truncated value + // Overflow occurred, but 'value' contains the truncated + // lowest bits of pylong. } + The above example may look *similar* to + :c:func:`PyLong_As* ` + but instead fills in a specific caller defined type and never raises an + error about of the :class:`int` *pylong*'s value regardless of *n_bytes* + or the returned byte count. + + To get at the entire potentially big Python value, this can be used to + reserve enough space and copy it:: + + // Ask how much space we need. + Py_ssize_t expected = PyLong_AsNativeBits(pylong, NULL, 0, -1); + if (expected < 0) { + // A Python exception was set with the reason. + return NULL; + } + assert(expected != 0); // Impossible per the API definition. + uint8_t *bignum = malloc(expected); + if (!bignum) { + PyErr_SetString(PyExc_MemoryError, "bignum malloc failed."); + return NULL; + } + // Safely get the entire value. + Py_ssize_t bytes = PyLong_AsNativeBits(pylong, bignum, expected, -1); + if (bytes < 0) { // Exception set. + free(bignum); + return NULL; + } + else if (bytes > expected) { // Be safe, should not be possible. + PyErr_SetString(PyExc_RuntimeError, + "Unexpected bignum truncation after a size check."); + free(bignum); + return NULL; + } + // The expected success given the above pre-check. + // ... use bignum ... + free(bignum); + *endianness* may be passed ``-1`` for the native endian that CPython was compiled with, or ``0`` for big endian and ``1`` for little. - Return ``-1`` with an exception raised if *pylong* cannot be interpreted as + Returns ``-1`` with an exception raised if *pylong* cannot be interpreted as an integer. Otherwise, return the size of the buffer required to store the value. If this is equal to or less than *n_bytes*, the entire value was - copied. + copied. ``0`` will never be returned. - Unless an exception is raised, all *n_bytes* of the buffer will be written - with as much of the value as can fit. This allows the caller to ignore all - non-negative results if the intent is to match the typical behavior of a - C-style downcast. No exception is set for this case. + Unless an exception is raised, all *n_bytes* of the buffer will always be + written. In the case of truncation, as many of the lowest bits of the value + as could fit are written. This allows the caller to ignore all non-negative + results if the intent is to match the typical behavior of a C-style + downcast. No exception is set on truncation. - Values are always copied as two's-complement, and sufficient buffer will be + Values are always copied as two's-complement and sufficient buffer will be requested to include a sign bit. For example, this may cause an value that fits into 8 bytes when treated as unsigned to request 9 bytes, even though all eight bytes were copied into the buffer. What has been omitted is the - zero sign bit, which is redundant when the intention is to treat the value as - unsigned. + zero sign bit -- redundant if the caller's intention is to treat the value + as unsigned. - Passing zero to *n_bytes* will return the requested buffer size. + Passing zero to *n_bytes* will return the size of a buffer that would + be large enough to hold the value. This may be larger than technically + necessary, but not unreasonably so. .. note:: - When the value does not fit in the provided buffer, the requested size - returned from the function may be larger than necessary. Passing 0 to this - function is not an accurate way to determine the bit length of a value. + Passing *n_bytes=0* to this function is not an accurate way to determine + the bit length of a value. .. versionadded:: 3.13 diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index fc82cbfa66ea7aa..9f5ee507a8eb85e 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -438,7 +438,12 @@ def test_long_asnativebytes(self): if support.verbose: print(f"SIZEOF_SIZE={SZ}\n{MAX_SSIZE=:016X}\n{MAX_USIZE=:016X}") - # These tests check that the requested buffer size is correct + # These tests check that the requested buffer size is correct. + # This matches our current implementation: We only specify that the + # return value is a size *sufficient* to hold the result when queried + # using n_bytes=0. If our implementation changes, feel free to update + # the expectations here -- or loosen them to be range checks. + # (i.e. 0 *could* be stored in 1 byte and 512 in 2) for v, expect in [ (0, SZ), (512, SZ), @@ -453,12 +458,25 @@ def test_long_asnativebytes(self): (-(2**256-1), 33), ]: with self.subTest(f"sizeof-{v:X}"): - buffer = bytearray(1) + buffer = bytearray(b"\x5a") self.assertEqual(expect, asnativebytes(v, buffer, 0, -1), - "PyLong_AsNativeBytes(v, NULL, 0, -1)") + "PyLong_AsNativeBytes(v, , 0, -1)") + self.assertEqual(buffer, b"\x5a", + "buffer overwritten when it should not have been") # Also check via the __index__ path self.assertEqual(expect, asnativebytes(Index(v), buffer, 0, -1), - "PyLong_AsNativeBytes(Index(v), NULL, 0, -1)") + "PyLong_AsNativeBytes(Index(v), , 0, -1)") + self.assertEqual(buffer, b"\x5a", + "buffer overwritten when it should not have been") + + # Test that we populate n=2 bytes but do not overwrite more. + buffer = bytearray(b"\x99"*3) + self.assertEqual(2, asnativebytes(4, buffer, 2, 0), # BE + "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 0) // BE") + self.assertEqual(buffer, b"\x00\x04\x99") + self.assertEqual(2, asnativebytes(4, buffer, 2, 1), # LE + "PyLong_AsNativeBytes(v, <3 byte buffer>, 2, 1) // LE") + self.assertEqual(buffer, b"\x04\x00\x99") # We request as many bytes as `expect_be` contains, and always check # the result (both big and little endian). We check the return value @@ -510,7 +528,9 @@ def test_long_asnativebytes(self): ]: with self.subTest(f"{v:X}-{len(expect_be)}bytes"): n = len(expect_be) - buffer = bytearray(n) + # Fill the buffer with dummy data to ensure all bytes + # are overwritten. + buffer = bytearray(b"\xa5"*n) expect_le = expect_be[::-1] self.assertEqual(expect_n, asnativebytes(v, buffer, n, 0),