Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix buffer protocol implementation #5407

Merged
merged 6 commits into from
Nov 5, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 51 additions & 5 deletions include/pybind11/detail/class.h
Original file line number Diff line number Diff line change
Expand Up @@ -601,24 +601,70 @@ extern "C" inline int pybind11_getbuffer(PyObject *obj, Py_buffer *view, int fla
set_error(PyExc_BufferError, "Writable buffer requested for readonly storage");
return -1;
}

// Fill in all the information, and then downgrade as requested by the caller, or raise an
// error if that's not possible.
view->obj = obj;
view->ndim = 1;
view->internal = info;
view->buf = info->ptr;
view->itemsize = info->itemsize;
view->len = view->itemsize;
for (auto s : info->shape) {
view->len *= s;
}
view->ndim = (int) info->ndim;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please change this to static_cast<int>(info->ndim)?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was about to ask if we should prefer C++ static_cast over C-style casts.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, although we're not set up to catch C-style casts automatically.

A battle for another day. In the meantime I'm simply changing C-style casts to C++ casts in code that's touched in some way.

view->shape = info->shape.data();
view->strides = info->strides.data();
view->readonly = static_cast<int>(info->readonly);
if ((flags & PyBUF_FORMAT) == PyBUF_FORMAT) {
view->format = const_cast<char *>(info->format.c_str());
}
if ((flags & PyBUF_STRIDES) == PyBUF_STRIDES) {
view->ndim = (int) info->ndim;
view->strides = info->strides.data();
view->shape = info->shape.data();

// Note, all contiguity flags imply PyBUF_STRIDES and lower.
if ((flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) {
if (PyBuffer_IsContiguous(view, 'C') == 0) {
std::memset(view, 0, sizeof(Py_buffer));
delete info;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For a follow-on PR: We should use std::unique_ptr to make sure there aren't leaks (e.g. when there are C++ exceptions).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will that work when put into view->internal?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@QuLogic Is there a chance that you could try this in a follow-on PR, now that this one is merged?

set_error(PyExc_BufferError,
"C-contiguous buffer requested for discontiguous storage");
return -1;
}
} else if ((flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) {
if (PyBuffer_IsContiguous(view, 'F') == 0) {
std::memset(view, 0, sizeof(Py_buffer));
delete info;
set_error(PyExc_BufferError,
"Fortran-contiguous buffer requested for discontiguous storage");
return -1;
}
} else if ((flags & PyBUF_ANY_CONTIGUOUS) == PyBUF_ANY_CONTIGUOUS) {
if (PyBuffer_IsContiguous(view, 'A') == 0) {
std::memset(view, 0, sizeof(Py_buffer));
delete info;
set_error(PyExc_BufferError, "Contiguous buffer requested for discontiguous storage");
return -1;
}

// If no strides are requested, the buffer must be C-contiguous.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe move this comment to the top of the else if block two lines down?

// https://docs.python.org/3/c-api/buffer.html#contiguity-requests
} else if ((flags & PyBUF_STRIDES) != PyBUF_STRIDES) {
if (PyBuffer_IsContiguous(view, 'C') == 0) {
std::memset(view, 0, sizeof(Py_buffer));
delete info;
set_error(PyExc_BufferError,
"C-contiguous buffer requested for discontiguous storage");
return -1;
}

view->strides = nullptr;

// Since this is a contiguous buffer, it can also pretend to be 1D.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unsure, maybe better to follow numpy (IIUC?) and make this 0?

if ((flags & PyBUF_ND) != PyBUF_ND) {
view->shape = nullptr;
view->ndim = 1;
}
}

Py_INCREF(view->obj);
return 0;
}
Expand Down
170 changes: 170 additions & 0 deletions tests/test_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,125 @@ TEST_SUBMODULE(buffers, m) {
sizeof(float)});
});

// A matrix that uses Fortran storage order.
class FortranMatrix : public Matrix {
public:
FortranMatrix(py::ssize_t rows, py::ssize_t cols) : Matrix(cols, rows) {
print_created(this,
std::to_string(rows) + "x" + std::to_string(cols) + " Fortran matrix");
}

float operator()(py::ssize_t i, py::ssize_t j) const { return Matrix::operator()(j, i); }

float &operator()(py::ssize_t i, py::ssize_t j) { return Matrix::operator()(j, i); }

using Matrix::data;

py::ssize_t rows() const { return Matrix::cols(); }
py::ssize_t cols() const { return Matrix::rows(); }
};
py::class_<FortranMatrix, Matrix>(m, "FortranMatrix", py::buffer_protocol())
.def(py::init<py::ssize_t, py::ssize_t>())

.def("rows", &FortranMatrix::rows)
.def("cols", &FortranMatrix::cols)

/// Bare bones interface
.def("__getitem__",
[](const FortranMatrix &m, std::pair<py::ssize_t, py::ssize_t> i) {
if (i.first >= m.rows() || i.second >= m.cols()) {
throw py::index_error();
}
return m(i.first, i.second);
})
.def("__setitem__",
[](FortranMatrix &m, std::pair<py::ssize_t, py::ssize_t> i, float v) {
if (i.first >= m.rows() || i.second >= m.cols()) {
throw py::index_error();
}
m(i.first, i.second) = v;
})
/// Provide buffer access
.def_buffer([](FortranMatrix &m) -> py::buffer_info {
return py::buffer_info(m.data(), /* Pointer to buffer */
{m.rows(), m.cols()}, /* Buffer dimensions */
/* Strides (in bytes) for each index */
{sizeof(float), sizeof(float) * size_t(m.rows())});
});

// A matrix that uses a discontiguous underlying memory block.
class DiscontiguousMatrix : public Matrix {
public:
DiscontiguousMatrix(py::ssize_t rows,
py::ssize_t cols,
py::ssize_t row_factor,
py::ssize_t col_factor)
: Matrix(rows * row_factor, cols * col_factor), m_row_factor(row_factor),
m_col_factor(col_factor) {
print_created(this,
std::to_string(rows) + "(*" + std::to_string(row_factor) + ")x"
+ std::to_string(cols) + "(*" + std::to_string(col_factor)
+ ") matrix");
}

~DiscontiguousMatrix() {
print_destroyed(this,
std::to_string(rows() / m_row_factor) + "(*"
+ std::to_string(m_row_factor) + ")x"
+ std::to_string(cols() / m_col_factor) + "(*"
+ std::to_string(m_col_factor) + ") matrix");
}

float operator()(py::ssize_t i, py::ssize_t j) const {
return Matrix::operator()(i * m_row_factor, j * m_col_factor);
}

float &operator()(py::ssize_t i, py::ssize_t j) {
return Matrix::operator()(i * m_row_factor, j * m_col_factor);
}

using Matrix::data;

py::ssize_t rows() const { return Matrix::rows() / m_row_factor; }
py::ssize_t cols() const { return Matrix::cols() / m_col_factor; }
py::ssize_t row_factor() const { return m_row_factor; }
py::ssize_t col_factor() const { return m_col_factor; }

private:
py::ssize_t m_row_factor;
py::ssize_t m_col_factor;
};
py::class_<DiscontiguousMatrix, Matrix>(m, "DiscontiguousMatrix", py::buffer_protocol())
.def(py::init<py::ssize_t, py::ssize_t, py::ssize_t, py::ssize_t>())

.def("rows", &DiscontiguousMatrix::rows)
.def("cols", &DiscontiguousMatrix::cols)

/// Bare bones interface
.def("__getitem__",
[](const DiscontiguousMatrix &m, std::pair<py::ssize_t, py::ssize_t> i) {
if (i.first >= m.rows() || i.second >= m.cols()) {
throw py::index_error();
}
return m(i.first, i.second);
})
.def("__setitem__",
[](DiscontiguousMatrix &m, std::pair<py::ssize_t, py::ssize_t> i, float v) {
if (i.first >= m.rows() || i.second >= m.cols()) {
throw py::index_error();
}
m(i.first, i.second) = v;
})
/// Provide buffer access
.def_buffer([](DiscontiguousMatrix &m) -> py::buffer_info {
return py::buffer_info(m.data(), /* Pointer to buffer */
{m.rows(), m.cols()}, /* Buffer dimensions */
/* Strides (in bytes) for each index */
{size_t(m.col_factor()) * sizeof(float) * size_t(m.cols())
* size_t(m.row_factor()),
size_t(m.col_factor()) * sizeof(float)});
});

class BrokenMatrix : public Matrix {
public:
BrokenMatrix(py::ssize_t rows, py::ssize_t cols) : Matrix(rows, cols) {}
Expand Down Expand Up @@ -268,4 +387,55 @@ TEST_SUBMODULE(buffers, m) {
});

m.def("get_buffer_info", [](const py::buffer &buffer) { return buffer.request(); });

// Expose Py_buffer for testing.
m.attr("PyBUF_SIMPLE") = PyBUF_SIMPLE;
m.attr("PyBUF_ND") = PyBUF_ND;
m.attr("PyBUF_STRIDES") = PyBUF_STRIDES;
m.attr("PyBUF_INDIRECT") = PyBUF_INDIRECT;
m.attr("PyBUF_C_CONTIGUOUS") = PyBUF_C_CONTIGUOUS;
m.attr("PyBUF_F_CONTIGUOUS") = PyBUF_F_CONTIGUOUS;
m.attr("PyBUF_ANY_CONTIGUOUS") = PyBUF_ANY_CONTIGUOUS;

m.def("get_py_buffer", [](const py::object &object, int flags) {
Py_buffer buffer;
memset(&buffer, 0, sizeof(Py_buffer));
if (PyObject_GetBuffer(object.ptr(), &buffer, flags) == -1) {
throw py::error_already_set();
}

auto SimpleNamespace = py::module_::import("types").attr("SimpleNamespace");
py::object result = SimpleNamespace("len"_a = buffer.len,
"readonly"_a = buffer.readonly,
"itemsize"_a = buffer.itemsize,
"format"_a = buffer.format,
"ndim"_a = buffer.ndim,
"shape"_a = py::none(),
"strides"_a = py::none(),
"suboffsets"_a = py::none());
if (buffer.shape != nullptr) {
py::list l;
for (auto i = 0; i < buffer.ndim; i++) {
l.append(buffer.shape[i]);
}
py::setattr(result, "shape", l);
}
if (buffer.strides != nullptr) {
py::list l;
for (auto i = 0; i < buffer.ndim; i++) {
l.append(buffer.strides[i]);
}
py::setattr(result, "strides", l);
}
if (buffer.suboffsets != nullptr) {
py::list l;
for (auto i = 0; i < buffer.ndim; i++) {
l.append(buffer.suboffsets[i]);
}
py::setattr(result, "suboffsets", l);
}

PyBuffer_Release(&buffer);
return result;
});
}
114 changes: 114 additions & 0 deletions tests/test_buffers.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,3 +239,117 @@ def test_buffer_exception():
memoryview(m.BrokenMatrix(1, 1))
assert isinstance(excinfo.value.__cause__, RuntimeError)
assert "for context" in str(excinfo.value.__cause__)


def test_to_pybuffer():
mat = m.Matrix(5, 4)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To compare with NumPy, you can swap:

  • Matrix(5, 4) with np.empty((5, 4), dtype=np.float32)
  • FortranMatrix(5, 4) with np.empty((5, 4), dtype=np.float32, order='F')
  • DiscontiguousMatrix(5, 4, 2, 3) with np.empty((5*2, 4*3), dtype=np.float32)[::2, ::3]
    Also replace mat.rows() with 5 and mat.cols() with 4 (maybe I should do that so we're always comparing with 'known' values instead of internals?)

Then running the tests show two differences:

  • for PyBUF_SIMPLE, NumPy returns ndim=0, which I guess is truer in spirit, and we could do here too.
  • for contiguity mismatches, it raises ValueError instead of BufferError, which is maybe a bug on their part, but close enough to the same.

Could you please try that? Roughly:

  • Split the added tests into multiple smaller ones.
  • Use @pytest.mark.parametrize to loop over the np.empty() equivalents above.

Motivation behind my suggestion:

  • We want to ensure that we're compatible with numpy, and that it stays that way in the future.

(I think you can use pytest.importorskip("numpy"), although we only want to skip the tests that depend on numpy (not all tests in this file). Not sure exactly how to best make that work.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I think you can use pytest.importorskip("numpy"), although we only want to skip the tests that depend on numpy (not all tests in this file). Not sure exactly how to best make that work.)

No need; it's already at the top of the file.


info = m.get_py_buffer(mat, m.PyBUF_SIMPLE)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 1 # See discussion on PR #5407.
assert info.shape is None
assert info.strides is None
assert info.suboffsets is None
assert not info.readonly
info = m.get_py_buffer(mat, m.PyBUF_ND)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 2
assert info.shape == [5, 4]
assert info.strides is None
assert info.suboffsets is None
assert not info.readonly
info = m.get_py_buffer(mat, m.PyBUF_STRIDES)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 2
assert info.shape == [5, 4]
assert info.strides == [4 * info.itemsize, info.itemsize]
assert info.suboffsets is None
assert not info.readonly
info = m.get_py_buffer(mat, m.PyBUF_INDIRECT)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 2
assert info.shape == [5, 4]
assert info.strides == [4 * info.itemsize, info.itemsize]
assert info.suboffsets is None # Should be filled in here, but we don't use it.
assert not info.readonly

# A Fortran-shaped buffer can only be accessed at PyBUF_STRIDES level or higher.
mat = m.FortranMatrix(5, 4)
info = m.get_py_buffer(mat, m.PyBUF_STRIDES)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 2
assert info.shape == [5, 4]
assert info.strides == [info.itemsize, 5 * info.itemsize]
assert info.suboffsets is None
assert not info.readonly
info = m.get_py_buffer(mat, m.PyBUF_INDIRECT)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 2
assert info.shape == [5, 4]
assert info.strides == [info.itemsize, 5 * info.itemsize]
assert info.suboffsets is None # Should be filled in here, but we don't use it.
assert not info.readonly

mat = m.DiscontiguousMatrix(5, 4, 2, 3)
info = m.get_py_buffer(mat, m.PyBUF_STRIDES)
assert info.itemsize == ctypes.sizeof(ctypes.c_float)
assert info.len == mat.rows() * mat.cols() * info.itemsize
assert info.ndim == 2
assert info.shape == [5, 4]
assert info.strides == [2 * 4 * 3 * info.itemsize, 3 * info.itemsize]
assert info.suboffsets is None
assert not info.readonly


def test_to_pybuffer_contiguity():
def check_strides(mat):
# The full block is memset to 0, so fill it with non-zero in real spots.
expected = np.arange(1, mat.rows() * mat.cols() + 1).reshape(
(mat.rows(), mat.cols())
)
for i in range(mat.rows()):
for j in range(mat.cols()):
mat[i, j] = expected[i, j]
# If all strides are correct, the exposed buffer should match the input.
np.testing.assert_array_equal(np.array(mat), expected)

mat = m.Matrix(5, 4)
check_strides(mat)
# Should work in C-contiguous mode, but not Fortran order.
m.get_py_buffer(mat, m.PyBUF_C_CONTIGUOUS)
m.get_py_buffer(mat, m.PyBUF_ANY_CONTIGUOUS)
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_F_CONTIGUOUS)

mat = m.FortranMatrix(5, 4)
check_strides(mat)
# These flags imply C-contiguity, so won't work.
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_SIMPLE)
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_ND)
# Should work in Fortran-contiguous mode, but not C order.
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_C_CONTIGUOUS)
m.get_py_buffer(mat, m.PyBUF_ANY_CONTIGUOUS)
m.get_py_buffer(mat, m.PyBUF_F_CONTIGUOUS)

mat = m.DiscontiguousMatrix(5, 4, 2, 3)
check_strides(mat)
# Should never work.
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_SIMPLE)
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_ND)
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_C_CONTIGUOUS)
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_ANY_CONTIGUOUS)
with pytest.raises(BufferError):
m.get_py_buffer(mat, m.PyBUF_F_CONTIGUOUS)