Skip to content

Commit

Permalink
add seamless numpy-like functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
casperdcl committed Jan 18, 2021
1 parent e663061 commit bb6aee2
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 57 deletions.
30 changes: 26 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,23 @@ Unifying Python/C++/CUDA memory: Python buffered array <-> C++11 ``std::vector``
Why
~~~

Other libraries which expose functionality to convert/pass data formats between these different language spaces tend to be bloated, unnecessarily complex, and relatively unmaintainable. By comparison, ``cuvec`` uses the latest functionality of Python, C++, and CUDA to keep its code (and yours) as succinct as possible. "Native" containers are exposed so your code follows the conventions of your language. Want something which works like a ``numpy.ndarray``? Not a problem. Want to convert it to a ``std::vector``? Or perhaps a raw ``float *`` to use in a CUDA kernel? Trivial.
Data should be manipulated using the existing functionality and design paradigms of each programming language. Python code should be Pythonic. CUDA code should be... CUDActic? C code should be... er, Clean.

However, in practice converting between data formats across languages can be a pain.

Other libraries which expose functionality to convert/pass data formats between these different language spaces tend to be bloated, unnecessarily complex, and relatively unmaintainable. By comparison, ``cuvec`` uses the latest functionality of Python, C/C++11, and CUDA to keep its code (and yours) as succinct as possible. "Native" containers are exposed so your code follows the conventions of your language. Want something which works like a ``numpy.ndarray``? Not a problem. Want to convert it to a ``std::vector``? Or perhaps a raw ``float *`` to use in a CUDA kernel? Trivial.

Non objectives
--------------

Anything to do with mathematical functionality. Even something as simple as setting element values is left to the user and/or pre-existing features - simply use ``numpy.ndarray.fill()`` (Python/Numpy), ``std::fill()`` (C++), or ``memset()`` (C/CUDA).
Anything to do with mathematical functionality. The aim is to expose functionality, not create it.

Even something as simple as setting element values is left to the user and/or pre-existing features - for example:

- Python: ``arr[:] = value``
- Numpy: ``arr.fill(value)``
- C++: ``std::fill(vec.begin(), vec.end(), value)``
- C/CUDA: ``memset(vec.data(), value, sizeof(T) * vec.size())``

Install
~~~~~~~
Expand All @@ -38,7 +49,10 @@ Creating
.. code:: python
import cuvec
vec = cuvec.vector((1337, 42), "float32")
arr = cuvec.zeros((1337, 42), "float32") # like `numpy.ndarray`
# print(sum(arr))
# some_numpy_func(arr)
# some_cpython_api_func(arr.cuvec)
**CPython API**

Expand Down Expand Up @@ -66,6 +80,14 @@ Converting

The following involve no memory copies.

**Python** to **CPython API**

.. code:: python
# import cuvec, my_custom_lib
# arr = cuvec.zeros((1337, 42), "float32")
my_custom_lib.some_cpython_api_func(arr.cuvec)
**CPython API** to **C++**

.. code:: cpp
Expand All @@ -81,7 +103,7 @@ The following involve no memory copies.
/// input: `CuVec<type> vec`
/// output: `type *arr`
float *arr = vec->data(); // pointer to `cudaMallocManaged()` data
float *arr = vec.data(); // pointer to `cudaMallocManaged()` data
External CMake Projects
~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
10 changes: 6 additions & 4 deletions cuvec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
__all__ = [
# config
'cmake_prefix', 'include_path',
# classes
'CuVec',
# functions
'dev_sync', 'from_numpy', 'zeros',
'dev_sync', 'copy', 'zeros', 'cu_copy', 'cu_zeros',
# data
'typecodes', 'vec_types'] # yapf: disable

Expand All @@ -29,12 +31,12 @@

try:
from .cuvec import dev_sync
except ImportError as err:
except ImportError as err: # pragma: no cover
from warnings import warn
warn(str(err), UserWarning)
else:
from .helpers import from_numpy, zeros
from .pycuvec import typecodes, vec_types
from .helpers import CuVec, copy, zeros
from .pycuvec import cu_copy, cu_zeros, typecodes, vec_types

# for use in `cmake -DCMAKE_PREFIX_PATH=...`
cmake_prefix = Path(resource_filename(__name__, "cmake")).resolve()
Expand Down
51 changes: 41 additions & 10 deletions cuvec/helpers.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,55 @@
"""Useful helper functions."""
from collections.abc import Sequence
import logging
from textwrap import dedent

import numpy as np

from .pycuvec import vec_types
from .pycuvec import cu_copy, cu_zeros, vec_types

log = logging.getLogger(__name__)


class CuVec(np.ndarray):
"""
A `numpy.ndarray` compatible view with a `cuvec` member containing the
underlying `cuvec.Vector_*` object (for use in CPython API function calls).
"""
_Vector_types = tuple(vec_types.values())

def __new__(cls, arr, cuvec=None):
"""arr: `cuvec.CuVec`, raw `cuvec.Vector_*`, or `numpy.ndarray`"""
if isinstance(arr, CuVec._Vector_types):
log.debug("wrap raw %s", type(arr))
obj = np.asarray(arr).view(cls)
obj.cuvec = arr
return obj
if isinstance(arr, CuVec):
log.debug("new view")
obj = np.asarray(arr).view(cls)
obj.cuvec = arr.cuvec
return obj
if isinstance(arr, np.ndarray):
log.debug("copy")
return copy(arr)
raise NotImplementedError(
dedent("""\
Not intended for explicit construction
(do not do `cuvec.CuVec((42, 1337))`;
instead use `cuvec.zeros((42, 137))`"""))


def zeros(shape, dtype="float32"):
"""
Returns a new `Vector_*` of the specified shape and data type
(`cuvec` equivalent of `numpy.zeros`).
Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
of the specified shape and data type (`cuvec` equivalent of `numpy.zeros`).
"""
return vec_types[np.dtype(dtype)](shape if isinstance(shape, Sequence) else (shape,))
return CuVec(cu_zeros(shape, dtype))


def from_numpy(arr):
def copy(arr):
"""
Returns a new `Vector_*` of the specified shape and data type
Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
with data copied from the specified `arr`
(`cuvec` equivalent of `numpy.copy`).
"""
res = zeros(arr.shape, arr.dtype)
np.asarray(res)[:] = arr[:]
return res
return CuVec(cu_copy(arr))
17 changes: 17 additions & 0 deletions cuvec/pycuvec.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Thin wrappers around `cuvec` C++/CUDA module"""
import array
from collections.abc import Sequence

import numpy as np

Expand Down Expand Up @@ -30,3 +31,19 @@
np.dtype('uint64'): Vector_Q,
np.dtype('float32'): Vector_f,
np.dtype('float64'): Vector_d}


def cu_zeros(shape, dtype="float32"):
"""
Returns a new `<cuvec.Vector_*>` of the specified shape and data type.
"""
return vec_types[np.dtype(dtype)](shape if isinstance(shape, Sequence) else (shape,))


def cu_copy(arr):
"""
Returns a new `<cuvec.Vector_*>` with data copied from the specified `arr`.
"""
res = cu_zeros(arr.shape, arr.dtype)
np.asarray(res)[:] = arr[:]
return res
27 changes: 0 additions & 27 deletions tests/test_cuvec.py

This file was deleted.

59 changes: 59 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import logging

import numpy as np
from pytest import mark, raises

import cuvec

shape = 127, 344, 344


@mark.parametrize("spec,result", [("i", np.int32), ("d", np.float64)])
def test_zeros(spec, result):
a = np.asarray(cuvec.zeros(shape, spec))
assert a.dtype == result
assert a.shape == shape
assert not a.any()


def test_copy():
a = np.random.random(shape)
b = np.asarray(cuvec.copy(a))
assert a.shape == b.shape
assert a.dtype == b.dtype
assert (a == b).all()


def test_CuVec_creation(caplog):
with raises(TypeError):
cuvec.CuVec()

with raises(NotImplementedError):
cuvec.CuVec(shape)

caplog.set_level(logging.DEBUG)
caplog.clear()
v = cuvec.CuVec(np.ones(shape, dtype='h'))
assert [i[1:] for i in caplog.record_tuples] == [(10, 'copy'),
(10, "wrap raw <class 'Vector_h'>")]
assert v.shape == shape
assert v.dtype.char == 'h'
assert (v == 1).all()

caplog.clear()
v = cuvec.zeros(shape, 'd')
assert [i[1:] for i in caplog.record_tuples] == [(10, "wrap raw <class 'Vector_d'>")]

caplog.clear()
v[0, 0, 0] = 1
assert not caplog.record_tuples
w = cuvec.CuVec(v)
assert [i[1:] for i in caplog.record_tuples] == [(10, "new view")]

caplog.clear()
assert w[0, 0, 0] == 1
v[0, 0, 0] = 9
assert w[0, 0, 0] == 9
assert v.cuvec is w.cuvec
assert v.data == w.data
assert not caplog.record_tuples
29 changes: 17 additions & 12 deletions tests/test_pycuvec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,24 @@
import cuvec


@mark.parametrize("spec,result", [("i", np.int32), ("d", np.float64)])
def test_zeros(spec, result):
shape = 127, 344, 344
a = np.asarray(cuvec.zeros(shape, spec))
assert a.dtype == result
assert a.shape == shape
@mark.parametrize("tp", list(cuvec.typecodes))
def test_Vector_asarray(tp):
"""tp(char): any of bBhHiIqQfd"""
v = getattr(cuvec.cuvec, f"Vector_{tp}")((1, 2, 3))
assert str(v) == f"Vector_{tp}((1, 2, 3))"
a = np.asarray(v)
assert not a.any()
a[0, 0] = 42
b = np.asarray(v)
assert (b[0, 0] == 42).all()
assert not b[1:, 1:].any()
assert a.dtype.char == tp
del a, b, v


def test_from_numpy():
def test_Vector_strides():
shape = 127, 344, 344
a = np.random.random(shape)
b = np.asarray(cuvec.from_numpy(a))
assert a.shape == b.shape
assert a.dtype == b.dtype
assert (a == b).all()
v = cuvec.cuvec.Vector_f(shape)
a = np.asarray(v)
assert a.shape == shape
assert a.strides == (473344, 1376, 4)

0 comments on commit bb6aee2

Please sign in to comment.