add seamless numpy-like functionality

AMYPAD · Jan 18, 2021 · bb6aee2 · bb6aee2
1 parent e663061
commit bb6aee2
Show file tree

Hide file tree

Showing 7 changed files with 166 additions and 57 deletions.
diff --git a/README.rst b/README.rst
@@ -8,12 +8,23 @@ Unifying Python/C++/CUDA memory: Python buffered array <-> C++11 ``std::vector``
 Why
 ~~~
 
-Other libraries which expose functionality to convert/pass data formats between these different language spaces tend to be bloated, unnecessarily complex, and relatively unmaintainable. By comparison, ``cuvec`` uses the latest functionality of Python, C++, and CUDA to keep its code (and yours) as succinct as possible. "Native" containers are exposed so your code follows the conventions of your language. Want something which works like a ``numpy.ndarray``? Not a problem. Want to convert it to a ``std::vector``? Or perhaps a raw ``float *`` to use in a CUDA kernel? Trivial.
+Data should be manipulated using the existing functionality and design paradigms of each programming language. Python code should be Pythonic. CUDA code should be... CUDActic? C code should be... er, Clean.
+
+However, in practice converting between data formats across languages can be a pain.
+
+Other libraries which expose functionality to convert/pass data formats between these different language spaces tend to be bloated, unnecessarily complex, and relatively unmaintainable. By comparison, ``cuvec`` uses the latest functionality of Python, C/C++11, and CUDA to keep its code (and yours) as succinct as possible. "Native" containers are exposed so your code follows the conventions of your language. Want something which works like a ``numpy.ndarray``? Not a problem. Want to convert it to a ``std::vector``? Or perhaps a raw ``float *`` to use in a CUDA kernel? Trivial.
 
 Non objectives
 --------------
 
-Anything to do with mathematical functionality. Even something as simple as setting element values is left to the user and/or pre-existing features - simply use ``numpy.ndarray.fill()`` (Python/Numpy), ``std::fill()`` (C++), or ``memset()`` (C/CUDA).
+Anything to do with mathematical functionality. The aim is to expose functionality, not create it.
+
+Even something as simple as setting element values is left to the user and/or pre-existing features - for example:
+
+- Python: ``arr[:] = value``
+- Numpy: ``arr.fill(value)``
+- C++: ``std::fill(vec.begin(), vec.end(), value)``
+- C/CUDA: ``memset(vec.data(), value, sizeof(T) * vec.size())``
 
 Install
 ~~~~~~~
@@ -38,7 +49,10 @@ Creating
 .. code:: python
 
     import cuvec
-    vec = cuvec.vector((1337, 42), "float32")
+    arr = cuvec.zeros((1337, 42), "float32") # like `numpy.ndarray`
+    # print(sum(arr))
+    # some_numpy_func(arr)
+    # some_cpython_api_func(arr.cuvec)
 
 **CPython API**
 
@@ -66,6 +80,14 @@ Converting
 
 The following involve no memory copies.
 
+**Python** to **CPython API**
+
+.. code:: python
+
+    # import cuvec, my_custom_lib
+    # arr = cuvec.zeros((1337, 42), "float32")
+    my_custom_lib.some_cpython_api_func(arr.cuvec)
+
 **CPython API** to **C++**
 
 .. code:: cpp
@@ -81,7 +103,7 @@ The following involve no memory copies.
 
     /// input: `CuVec<type> vec`
     /// output: `type *arr`
-    float *arr = vec->data(); // pointer to `cudaMallocManaged()` data
+    float *arr = vec.data(); // pointer to `cudaMallocManaged()` data
 
 External CMake Projects
 ~~~~~~~~~~~~~~~~~~~~~~~

diff --git a/cuvec/__init__.py b/cuvec/__init__.py
@@ -18,8 +18,10 @@
 __all__ = [
     # config
     'cmake_prefix', 'include_path',
+    # classes
+    'CuVec',
     # functions
-    'dev_sync', 'from_numpy', 'zeros',
+    'dev_sync', 'copy', 'zeros', 'cu_copy', 'cu_zeros',
     # data
     'typecodes', 'vec_types'] # yapf: disable
 
@@ -29,12 +31,12 @@
 
 try:
     from .cuvec import dev_sync
-except ImportError as err:
+except ImportError as err: # pragma: no cover
     from warnings import warn
     warn(str(err), UserWarning)
 else:
-    from .helpers import from_numpy, zeros
-    from .pycuvec import typecodes, vec_types
+    from .helpers import CuVec, copy, zeros
+    from .pycuvec import cu_copy, cu_zeros, typecodes, vec_types
 
 # for use in `cmake -DCMAKE_PREFIX_PATH=...`
 cmake_prefix = Path(resource_filename(__name__, "cmake")).resolve()

diff --git a/cuvec/helpers.py b/cuvec/helpers.py
@@ -1,24 +1,55 @@
 """Useful helper functions."""
-from collections.abc import Sequence
+import logging
+from textwrap import dedent
 
 import numpy as np
 
-from .pycuvec import vec_types
+from .pycuvec import cu_copy, cu_zeros, vec_types
+
+log = logging.getLogger(__name__)
+
+
+class CuVec(np.ndarray):
+    """
+    A `numpy.ndarray` compatible view with a `cuvec` member containing the
+    underlying `cuvec.Vector_*` object (for use in CPython API function calls).
+    """
+    _Vector_types = tuple(vec_types.values())
+
+    def __new__(cls, arr, cuvec=None):
+        """arr: `cuvec.CuVec`, raw `cuvec.Vector_*`, or `numpy.ndarray`"""
+        if isinstance(arr, CuVec._Vector_types):
+            log.debug("wrap raw %s", type(arr))
+            obj = np.asarray(arr).view(cls)
+            obj.cuvec = arr
+            return obj
+        if isinstance(arr, CuVec):
+            log.debug("new view")
+            obj = np.asarray(arr).view(cls)
+            obj.cuvec = arr.cuvec
+            return obj
+        if isinstance(arr, np.ndarray):
+            log.debug("copy")
+            return copy(arr)
+        raise NotImplementedError(
+            dedent("""\
+            Not intended for explicit construction
+            (do not do `cuvec.CuVec((42, 1337))`;
+            instead use `cuvec.zeros((42, 137))`"""))
 
 
 def zeros(shape, dtype="float32"):
     """
-    Returns a new `Vector_*` of the specified shape and data type
-    (`cuvec` equivalent of `numpy.zeros`).
+    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
+    of the specified shape and data type (`cuvec` equivalent of `numpy.zeros`).
     """
-    return vec_types[np.dtype(dtype)](shape if isinstance(shape, Sequence) else (shape,))
+    return CuVec(cu_zeros(shape, dtype))
 
 
-def from_numpy(arr):
+def copy(arr):
     """
-    Returns a new `Vector_*` of the specified shape and data type
+    Returns a `cuvec.CuVec` view of a new `numpy.ndarray`
+    with data copied from the specified `arr`
     (`cuvec` equivalent of `numpy.copy`).
     """
-    res = zeros(arr.shape, arr.dtype)
-    np.asarray(res)[:] = arr[:]
-    return res
+    return CuVec(cu_copy(arr))
diff --git a/cuvec/pycuvec.py b/cuvec/pycuvec.py
@@ -1,5 +1,6 @@
 """Thin wrappers around `cuvec` C++/CUDA module"""
 import array
+from collections.abc import Sequence
 
 import numpy as np
 
@@ -30,3 +31,19 @@
     np.dtype('uint64'): Vector_Q,
     np.dtype('float32'): Vector_f,
     np.dtype('float64'): Vector_d}
+
+
+def cu_zeros(shape, dtype="float32"):
+    """
+    Returns a new `<cuvec.Vector_*>` of the specified shape and data type.
+    """
+    return vec_types[np.dtype(dtype)](shape if isinstance(shape, Sequence) else (shape,))
+
+
+def cu_copy(arr):
+    """
+    Returns a new `<cuvec.Vector_*>` with data copied from the specified `arr`.
+    """
+    res = cu_zeros(arr.shape, arr.dtype)
+    np.asarray(res)[:] = arr[:]
+    return res
diff --git a/tests/test_cuvec.py b/tests/test_cuvec.py
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -0,0 +1,59 @@
+import logging
+
+import numpy as np
+from pytest import mark, raises
+
+import cuvec
+
+shape = 127, 344, 344
+
+
+@mark.parametrize("spec,result", [("i", np.int32), ("d", np.float64)])
+def test_zeros(spec, result):
+    a = np.asarray(cuvec.zeros(shape, spec))
+    assert a.dtype == result
+    assert a.shape == shape
+    assert not a.any()
+
+
+def test_copy():
+    a = np.random.random(shape)
+    b = np.asarray(cuvec.copy(a))
+    assert a.shape == b.shape
+    assert a.dtype == b.dtype
+    assert (a == b).all()
+
+
+def test_CuVec_creation(caplog):
+    with raises(TypeError):
+        cuvec.CuVec()
+
+    with raises(NotImplementedError):
+        cuvec.CuVec(shape)
+
+    caplog.set_level(logging.DEBUG)
+    caplog.clear()
+    v = cuvec.CuVec(np.ones(shape, dtype='h'))
+    assert [i[1:] for i in caplog.record_tuples] == [(10, 'copy'),
+                                                     (10, "wrap raw <class 'Vector_h'>")]
+    assert v.shape == shape
+    assert v.dtype.char == 'h'
+    assert (v == 1).all()
+
+    caplog.clear()
+    v = cuvec.zeros(shape, 'd')
+    assert [i[1:] for i in caplog.record_tuples] == [(10, "wrap raw <class 'Vector_d'>")]
+
+    caplog.clear()
+    v[0, 0, 0] = 1
+    assert not caplog.record_tuples
+    w = cuvec.CuVec(v)
+    assert [i[1:] for i in caplog.record_tuples] == [(10, "new view")]
+
+    caplog.clear()
+    assert w[0, 0, 0] == 1
+    v[0, 0, 0] = 9
+    assert w[0, 0, 0] == 9
+    assert v.cuvec is w.cuvec
+    assert v.data == w.data
+    assert not caplog.record_tuples
diff --git a/tests/test_pycuvec.py b/tests/test_pycuvec.py
@@ -4,19 +4,24 @@
 import cuvec
 
 
-@mark.parametrize("spec,result", [("i", np.int32), ("d", np.float64)])
-def test_zeros(spec, result):
-    shape = 127, 344, 344
-    a = np.asarray(cuvec.zeros(shape, spec))
-    assert a.dtype == result
-    assert a.shape == shape
+@mark.parametrize("tp", list(cuvec.typecodes))
+def test_Vector_asarray(tp):
+    """tp(char): any of bBhHiIqQfd"""
+    v = getattr(cuvec.cuvec, f"Vector_{tp}")((1, 2, 3))
+    assert str(v) == f"Vector_{tp}((1, 2, 3))"
+    a = np.asarray(v)
     assert not a.any()
+    a[0, 0] = 42
+    b = np.asarray(v)
+    assert (b[0, 0] == 42).all()
+    assert not b[1:, 1:].any()
+    assert a.dtype.char == tp
+    del a, b, v
 
 
-def test_from_numpy():
+def test_Vector_strides():
     shape = 127, 344, 344
-    a = np.random.random(shape)
-    b = np.asarray(cuvec.from_numpy(a))
-    assert a.shape == b.shape
-    assert a.dtype == b.dtype
-    assert (a == b).all()
+    v = cuvec.cuvec.Vector_f(shape)
+    a = np.asarray(v)
+    assert a.shape == shape
+    assert a.strides == (473344, 1376, 4)