diff --git a/build.sh b/build.sh index 6dd250c51..41b25caf7 100755 --- a/build.sh +++ b/build.sh @@ -305,6 +305,9 @@ if [[ ${CMAKE_TARGET} == "" ]]; then fi # Append `-DFIND_CUVS_CPP=ON` to EXTRA_CMAKE_ARGS unless a user specified the option. + + + SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}" if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_CUVS_CPP"* ]]; then SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_CUVS_CPP=ON" diff --git a/python/cuvs/CMakeLists.txt b/python/cuvs/CMakeLists.txt index 64c058255..0938cf4a6 100644 --- a/python/cuvs/CMakeLists.txt +++ b/python/cuvs/CMakeLists.txt @@ -37,14 +37,16 @@ option(FIND_CUVS_CPP "Search for existing CUVS C++ installations before defaulti OFF ) +message("- FIND_CUVS_CPP: ${FIND_CUVS_CPP}") + # If the user requested it we attempt to find CUVS. if(FIND_CUVS_CPP) - find_package(cuvs ${cuvs_version}) + find_package(cuvs_c ${cuvs_version}) else() set(cuvs_FOUND OFF) endif() -include(rapids-cython) +include(rapids-cython-core) if(NOT cuvs_FOUND) set(BUILD_TESTS OFF) @@ -60,6 +62,9 @@ endif() rapids_cython_init() +add_subdirectory(cuvs/common) +add_subdirectory(cuvs/neighbors) + if(DEFINED cython_lib_dir) rapids_cython_add_rpath_entries(TARGET cuvs PATHS "${cython_lib_dir}") endif() diff --git a/python/cuvs/cuvs/common/CMakeLists.txt b/python/cuvs/cuvs/common/CMakeLists.txt index b6af82882..c5f623f4b 100644 --- a/python/cuvs/cuvs/common/CMakeLists.txt +++ b/python/cuvs/cuvs/common/CMakeLists.txt @@ -13,7 +13,7 @@ # ============================================================================= # Set the list of Cython files to build -set(cython_sources dlpack.pyx) +set(cython_sources cydlpack.pyx) set(linked_libraries cuvs::cuvs) # Build all of the Cython targets diff --git a/python/cuvs/cuvs/common/c_api.pxd b/python/cuvs/cuvs/common/c_api.pxd new file mode 100644 index 000000000..a8b91773f --- /dev/null +++ b/python/cuvs/cuvs/common/c_api.pxd @@ -0,0 +1,32 @@ +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# cython: language_level=3 + + +from libc.stdint cimport uintptr_t +from cuda.ccudart cimport cudaStream_t + + +cdef extern from "cuvs/core/c_api.h": + ctypedef uintptr_t cuvsResources_t + + ctypedef enum cuvsError_t: + CUVS_ERROR, + CUVS_SUCCESS + + cuvsError_t cuvsResourcesCreate(cuvsResources_t* res) + cuvsError_t cuvsResourcesDestroy(cuvsResources_t res) + cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream) diff --git a/python/cuvs/cuvs/common/cydlpack.pxd b/python/cuvs/cuvs/common/cydlpack.pxd index 72a03909c..1da1cf4f3 100644 --- a/python/cuvs/cuvs/common/cydlpack.pxd +++ b/python/cuvs/cuvs/common/cydlpack.pxd @@ -66,3 +66,4 @@ cdef extern from 'dlpack.h' nogil: void (*deleter)(DLManagedTensor*) # noqa: E211 +cdef DLManagedTensor dlpack_c(ary) diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx index ea9e01f38..9e2f01e9c 100644 --- a/python/cuvs/cuvs/common/cydlpack.pyx +++ b/python/cuvs/cuvs/common/cydlpack.pyx @@ -17,8 +17,10 @@ import numpy as np +from libc cimport stdlib -cdef void deleter(DLManagedTensor* tensor): + +cdef void deleter(DLManagedTensor* tensor) noexcept: if tensor.manager_ctx is NULL: return stdlib.free(tensor.dl_tensor.shape) @@ -69,7 +71,7 @@ cdef DLManagedTensor dlpack_c(ary): tensor.dtype = dtype dlm.dl_tensor = tensor - dlm.manager_ct = NULL + dlm.manager_ctx = NULL dlm.deleter = deleter return dlm diff --git a/python/cuvs/cuvs/neighbors/CMakeLists.txt b/python/cuvs/cuvs/neighbors/CMakeLists.txt index b68f40f86..04c494c46 100644 --- a/python/cuvs/cuvs/neighbors/CMakeLists.txt +++ b/python/cuvs/cuvs/neighbors/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,13 +12,4 @@ # the License. # ============================================================================= -# Set the list of Cython files to build -set(cython_sources cagra.pyx) -set(linked_libraries cuvs::cuvs) - -# Build all of the Cython targets -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_ -) +add_subdirectory(cagra) diff --git a/python/cuvs/cuvs/neighbors/__init__.py b/python/cuvs/cuvs/neighbors/__init__.py index 95c9415fa..c3af93d75 100644 --- a/python/cuvs/cuvs/neighbors/__init__.py +++ b/python/cuvs/cuvs/neighbors/__init__.py @@ -13,12 +13,9 @@ # limitations under the License. -from .cagra import Index, IndexParams, SearchParams, build, load, save, search +from cuvs.neighbors import cagra __all__ = [ - "Index", - "IndexParams", - "SearchParams", - "build", - "search", + "common", + "cagra" ] diff --git a/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt new file mode 100644 index 000000000..b68f40f86 --- /dev/null +++ b/python/cuvs/cuvs/neighbors/cagra/CMakeLists.txt @@ -0,0 +1,24 @@ +# ============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +# Set the list of Cython files to build +set(cython_sources cagra.pyx) +set(linked_libraries cuvs::cuvs) + +# Build all of the Cython targets +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cuvs MODULE_PREFIX neighbors_cagra_ +) diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.pxd b/python/cuvs/cuvs/neighbors/cagra/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/neighbors/cagra/__init__.py b/python/cuvs/cuvs/neighbors/cagra/__init__.py new file mode 100644 index 000000000..95c9415fa --- /dev/null +++ b/python/cuvs/cuvs/neighbors/cagra/__init__.py @@ -0,0 +1,24 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from .cagra import Index, IndexParams, SearchParams, build, load, save, search + +__all__ = [ + "Index", + "IndexParams", + "SearchParams", + "build", + "search", +] diff --git a/python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd b/python/cuvs/cuvs/neighbors/cagra/c/__init__.pxd new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/neighbors/cagra/c/__init__.py b/python/cuvs/cuvs/neighbors/cagra/c/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/cuvs/cuvs/neighbors/c/cagra_c.pxd b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd similarity index 82% rename from python/cuvs/cuvs/neighbors/c/cagra_c.pxd rename to python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd index 7bf15222d..8538580ec 100644 --- a/python/cuvs/cuvs/neighbors/c/cagra_c.pxd +++ b/python/cuvs/cuvs/neighbors/cagra/c/cagra_c.pxd @@ -15,22 +15,14 @@ # # cython: language_level=3 -from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t +from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t +from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor -cdef extern from "cuvs/core/c_api.h" - ctypedef uintptr_t cuvsResources_t +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t - ctypedef enum cuvsError_t: - CUVS_ERROR, - CUVS_SUCCESS - cuvsError_t cuvsResourcesCreate(cuvsResources_t* res) - cuvsError_t cuvsResourcesDestroy(cuvsResources_t res) - cuvsError_t cuvsStreamSet(cuvsResources_t res, cudaStream_t stream) - - -cdef extern from "cuvs/neighborscagra_c.h" nogil: +cdef extern from "cuvs/neighbors/cagra_c.h" nogil: ctypedef enum cagraGraphBuildAlgo: IVF_PQ @@ -44,7 +36,7 @@ cdef extern from "cuvs/neighborscagra_c.h" nogil: size_t nn_descent_niter - ctypedef enum search_algo: + ctypedef enum cagraSearchAlgo: SINGLE_CTA, MULTI_CTA, MULTI_KERNEL, @@ -81,7 +73,7 @@ cdef extern from "cuvs/neighborscagra_c.h" nogil: cuvsError_t cagraIndexDestroy(cagraIndex_t index) cuvsError_t cagraBuild(cuvsResources_t res, - struct cagraIndexParams params, + cagraIndexParams params, DLManagedTensor* dataset, cagraIndex_t index); diff --git a/python/cuvs/cuvs/neighbors/cagra/cagra.pxd b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd new file mode 100644 index 000000000..83e4a3acf --- /dev/null +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pxd @@ -0,0 +1 @@ +cimport cuvs.neighbors.cagra.c.cagra_c as cagra_c diff --git a/python/cuvs/cuvs/neighbors/cagra.pyx b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx similarity index 91% rename from python/cuvs/cuvs/neighbors/cagra.pyx rename to python/cuvs/cuvs/neighbors/cagra/cagra.pyx index e07abd388..f9e523202 100644 --- a/python/cuvs/cuvs/neighbors/cagra.pyx +++ b/python/cuvs/cuvs/neighbors/cagra/cagra.pyx @@ -19,7 +19,7 @@ import numpy as np cimport cuvs.common.cydlpack from cuvs.common.temp_raft import auto_sync_resources -from cuvs.common.cydlpack import dplack_c +from cuvs.common cimport cydlpack from cython.operator cimport dereference as deref @@ -32,9 +32,11 @@ from pylibraft.common import ( from pylibraft.common.cai_wrapper import wrap_array from pylibraft.common.interruptible import cuda_interruptible -cimport cuvs.neighbors.cagra.c.cagra_c as cagra_c - from pylibraft.neighbors.common import _check_input_array +from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t +from pylibraft.common.handle cimport device_resources + +from libc.stdint cimport int8_t, int64_t, uint8_t, uint32_t, uint64_t, uintptr_t cdef class IndexParams: @@ -59,7 +61,7 @@ cdef class IndexParams: building the knn graph. It is expected to be generally faster than ivf_pq. """ - cdef cagra_c.index_params params + cdef cagra_c.cagraIndexParams params def __init__(self, *, metric="sqeuclidean", @@ -101,23 +103,22 @@ cdef class IndexParams: cdef class Index: - cdef cagraIndex_t index + cdef cagra_c.cagraIndex_t index def __cinit__(self): cdef cuvsError_t index_create_status - index_create_status = cuvsCagraIndexCreate(&self.index) + index_create_status = cagra_c.cagraIndexCreate(&self.index) self.trained = False - if index_create_status == cagra_c.cuvsError_t.CUVS_ERROR: + if index_create_status == cuvsError_t.CUVS_ERROR: raise Exception("FAIL") def __dealloc__(self): + cdef cuvsError_t index_destroy_status if self.index is not NULL: - cdef cuvsError_t index_destroy_status - index_destroy_status = cagraIndexDestroy(&self.index) - if index_destroy_status == cagra_c.cuvsError_t.CUVS_ERROR: + index_destroy_status = cagra_c.cagraIndexDestroy(self.index) + if index_destroy_status == cuvsError_t.CUVS_ERROR: raise Exception("FAIL") - del self.index def __repr__(self): # todo(dgd): update repr as we expose data through C API @@ -181,21 +182,21 @@ def build_index(IndexParams index_params, dataset, resources=None): if resources is None: resources = DeviceResources() cdef cuvsResources_t* resources_ = \ - handle.getHandle() + resources.getHandle() cdef Index idx = Index() + cdef cuvsError_t build_status + cdef cydlpack.DLManagedTensor dataset_dlpack = cydlpack.dlpack_c(dataset_ai) with cuda_interruptible(): - cdef cuvsError_t build_status - build_status = cagra_c.cagraBuild( deref(resources_), index_params.params, - &dplack_c(dataset_ai), - deref(idx.index) + &dataset_dlpack, + idx.index ) - if index_destroy_status == cagra_c.cuvsError_t.CUVS_ERROR: + if build_status == cagra_c.cuvsError_t.CUVS_ERROR: raise RuntimeError("Index failed to build.") else: idx.trained = True @@ -252,7 +253,7 @@ cdef class SearchParams: rand_xor_mask: int, default = 0x128394 Bit mask used for initial random seed node selection. """ - cdef cagra_c.search_params params + cdef cagra_c.cagraSearchParams params def __init__(self, *, max_queries=0, @@ -291,7 +292,7 @@ cdef class SearchParams: elif hashmap_mode == "small": self.params.hashmap_mode = cagra_c.cagraHashMode.SMALL elif hashmap_mode == "auto": - self.params.hashmap_mode = cagra_c.cagraHashMode.AUTO + self.params.hashmap_mode = cagra_c.cagraHashMode.AUTO_HASH else: raise ValueError("`hashmap_mode` value not supported.") @@ -372,7 +373,7 @@ def search(SearchParams search_params, neighbors=None, distances=None, resources=None): - """ + """ Find the k nearest neighbors for each query. Parameters @@ -424,7 +425,6 @@ def search(SearchParams search_params, >>> neighbors = cp.asarray(neighbors) >>> distances = cp.asarray(distances) """ - if not index.trained: raise ValueError("Index need to be built before calling search.") @@ -456,16 +456,19 @@ def search(SearchParams search_params, _check_input_array(distances_cai, [np.dtype('float32')], exp_rows=n_queries, exp_cols=k) - cdef cagra_c.search_params params = search_params.params + cdef cagra_c.cagraSearchParams params = search_params.params + cdef cydlpack.DLManagedTensor queries_dlpack = cydlpack.dlpack_c(queries_cai) + cdef cydlpack.DLManagedTensor neighbors_dlpack = cydlpack.dlpack_c(neighbors_cai) + cdef cydlpack.DLManagedTensor distances_dlpack = cydlpack.dlpack_c(distances_cai) with cuda_interruptible(): cagra_c.cagraSearch( - deref(resources_), + resources_, params, - deref(idx_float.index), - &dplack_c(queries_cai), - &dplack_c(neighbors_cai), - &dplack_c(distances_cai) + index.index, + &queries_dlpack, + &neighbors_dlpack, + &distances_dlpack ) return (distances, neighbors)