From 91ec8417bde711877d338313b83380961e2bf7ae Mon Sep 17 00:00:00 2001 From: Jiashu Yao Date: Wed, 21 Aug 2024 12:10:01 +0000 Subject: [PATCH] [Fix] multi-defination issue across multiple source file. --- .../merlin/core_kernels/group_lock_kernels.cuh | 18 +++++++++--------- include/merlin/types.cuh | 4 ++-- include/merlin/utils.cuh | 3 ++- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/merlin/core_kernels/group_lock_kernels.cuh b/include/merlin/core_kernels/group_lock_kernels.cuh index d42f208cd..ae2f38466 100644 --- a/include/merlin/core_kernels/group_lock_kernels.cuh +++ b/include/merlin/core_kernels/group_lock_kernels.cuh @@ -21,7 +21,7 @@ namespace nv { namespace merlin { namespace group_lock { -__global__ void init_kernel( +static __global__ void init_kernel( cuda::atomic* update_count, cuda::atomic* read_count, cuda::atomic* unique_flag) { @@ -29,7 +29,7 @@ __global__ void init_kernel( new (read_count) cuda::atomic{0}; new (unique_flag) cuda::atomic{false}; } -__global__ void lock_read_kernel( +static __global__ void lock_read_kernel( cuda::atomic* update_count, cuda::atomic* read_count) { for (;;) { @@ -43,12 +43,12 @@ __global__ void lock_read_kernel( } } -__global__ void unlock_read_kernel( +static __global__ void unlock_read_kernel( cuda::atomic* read_count) { read_count->fetch_sub(1, cuda::std::memory_order_relaxed); } -__global__ void lock_update_kernel( +static __global__ void lock_update_kernel( cuda::atomic* update_count, cuda::atomic* read_count) { for (;;) { @@ -62,12 +62,12 @@ __global__ void lock_update_kernel( } } -__global__ void unlock_update_kernel( +static __global__ void unlock_update_kernel( cuda::atomic* update_count) { update_count->fetch_sub(1, cuda::std::memory_order_relaxed); } -__global__ void lock_update_read_kernel( +static __global__ void lock_update_read_kernel( cuda::atomic* update_count, cuda::atomic* read_count, cuda::atomic* unique_flag) { @@ -101,7 +101,7 @@ __global__ void lock_update_read_kernel( } } -__global__ void unlock_update_read_kernel( +static __global__ void unlock_update_read_kernel( cuda::atomic* update_count, cuda::atomic* read_count, cuda::atomic* unique_flag) { @@ -110,12 +110,12 @@ __global__ void unlock_update_read_kernel( unique_flag->store(false, cuda::std::memory_order_relaxed); } -__global__ void update_count_kernel( +static __global__ void update_count_kernel( int* counter, cuda::atomic* update_count) { *counter = update_count->load(cuda::std::memory_order_relaxed); } -__global__ void read_count_kernel( +static __global__ void read_count_kernel( int* counter, cuda::atomic* read_count) { *counter = read_count->load(cuda::std::memory_order_relaxed); } diff --git a/include/merlin/types.cuh b/include/merlin/types.cuh index cc266604f..ebbe1bffd 100644 --- a/include/merlin/types.cuh +++ b/include/merlin/types.cuh @@ -54,7 +54,7 @@ constexpr uint64_t MAX_SCORE = UINT64_C(0xFFFFFFFFFFFFFFFF); constexpr uint64_t EMPTY_SCORE = UINT64_C(0); constexpr uint64_t IGNORED_GLOBAL_EPOCH = UINT64_C(0xFFFFFFFFFFFFFFFF); -uint64_t EMPTY_KEY_CPU = DEFAULT_EMPTY_KEY; +static uint64_t EMPTY_KEY_CPU = DEFAULT_EMPTY_KEY; __constant__ uint64_t EMPTY_KEY = DEFAULT_EMPTY_KEY; __constant__ uint64_t RECLAIM_KEY = DEFAULT_RECLAIM_KEY; __constant__ uint64_t LOCKED_KEY = DEFAULT_LOCKED_KEY; @@ -76,7 +76,7 @@ __forceinline__ __device__ bool IS_VACANT_KEY(K key) { return (VACANT_KEY_MASK_1 & key) == VACANT_KEY_MASK_2; } -cudaError_t init_reserved_keys(int index) { +static cudaError_t init_reserved_keys(int index) { if (index < 1 || index > MAX_RESERVED_KEY_BIT) { // index = 0 is the default, // index = 62 is the maximum index can be set for reserved keys. diff --git a/include/merlin/utils.cuh b/include/merlin/utils.cuh index d33ee50ae..885682bb8 100644 --- a/include/merlin/utils.cuh +++ b/include/merlin/utils.cuh @@ -314,7 +314,8 @@ inline void free_pointers(cudaStream_t stream, int n, ...) { va_end(args); } -__global__ void memset64bitKernel(void* devPtr, uint64_t value, size_t count) { +static __global__ void memset64bitKernel(void* devPtr, uint64_t value, + size_t count) { size_t idx = blockIdx.x * blockDim.x + threadIdx.x; if (idx < count) { static_cast(devPtr)[idx] = value;