-
Notifications
You must be signed in to change notification settings - Fork 2
/
util.h
executable file
·139 lines (124 loc) · 3.07 KB
/
util.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#ifndef __UTILS_H__
#define __UTILS_H__
#include <cassert>
#include <chrono>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <curand_kernel.h>
#include <iomanip>
#include <iostream>
#include <string>
#include <numeric>
#include <thrust/execution_policy.h>
#include <thrust/gather.h>
#include <thrust/iterator/discard_iterator.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/reduce.h>
#include <thrust/scan.h>
#include <thrust/sort.h>
#include <thrust/tuple.h>
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
template<typename T>
void
check(T err, const char* const func, const char* const file, const int line)
{
if (err != cudaSuccess) {
std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
exit(1);
}
}
inline __host__ __device__ float2
operator+(const float2& a, const float2& b)
{
return make_float2(a.x + b.x, a.y + b.y);
}
inline __host__ __device__ void
operator+=(float2& a, const float2& b)
{
a.x += b.x;
a.y += b.y;
}
inline __host__ __device__ float3
operator+(const float3& a, const float3& b)
{
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
}
inline __host__ __device__ void
operator+=(float3& a, const float3& b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
}
inline __host__ __device__ float4
operator+(const float4& a, const float4& b)
{
return make_float4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
inline __host__ __device__ void
operator+=(float4& a, const float4& b)
{
a.x += b.x;
a.y += b.y;
a.z += b.z;
a.w += b.w;
}
inline __host__ __device__ float4 operator*(const float4& a, const float4& b)
{
return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
inline __host__ __device__ float4 operator*(const float4& a, const float& b)
{
return make_float4(a.x * b, a.y * b, a.z * b, a.w * b);
}
inline __host__ __device__ void
operator*=(float4& a, const float4& b)
{
a.x *= b.x;
a.y *= b.y;
a.z *= b.z;
a.w *= b.w;
}
inline __host__ __device__ bool
operator<(const int2& a, const int2& b)
{
return a.x == b.x ? a.y < b.y : a.x < b.x;
// if (a.x != b.x) {
// return a.x < b.x;
// } else {
// return a.y < b.y;
// }
}
inline __host__ __device__ bool
operator==(const int2& a, const int2& b)
{
return a.x == b.x && a.y == b.y;
}
inline __host__ __device__ bool
operator!=(const int2& a, const int2& b)
{
return ((a.x != b.x) || (a.y != b.y));
}
__device__ __forceinline__ float
atomicMinFloat(float* addr, float value)
{
float old;
old =
(value >= 0)
? __int_as_float(atomicMin((int*)addr, __float_as_int(value)))
: __uint_as_float(atomicMax((unsigned int*)addr, __float_as_uint(value)));
return old;
}
// TODO: check if this is correct √√√ DONE it is correct based on a test
__device__ __forceinline__ float
atomicMaxFloat(float* addr, float value)
{
float old;
old =
(value >= 0)
? __int_as_float(atomicMax((int*)addr, __float_as_int(value)))
: __uint_as_float(atomicMin((unsigned int*)addr, __float_as_uint(value)));
return old;
}
#endif