-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathbatch.h
70 lines (49 loc) · 1.46 KB
/
batch.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <cuda.h>
extern CUresult cuDeviceGetAttributes(int* retVal, CUdevice_attribute* attrs, int n, CUdevice dev);
extern CUresult cuLaunchAndSync(CUfunction f, unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ,
unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
unsigned int sharedMemBytes, CUstream hStream, void** kernelParams, void** extra);
extern CUresult cuAllocAndCopy(CUdeviceptr* mem, const void* src, size_t bytesize);
typedef enum {
fn_setCurrent,
fn_mallocD,
fn_mallocH,
fn_mallocManaged,
fn_memfreeD,
fn_memfreeH,
fn_memcpy,
fn_memcpyHtoD,
fn_memcpyDtoH,
fn_memcpyDtoD,
fn_memcpyHtoDAsync,
fn_memcpyDtoHAsync,
fn_memcpyDtoDAsync,
fn_launchKernel,
fn_sync,
fn_launchAndSync,
fn_allocAndCopy,
} batchFn;
typedef struct fnargs {
batchFn fn;
CUcontext ctx;
CUdeviceptr devPtr0;
CUdeviceptr devPtr1;
uintptr_t ptr0;
uintptr_t ptr1;
CUfunction f;
unsigned int gridDimX;
unsigned int gridDimY;
unsigned int gridDimZ;
unsigned int blockDimX;
unsigned int blockDimY;
unsigned int blockDimZ;
unsigned int sharedMemBytes;
uintptr_t kernelParams;
uintptr_t extra;
size_t size;
CUstream stream;
} fnargs_t;
extern CUresult processFn(fnargs_t* args);
extern void process(CUcontext ctx, uintptr_t* args, CUresult* retVal, int count);
// extern void process(uintptr_t* args, CUresult* retVal, int count);
// extern CUresult batchMalloc(uintptr_t* args, CUdeviceptr* ptrs, int count);