Skip to content

Commit

Permalink
utests: fix warnings, clean up code
Browse files Browse the repository at this point in the history
  • Loading branch information
hcmh committed Jun 6, 2024
1 parent d7ea930 commit f96b4d6
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 52 deletions.
95 changes: 44 additions & 51 deletions utests/test_cuda_gpukrnls.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,11 @@ static bool test_im2col_loop_in(void)
{


unsigned long size = CFL_SIZE;
unsigned int N = 5;
enum { N = 5 };

long idims[5] = { 1, 16, 4, 4, 1 };
long kdims[5] = { 16, 16, 3, 3, 1 };
long odims[5] = { 16, 1, 2, 2, 1 };
long idims[N] = { 1, 16, 4, 4, 1 };
long kdims[N] = { 16, 16, 3, 3, 1 };
long odims[N] = { 16, 1, 2, 2, 1 };

complex float* in_cpu = md_alloc(N, idims, CFL_SIZE);

Expand All @@ -51,31 +50,31 @@ static bool test_im2col_loop_in(void)

long idims_mat[N + 3]; // (1 | nr_in_channel, kx, ky, kz | outx, outy, outz | ... )

md_select_dims(8, ~1ul , idims_mat, dims_mat);
md_select_dims(N + 3, ~1ul , idims_mat, dims_mat);


long istrs_mat[8];
long istrs_mat[N + 3];

md_copy_strides(5, istrs_mat, MD_STRIDES(5, idims, size));
md_copy_strides(3, istrs_mat + 5, MD_STRIDES(5, idims, size) + 2);
md_copy_strides(N, istrs_mat, MD_STRIDES(N, idims, CFL_SIZE));
md_copy_strides(3, istrs_mat + N, MD_STRIDES(N, idims, CFL_SIZE) + 2);


complex float* imat_cpu = md_alloc(8, idims_mat, CFL_SIZE);
complex float* imat_cpu = md_alloc(N + 3, idims_mat, CFL_SIZE);

md_copy2(8, idims_mat, MD_STRIDES(8, idims_mat, size), imat_cpu, istrs_mat, in_cpu, size);
md_copy2(N + 3, idims_mat, MD_STRIDES(N + 3, idims_mat, CFL_SIZE), imat_cpu, istrs_mat, in_cpu, CFL_SIZE);


complex float* imat_gpu = md_alloc_gpu(8, idims_mat, CFL_SIZE);
complex float* imat_gpu = md_alloc_gpu(N + 3, idims_mat, CFL_SIZE);

cuda_im2col(imat_gpu, in_gpu, odims, idims, kdims, NULL, NULL);

complex float* imat_gpu_cpu = md_alloc(8, idims_mat, CFL_SIZE);
complex float* imat_gpu_cpu = md_alloc(N + 3, idims_mat, CFL_SIZE);

md_copy(8, idims_mat, imat_gpu_cpu, imat_gpu, size);
md_copy(N + 3, idims_mat, imat_gpu_cpu, imat_gpu, CFL_SIZE);


float err = md_zrmse(8, idims_mat, imat_gpu_cpu, imat_cpu);
debug_printf(DP_DEBUG1, "%f, %f, %f\n", err, md_zrms(8, idims_mat, imat_cpu), md_zrms(8, idims_mat, imat_gpu_cpu));
float err = md_zrmse(N + 3, idims_mat, imat_gpu_cpu, imat_cpu);
debug_printf(DP_DEBUG1, "%f, %f, %f\n", err, md_zrms(N + 3, idims_mat, imat_cpu), md_zrms(N + 3, idims_mat, imat_gpu_cpu));

md_free(in_cpu);
md_free(in_gpu);
Expand All @@ -91,14 +90,11 @@ UT_GPU_REGISTER_TEST(test_im2col_loop_in);

static bool test_im2col_loop_out(void)
{
enum { N = 5 };


unsigned long size = CFL_SIZE;
unsigned int N = 5;

long idims[5] = { 1, 4, 4, 4, 1 };
long kdims[5] = { 4, 4, 3, 3, 1 };
long odims[5] = { 4, 1, 2, 2, 1 };
long idims[N] = { 1, 4, 4, 4, 1 };
long kdims[N] = { 4, 4, 3, 3, 1 };
long odims[N] = { 4, 1, 2, 2, 1 };

complex float* in_cpu = md_alloc(N, idims, CFL_SIZE);

Expand All @@ -117,32 +113,32 @@ static bool test_im2col_loop_out(void)

long idims_mat[N + 3]; // (1 | nr_in_channel, kx, ky, kz | outx, outy, outz | ... )

md_select_dims(8, ~1ul , idims_mat, dims_mat);
md_select_dims(N + 3, ~1ul , idims_mat, dims_mat);


long istrs_mat[8];
long istrs_mat[N + 3];

md_copy_strides(5, istrs_mat, MD_STRIDES(5, idims, size));
md_copy_strides(3, istrs_mat + 5, MD_STRIDES(5, idims, size) + 2);
md_copy_strides(N + 3, istrs_mat, MD_STRIDES(N, idims, CFL_SIZE));
md_copy_strides(3, istrs_mat + N, MD_STRIDES(N, idims, CFL_SIZE) + 2);


complex float* imat_cpu = md_alloc(8, idims_mat, CFL_SIZE);
complex float* imat_cpu = md_alloc(N + 3, idims_mat, CFL_SIZE);

md_copy2(8, idims_mat, MD_STRIDES(8, idims_mat, size), imat_cpu, istrs_mat, in_cpu, size);
md_copy2(N + 3, idims_mat, MD_STRIDES(N + 3, idims_mat, CFL_SIZE), imat_cpu, istrs_mat, in_cpu, CFL_SIZE);


complex float* imat_gpu = md_alloc_gpu(8, idims_mat, CFL_SIZE);
complex float* imat_gpu = md_alloc_gpu(N + 3, idims_mat, CFL_SIZE);

cuda_im2col(imat_gpu, in_gpu, odims, idims, kdims, NULL, NULL);


complex float* imat_gpu_cpu = md_alloc(8, idims_mat, CFL_SIZE);
complex float* imat_gpu_cpu = md_alloc(N + 3, idims_mat, CFL_SIZE);

md_copy(8, idims_mat, imat_gpu_cpu, imat_gpu, size);
md_copy(N + 3, idims_mat, imat_gpu_cpu, imat_gpu, CFL_SIZE);


float err = md_zrmse(8, idims_mat, imat_gpu_cpu, imat_cpu);
debug_printf(DP_DEBUG1, "%f, %f, %f\n", err, md_zrms(8, idims_mat, imat_cpu), md_zrms(8, idims_mat, imat_gpu_cpu));
float err = md_zrmse(N + 3, idims_mat, imat_gpu_cpu, imat_cpu);
debug_printf(DP_DEBUG1, "%f, %f, %f\n", err, md_zrms(N + 3, idims_mat, imat_cpu), md_zrms(N + 3, idims_mat, imat_gpu_cpu));

md_free(in_cpu);
md_free(in_gpu);
Expand All @@ -158,14 +154,11 @@ UT_GPU_REGISTER_TEST(test_im2col_loop_out);

static bool test_im2col_adj(void)
{
enum { N = 5 };


unsigned long size = CFL_SIZE;
unsigned int N = 5;

long idims[5] = { 1, 4, 4, 4, 1 };
long kdims[5] = { 4, 4, 3, 3, 1 };
long odims[5] = { 4, 1, 2, 2, 1 };
long idims[N] = { 1, 4, 4, 4, 1 };
long kdims[N] = { 4, 4, 3, 3, 1 };
long odims[N] = { 4, 1, 2, 2, 1 };


complex float* in_cpu = md_alloc(N, idims, CFL_SIZE);
Expand All @@ -185,32 +178,32 @@ static bool test_im2col_adj(void)

long idims_mat[N + 3]; // (1 | nr_in_channel, kx, ky, kz | outx, outy, outz | ... )

md_select_dims(8, ~1ul , idims_mat, dims_mat);
md_select_dims(N + 3, ~1ul , idims_mat, dims_mat);


long istrs_mat[8];
long istrs_mat[N + 3];

md_copy_strides(5, istrs_mat, MD_STRIDES(5, idims, size));
md_copy_strides(3, istrs_mat + 5, MD_STRIDES(5, idims, size) + 2);
md_copy_strides(5, istrs_mat, MD_STRIDES(5, idims, CFL_SIZE));
md_copy_strides(3, istrs_mat + 5, MD_STRIDES(5, idims, CFL_SIZE) + 2);


complex float* imat_cpu = md_alloc(8, idims_mat, CFL_SIZE);
complex float* imat_cpu = md_alloc(N + 3, idims_mat, CFL_SIZE);

md_gaussian_rand(8, idims_mat, imat_cpu);
md_gaussian_rand(N + 3, idims_mat, imat_cpu);


complex float* imat_gpu = md_alloc_gpu(8, idims_mat, CFL_SIZE);
complex float* imat_gpu = md_alloc_gpu(N + 3, idims_mat, CFL_SIZE);

md_copy(8, idims_mat, imat_gpu, imat_cpu, CFL_SIZE);
md_copy(N + 3, idims_mat, imat_gpu, imat_cpu, CFL_SIZE);


md_zadd2(8, idims_mat, istrs_mat, in_cpu, istrs_mat, in_cpu, MD_STRIDES(8, idims_mat, size), imat_cpu);
md_zadd2(N + 3, idims_mat, istrs_mat, in_cpu, istrs_mat, in_cpu, MD_STRIDES(N + 3, idims_mat, CFL_SIZE), imat_cpu);
cuda_im2col_transp(in_gpu, imat_gpu, odims, idims, kdims, NULL, NULL);


complex float* in_gpu_cpu = md_alloc(5, idims, CFL_SIZE);

md_copy(5, idims, in_gpu_cpu, in_gpu, size);
md_copy(5, idims, in_gpu_cpu, in_gpu, CFL_SIZE);


float err = md_znrmse(5, idims, in_gpu_cpu, in_cpu);
Expand Down
2 changes: 1 addition & 1 deletion utests/test_cudafft.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@



static bool run_cuda_fft_test(const unsigned int D, const long* dims, const unsigned long flags,
static bool run_cuda_fft_test(const int D, const long* dims, const unsigned long flags,
const complex float* in, complex float* cpu_inout,
complex float* gpu_inout, complex float* gpu_result)
{
Expand Down

0 comments on commit f96b4d6

Please sign in to comment.