From d8ad74f7b41580ff86262f46eaaadee6c00baec5 Mon Sep 17 00:00:00 2001 From: khaled Date: Thu, 2 Nov 2023 12:57:12 -0500 Subject: [PATCH] Added NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_affine_sequence --- .../kernels/tensor/include/sequences.hpp | 16 ++-- .../runtime/kernels/tensor/src/sequences.cpp | 87 +++++++++---------- 2 files changed, 48 insertions(+), 55 deletions(-) diff --git a/numba_dpex/core/runtime/kernels/tensor/include/sequences.hpp b/numba_dpex/core/runtime/kernels/tensor/include/sequences.hpp index 6c603f498c..ea293bcc60 100644 --- a/numba_dpex/core/runtime/kernels/tensor/include/sequences.hpp +++ b/numba_dpex/core/runtime/kernels/tensor/include/sequences.hpp @@ -30,8 +30,8 @@ namespace kernel namespace tensor { -template class ndpx_sequence_step_kernel; -template class ndpx_affine_sequence_kernel; +template class dpex_sequence_step_kernel; +template class dpex_affine_sequence_kernel; template class SequenceStepFunctor { @@ -54,7 +54,7 @@ template class SequenceStepFunctor start_v.imag() + i * step_v.imag()}; } else { - p[i] = start_v + i * step_v; + p[i] = start_v + (i * step_v); } } }; @@ -125,13 +125,9 @@ sycl::event sequence_step_kernel(sycl::queue exec_q, dpexrt_tensor::typeutils::validate_type_for_device(exec_q); - std::cout << "sequqnce_step_kernel<" - << dpexrt_tensor::typeutils::demangle() - << ">(): validate_type_for_device(exec_q) = done" << std::endl; - sycl::event seq_step_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); - cgh.parallel_for>( + cgh.parallel_for>( sycl::range<1>{nelems}, SequenceStepFunctor(array_data, start_v, step_v)); }); @@ -153,14 +149,14 @@ sycl::event affine_sequence_kernel(sycl::queue &exec_q, sycl::event affine_seq_step_event = exec_q.submit([&](sycl::handler &cgh) { cgh.depends_on(depends); if (device_supports_doubles) { - cgh.parallel_for>( + cgh.parallel_for>( sycl::range<1>{nelems}, AffineSequenceFunctor(array_data, start_v, end_v, (include_endpoint) ? nelems - 1 : nelems)); } else { - cgh.parallel_for>( + cgh.parallel_for>( sycl::range<1>{nelems}, AffineSequenceFunctor(array_data, start_v, end_v, (include_endpoint) ? nelems - 1 diff --git a/numba_dpex/core/runtime/kernels/tensor/src/sequences.cpp b/numba_dpex/core/runtime/kernels/tensor/src/sequences.cpp index 739eb718ee..b9226d7d48 100644 --- a/numba_dpex/core/runtime/kernels/tensor/src/sequences.cpp +++ b/numba_dpex/core/runtime/kernels/tensor/src/sequences.cpp @@ -75,9 +75,7 @@ extern "C" uint NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_sequence( char *dst_data = reinterpret_cast(dst->data); - // int dst_typeid = 7; // 7 = int64_t, 10 = float, 11 = double auto fn = sequence_step_dispatch_vector[dst_typeid]; - sycl::queue *queue = reinterpret_cast(exec_q); std::vector depends = std::vector(); sycl::event linspace_step_event = @@ -93,46 +91,45 @@ extern "C" uint NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_sequence( return 1; } -// uint dpexrt_tensor::tensor::populate_arystruct_affine_sequence( -// void *start, -// void *end, -// arystruct_t *dst, -// int include_endpoint, -// int ndim, -// int is_c_contiguous, -// const DPCTLSyclQueueRef exec_q, -// const DPCTLEventVectorRef depends) -// { -// if (ndim != 1) { -// throw std::logic_error( -// "populate_arystruct_linseq(): array must be 1D."); -// } -// if (!is_c_contiguous) { -// throw std::logic_error( -// "populate_arystruct_linseq(): array must be c-contiguous."); -// } -// /** -// auto array_types = td_ns::usm_ndarray_types(); -// int dst_typenum = dst.get_typenum(); -// int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum); - -// py::ssize_t len = dst.get_shape(0); -// if (len == 0) { -// // nothing to do -// return std::make_pair(sycl::event{}, sycl::event{}); -// } - -// char *dst_data = dst.get_data(); -// sycl::event linspace_affine_event; - -// auto fn = lin_space_affine_dispatch_vector[dst_typeid]; - -// linspace_affine_event = fn(exec_q, static_cast(len), start, end, -// include_endpoint, dst_data, depends); - -// return std::make_pair( -// keep_args_alive(exec_q, {dst}, {linspace_affine_event}), -// linspace_affine_event); -// */ -// return 0; -// } +extern "C" uint NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_affine_sequence( + void *start, + void *end, + arystruct_t *dst, + u_int8_t include_endpoint, + int ndim, + u_int8_t is_c_contiguous, + int dst_typeid, + const DPCTLSyclQueueRef exec_q) +{ + if (ndim != 1) { + throw std::logic_error( + "populate_arystruct_linseq(): array must be 1D."); + } + if (!is_c_contiguous) { + throw std::logic_error( + "populate_arystruct_linseq(): array must be c-contiguous."); + } + + size_t len = static_cast(dst->nitems); + if (len == 0) + return 0; + + char *dst_data = reinterpret_cast(dst->data); + sycl::queue *queue = reinterpret_cast(exec_q); + std::vector depends = std::vector(); + + auto fn = affine_sequence_dispatch_vector[dst_typeid]; + + sycl::event linspace_affine_event = + fn(exec_q, static_cast(len), start, end, include_endpoint, + dst_data, depends); + + linspace_affine_event.wait_and_throw(); + + if (linspace_affine_event + .get_info() == + sycl::info::event_command_status::complete) + return 0; + else + return 1; +}