Skip to content

Commit

Permalink
Added NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_affine_sequence
Browse files Browse the repository at this point in the history
  • Loading branch information
chudur-budur committed Nov 2, 2023
1 parent 6bed396 commit d8ad74f
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 55 deletions.
16 changes: 6 additions & 10 deletions numba_dpex/core/runtime/kernels/tensor/include/sequences.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ namespace kernel
namespace tensor
{

template <typename T> class ndpx_sequence_step_kernel;
template <typename T, typename wT> class ndpx_affine_sequence_kernel;
template <typename T> class dpex_sequence_step_kernel;
template <typename T, typename wT> class dpex_affine_sequence_kernel;

template <typename T> class SequenceStepFunctor
{
Expand All @@ -54,7 +54,7 @@ template <typename T> class SequenceStepFunctor
start_v.imag() + i * step_v.imag()};
}
else {
p[i] = start_v + i * step_v;
p[i] = start_v + (i * step_v);
}
}
};
Expand Down Expand Up @@ -125,13 +125,9 @@ sycl::event sequence_step_kernel(sycl::queue exec_q,

dpexrt_tensor::typeutils::validate_type_for_device<T>(exec_q);

std::cout << "sequqnce_step_kernel<"
<< dpexrt_tensor::typeutils::demangle<T>()
<< ">(): validate_type_for_device<T>(exec_q) = done" << std::endl;

sycl::event seq_step_event = exec_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(depends);
cgh.parallel_for<ndpx_sequence_step_kernel<T>>(
cgh.parallel_for<dpex_sequence_step_kernel<T>>(
sycl::range<1>{nelems},
SequenceStepFunctor<T>(array_data, start_v, step_v));
});
Expand All @@ -153,14 +149,14 @@ sycl::event affine_sequence_kernel(sycl::queue &exec_q,
sycl::event affine_seq_step_event = exec_q.submit([&](sycl::handler &cgh) {
cgh.depends_on(depends);
if (device_supports_doubles) {
cgh.parallel_for<ndpx_affine_sequence_kernel<T, double>>(
cgh.parallel_for<dpex_affine_sequence_kernel<T, double>>(
sycl::range<1>{nelems},
AffineSequenceFunctor<T, double>(array_data, start_v, end_v,
(include_endpoint) ? nelems - 1
: nelems));
}
else {
cgh.parallel_for<ndpx_affine_sequence_kernel<T, float>>(
cgh.parallel_for<dpex_affine_sequence_kernel<T, float>>(
sycl::range<1>{nelems},
AffineSequenceFunctor<T, float>(array_data, start_v, end_v,
(include_endpoint) ? nelems - 1
Expand Down
87 changes: 42 additions & 45 deletions numba_dpex/core/runtime/kernels/tensor/src/sequences.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ extern "C" uint NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_sequence(

char *dst_data = reinterpret_cast<char *>(dst->data);

// int dst_typeid = 7; // 7 = int64_t, 10 = float, 11 = double
auto fn = sequence_step_dispatch_vector[dst_typeid];

sycl::queue *queue = reinterpret_cast<sycl::queue *>(exec_q);
std::vector<sycl::event> depends = std::vector<sycl::event>();
sycl::event linspace_step_event =
Expand All @@ -93,46 +91,45 @@ extern "C" uint NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_sequence(
return 1;
}

// uint dpexrt_tensor::tensor::populate_arystruct_affine_sequence(
// void *start,
// void *end,
// arystruct_t *dst,
// int include_endpoint,
// int ndim,
// int is_c_contiguous,
// const DPCTLSyclQueueRef exec_q,
// const DPCTLEventVectorRef depends)
// {
// if (ndim != 1) {
// throw std::logic_error(
// "populate_arystruct_linseq(): array must be 1D.");
// }
// if (!is_c_contiguous) {
// throw std::logic_error(
// "populate_arystruct_linseq(): array must be c-contiguous.");
// }
// /**
// auto array_types = td_ns::usm_ndarray_types();
// int dst_typenum = dst.get_typenum();
// int dst_typeid = array_types.typenum_to_lookup_id(dst_typenum);

// py::ssize_t len = dst.get_shape(0);
// if (len == 0) {
// // nothing to do
// return std::make_pair(sycl::event{}, sycl::event{});
// }

// char *dst_data = dst.get_data();
// sycl::event linspace_affine_event;

// auto fn = lin_space_affine_dispatch_vector[dst_typeid];

// linspace_affine_event = fn(exec_q, static_cast<size_t>(len), start, end,
// include_endpoint, dst_data, depends);

// return std::make_pair(
// keep_args_alive(exec_q, {dst}, {linspace_affine_event}),
// linspace_affine_event);
// */
// return 0;
// }
extern "C" uint NUMBA_DPEX_SYCL_KERNEL_populate_arystruct_affine_sequence(
void *start,
void *end,
arystruct_t *dst,
u_int8_t include_endpoint,
int ndim,
u_int8_t is_c_contiguous,
int dst_typeid,
const DPCTLSyclQueueRef exec_q)
{
if (ndim != 1) {
throw std::logic_error(
"populate_arystruct_linseq(): array must be 1D.");
}
if (!is_c_contiguous) {
throw std::logic_error(
"populate_arystruct_linseq(): array must be c-contiguous.");
}

size_t len = static_cast<size_t>(dst->nitems);
if (len == 0)
return 0;

char *dst_data = reinterpret_cast<char *>(dst->data);
sycl::queue *queue = reinterpret_cast<sycl::queue *>(exec_q);
std::vector<sycl::event> depends = std::vector<sycl::event>();

auto fn = affine_sequence_dispatch_vector[dst_typeid];

sycl::event linspace_affine_event =
fn(exec_q, static_cast<size_t>(len), start, end, include_endpoint,
dst_data, depends);

linspace_affine_event.wait_and_throw();

if (linspace_affine_event
.get_info<sycl::info::event::command_execution_status>() ==
sycl::info::event_command_status::complete)
return 0;
else
return 1;
}

0 comments on commit d8ad74f

Please sign in to comment.