diff --git a/CMakeLists.txt b/CMakeLists.txt index 76b20a03..fd0458ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,7 +123,7 @@ option(LINALG_ENABLE_BLAS "Assume that we are linking with a BLAS library." ${BLAS_FOUND}) -find_package(KokkosKernels) +find_package(KokkosKernels QUIET) option(LINALG_ENABLE_KOKKOS "Enable Kokkos-based implementation. Default: autodetect Kokkos installation." ${KokkosKernels_FOUND}) diff --git a/include/experimental/__p1673_bits/transposed.hpp b/include/experimental/__p1673_bits/transposed.hpp index 5c75ee42..b811aadd 100644 --- a/include/experimental/__p1673_bits/transposed.hpp +++ b/include/experimental/__p1673_bits/transposed.hpp @@ -253,7 +253,8 @@ namespace impl { template static auto mapping(const typename layout_stride::template mapping& orig_map) { using original_mapping_type = typename layout_stride::template mapping; - using extents_type = transpose_extents_t; + // MSVC fails for transpose_extents_t + using extents_type = transpose_extents_t::extents_type>; using return_mapping_type = typename layout_type::template mapping; return return_mapping_type{ transpose_extents(orig_map.extents()), diff --git a/tests/hpx-based/add_rank1_hpx.cpp b/tests/hpx-based/add_rank1_hpx.cpp index 0295d338..72a64816 100644 --- a/tests/hpx-based/add_rank1_hpx.cpp +++ b/tests/hpx-based/add_rank1_hpx.cpp @@ -1,6 +1,8 @@ // Copyright (c) 2022 Hartmut Kaiser #include +#include + #include #include @@ -35,7 +37,7 @@ void hpx_blas1_add_test_impl(ExPolicy policy, x_t x, y_t y, z_t z) // compute gold std::vector gold(extent); using mdspan_t = std::experimental::mdspan>; + std::experimental::extents<::std::size_t, dynamic_extent>>; mdspan_t z_gold(gold.data(), extent); add_gold_solution(x, y, z_gold); diff --git a/tests/hpx-based/gtest_fixtures.hpp b/tests/hpx-based/gtest_fixtures.hpp index 10509d79..79ebf16e 100644 --- a/tests/hpx-based/gtest_fixtures.hpp +++ b/tests/hpx-based/gtest_fixtures.hpp @@ -53,9 +53,9 @@ // it is fine to put these here even if this // is a header since this is limited to tests -using std::experimental::mdspan; -using std::experimental::extents; using std::experimental::dynamic_extent; +using std::experimental::extents; +using std::experimental::mdspan; // // helper class for generating random numbers @@ -64,263 +64,292 @@ template struct UnifDist; template <> -struct UnifDist { - using dist_type = std::uniform_int_distribution; - std::random_device rd; - std::mt19937 m_gen{rd()}; - dist_type m_dist; - - UnifDist(const int a, const int b) : m_dist(a, b) {} - int - operator()() - { - return m_dist(m_gen); - } +struct UnifDist +{ + using dist_type = std::uniform_int_distribution; + std::random_device rd; + std::mt19937 m_gen{rd()}; + dist_type m_dist; + + UnifDist(const int a, const int b) : m_dist(a, b) {} + int operator()() + { + return m_dist(m_gen); + } }; template <> -struct UnifDist { - using dist_type = std::uniform_real_distribution; - std::random_device rd; - std::mt19937 m_gen{rd()}; - dist_type m_dist; - - UnifDist(const double a, const double b) : m_dist(a, b) {} - double - operator()() - { - return m_dist(m_gen); - } +struct UnifDist +{ + using dist_type = std::uniform_real_distribution; + std::random_device rd; + std::mt19937 m_gen{rd()}; + dist_type m_dist; + + UnifDist(const double a, const double b) : m_dist(a, b) {} + double operator()() + { + return m_dist(m_gen); + } }; template <> -struct UnifDist { - using dist_type = std::uniform_real_distribution; - std::random_device rd; - std::mt19937 m_gen{rd()}; - dist_type m_dist; - - UnifDist(const float a, const float b) : m_dist(a, b) {} - float - operator()() - { - return m_dist(m_gen); - } +struct UnifDist +{ + using dist_type = std::uniform_real_distribution; + std::random_device rd; + std::mt19937 m_gen{rd()}; + dist_type m_dist; + + UnifDist(const float a, const float b) : m_dist(a, b) {} + float operator()() + { + return m_dist(m_gen); + } }; - template -void -fill_random_mdspan(UnifDist& randObj_r, UnifDist& randObj_i, mdspan_t mdspanObj) +void fill_random_mdspan( + UnifDist& randObj_r, UnifDist& randObj_i, mdspan_t mdspanObj) { - static_assert(mdspan_t::rank() <= 2); + static_assert(mdspan_t::rank() <= 2); - if constexpr (mdspan_t::rank() == 1) { - for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { - mdspanObj(i) = {randObj_r(), randObj_i()}; + if constexpr (mdspan_t::rank() == 1) + { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) + { + mdspanObj(i) = {randObj_r(), randObj_i()}; + } } - } else { - for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { - for (std::size_t j = 0; j < mdspanObj.extent(1); ++j) { - mdspanObj(i, j) = {randObj_r(), randObj_i()}; - } + else + { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) + { + for (std::size_t j = 0; j < mdspanObj.extent(1); ++j) + { + mdspanObj(i, j) = {randObj_r(), randObj_i()}; + } + } } - } } template -void -fill_random_mdspan(UnifDist& randObj, mdspan_t mdspanObj) +void fill_random_mdspan(UnifDist& randObj, mdspan_t mdspanObj) { - static_assert(mdspan_t::rank() <= 2); + static_assert(mdspan_t::rank() <= 2); - if constexpr (mdspan_t::rank() == 1) { - for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { - mdspanObj(i) = randObj(); + if constexpr (mdspan_t::rank() == 1) + { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) + { + mdspanObj(i) = randObj(); + } } - } else { - for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { - for (std::size_t j = 0; j < mdspanObj.extent(1); ++j) { - mdspanObj(i, j) = randObj(); - } + else + { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) + { + for (std::size_t j = 0; j < mdspanObj.extent(1); ++j) + { + mdspanObj(i, j) = randObj(); + } + } } - } } - template -constexpr void -static_check_value_type(T /*unused*/) +constexpr void static_check_value_type(T /*unused*/) { - // clang-format off + // clang-format off static_assert( std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v>, "gtest_fixtures: unsupported value_type"); - // clang-format on + // clang-format on } - template -class _blas1_signed_fixture : public ::testing::Test { - // extent is arbitrarily chosen but not trivially small - const std::size_t myExtent = 137; +class _blas1_signed_fixture : public ::testing::Test +{ + // extent is arbitrarily chosen but not trivially small + const std::size_t myExtent = 137; public: - using value_type = T; + using value_type = T; - _blas1_signed_fixture() - : x_data(myExtent), y_data(myExtent), z_data(myExtent), x(x_data.data(), myExtent), y(y_data.data(), myExtent), + _blas1_signed_fixture() + : x_data(myExtent), y_data(myExtent), z_data(myExtent), + x(x_data.data(), myExtent), y(y_data.data(), myExtent), z(z_data.data(), myExtent) - { - static_check_value_type(value_type{}); - - if constexpr (std::is_same_v>) { - const auto a_r = static_cast(-101); - const auto b_r = static_cast(103); - UnifDist randObj_r(a_r, b_r); - - const auto a_i = static_cast(-21); - const auto b_i = static_cast(43); - UnifDist randObj_i(a_i, b_i); - - fill_random_mdspan(randObj_r, randObj_i, x); - fill_random_mdspan(randObj_r, randObj_i, y); - fill_random_mdspan(randObj_r, randObj_i, z); - } else { - const auto a = static_cast(-11); - const auto b = static_cast(23); - UnifDist randObj(a, b); - - fill_random_mdspan(randObj, x); - fill_random_mdspan(randObj, y); - fill_random_mdspan(randObj, z); + { + static_check_value_type(value_type{}); + + if constexpr (std::is_same_v>) + { + const auto a_r = static_cast(-101); + const auto b_r = static_cast(103); + UnifDist randObj_r(a_r, b_r); + + const auto a_i = static_cast(-21); + const auto b_i = static_cast(43); + UnifDist randObj_i(a_i, b_i); + + fill_random_mdspan(randObj_r, randObj_i, x); + fill_random_mdspan(randObj_r, randObj_i, y); + fill_random_mdspan(randObj_r, randObj_i, z); + } + else + { + const auto a = static_cast(-11); + const auto b = static_cast(23); + UnifDist randObj(a, b); + + fill_random_mdspan(randObj, x); + fill_random_mdspan(randObj, y); + fill_random_mdspan(randObj, z); + } } - } - std::vector x_data; - std::vector y_data; - std::vector z_data; + std::vector x_data; + std::vector y_data; + std::vector z_data; - using mdspan_t = mdspan>; - mdspan_t x; - mdspan_t y; - mdspan_t z; + using mdspan_t = mdspan>; + mdspan_t x; + mdspan_t y; + mdspan_t z; }; template -class _blas2_signed_fixture : public ::testing::Test { +class _blas2_signed_fixture : public ::testing::Test +{ protected: - // extents are arbitrarily chosen but not trivially small - const std::size_t myExtent0 = 77; - const std::size_t myExtent1 = 41; + // extents are arbitrarily chosen but not trivially small + const std::size_t myExtent0 = 77; + const std::size_t myExtent1 = 41; public: - using value_type = T; - - _blas2_signed_fixture() - : A_e0e1_data(myExtent0 * myExtent1), A_e0e1(A_e0e1_data.data(), myExtent0, myExtent1), - B_e0e1_data(myExtent0 * myExtent1), B_e0e1(B_e0e1_data.data(), myExtent0, myExtent1), - A_sym_e0_data(myExtent0 * myExtent0), A_sym_e0(A_sym_e0_data.data(), myExtent0, myExtent0), - A_hem_e0_data(myExtent0 * myExtent0), A_hem_e0(A_hem_e0_data.data(), myExtent0, myExtent0), - x_e0_data(myExtent0), x_e0(x_e0_data.data(), myExtent0), x_e1_data(myExtent1), - x_e1(x_e1_data.data(), myExtent1), y_e0_data(myExtent0), y_e0(y_e0_data.data(), myExtent0), + using value_type = T; + + _blas2_signed_fixture() + : A_e0e1_data(myExtent0 * myExtent1), + A_e0e1(A_e0e1_data.data(), myExtent0, myExtent1), + B_e0e1_data(myExtent0 * myExtent1), + B_e0e1(B_e0e1_data.data(), myExtent0, myExtent1), + A_sym_e0_data(myExtent0 * myExtent0), + A_sym_e0(A_sym_e0_data.data(), myExtent0, myExtent0), + A_hem_e0_data(myExtent0 * myExtent0), + A_hem_e0(A_hem_e0_data.data(), myExtent0, myExtent0), + x_e0_data(myExtent0), x_e0(x_e0_data.data(), myExtent0), + x_e1_data(myExtent1), x_e1(x_e1_data.data(), myExtent1), + y_e0_data(myExtent0), y_e0(y_e0_data.data(), myExtent0), z_e0_data(myExtent0), z_e0(z_e0_data.data(), myExtent0) - { - - static_check_value_type(value_type{}); - - if constexpr (std::is_same_v>) { - const auto a_r = static_cast(-5); - const auto b_r = static_cast(3); - UnifDist randObj_r(a_r, b_r); - - const auto a_i = static_cast(-9); - const auto b_i = static_cast(7); - UnifDist randObj_i(a_i, b_i); - - // fill symmetric matrices - for (std::size_t i = 0; i < myExtent0; ++i) { - for (std::size_t j = i; j < myExtent0; ++j) { - A_sym_e0(i, j) = {randObj_r(), randObj_i()}; - A_sym_e0(j, i) = A_sym_e0(i, j); - } - } - - // fill herm matrices - for (std::size_t i = 0; i < myExtent0; ++i) { - // diagonal has real elements - A_hem_e0(i, i) = randObj_r(); - - for (std::size_t j = i + 1; j < myExtent0; ++j) { - A_hem_e0(i, j) = {randObj_r(), randObj_i()}; - A_hem_e0(j, i) = std::conj(A_hem_e0(i, j)); + { + static_check_value_type(value_type{}); + + if constexpr (std::is_same_v>) + { + const auto a_r = static_cast(-5); + const auto b_r = static_cast(3); + UnifDist randObj_r(a_r, b_r); + + const auto a_i = static_cast(-9); + const auto b_i = static_cast(7); + UnifDist randObj_i(a_i, b_i); + + // fill symmetric matrices + for (std::size_t i = 0; i < myExtent0; ++i) + { + for (std::size_t j = i; j < myExtent0; ++j) + { + A_sym_e0(i, j) = {randObj_r(), randObj_i()}; + A_sym_e0(j, i) = A_sym_e0(i, j); + } + } + + // fill herm matrices + for (std::size_t i = 0; i < myExtent0; ++i) + { + // diagonal has real elements + A_hem_e0(i, i) = randObj_r(); + + for (std::size_t j = i + 1; j < myExtent0; ++j) + { + A_hem_e0(i, j) = {randObj_r(), randObj_i()}; + A_hem_e0(j, i) = std::conj(A_hem_e0(i, j)); + } + } + + // fill nonsym matrices and vectors + fill_random_mdspan(randObj_r, randObj_i, A_e0e1); + fill_random_mdspan(randObj_r, randObj_i, B_e0e1); + fill_random_mdspan(randObj_r, randObj_i, x_e0); + fill_random_mdspan(randObj_r, randObj_i, x_e1); + fill_random_mdspan(randObj_r, randObj_i, y_e0); + fill_random_mdspan(randObj_r, randObj_i, z_e0); } - } - - // fill nonsym matrices and vectors - fill_random_mdspan(randObj_r, randObj_i, A_e0e1); - fill_random_mdspan(randObj_r, randObj_i, B_e0e1); - fill_random_mdspan(randObj_r, randObj_i, x_e0); - fill_random_mdspan(randObj_r, randObj_i, x_e1); - fill_random_mdspan(randObj_r, randObj_i, y_e0); - fill_random_mdspan(randObj_r, randObj_i, z_e0); - } else { - const auto a = static_cast(-5); - const auto b = static_cast(4); - UnifDist randObj(a, b); - - // fill herm matrices, which for float or double is - // just a symmetric matrix - for (std::size_t i = 0; i < myExtent0; ++i) { - for (std::size_t j = i; j < myExtent0; ++j) { - A_hem_e0(i, j) = randObj(); - A_hem_e0(j, i) = A_hem_e0(i, j); + else + { + const auto a = static_cast(-5); + const auto b = static_cast(4); + UnifDist randObj(a, b); + + // fill herm matrices, which for float or double is + // just a symmetric matrix + for (std::size_t i = 0; i < myExtent0; ++i) + { + for (std::size_t j = i; j < myExtent0; ++j) + { + A_hem_e0(i, j) = randObj(); + A_hem_e0(j, i) = A_hem_e0(i, j); + } + } + + // fill symmetric matrices + for (std::size_t i = 0; i < myExtent0; ++i) + { + for (std::size_t j = i; j < myExtent0; ++j) + { + A_sym_e0(i, j) = randObj(); + A_sym_e0(j, i) = A_sym_e0(i, j); + } + } + + // fill nonsym matrices and vectors + fill_random_mdspan(randObj, A_e0e1); + fill_random_mdspan(randObj, B_e0e1); + fill_random_mdspan(randObj, x_e0); + fill_random_mdspan(randObj, x_e1); + fill_random_mdspan(randObj, y_e0); + fill_random_mdspan(randObj, z_e0); } - } - - // fill symmetric matrices - for (std::size_t i = 0; i < myExtent0; ++i) { - for (std::size_t j = i; j < myExtent0; ++j) { - A_sym_e0(i, j) = randObj(); - A_sym_e0(j, i) = A_sym_e0(i, j); - } - } - - // fill nonsym matrices and vectors - fill_random_mdspan(randObj, A_e0e1); - fill_random_mdspan(randObj, B_e0e1); - fill_random_mdspan(randObj, x_e0); - fill_random_mdspan(randObj, x_e1); - fill_random_mdspan(randObj, y_e0); - fill_random_mdspan(randObj, z_e0); } - } - - std::vector A_e0e1_data; - std::vector B_e0e1_data; - std::vector A_sym_e0_data; - std::vector A_hem_e0_data; - std::vector x_e0_data; - std::vector x_e1_data; - std::vector y_e0_data; - std::vector z_e0_data; - - using mdspan_r1_t = mdspan>; - using mdspan_r2_t = mdspan>; - mdspan_r2_t A_e0e1; //e0 x e1 - mdspan_r2_t B_e0e1; //e0 x e1 - mdspan_r2_t A_sym_e0; //e0 x e0, symmetric - mdspan_r2_t A_hem_e0; //e0 x e0, hermitian - - mdspan_r1_t x_e0; // x vector with extent == e0 - mdspan_r1_t x_e1; // x vector with extent == e1 - mdspan_r1_t y_e0; // y vector with extent == e0 - mdspan_r1_t z_e0; // z vector with extent == e0 -}; + std::vector A_e0e1_data; + std::vector B_e0e1_data; + std::vector A_sym_e0_data; + std::vector A_hem_e0_data; + std::vector x_e0_data; + std::vector x_e1_data; + std::vector y_e0_data; + std::vector z_e0_data; + + using mdspan_r1_t = + mdspan>; + using mdspan_r2_t = mdspan>; + mdspan_r2_t A_e0e1; //e0 x e1 + mdspan_r2_t B_e0e1; //e0 x e1 + mdspan_r2_t A_sym_e0; //e0 x e0, symmetric + mdspan_r2_t A_hem_e0; //e0 x e0, hermitian + + mdspan_r1_t x_e0; // x vector with extent == e0 + mdspan_r1_t x_e1; // x vector with extent == e1 + mdspan_r1_t y_e0; // y vector with extent == e0 + mdspan_r1_t z_e0; // z vector with extent == e0 +}; //template //class _blas3_signed_fixture : public _blas2_signed_fixture { @@ -400,11 +429,13 @@ class _blas2_signed_fixture : public ::testing::Test { using blas1_signed_float_fixture = _blas1_signed_fixture; using blas1_signed_double_fixture = _blas1_signed_fixture; -using blas1_signed_complex_double_fixture = _blas1_signed_fixture>; +using blas1_signed_complex_double_fixture = + _blas1_signed_fixture>; using blas2_signed_float_fixture = _blas2_signed_fixture; using blas2_signed_double_fixture = _blas2_signed_fixture; -using blas2_signed_complex_double_fixture = _blas2_signed_fixture>; +using blas2_signed_complex_double_fixture = + _blas2_signed_fixture>; //using blas3_signed_float_fixture = _blas3_signed_fixture; //using blas3_signed_double_fixture = _blas3_signed_fixture; diff --git a/tests/hpx-based/scale_rank1_hpx.cpp b/tests/hpx-based/scale_rank1_hpx.cpp index ddaa0543..fbe05b10 100644 --- a/tests/hpx-based/scale_rank1_hpx.cpp +++ b/tests/hpx-based/scale_rank1_hpx.cpp @@ -1,9 +1,11 @@ // Copyright (c) 2022 Hartmut Kaiser -#include #include #include +#include +#include + #include "gtest/gtest.h" #include "gtest_fixtures.hpp" @@ -29,7 +31,7 @@ void hpx_blas1_scale_test_impl(ExPolicy policy, x_t x, FactorT factor) // compute gold std::vector gold(extent); - using mdspan_t = mdspan>; + using mdspan_t = mdspan>; mdspan_t x_gold(gold.data(), extent); for (std::size_t i = 0; i < x.extent(0); ++i) { diff --git a/tests/hpx-based/scale_rank2_hpx.cpp b/tests/hpx-based/scale_rank2_hpx.cpp index 24c43aff..24af4bca 100644 --- a/tests/hpx-based/scale_rank2_hpx.cpp +++ b/tests/hpx-based/scale_rank2_hpx.cpp @@ -33,8 +33,8 @@ void hpx_blas_scale_test_impl(ExPolicy policy, A_t A, FactorT factor) // compute gold std::vector gold(extent0 * extent1); - using mdspan_t = - mdspan>; + using mdspan_t = mdspan>; mdspan_t A_gold(gold.data(), extent0, extent1); for (std::size_t i = 0; i < extent0; ++i) { @@ -91,6 +91,7 @@ TEST_F(blas2_signed_float_fixture, hpx_scale) hpx::execution::par, A_e0e1, static_cast(2)); hpx_blas_scale_test_impl( hpx::execution::par_unseq, A_e0e1, static_cast(2)); + // FIXME: not yet implemented //#if defined(HPX_HAVE_DATAPAR) // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, static_cast(2)); // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, static_cast(2)); @@ -105,6 +106,7 @@ TEST_F(blas2_signed_double_fixture, hpx_scale) hpx::execution::par, A_e0e1, static_cast(2)); hpx_blas_scale_test_impl( hpx::execution::par_unseq, A_e0e1, static_cast(2)); + // FIXME: not yet implemented //#if defined(HPX_HAVE_DATAPAR) // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, static_cast(2)); // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, static_cast(2)); @@ -121,6 +123,7 @@ TEST_F(blas2_signed_complex_double_fixture, hpx_scale_complex_factor) hpx_blas_scale_test_impl(HPXKernelsSTD::hpx_exec<>(), A_e0e1, factor); hpx_blas_scale_test_impl(hpx::execution::par, A_e0e1, factor); hpx_blas_scale_test_impl(hpx::execution::par_unseq, A_e0e1, factor); + // FIXME: not yet implemented //#if defined(HPX_HAVE_DATAPAR) // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, factor); // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, factor); @@ -137,6 +140,7 @@ TEST_F(blas2_signed_complex_double_fixture, hpx_scale_double_factor) hpx_blas_scale_test_impl(HPXKernelsSTD::hpx_exec<>(), A_e0e1, 2.); hpx_blas_scale_test_impl(hpx::execution::par, A_e0e1, 2.); hpx_blas_scale_test_impl(hpx::execution::par_unseq, A_e0e1, 2.); + // FIXME: not yet implemented //#if defined(HPX_HAVE_DATAPAR) // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, 2.); // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, 2.); diff --git a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp index 917a6ffa..59dda90a 100644 --- a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp +++ b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp @@ -49,28 +49,29 @@ #include #include +#include +#include + #include "signal_hpx_impl_called.hpp" namespace HPXKernelsSTD { namespace { -template ::size_type ext_x, class Layout_x, - class Accessor_x, class ElementType_y, - std::experimental::extents<>::size_type ext_y, class Layout_y, - class Accessor_y, class ElementType_z, - std::experimental::extents<>::size_type ext_z, class Layout_z, +template void add_rank_1(ExPolicy&& policy, - std::experimental::mdspan, - Layout_x, Accessor_x> + std::experimental::mdspan, Layout_x, Accessor_x> x, - std::experimental::mdspan, - Layout_y, Accessor_y> + std::experimental::mdspan, Layout_y, Accessor_y> y, - std::experimental::mdspan, - Layout_z, Accessor_z> + std::experimental::mdspan, Layout_z, Accessor_z> z) { static_assert(x.static_extent(0) == std::experimental::dynamic_extent || @@ -85,13 +86,16 @@ void add_rank_1(ExPolicy&& policy, #if defined(HPX_HAVE_DATAPAR) using mdspan_x_t = std::experimental::mdspan, Layout_x, Accessor_x>; + std::experimental::extents, Layout_x, Accessor_x>; using mdspan_y_t = std::experimental::mdspan, Layout_y, Accessor_y>; + std::experimental::extents, Layout_y, Accessor_y>; using mdspan_z_t = std::experimental::mdspan, Layout_z, Accessor_z>; + std::experimental::extents, Layout_z, Accessor_z>; constexpr bool allow_explicit_vectorization = + std::is_arithmetic_v && + std::is_arithmetic_v && + std::is_arithmetic_v && mdspan_x_t::is_always_contiguous() && mdspan_y_t::is_always_contiguous() && mdspan_z_t::is_always_contiguous() && @@ -111,39 +115,43 @@ void add_rank_1(ExPolicy&& policy, else { // fall back to the underlying base policy - hpx::experimental::for_loop(policy.base_policy(), - std::experimental::extents<>::size_type(0), x.extent(0), - [&](auto i) { z(i) = x(i) + y(i); }); + hpx::experimental::for_loop(policy.base_policy(), SizeType_z(0), + x.extent(0), [&](auto i) { z(i) = x(i) + y(i); }); } } + else + if constexpr (hpx::is_vectorpack_execution_policy_v) + { + hpx::experimental::for_loop(policy.base_policy(), SizeType_z(0), + z.extent(0), [&](auto i) { z(i) = x(i) + y(i); }); + } else #endif { - hpx::experimental::for_loop(policy, - std::experimental::extents<>::size_type(0), z.extent(0), + hpx::experimental::for_loop(policy, SizeType_z(0), z.extent(0), [&](auto i) { z(i) = x(i) + y(i); }); } } -template ::size_type numRows_x, - std::experimental::extents<>::size_type numCols_x, class Layout_x, - class Accessor_x, class ElementType_y, - std::experimental::extents<>::size_type numRows_y, - std::experimental::extents<>::size_type numCols_y, class Layout_y, - class Accessor_y, class ElementType_z, - std::experimental::extents<>::size_type numRows_z, - std::experimental::extents<>::size_type numCols_z, class Layout_z, +template void add_rank_2(ExPolicy&& policy, std::experimental::mdspan, Layout_x, Accessor_x> + std::experimental::extents, Layout_x, + Accessor_x> x, std::experimental::mdspan, Layout_y, Accessor_y> + std::experimental::extents, Layout_y, + Accessor_y> y, std::experimental::mdspan, Layout_z, Accessor_z> + std::experimental::extents, Layout_z, + Accessor_z> z) { static_assert(x.static_extent(0) == std::experimental::dynamic_extent || @@ -166,37 +174,34 @@ void add_rank_2(ExPolicy&& policy, y.static_extent(1) == std::experimental::dynamic_extent || x.static_extent(1) == y.static_extent(1)); - using size_type = typename std::experimental::extents<>::size_type; - - hpx::experimental::for_loop(policy, size_type(0), x.extent(0), [&](auto j) { - for (size_type i = 0; i < x.extent(0); ++i) - { - z(i, j) = x(i, j) + y(i, j); - } - }); + hpx::experimental::for_loop( + policy, SizeType_z(0), z.extent(1), [&](auto j) { + for (SizeType_z i = 0; i < z.extent(0); ++i) + { + z(i, j) = x(i, j) + y(i, j); + } + }); } } // end anonymous namespace -MDSPAN_TEMPLATE_REQUIRES(class ExPolicy, class ElementType_x, - std::experimental::extents<>::size_type... ext_x, class Layout_x, - class Accessor_x, class ElementType_y, - std::experimental::extents<>::size_type... ext_y, class Layout_y, - class Accessor_y, class ElementType_z, - std::experimental::extents<>::size_type... ext_z, class Layout_z, - class Accessor_z, +MDSPAN_TEMPLATE_REQUIRES(class ExPolicy, class ElementType_x, class SizeType_x, + ::std::size_t... ext_x, class Layout_x, class Accessor_x, + class ElementType_y, class SizeType_y, ::std::size_t... ext_y, + class Layout_y, class Accessor_y, class ElementType_z, class SizeType_z, + ::std::size_t... ext_z, class Layout_z, class Accessor_z, /* requires */ (sizeof...(ext_x) == sizeof...(ext_y) && sizeof...(ext_x) == sizeof...(ext_z) && sizeof...(ext_z) <= 2)) void add(hpx_exec&& policy, std::experimental::mdspan, Layout_x, Accessor_x> + std::experimental::extents, Layout_x, Accessor_x> x, std::experimental::mdspan, Layout_y, Accessor_y> + std::experimental::extents, Layout_y, Accessor_y> y, std::experimental::mdspan, Layout_z, Accessor_z> + std::experimental::extents, Layout_z, Accessor_z> z) { if constexpr (z.rank() == 1) @@ -208,6 +213,7 @@ void add(hpx_exec&& policy, add_rank_2(policy.policy_, x, y, z); } } + } // namespace HPXKernelsSTD #endif //LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_HPXKERNELS_ADD_HPP_ diff --git a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp index 59a273eb..17b174b0 100644 --- a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp +++ b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp @@ -8,25 +8,28 @@ #include #include +#include +#include + #include "signal_hpx_impl_called.hpp" namespace HPXKernelsSTD { namespace { -template ::size_type ext0, class Layout, class Accessor> +template void linalg_scale_rank_1(ExPolicy&& policy, const Scalar alpha, - std::experimental::mdspan, - Layout, Accessor> + std::experimental::mdspan, Layout, Accessor> x) { #if defined(HPX_HAVE_DATAPAR) using mdspan_t = std::experimental::mdspan, Layout, Accessor>; + std::experimental::extents, Layout, Accessor>; constexpr bool allow_explicit_vectorization = - mdspan_t::is_always_contiguous() && + std::is_arithmetic_v && mdspan_t::is_always_contiguous() && (hpx::is_vectorpack_execution_policy_v || hpx::is_unsequenced_execution_policy_v); @@ -41,48 +44,47 @@ void linalg_scale_rank_1(ExPolicy&& policy, const Scalar alpha, else { // fall back to the underlying base policy - hpx::experimental::for_loop(policy.base_policy(), - std::experimental::extents<>::size_type(0), x.extent(0), - [&](auto i) { x(i) *= alpha; }); + hpx::experimental::for_loop(policy.base_policy(), SizeType(0), + x.extent(0), [&](auto i) { x(i) *= alpha; }); } } + else if constexpr (hpx::is_vectorpack_execution_policy_v) + { + hpx::experimental::for_loop(policy.base_policy(), SizeType(0), + x.extent(0), [&](auto i) { x(i) *= alpha; }); + } else #endif { - hpx::experimental::for_loop(policy, - std::experimental::extents<>::size_type(0), x.extent(0), - [&](auto i) { x(i) *= alpha; }); + hpx::experimental::for_loop( + policy, SizeType(0), x.extent(0), [&](auto i) { x(i) *= alpha; }); } } -template ::size_type numRows, - std::experimental::extents<>::size_type numCols, class Layout, - class Accessor> +template void linalg_scale_rank_2(ExPolicy&& policy, const Scalar alpha, std::experimental::mdspan, Layout, Accessor> + std::experimental::extents, Layout, + Accessor> A) { - using size_type = typename std::experimental::extents<>::size_type; - hpx::experimental::for_loop(policy, - std::experimental::extents<>::size_type(0), A.extent(1), [&](auto j) { - for (size_type i = 0; i < A.extent(0); ++i) - { - A(i, j) *= alpha; - } - }); + hpx::experimental::for_loop(policy, SizeType(0), A.extent(1), [&](auto j) { + for (SizeType i = 0; i < A.extent(0); ++i) + { + A(i, j) *= alpha; + } + }); } } // namespace MDSPAN_TEMPLATE_REQUIRES(class ExPolicy, class Scalar, class ElementType, - std::experimental::extents<>::size_type... ext, class Layout, - class Accessor, + class SizeType, ::std::size_t... ext, class Layout, class Accessor, /* requires */ (sizeof...(ext) <= 2)) void scale(hpx_exec&& policy, const Scalar alpha, - std::experimental::mdspan, - Layout, Accessor> + std::experimental::mdspan, Layout, Accessor> x) { Impl::signal_hpx_impl_called("scale"); @@ -97,4 +99,5 @@ void scale(hpx_exec&& policy, const Scalar alpha, } } // namespace HPXKernelsSTD + #endif