From 76d38cffe4b052161b5b9c28d0a0c696004c815d Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Sat, 11 Jun 2022 21:22:42 -0500 Subject: [PATCH] Started to add implementation based on HPX - added build system support and test infrastructure - added .clang-format configuration file - added BLAS1 add (1d and 2d) - added BLAS1 scale (1d and 2d) - added GitHub action --- .clang-format | 104 +++++ .github/workflows/linux_hpx.yml | 34 ++ CMakeLists.txt | 18 + examples/CMakeLists.txt | 6 +- examples/hpx-based/CMakeLists.txt | 19 + examples/hpx-based/add_hpx.cpp | 118 +++++ examples/hpx-based/scale_hpx.cpp | 47 ++ examples/kokkos-based/CMakeLists.txt | 23 +- .../__p1673_bits/linalg_config.h.in | 2 + .../__p1673_bits/linalg_execpolicy_mapper.hpp | 3 + include/experimental/linalg | 3 + tests/CMakeLists.txt | 3 + tests/hpx-based/CMakeLists.txt | 233 ++++++++++ tests/hpx-based/add_rank1_hpx.cpp | 113 +++++ tests/hpx-based/gtest_fixtures.hpp | 413 ++++++++++++++++++ tests/hpx-based/gtest_main_hpx.cpp | 30 ++ tests/hpx-based/helpers.hpp | 22 + tests/hpx-based/scale_rank1_hpx.cpp | 132 ++++++ tests/hpx-based/scale_rank2_hpx.cpp | 145 ++++++ tests/hpx-based/test.cmake | 28 ++ tests/kokkos-based/CMakeLists.txt | 2 + tests/native/CMakeLists.txt | 1 + .../hpx-kernels/blas1_add_hpx.hpp | 213 +++++++++ .../hpx-kernels/blas1_scale_hpx.hpp | 100 +++++ .../hpx-kernels/exec_policy_wrapper_hpx.hpp | 71 +++ .../hpx-kernels/signal_hpx_impl_called.hpp | 61 +++ .../include/experimental/linalg_hpxkernels | 35 ++ 27 files changed, 1969 insertions(+), 10 deletions(-) create mode 100644 .clang-format create mode 100644 .github/workflows/linux_hpx.yml create mode 100644 examples/hpx-based/CMakeLists.txt create mode 100644 examples/hpx-based/add_hpx.cpp create mode 100644 examples/hpx-based/scale_hpx.cpp create mode 100644 tests/hpx-based/CMakeLists.txt create mode 100644 tests/hpx-based/add_rank1_hpx.cpp create mode 100644 tests/hpx-based/gtest_fixtures.hpp create mode 100644 tests/hpx-based/gtest_main_hpx.cpp create mode 100644 tests/hpx-based/helpers.hpp create mode 100644 tests/hpx-based/scale_rank1_hpx.cpp create mode 100644 tests/hpx-based/scale_rank2_hpx.cpp create mode 100644 tests/hpx-based/test.cmake create mode 100644 tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp create mode 100644 tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp create mode 100644 tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/exec_policy_wrapper_hpx.hpp create mode 100644 tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/signal_hpx_impl_called.hpp create mode 100644 tpl-implementations/include/experimental/linalg_hpxkernels diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..b601cad9 --- /dev/null +++ b/.clang-format @@ -0,0 +1,104 @@ +# Copyright (c) 2016 Thomas Heller +# Copyright (c) 2016-2022 Hartmut Kaiser + +--- +AccessModifierOffset: -4 +AlignAfterOpenBracket: DontAlign +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Right +AlignOperands: false +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterCaseLabel: true + AfterClass: true + AfterControlStatement: true + AfterEnum: true + AfterFunction: true + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: true + AfterUnion: true + AfterExternBlock: false + BeforeCatch: true + BeforeElse: true + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false + BeforeLambdaBody: false + BeforeWhile: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: true +BreakBeforeTernaryOperators: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: /// +CompactNamespaces: true +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth : 2 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: false +FixNamespaceComments: true +IncludeCategories: + - Regex: '^' + Priority: 1 + - Regex: '^' + Priority: 2 + - Regex: '^' + Priority: 3 + - Regex: '^' + Priority: 4 + - Regex: '^' + Priority: 5 + - Regex: '^' + Priority: 6 + - Regex: '^<.*' + Priority: 7 + - Regex: '.*' + Priority: 8 +IndentCaseLabels: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +Language: Cpp +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: false +SortIncludes: true +SpaceAfterCStyleCast: true +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 4 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp11 +TabWidth: 4 +UseTab: Never +... diff --git a/.github/workflows/linux_hpx.yml b/.github/workflows/linux_hpx.yml new file mode 100644 index 00000000..a7f05ea3 --- /dev/null +++ b/.github/workflows/linux_hpx.yml @@ -0,0 +1,34 @@ +# Copyright (c) 2022 Hartmut Kaiser + +name: HPX Backend CI (Debug) + +on: [pull_request] + +jobs: + build: + runs-on: ubuntu-latest + container: stellargroup/hpx:dev + + steps: + - uses: actions/checkout@v2 + - name: Configure + shell: bash + run: | + cmake \ + . \ + -Bbuild \ + -GNinja \ + -DCMAKE_BUILD_TYPE=Debug \ + -DLINALG_ENABLE_TESTS=On \ + -DLINALG_ENABLE_HPX=On \ + -DLINALG_ENABLE_HPX_DEFAULT=On \ + -DHPX_DIR=/hpx/build/lib/cmake/HPX + - name: Build + shell: bash + run: | + make install + - name: Test + shell: bash + run: | + cd build + ctest --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index 8c528fa2..76b20a03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,6 +5,8 @@ project(LinAlg LANGUAGES CXX ) +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + ################################################################################ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") @@ -152,6 +154,22 @@ if(LINALG_ENABLE_KOKKOS) ) endif() +if(LINALG_ENABLE_HPX) + find_package(HPX 1.8 REQUIRED) + target_link_libraries(linalg INTERFACE HPX::hpx) + target_include_directories(linalg INTERFACE + $ + ) + if(MSVC) + target_compile_definitions(linalg INTERFACE NOMINMAX _CRT_SECURE_NO_WARNINGS) + endif() + message(STATUS "HPX version: " ${HPX_VERSION_STRING}) +endif() + +if(LINALG_ENABLE_KOKKOS_DEFAULT AND LINALG_ENABLE_HPX_DEFAULT) + message(FATAL_ERROR "Only one of the Kokkos and HPX backends can be marked as default. Please set either LINALG_ENABLE_KOKKOS_DEFAULT or LINALG_ENABLE_HPX_DEFAULT to OFF") +endif() + target_include_directories(linalg INTERFACE $ $ diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index efc981e5..d86d46f7 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,7 +1,8 @@ function(linalg_add_example EXENAME) add_executable(${EXENAME} ${EXENAME}.cpp) - target_link_libraries(${EXENAME} linalg) + target_link_libraries(${EXENAME} PRIVATE linalg) + set_target_properties(${EXENAME} PROPERTIES FOLDER "Examples") endfunction(linalg_add_example) linalg_add_example(01_scale) @@ -11,3 +12,6 @@ linalg_add_example(03_matrix_vector_product_mixedprec) if(LINALG_ENABLE_KOKKOS) add_subdirectory(kokkos-based) endif() +if(LINALG_ENABLE_HPX) + add_subdirectory(hpx-based) +endif() diff --git a/examples/hpx-based/CMakeLists.txt b/examples/hpx-based/CMakeLists.txt new file mode 100644 index 00000000..7cdc0d12 --- /dev/null +++ b/examples/hpx-based/CMakeLists.txt @@ -0,0 +1,19 @@ +# Copyright (c) 2022 Hartmut Kaiser + +function(linalg_add_example_hpx EXENAME) + linalg_add_example(${EXENAME}) + if(LINALG_ENABLE_HPX) + target_link_libraries(${EXENAME} PRIVATE HPX::wrap_main) + endif() + set_target_properties(${EXENAME} PROPERTIES FOLDER "Examples/HPX") +endfunction(linalg_add_example_hpx) + +linalg_add_example_hpx(add_hpx) +#linalg_add_example_hpx(dot_hpx) +#linalg_add_example_hpx(dotc_hpx) +#linalg_add_example_hpx(idx_abs_max_hpx) +#linalg_add_example_hpx(vector_norm2_hpx) +#linalg_add_example_hpx(vector_abs_sum_hpx) +#linalg_add_example_hpx(vector_sum_of_squares_hpx) +linalg_add_example_hpx(scale_hpx) +#linalg_add_example_hpx(matrix_vector_product_hpx) diff --git a/examples/hpx-based/add_hpx.cpp b/examples/hpx-based/add_hpx.cpp new file mode 100644 index 00000000..d3a77e6a --- /dev/null +++ b/examples/hpx-based/add_hpx.cpp @@ -0,0 +1,118 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#include +#include + +#include +#include + +#include +#include +#include +#include + +template +void print_elements( + const T1& v, const std::vector& gold, char const* policy_str) +{ + std::cout << "Using policy: " << policy_str << "\n"; + for (std::size_t i = 0; i < v.size(); i++) + { + std::cout << "computed = " << v(i) << " , gold = " << gold[i] << "\n"; + } +} + +void reset(auto z) +{ + for (std::size_t i = 0; i < z.extent(0); i++) + { + z(i) = 0; + } +} + +int main(int argc, char* argv[]) +{ + std::cout << "add example: calling HPX-kernels" << std::endl; + + std::size_t N = 50; + + using value_type = double; + + std::vector x_data(N); + std::vector y_data(N); + std::vector z_data(N); + + value_type* x_ptr = x_data.data(); + value_type* y_ptr = y_data.data(); + value_type* z_ptr = z_data.data(); + + using dyn_1d_ext_type = + std::experimental::extents; + using mdspan_type = std::experimental::mdspan; + mdspan_type x(x_ptr, N); + mdspan_type y(y_ptr, N); + mdspan_type z(z_ptr, N); + + std::vector gold(N); + for (std::size_t i = 0; i < x.extent(0); i++) + { + x(i) = static_cast(i); + y(i) = i + static_cast(10); + z(i) = 0; + gold[i] = x(i) + y(i); + } + + namespace stdla = std::experimental::linalg; + const value_type init_value = 2.0; + + // This goes to the base implementation + { + stdla::add(std::execution::seq, x, y, z); + print_elements(z, gold, "std::execution::seq"); + } + + // This also goes to the base implementation + { + reset(z); // reset z since it is modified above + stdla::add(hpx::execution::seq, x, y, z); + print_elements(z, gold, "hpx::execution::seq"); + } + + // This forwards to HPXKernels + { + reset(z); // reset z since it is modified above + stdla::add(HPXKernelsSTD::hpx_exec<>(), x, y, z); + print_elements(z, gold, "HPXKernelsSTD::hpx_exec<>()"); + } + + // This forwards to HPXKernels if LINALG_ENABLE_HPX_DEFAULT is ON + { + reset(z); // reset z since it is modified above + stdla::add(std::execution::par, x, y, z); + print_elements(z, gold, "std::execution::par"); + } + + // This forwards to HPXKernels + { + reset(z); // reset z since it is modified above + stdla::add(hpx::execution::par, x, y, z); + print_elements(z, gold, "hpx::execution::par"); + } + +#if defined(HPX_HAVE_DATAPAR) + // this invokes a explicitly vectorized HPX versions + { + reset(z); // reset z since it is modified above + stdla::add(hpx::execution::simd, x, y, z); + print_elements(z, gold, "hpx::execution::simd"); + } + + { + reset(z); // reset z since it is modified above + stdla::add(hpx::execution::par_simd, x, y, z); + print_elements(z, gold, "hpx::execution::par_simd"); + } +#endif + + return 0; +} diff --git a/examples/hpx-based/scale_hpx.cpp b/examples/hpx-based/scale_hpx.cpp new file mode 100644 index 00000000..15b946e0 --- /dev/null +++ b/examples/hpx-based/scale_hpx.cpp @@ -0,0 +1,47 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#include +#include + +#include +#include + +#include + +int main(int argc, char* argv[]) +{ + std::cout << "dot example: calling hpx-kernels" << std::endl; + + std::size_t N = 40; + { + std::vector data(N); + double* a_ptr = data.data(); + + // Requires CTAD working, GCC 11.1 works but some others are buggy + // std::experimental::mdspan a(a_ptr,N); + std::experimental::mdspan> + a(a_ptr, N); + for (std::size_t i = 0; i < a.extent(0); i++) + a(i) = double(i); + + // This forwards to HPXKernels + std::experimental::linalg::scale(HPXKernelsSTD::hpx_exec<>(), 2.0, a); + // This forwards to HPXKernels if LINALG_ENABLE_HPX_DEFAULT is ON + std::experimental::linalg::scale(std::execution::par, 2.0, a); + // This always forwards to HPXKernels + std::experimental::linalg::scale(hpx::execution::par, 2.0, a); + // This goes to the base implementation + std::experimental::linalg::scale(std::execution::seq, 2.0, a); + // This also goes to the base implementation + std::experimental::linalg::scale(hpx::execution::seq, 2.0, a); +#if defined(HPX_HAVE_DATAPAR) + // this invokes a explicitly vectorized version + std::experimental::linalg::scale(hpx::execution::simd, 2.0, a); + std::experimental::linalg::scale(hpx::execution::par_simd, 2.0, a); +#endif + for (std::size_t i = 0; i < a.extent(0); i++) + printf("%zi %lf\n", i, a(i)); + } + return 0; +} diff --git a/examples/kokkos-based/CMakeLists.txt b/examples/kokkos-based/CMakeLists.txt index d88f634b..c8568ea6 100644 --- a/examples/kokkos-based/CMakeLists.txt +++ b/examples/kokkos-based/CMakeLists.txt @@ -1,10 +1,15 @@ -linalg_add_example(add_kokkos) -linalg_add_example(dot_kokkos) -linalg_add_example(dotc_kokkos) -linalg_add_example(idx_abs_max_kokkos) -linalg_add_example(vector_norm2_kokkos) -linalg_add_example(vector_abs_sum_kokkos) -linalg_add_example(vector_sum_of_squares_kokkos) -linalg_add_example(scale_kokkos) -linalg_add_example(matrix_vector_product_kokkos) +function(linalg_add_example_kokkos EXENAME) + linalg_add_example(${EXENAME}) + set_target_properties(${EXENAME} PROPERTIES FOLDER "Examples/Kokkos") +endfunction(linalg_add_example_kokkos) + +linalg_add_examplekokkos(add_kokkos) +linalg_add_examplekokkos(dot_kokkos) +linalg_add_examplekokkos(dotc_kokkos) +linalg_add_examplekokkos(idx_abs_max_kokkos) +linalg_add_examplekokkos(vector_norm2_kokkos) +linalg_add_examplekokkos(vector_abs_sum_kokkos) +linalg_add_examplekokkos(vector_sum_of_squares_kokkos) +linalg_add_examplekokkos(scale_kokkos) +linalg_add_examplekokkos(matrix_vector_product_kokkos) diff --git a/include/experimental/__p1673_bits/linalg_config.h.in b/include/experimental/__p1673_bits/linalg_config.h.in index 5703497d..9ebc7b5e 100644 --- a/include/experimental/__p1673_bits/linalg_config.h.in +++ b/include/experimental/__p1673_bits/linalg_config.h.in @@ -5,3 +5,5 @@ #cmakedefine LINALG_ENABLE_CONCEPTS #cmakedefine LINALG_ENABLE_KOKKOS #cmakedefine LINALG_ENABLE_KOKKOS_DEFAULT +#cmakedefine LINALG_ENABLE_HPX +#cmakedefine LINALG_ENABLE_HPX_DEFAULT diff --git a/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp b/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp index 0f96b5bb..f02c9d79 100644 --- a/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp +++ b/include/experimental/__p1673_bits/linalg_execpolicy_mapper.hpp @@ -33,6 +33,9 @@ template inline constexpr bool is_inline_exec_v = is_inline_exec::va #include #endif +#if defined(LINALG_ENABLE_HPX) +#include +#endif namespace std { namespace experimental { diff --git a/include/experimental/linalg b/include/experimental/linalg index da144642..d4ce2609 100644 --- a/include/experimental/linalg +++ b/include/experimental/linalg @@ -80,3 +80,6 @@ #ifdef LINALG_ENABLE_KOKKOS #include #endif +#ifdef LINALG_ENABLE_HPX +#include +#endif diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 8fe2f816..f7e9a1a6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,3 +26,6 @@ add_subdirectory(native) if(LINALG_ENABLE_KOKKOS) add_subdirectory(kokkos-based) endif() +if(LINALG_ENABLE_HPX) + add_subdirectory(hpx-based) +endif() diff --git a/tests/hpx-based/CMakeLists.txt b/tests/hpx-based/CMakeLists.txt new file mode 100644 index 00000000..3b6b4476 --- /dev/null +++ b/tests/hpx-based/CMakeLists.txt @@ -0,0 +1,233 @@ +# Copyright (c) 2022 Hartmut Kaiser + +macro(linalg_add_test_hpx TESTNAME FINDSTR) + + # the macro might have extra args + set(appendToTestName "") + set(compileDefs "") + set(extra_args "${ARGN}") + list(LENGTH extra_args extra_count) + if (${extra_count} EQUAL 2) + list(GET extra_args 0 compileDefs) + list(GET extra_args 1 appendToTestName) + endif() + + if (appendToTestName STREQUAL "") + set(testNameFinal utest_${TESTNAME}) + else() + set(testNameFinal utest_${TESTNAME}_${appendToTestName}) + endif() + set(testExe ${testNameFinal}_exe) + add_executable(${testExe} ${TESTNAME}.cpp gtest_main_hpx.cpp) + target_link_libraries(${testExe} linalg GTest::GTest HPX::hpx) + + if (NOT compileDefs STREQUAL "") + target_compile_definitions(${testExe} PRIVATE -D${compileDefs} -DHPX_STDBLAS_ENABLE_TESTS) + else() + target_compile_definitions(${testExe} PRIVATE -DHPX_STDBLAS_ENABLE_TESTS) + endif() + + set_target_properties(${testExe} PROPERTIES FOLDER "Tests/HPX") + + add_test( + NAME ${testNameFinal} + COMMAND ${CMAKE_COMMAND} + -DEXE_NAME=$ + -DALGO_NAME=${TESTNAME} + -DLOG_FILE=${CMAKE_CURRENT_BINARY_DIR}/${TESTNAME}_${appendToTestName}.log + -DTEST_STRING_FIND=${FINDSTR} + -P ${CMAKE_CURRENT_SOURCE_DIR}/test.cmake) + + # we have to set fail regex so that if it fails, + # test.cmake will detect it + set_tests_properties( + ${testNameFinal} PROPERTIES + FAIL_REGULAR_EXPRESSION "[^a-z]Error;ERROR;Failed;FAILED") +endmacro() + +# add tests below + +# +# blas1 (according to P1673) +# + +#linalg_add_test_hpx( +# dot_hpx +# "dot: hpx impl") +#linalg_add_test_hpx( +# dotc_hpx +# "dot: hpx impl") # this his not a typo, dotc calls dot underneath + +linalg_add_test_hpx( + add_rank1_hpx + "add: hpx impl") + +linalg_add_test_hpx( + scale_rank1_hpx + "scale: hpx impl") +linalg_add_test_hpx( + scale_rank2_hpx + "scale: hpx impl") + +#linalg_add_test_hpx( +# idx_abs_max_hpx +# "idx_abs_max: hpx impl") + +#linalg_add_test_hpx( +# vector_norm2_hpx +# "vector_norm2: hpx impl") +#linalg_add_test_hpx( +# vector_sum_of_squares_hpx +# "vector_sum_of_squares: hpx impl") +# +#linalg_add_test_hpx( +# vector_abs_sum_hpx +# "vector_abs_sum: hpx impl") + +#linalg_add_test_hpx( +# matrix_frob_norm_hpx +# "matrix_frob_norm: hpx impl") +#linalg_add_test_hpx( +# matrix_one_norm_hpx +# "matrix_one_norm: hpx impl") +#linalg_add_test_hpx( +# matrix_inf_norm_hpx +# "matrix_inf_norm: hpx impl") + +#linalg_add_test_hpx( +# swap_elements_rank1_hpx +# "swap_elements: hpx impl") +#linalg_add_test_hpx( +# swap_elements_rank2_hpx +# "swap_elements: hpx impl") +#linalg_add_test_hpx( +# copy_hpx +# "copy: hpx impl") + +# +# blas2 (according to P1673) +# + +#linalg_add_test_hpx( +# overwriting_matrix_vector_product +# "overwriting_matrix_vector_product: hpx impl") +#linalg_add_test_hpx( +# updating_matrix_vector_product +# "updating_matrix_vector_product: hpx impl") + +# symmetric_matrix_vector_product +#linalg_add_test_hpx( +# overwriting_symmetric_matrix_vector_product +# "overwriting_symmetric_matrix_vector_product_lower: hpx impl" USE_LOWER lower) +#linalg_add_test_hpx( +# overwriting_symmetric_matrix_vector_product +# "overwriting_symmetric_matrix_vector_product_upper: hpx impl" USE_UPPER upper) +#linalg_add_test_hpx( +# updating_symmetric_matrix_vector_product +# "updating_symmetric_matrix_vector_product_lower: hpx impl" USE_LOWER lower) +#linalg_add_test_hpx( +# updating_symmetric_matrix_vector_product +# "updating_symmetric_matrix_vector_product_upper: hpx impl" USE_UPPER upper) + +# hermitian_matrix_vector_product +#linalg_add_test_hpx( +# overwriting_hermitian_matrix_vector_product +# "overwriting_hermitian_matrix_vector_product_lower: hpx impl" USE_LOWER lower) +#linalg_add_test_hpx( +# overwriting_hermitian_matrix_vector_product +# "overwriting_hermitian_matrix_vector_product_upper: hpx impl" USE_UPPER upper) +#linalg_add_test_hpx( +# updating_hermitian_matrix_vector_product +# "updating_hermitian_matrix_vector_product_lower: hpx impl" USE_LOWER lower) +#linalg_add_test_hpx( +# updating_hermitian_matrix_vector_product +# "updating_hermitian_matrix_vector_product_upper: hpx impl" USE_UPPER upper) + +# triangular_matrix_vector_product +#linalg_add_test_hpx( +# overwriting_triangular_matrix_vector_product +# "overwriting_triangular_matrix_vector_product_lower: hpx impl" USE_LOWER lower) +#linalg_add_test_hpx( +# overwriting_triangular_matrix_vector_product +# "overwriting_triangular_matrix_vector_product_upper: hpx impl" USE_UPPER upper) +#linalg_add_test_hpx( +# updating_triangular_matrix_vector_product +# "updating_triangular_matrix_vector_product_lower: hpx impl" USE_LOWER lower) +#linalg_add_test_hpx( +# updating_triangular_matrix_vector_product +# "updating_triangular_matrix_vector_product_upper: hpx impl" USE_UPPER upper) +# +#linalg_add_test_hpx( +# matrix_rank1_update_hpx +# "matrix_rank1_update: hpx impl") +# +#linalg_add_test_hpx( +# symmetric_matrix_rank1_update_hpx +# "symmetric_matrix_rank1_update: hpx impl") +#linalg_add_test_hpx( +# hermitian_matrix_rank1_update_hpx +# "hermitian_matrix_rank1_update: hpx impl") +# +#linalg_add_test_hpx( +# symmetric_matrix_rank2_update_hpx +# "symmetric_matrix_rank2_update: hpx impl") +#linalg_add_test_hpx( +# hermitian_matrix_rank2_update_hpx +# "hermitian_matrix_rank2_update: hpx impl") + +# +# blas3 (according to P1673) +# + +# matrix_product +#linalg_add_test_hpx( +# gemm_C_AB +# "gemm_C_AB_product: hpx impl") +#linalg_add_test_hpx( +# gemm_C_ABT +# "gemm_C_ABT_product: hpx impl") +#linalg_add_test_hpx( +# gemm_C_ATB +# "gemm_C_ATB_product: hpx impl") +# +#linalg_add_test_hpx( +# triangular_matrix_left_product_hpx +# "triangular_matrix_left_product_hpx: hpx impl") +#linalg_add_test_hpx( +# triangular_matrix_right_product_hpx +# "triangular_matrix_right_product_hpx: hpx impl") + +# {symmetric,hermitian}_matrix_{left} +#linalg_add_test_hpx( +# symmetric_matrix_left_product_hpx +# "symmetric_matrix_left_product: hpx impl") +#linalg_add_test_hpx( +# symmetric_matrix_right_product_hpx +# "symmetric_matrix_right_product: hpx impl") +#linalg_add_test_hpx( +# hermitian_matrix_left_product_hpx +# "hermitian_matrix_left_product: hpx impl") +#linalg_add_test_hpx( +# hermitian_matrix_right_product_hpx +# "hermitian_matrix_right_product: hpx impl") +# +#linalg_add_test_hpx( +# triangular_matrix_matrix_left_solve +# "triangular_matrix_matrix_left_solve: hpx impl") +#linalg_add_test_hpx( +# triangular_matrix_matrix_right_solve +# "triangular_matrix_matrix_right_solve: hpx impl") +# +#linalg_add_test_hpx( +# symmetric_matrix_rank_2k_update_hpx +# "symmetric_matrix_rank_2k_update: hpx impl") +#linalg_add_test_hpx( +# hermitian_matrix_rank_2k_update_hpx +# "hermitian_matrix_rank_2k_update: hpx impl") +# +#linalg_add_test_hpx( +# symmetric_matrix_rank_k_update_hpx +# "symmetric_matrix_rank_k_update: hpx impl") +#linalg_add_test_hpx( +# hermitian_matrix_rank_k_update_hpx +# "hermitian_matrix_rank_k_update: hpx impl") \ No newline at end of file diff --git a/tests/hpx-based/add_rank1_hpx.cpp b/tests/hpx-based/add_rank1_hpx.cpp new file mode 100644 index 00000000..0295d338 --- /dev/null +++ b/tests/hpx-based/add_rank1_hpx.cpp @@ -0,0 +1,113 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gtest_fixtures.hpp" + +#include "helpers.hpp" + +namespace { + +template +void add_gold_solution(x_t x, y_t y, z_t z) +{ + for (std::size_t i = 0; i < x.extent(0); ++i) + { + z(i) = x(i) + y(i); + } +} + +template +void hpx_blas1_add_test_impl(ExPolicy policy, x_t x, y_t y, z_t z) +{ + namespace stdla = std::experimental::linalg; + + using value_type = typename x_t::value_type; + const std::size_t extent = x.extent(0); + + // copy x and y to verify they are not changed after kernel + auto x_preKernel = hpxtesting::create_stdvector_and_copy(x); + auto y_preKernel = hpxtesting::create_stdvector_and_copy(y); + + // compute gold + std::vector gold(extent); + using mdspan_t = std::experimental::mdspan>; + mdspan_t z_gold(gold.data(), extent); + add_gold_solution(x, y, z_gold); + + stdla::add(policy, x, y, z); + + if constexpr (std::is_same_v) + { + for (std::size_t i = 0; i < extent; ++i) + { + EXPECT_FLOAT_EQ(x(i), x_preKernel[i]); + EXPECT_FLOAT_EQ(y(i), y_preKernel[i]); + EXPECT_FLOAT_EQ(z(i), z_gold(i)); + } + } + + if constexpr (std::is_same_v) + { + for (std::size_t i = 0; i < extent; ++i) + { + EXPECT_DOUBLE_EQ(x(i), x_preKernel[i]); + EXPECT_DOUBLE_EQ(y(i), y_preKernel[i]); + EXPECT_DOUBLE_EQ(z(i), z_gold(i)); + } + } + + if constexpr (std::is_same_v>) + { + for (std::size_t i = 0; i < extent; ++i) + { + EXPECT_TRUE(x(i) == x_preKernel[i]); + EXPECT_TRUE(y(i) == y_preKernel[i]); + EXPECT_DOUBLE_EQ(z(i).real(), z_gold[i].real()); + EXPECT_DOUBLE_EQ(z(i).imag(), z_gold[i].imag()); + } + } +} +} // namespace + +TEST_F(blas1_signed_float_fixture, hpx_add) +{ + hpx_blas1_add_test_impl(HPXKernelsSTD::hpx_exec<>(), x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par, x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par_unseq, x, y, z); +#if defined(HPX_HAVE_DATAPAR) + hpx_blas1_add_test_impl(hpx::execution::simd, x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par_simd, x, y, z); +#endif +} + +TEST_F(blas1_signed_double_fixture, hpx_add) +{ + hpx_blas1_add_test_impl(HPXKernelsSTD::hpx_exec<>(), x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par, x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par_unseq, x, y, z); + #if defined(HPX_HAVE_DATAPAR) + hpx_blas1_add_test_impl(hpx::execution::simd, x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par_simd, x, y, z); + #endif +} + +TEST_F(blas1_signed_complex_double_fixture, hpx_add) +{ + using kc_t = std::complex; + using stdc_t = value_type; + if (alignof(value_type) == alignof(kc_t)) + { + hpx_blas1_add_test_impl(HPXKernelsSTD::hpx_exec<>(), x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par, x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par_unseq, x, y, z); +#if defined(HPX_HAVE_DATAPAR) + hpx_blas1_add_test_impl(hpx::execution::simd, x, y, z); + hpx_blas1_add_test_impl(hpx::execution::par_simd, x, y, z); +#endif + } +} diff --git a/tests/hpx-based/gtest_fixtures.hpp b/tests/hpx-based/gtest_fixtures.hpp new file mode 100644 index 00000000..10509d79 --- /dev/null +++ b/tests/hpx-based/gtest_fixtures.hpp @@ -0,0 +1,413 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. // +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef LINALG_TESTS_HPX_BLAS1_FIXTURES_HPP_ +#define LINALG_TESTS_HPX_BLAS1_FIXTURES_HPP_ + +#include +#include + +#include +#include + +#include "gtest/gtest.h" + +// it is fine to put these here even if this +// is a header since this is limited to tests +using std::experimental::mdspan; +using std::experimental::extents; +using std::experimental::dynamic_extent; + +// +// helper class for generating random numbers +// +template +struct UnifDist; + +template <> +struct UnifDist { + using dist_type = std::uniform_int_distribution; + std::random_device rd; + std::mt19937 m_gen{rd()}; + dist_type m_dist; + + UnifDist(const int a, const int b) : m_dist(a, b) {} + int + operator()() + { + return m_dist(m_gen); + } +}; + +template <> +struct UnifDist { + using dist_type = std::uniform_real_distribution; + std::random_device rd; + std::mt19937 m_gen{rd()}; + dist_type m_dist; + + UnifDist(const double a, const double b) : m_dist(a, b) {} + double + operator()() + { + return m_dist(m_gen); + } +}; + +template <> +struct UnifDist { + using dist_type = std::uniform_real_distribution; + std::random_device rd; + std::mt19937 m_gen{rd()}; + dist_type m_dist; + + UnifDist(const float a, const float b) : m_dist(a, b) {} + float + operator()() + { + return m_dist(m_gen); + } +}; + + +template +void +fill_random_mdspan(UnifDist& randObj_r, UnifDist& randObj_i, mdspan_t mdspanObj) +{ + static_assert(mdspan_t::rank() <= 2); + + if constexpr (mdspan_t::rank() == 1) { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { + mdspanObj(i) = {randObj_r(), randObj_i()}; + } + } else { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { + for (std::size_t j = 0; j < mdspanObj.extent(1); ++j) { + mdspanObj(i, j) = {randObj_r(), randObj_i()}; + } + } + } +} + +template +void +fill_random_mdspan(UnifDist& randObj, mdspan_t mdspanObj) +{ + static_assert(mdspan_t::rank() <= 2); + + if constexpr (mdspan_t::rank() == 1) { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { + mdspanObj(i) = randObj(); + } + } else { + for (std::size_t i = 0; i < mdspanObj.extent(0); ++i) { + for (std::size_t j = 0; j < mdspanObj.extent(1); ++j) { + mdspanObj(i, j) = randObj(); + } + } + } +} + + +template +constexpr void +static_check_value_type(T /*unused*/) +{ + // clang-format off + static_assert( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v>, + "gtest_fixtures: unsupported value_type"); + // clang-format on +} + + +template +class _blas1_signed_fixture : public ::testing::Test { + // extent is arbitrarily chosen but not trivially small + const std::size_t myExtent = 137; + +public: + using value_type = T; + + _blas1_signed_fixture() + : x_data(myExtent), y_data(myExtent), z_data(myExtent), x(x_data.data(), myExtent), y(y_data.data(), myExtent), + z(z_data.data(), myExtent) + { + static_check_value_type(value_type{}); + + if constexpr (std::is_same_v>) { + const auto a_r = static_cast(-101); + const auto b_r = static_cast(103); + UnifDist randObj_r(a_r, b_r); + + const auto a_i = static_cast(-21); + const auto b_i = static_cast(43); + UnifDist randObj_i(a_i, b_i); + + fill_random_mdspan(randObj_r, randObj_i, x); + fill_random_mdspan(randObj_r, randObj_i, y); + fill_random_mdspan(randObj_r, randObj_i, z); + } else { + const auto a = static_cast(-11); + const auto b = static_cast(23); + UnifDist randObj(a, b); + + fill_random_mdspan(randObj, x); + fill_random_mdspan(randObj, y); + fill_random_mdspan(randObj, z); + } + } + + std::vector x_data; + std::vector y_data; + std::vector z_data; + + using mdspan_t = mdspan>; + mdspan_t x; + mdspan_t y; + mdspan_t z; +}; + +template +class _blas2_signed_fixture : public ::testing::Test { +protected: + // extents are arbitrarily chosen but not trivially small + const std::size_t myExtent0 = 77; + const std::size_t myExtent1 = 41; + +public: + using value_type = T; + + _blas2_signed_fixture() + : A_e0e1_data(myExtent0 * myExtent1), A_e0e1(A_e0e1_data.data(), myExtent0, myExtent1), + B_e0e1_data(myExtent0 * myExtent1), B_e0e1(B_e0e1_data.data(), myExtent0, myExtent1), + A_sym_e0_data(myExtent0 * myExtent0), A_sym_e0(A_sym_e0_data.data(), myExtent0, myExtent0), + A_hem_e0_data(myExtent0 * myExtent0), A_hem_e0(A_hem_e0_data.data(), myExtent0, myExtent0), + x_e0_data(myExtent0), x_e0(x_e0_data.data(), myExtent0), x_e1_data(myExtent1), + x_e1(x_e1_data.data(), myExtent1), y_e0_data(myExtent0), y_e0(y_e0_data.data(), myExtent0), + z_e0_data(myExtent0), z_e0(z_e0_data.data(), myExtent0) + { + + static_check_value_type(value_type{}); + + if constexpr (std::is_same_v>) { + const auto a_r = static_cast(-5); + const auto b_r = static_cast(3); + UnifDist randObj_r(a_r, b_r); + + const auto a_i = static_cast(-9); + const auto b_i = static_cast(7); + UnifDist randObj_i(a_i, b_i); + + // fill symmetric matrices + for (std::size_t i = 0; i < myExtent0; ++i) { + for (std::size_t j = i; j < myExtent0; ++j) { + A_sym_e0(i, j) = {randObj_r(), randObj_i()}; + A_sym_e0(j, i) = A_sym_e0(i, j); + } + } + + // fill herm matrices + for (std::size_t i = 0; i < myExtent0; ++i) { + // diagonal has real elements + A_hem_e0(i, i) = randObj_r(); + + for (std::size_t j = i + 1; j < myExtent0; ++j) { + A_hem_e0(i, j) = {randObj_r(), randObj_i()}; + A_hem_e0(j, i) = std::conj(A_hem_e0(i, j)); + } + } + + // fill nonsym matrices and vectors + fill_random_mdspan(randObj_r, randObj_i, A_e0e1); + fill_random_mdspan(randObj_r, randObj_i, B_e0e1); + fill_random_mdspan(randObj_r, randObj_i, x_e0); + fill_random_mdspan(randObj_r, randObj_i, x_e1); + fill_random_mdspan(randObj_r, randObj_i, y_e0); + fill_random_mdspan(randObj_r, randObj_i, z_e0); + } else { + const auto a = static_cast(-5); + const auto b = static_cast(4); + UnifDist randObj(a, b); + + // fill herm matrices, which for float or double is + // just a symmetric matrix + for (std::size_t i = 0; i < myExtent0; ++i) { + for (std::size_t j = i; j < myExtent0; ++j) { + A_hem_e0(i, j) = randObj(); + A_hem_e0(j, i) = A_hem_e0(i, j); + } + } + + // fill symmetric matrices + for (std::size_t i = 0; i < myExtent0; ++i) { + for (std::size_t j = i; j < myExtent0; ++j) { + A_sym_e0(i, j) = randObj(); + A_sym_e0(j, i) = A_sym_e0(i, j); + } + } + + // fill nonsym matrices and vectors + fill_random_mdspan(randObj, A_e0e1); + fill_random_mdspan(randObj, B_e0e1); + fill_random_mdspan(randObj, x_e0); + fill_random_mdspan(randObj, x_e1); + fill_random_mdspan(randObj, y_e0); + fill_random_mdspan(randObj, z_e0); + } + } + + std::vector A_e0e1_data; + std::vector B_e0e1_data; + std::vector A_sym_e0_data; + std::vector A_hem_e0_data; + std::vector x_e0_data; + std::vector x_e1_data; + std::vector y_e0_data; + std::vector z_e0_data; + + using mdspan_r1_t = mdspan>; + using mdspan_r2_t = mdspan>; + mdspan_r2_t A_e0e1; //e0 x e1 + mdspan_r2_t B_e0e1; //e0 x e1 + mdspan_r2_t A_sym_e0; //e0 x e0, symmetric + mdspan_r2_t A_hem_e0; //e0 x e0, hermitian + + mdspan_r1_t x_e0; // x vector with extent == e0 + mdspan_r1_t x_e1; // x vector with extent == e1 + mdspan_r1_t y_e0; // y vector with extent == e0 + mdspan_r1_t z_e0; // z vector with extent == e0 +}; + + +//template +//class _blas3_signed_fixture : public _blas2_signed_fixture { +// using base_t = _blas2_signed_fixture; +// +// // extents are arbitrarily chosen but not trivially small +// using base_t::myExtent0; +// using base_t::myExtent1; +// const std::size_t myExtent2 = 53; +// +//public: +// using value_type = T; +// using typename base_t::mdspan_r1_t; +// using typename base_t::mdspan_r2_t; +// +// _blas3_signed_fixture() +// : base_t(), B_e0e2_data("B_e0e2_data", myExtent0, myExtent2), B_e0e2(B_e0e2_data.data(), myExtent0, myExtent2), +// B_e1e2_data("B_e1e2_data", myExtent1, myExtent2), B_e1e2(B_e1e2_data.data(), myExtent1, myExtent2), +// B_e2e1_data("B_e2e1_data", myExtent2, myExtent1), B_e2e1(B_e2e1_data.data(), myExtent2, myExtent1), +// C_e0e2_data("C_e0e2_data", myExtent0, myExtent2), C_e0e2(C_e0e2_data.data(), myExtent0, myExtent2), +// C_e1e2_data("C_e1e2_data", myExtent1, myExtent2), C_e1e2(C_e1e2_data.data(), myExtent1, myExtent2), +// C_e2e0_data("C_e2e0_data", myExtent2, myExtent0), C_e2e0(C_e2e0_data.data(), myExtent2, myExtent0), +// E_e0e2_data("E_e0e2_data", myExtent0, myExtent2), E_e0e2(E_e0e2_data.data(), myExtent0, myExtent2) +// { +// +// static_check_value_type(value_type{}); +// +// if constexpr (std::is_same_v>) { +// const auto a_r = static_cast(-5); +// const auto b_r = static_cast(3); +// UnifDist randObj_r(a_r, b_r); +// +// const auto a_i = static_cast(-9); +// const auto b_i = static_cast(7); +// UnifDist randObj_i(a_i, b_i); +// +// fill_random_mdspan(randObj_r, randObj_i, B_e0e2); +// fill_random_mdspan(randObj_r, randObj_i, B_e1e2); +// fill_random_mdspan(randObj_r, randObj_i, B_e2e1); +// fill_random_mdspan(randObj_r, randObj_i, C_e0e2); +// fill_random_mdspan(randObj_r, randObj_i, C_e1e2); +// fill_random_mdspan(randObj_r, randObj_i, C_e2e0); +// fill_random_mdspan(randObj_r, randObj_i, E_e0e2); +// } else { +// const auto a = static_cast(-5); +// const auto b = static_cast(4); +// UnifDist randObj(a, b); +// +// fill_random_mdspan(randObj, B_e0e2); +// fill_random_mdspan(randObj, B_e1e2); +// fill_random_mdspan(randObj, B_e2e1); +// fill_random_mdspan(randObj, C_e0e2); +// fill_random_mdspan(randObj, C_e1e2); +// fill_random_mdspan(randObj, C_e2e0); +// fill_random_mdspan(randObj, E_e0e2); +// } +// } +// +// std::vector B_e0e2_data; +// std::vector B_e1e2_data; +// std::vector B_e2e1_data; +// std::vector C_e0e2_data; +// std::vector C_e1e2_data; +// std::vector C_e2e0_data; +// std::vector E_e0e2_data; +// +// mdspan_r2_t B_e0e2; //e0 x e2 +// mdspan_r2_t B_e1e2; //e1 x e2 +// mdspan_r2_t B_e2e1; //e2 x e1 +// +// mdspan_r2_t C_e0e2; //e0 x e2 +// mdspan_r2_t C_e1e2; //e1 x e2 +// mdspan_r2_t C_e2e0; //e2 x e0 +// +// mdspan_r2_t E_e0e2; //e0 x e2 +//}; + +using blas1_signed_float_fixture = _blas1_signed_fixture; +using blas1_signed_double_fixture = _blas1_signed_fixture; +using blas1_signed_complex_double_fixture = _blas1_signed_fixture>; + +using blas2_signed_float_fixture = _blas2_signed_fixture; +using blas2_signed_double_fixture = _blas2_signed_fixture; +using blas2_signed_complex_double_fixture = _blas2_signed_fixture>; + +//using blas3_signed_float_fixture = _blas3_signed_fixture; +//using blas3_signed_double_fixture = _blas3_signed_fixture; +//using blas3_signed_complex_double_fixture = _blas3_signed_fixture>; + +#endif diff --git a/tests/hpx-based/gtest_main_hpx.cpp b/tests/hpx-based/gtest_main_hpx.cpp new file mode 100644 index 00000000..c06f0279 --- /dev/null +++ b/tests/hpx-based/gtest_main_hpx.cpp @@ -0,0 +1,30 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#include + +#include +#include + +#if defined(HPX_STDBLAS_ENABLE_TESTS) +namespace HPXKernelsSTD { namespace Impl { + +void signal_hpx_impl_called(std::string_view functionName) +{ + std::cout << functionName << ": hpx impl" << std::endl; +} + +}} // namespace HPXKernelsSTD::Impl +#endif + +int hpx_main(int argc, char* argv[]) +{ + ::testing::InitGoogleTest(&argc, argv); + int err = RUN_ALL_TESTS(); + hpx::finalize(); + return err; +} + +int main(int argc, char* argv[]) +{ + return hpx::init(argc, argv); +} diff --git a/tests/hpx-based/helpers.hpp b/tests/hpx-based/helpers.hpp new file mode 100644 index 00000000..22fd897a --- /dev/null +++ b/tests/hpx-based/helpers.hpp @@ -0,0 +1,22 @@ +// Copyright (c) 2022 Hartmut Kaiser + +namespace hpxtesting { + +template +auto create_stdvector_and_copy(T sourceView) +{ + static_assert(sourceView.rank() == 1); + + using value_type = typename T::value_type; + using res_t = std::vector; + + res_t result(sourceView.extent(0)); + for (std::size_t i = 0; i < sourceView.extent(0); ++i) + { + result[i] = sourceView(i); + } + + return result; +} + +} // namespace hpxtesting diff --git a/tests/hpx-based/scale_rank1_hpx.cpp b/tests/hpx-based/scale_rank1_hpx.cpp new file mode 100644 index 00000000..ddaa0543 --- /dev/null +++ b/tests/hpx-based/scale_rank1_hpx.cpp @@ -0,0 +1,132 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gtest_fixtures.hpp" + +namespace { + +template +void scale_gold_solution(x_t x, FactorT factor) +{ + FactorT result = {}; + for (std::size_t i = 0; i < x.extent(0); ++i) + { + x(i) *= factor; + } +} + +template +void hpx_blas1_scale_test_impl(ExPolicy policy, x_t x, FactorT factor) +{ + namespace stdla = std::experimental::linalg; + + using value_type = typename x_t::value_type; + const std::size_t extent = x.extent(0); + + // compute gold + std::vector gold(extent); + using mdspan_t = mdspan>; + mdspan_t x_gold(gold.data(), extent); + for (std::size_t i = 0; i < x.extent(0); ++i) + { + x_gold(i) = x(i); + } + scale_gold_solution(x_gold, factor); + + stdla::scale(policy, factor, x); + + if constexpr (std::is_same_v) + { + for (std::size_t i = 0; i < extent; ++i) + { + EXPECT_FLOAT_EQ(x(i), x_gold(i)); + } + } + + if constexpr (std::is_same_v) + { + for (std::size_t i = 0; i < extent; ++i) + { + EXPECT_DOUBLE_EQ(x(i), x_gold(i)); + } + } + + if constexpr (std::is_same_v>) + { + for (std::size_t i = 0; i < extent; ++i) + { + EXPECT_DOUBLE_EQ(x(i).real(), x_gold(i).real()); + EXPECT_DOUBLE_EQ(x(i).imag(), x_gold(i).imag()); + } + } +} +} // namespace + +TEST_F(blas1_signed_float_fixture, hpx_scale) +{ + hpx_blas1_scale_test_impl( + HPXKernelsSTD::hpx_exec<>(), x, static_cast(2)); + hpx_blas1_scale_test_impl( + hpx::execution::par, x, static_cast(2)); + hpx_blas1_scale_test_impl( + hpx::execution::par_unseq, x, static_cast(2)); +#if defined(HPX_HAVE_DATAPAR) + hpx_blas1_scale_test_impl( + hpx::execution::simd, x, static_cast(2)); + hpx_blas1_scale_test_impl( + hpx::execution::par_simd, x, static_cast(2)); +#endif +} + +TEST_F(blas1_signed_double_fixture, hpx_scale) +{ + hpx_blas1_scale_test_impl( + HPXKernelsSTD::hpx_exec<>(), x, static_cast(2)); + hpx_blas1_scale_test_impl( + hpx::execution::par, x, static_cast(2)); + hpx_blas1_scale_test_impl( + hpx::execution::par_unseq, x, static_cast(2)); +#if defined(HPX_HAVE_DATAPAR) + hpx_blas1_scale_test_impl( + hpx::execution::simd, x, static_cast(2)); + hpx_blas1_scale_test_impl( + hpx::execution::par_simd, x, static_cast(2)); +#endif +} + +TEST_F(blas1_signed_complex_double_fixture, hpx_scale_complex_factor) +{ + using kc_t = std::complex; + using stdc_t = value_type; + if constexpr (alignof(value_type) == alignof(kc_t)) + { + const value_type factor{2., 0.}; + hpx_blas1_scale_test_impl(HPXKernelsSTD::hpx_exec<>(), x, factor); + hpx_blas1_scale_test_impl(hpx::execution::par, x, factor); + hpx_blas1_scale_test_impl(hpx::execution::par_unseq, x, factor); +#if defined(HPX_HAVE_DATAPAR) + hpx_blas1_scale_test_impl(hpx::execution::simd, x, factor); + hpx_blas1_scale_test_impl(hpx::execution::par_simd, x, factor); +#endif + } +} + +TEST_F(blas1_signed_complex_double_fixture, hpx_scale_double_factor) +{ + using kc_t = std::complex; + using stdc_t = value_type; + if constexpr (alignof(value_type) == alignof(kc_t)) + { + hpx_blas1_scale_test_impl(HPXKernelsSTD::hpx_exec<>(), x, 2.); + hpx_blas1_scale_test_impl(hpx::execution::par, x, 2.); + hpx_blas1_scale_test_impl(hpx::execution::par_unseq, x, 2.); +#if defined(HPX_HAVE_DATAPAR) + hpx_blas1_scale_test_impl(hpx::execution::simd, x, 2.); + hpx_blas1_scale_test_impl(hpx::execution::par_simd, x, 2.); +#endif + } +} diff --git a/tests/hpx-based/scale_rank2_hpx.cpp b/tests/hpx-based/scale_rank2_hpx.cpp new file mode 100644 index 00000000..24c43aff --- /dev/null +++ b/tests/hpx-based/scale_rank2_hpx.cpp @@ -0,0 +1,145 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#include +#include +#include + +#include "gtest/gtest.h" +#include "gtest_fixtures.hpp" + +namespace { + +template +void scale_gold_solution(A_t A, FactorT factor) +{ + FactorT result = {}; + for (std::size_t i = 0; i < A.extent(0); ++i) + { + for (std::size_t j = 0; j < A.extent(1); ++j) + { + A(i, j) *= factor; + } + } +} + +template +void hpx_blas_scale_test_impl(ExPolicy policy, A_t A, FactorT factor) +{ + namespace stdla = std::experimental::linalg; + + using value_type = typename A_t::value_type; + const std::size_t extent0 = A.extent(0); + const std::size_t extent1 = A.extent(1); + + // compute gold + std::vector gold(extent0 * extent1); + using mdspan_t = + mdspan>; + mdspan_t A_gold(gold.data(), extent0, extent1); + for (std::size_t i = 0; i < extent0; ++i) + { + for (std::size_t j = 0; j < extent1; ++j) + { + A_gold(i, j) = A(i, j); + } + } + scale_gold_solution(A_gold, factor); + + stdla::scale(policy, factor, A); + + if constexpr (std::is_same_v) + { + for (std::size_t i = 0; i < extent0; ++i) + { + for (std::size_t j = 0; j < extent1; ++j) + { + EXPECT_FLOAT_EQ(A(i, j), A_gold(i, j)); + } + } + } + + if constexpr (std::is_same_v) + { + for (std::size_t i = 0; i < extent0; ++i) + { + for (std::size_t j = 0; j < extent1; ++j) + { + EXPECT_DOUBLE_EQ(A(i, j), A_gold(i, j)); + } + } + } + + if constexpr (std::is_same_v>) + { + for (std::size_t i = 0; i < extent0; ++i) + { + for (std::size_t j = 0; j < extent1; ++j) + { + EXPECT_DOUBLE_EQ(A(i, j).real(), A_gold(i, j).real()); + EXPECT_DOUBLE_EQ(A(i, j).imag(), A_gold(i, j).imag()); + } + } + } +} +} // namespace + +TEST_F(blas2_signed_float_fixture, hpx_scale) +{ + hpx_blas_scale_test_impl( + HPXKernelsSTD::hpx_exec<>(), A_e0e1, static_cast(2)); + hpx_blas_scale_test_impl( + hpx::execution::par, A_e0e1, static_cast(2)); + hpx_blas_scale_test_impl( + hpx::execution::par_unseq, A_e0e1, static_cast(2)); + //#if defined(HPX_HAVE_DATAPAR) + // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, static_cast(2)); + // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, static_cast(2)); + //#endif +} + +TEST_F(blas2_signed_double_fixture, hpx_scale) +{ + hpx_blas_scale_test_impl( + HPXKernelsSTD::hpx_exec<>(), A_e0e1, static_cast(2)); + hpx_blas_scale_test_impl( + hpx::execution::par, A_e0e1, static_cast(2)); + hpx_blas_scale_test_impl( + hpx::execution::par_unseq, A_e0e1, static_cast(2)); + //#if defined(HPX_HAVE_DATAPAR) + // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, static_cast(2)); + // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, static_cast(2)); + //#endif +} + +TEST_F(blas2_signed_complex_double_fixture, hpx_scale_complex_factor) +{ + using kc_t = std::complex; + using stdc_t = value_type; + if constexpr (alignof(value_type) == alignof(kc_t)) + { + const value_type factor{2., 0.}; + hpx_blas_scale_test_impl(HPXKernelsSTD::hpx_exec<>(), A_e0e1, factor); + hpx_blas_scale_test_impl(hpx::execution::par, A_e0e1, factor); + hpx_blas_scale_test_impl(hpx::execution::par_unseq, A_e0e1, factor); + //#if defined(HPX_HAVE_DATAPAR) + // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, factor); + // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, factor); + //#endif + } +} + +TEST_F(blas2_signed_complex_double_fixture, hpx_scale_double_factor) +{ + using kc_t = std::complex; + using stdc_t = value_type; + if constexpr (alignof(value_type) == alignof(kc_t)) + { + hpx_blas_scale_test_impl(HPXKernelsSTD::hpx_exec<>(), A_e0e1, 2.); + hpx_blas_scale_test_impl(hpx::execution::par, A_e0e1, 2.); + hpx_blas_scale_test_impl(hpx::execution::par_unseq, A_e0e1, 2.); + //#if defined(HPX_HAVE_DATAPAR) + // hpx_blas_scale_test_impl(hpx::execution::simd, A_e0e1, 2.); + // hpx_blas_scale_test_impl(hpx::execution::par_simd, A_e0e1, 2.); + //#endif + } +} diff --git a/tests/hpx-based/test.cmake b/tests/hpx-based/test.cmake new file mode 100644 index 00000000..c4507db1 --- /dev/null +++ b/tests/hpx-based/test.cmake @@ -0,0 +1,28 @@ +include(FindUnixCommands) + +# run test executable +execute_process( + COMMAND ${EXE_NAME} + RESULT_VARIABLE RES_A + OUTPUT_FILE ${LOG_FILE}) + +# first check that numerically the test passes +if(RES_A) + message(FATAL_ERROR "numerical test failed") +else() + message("numerical test succeeded") +endif() + +# just checking the numerics is not enough +# because we need to ensure that this result +# comes from the kokkos impl +# so we check that the proper string is found +# which signals that the correct Kokkos impl was found/called +set(CMD "grep -R '${TEST_STRING_FIND}' ${LOG_FILE} > /dev/null") +execute_process(COMMAND ${BASH} -c ${CMD} RESULT_VARIABLE RES_B) +if(RES_B) + message(FATAL_ERROR + "test failed: ${ALGO_NAME} did not call the correct Kokkos impl") +else() + message("${ALGO_NAME} called the correct Kokkos impl") +endif() diff --git a/tests/kokkos-based/CMakeLists.txt b/tests/kokkos-based/CMakeLists.txt index aa247c04..176f51f3 100644 --- a/tests/kokkos-based/CMakeLists.txt +++ b/tests/kokkos-based/CMakeLists.txt @@ -26,6 +26,8 @@ macro(linalg_add_test_kokkos TESTNAME FINDSTR) target_compile_definitions(${testExe} PRIVATE -DKOKKOS_STDBLAS_ENABLE_TESTS) endif() + set_target_properties(${testExe} PROPERTIES FOLDER "Tests/Kokkos") + add_test( NAME ${testNameFinal} COMMAND ${CMAKE_COMMAND} diff --git a/tests/native/CMakeLists.txt b/tests/native/CMakeLists.txt index 8cb09a04..92ba0dbe 100644 --- a/tests/native/CMakeLists.txt +++ b/tests/native/CMakeLists.txt @@ -10,6 +10,7 @@ macro(linalg_add_test name) target_link_libraries(${name} linalg GTest::GTest GTest::Main) endif() add_test(${name} ${name}) + set_target_properties(${name} PROPERTIES FOLDER "Tests") endmacro() linalg_add_test(abs_sum) diff --git a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp new file mode 100644 index 00000000..917a6ffa --- /dev/null +++ b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_add_hpx.hpp @@ -0,0 +1,213 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. // +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_HPXKERNELS_ADD_HPP_ +#define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_HPXKERNELS_ADD_HPP_ + +#include +#include + +#include +#include + +#include "signal_hpx_impl_called.hpp" + +namespace HPXKernelsSTD { + +namespace { + +template ::size_type ext_x, class Layout_x, + class Accessor_x, class ElementType_y, + std::experimental::extents<>::size_type ext_y, class Layout_y, + class Accessor_y, class ElementType_z, + std::experimental::extents<>::size_type ext_z, class Layout_z, + class Accessor_z> +void add_rank_1(ExPolicy&& policy, + std::experimental::mdspan, + Layout_x, Accessor_x> + x, + std::experimental::mdspan, + Layout_y, Accessor_y> + y, + std::experimental::mdspan, + Layout_z, Accessor_z> + z) +{ + static_assert(x.static_extent(0) == std::experimental::dynamic_extent || + z.static_extent(0) == std::experimental::dynamic_extent || + x.static_extent(0) == z.static_extent(0)); + static_assert(y.static_extent(0) == std::experimental::dynamic_extent || + z.static_extent(0) == std::experimental::dynamic_extent || + y.static_extent(0) == z.static_extent(0)); + static_assert(x.static_extent(0) == std::experimental::dynamic_extent || + y.static_extent(0) == std::experimental::dynamic_extent || + x.static_extent(0) == y.static_extent(0)); + +#if defined(HPX_HAVE_DATAPAR) + using mdspan_x_t = std::experimental::mdspan, Layout_x, Accessor_x>; + using mdspan_y_t = std::experimental::mdspan, Layout_y, Accessor_y>; + using mdspan_z_t = std::experimental::mdspan, Layout_z, Accessor_z>; + + constexpr bool allow_explicit_vectorization = + mdspan_x_t::is_always_contiguous() && + mdspan_y_t::is_always_contiguous() && + mdspan_z_t::is_always_contiguous() && + (hpx::is_vectorpack_execution_policy_v || + hpx::is_unsequenced_execution_policy_v); + + if constexpr (allow_explicit_vectorization) + { + // vectorize only if the arrays are contiguous and not strided + if (x.is_contiguous() && x.stride(0) == 1 && y.is_contiguous() && + y.stride(0) == 1 && z.is_contiguous() && z.stride(0) == 1) + { + auto zip = hpx::util::make_zip_iterator(x.data(), y.data()); + hpx::transform(policy, zip, zip + x.extent(0), z.data(), + [&](auto v) { return hpx::get<0>(v) + hpx::get<1>(v); }); + } + else + { + // fall back to the underlying base policy + hpx::experimental::for_loop(policy.base_policy(), + std::experimental::extents<>::size_type(0), x.extent(0), + [&](auto i) { z(i) = x(i) + y(i); }); + } + } + else +#endif + { + hpx::experimental::for_loop(policy, + std::experimental::extents<>::size_type(0), z.extent(0), + [&](auto i) { z(i) = x(i) + y(i); }); + } +} + +template ::size_type numRows_x, + std::experimental::extents<>::size_type numCols_x, class Layout_x, + class Accessor_x, class ElementType_y, + std::experimental::extents<>::size_type numRows_y, + std::experimental::extents<>::size_type numCols_y, class Layout_y, + class Accessor_y, class ElementType_z, + std::experimental::extents<>::size_type numRows_z, + std::experimental::extents<>::size_type numCols_z, class Layout_z, + class Accessor_z> +void add_rank_2(ExPolicy&& policy, + std::experimental::mdspan, Layout_x, Accessor_x> + x, + std::experimental::mdspan, Layout_y, Accessor_y> + y, + std::experimental::mdspan, Layout_z, Accessor_z> + z) +{ + static_assert(x.static_extent(0) == std::experimental::dynamic_extent || + z.static_extent(0) == std::experimental::dynamic_extent || + x.static_extent(0) == z.static_extent(0)); + static_assert(y.static_extent(0) == std::experimental::dynamic_extent || + z.static_extent(0) == std::experimental::dynamic_extent || + y.static_extent(0) == z.static_extent(0)); + static_assert(x.static_extent(0) == std::experimental::dynamic_extent || + y.static_extent(0) == std::experimental::dynamic_extent || + x.static_extent(0) == y.static_extent(0)); + + static_assert(x.static_extent(1) == std::experimental::dynamic_extent || + z.static_extent(1) == std::experimental::dynamic_extent || + x.static_extent(1) == z.static_extent(1)); + static_assert(y.static_extent(1) == std::experimental::dynamic_extent || + z.static_extent(1) == std::experimental::dynamic_extent || + y.static_extent(1) == z.static_extent(1)); + static_assert(x.static_extent(1) == std::experimental::dynamic_extent || + y.static_extent(1) == std::experimental::dynamic_extent || + x.static_extent(1) == y.static_extent(1)); + + using size_type = typename std::experimental::extents<>::size_type; + + hpx::experimental::for_loop(policy, size_type(0), x.extent(0), [&](auto j) { + for (size_type i = 0; i < x.extent(0); ++i) + { + z(i, j) = x(i, j) + y(i, j); + } + }); +} + +} // end anonymous namespace + +MDSPAN_TEMPLATE_REQUIRES(class ExPolicy, class ElementType_x, + std::experimental::extents<>::size_type... ext_x, class Layout_x, + class Accessor_x, class ElementType_y, + std::experimental::extents<>::size_type... ext_y, class Layout_y, + class Accessor_y, class ElementType_z, + std::experimental::extents<>::size_type... ext_z, class Layout_z, + class Accessor_z, + /* requires */ + (sizeof...(ext_x) == sizeof...(ext_y) && + sizeof...(ext_x) == sizeof...(ext_z) && sizeof...(ext_z) <= 2)) +void add(hpx_exec&& policy, + std::experimental::mdspan, Layout_x, Accessor_x> + x, + std::experimental::mdspan, Layout_y, Accessor_y> + y, + std::experimental::mdspan, Layout_z, Accessor_z> + z) +{ + if constexpr (z.rank() == 1) + { + add_rank_1(policy.policy_, x, y, z); + } + else if constexpr (z.rank() == 2) + { + add_rank_2(policy.policy_, x, y, z); + } +} +} // namespace HPXKernelsSTD + +#endif //LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_HPXKERNELS_ADD_HPP_ diff --git a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp new file mode 100644 index 00000000..59a273eb --- /dev/null +++ b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp @@ -0,0 +1,100 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_HPXKERNELS_SCALE_HPP_ +#define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_HPXKERNELS_SCALE_HPP_ + +#include + +#include +#include + +#include "signal_hpx_impl_called.hpp" + +namespace HPXKernelsSTD { + +namespace { + +template ::size_type ext0, class Layout, class Accessor> +void linalg_scale_rank_1(ExPolicy&& policy, const Scalar alpha, + std::experimental::mdspan, + Layout, Accessor> + x) +{ +#if defined(HPX_HAVE_DATAPAR) + using mdspan_t = std::experimental::mdspan, Layout, Accessor>; + + constexpr bool allow_explicit_vectorization = + mdspan_t::is_always_contiguous() && + (hpx::is_vectorpack_execution_policy_v || + hpx::is_unsequenced_execution_policy_v); + + if constexpr (allow_explicit_vectorization) + { + // vectorize only if the array is contiguous and not strided + if (x.is_contiguous() && x.stride(0) == 1) + { + hpx::for_each(policy, x.data(), x.data() + x.extent(0), + [&](auto& v) { v *= alpha; }); + } + else + { + // fall back to the underlying base policy + hpx::experimental::for_loop(policy.base_policy(), + std::experimental::extents<>::size_type(0), x.extent(0), + [&](auto i) { x(i) *= alpha; }); + } + } + else +#endif + { + hpx::experimental::for_loop(policy, + std::experimental::extents<>::size_type(0), x.extent(0), + [&](auto i) { x(i) *= alpha; }); + } +} + +template ::size_type numRows, + std::experimental::extents<>::size_type numCols, class Layout, + class Accessor> +void linalg_scale_rank_2(ExPolicy&& policy, const Scalar alpha, + std::experimental::mdspan, Layout, Accessor> + A) +{ + using size_type = typename std::experimental::extents<>::size_type; + hpx::experimental::for_loop(policy, + std::experimental::extents<>::size_type(0), A.extent(1), [&](auto j) { + for (size_type i = 0; i < A.extent(0); ++i) + { + A(i, j) *= alpha; + } + }); +} + +} // namespace + +MDSPAN_TEMPLATE_REQUIRES(class ExPolicy, class Scalar, class ElementType, + std::experimental::extents<>::size_type... ext, class Layout, + class Accessor, + /* requires */ (sizeof...(ext) <= 2)) +void scale(hpx_exec&& policy, const Scalar alpha, + std::experimental::mdspan, + Layout, Accessor> + x) +{ + Impl::signal_hpx_impl_called("scale"); + if constexpr (x.rank() == 1) + { + linalg_scale_rank_1(policy.policy_, alpha, x); + } + else if constexpr (x.rank() == 2) + { + linalg_scale_rank_2(policy.policy_, alpha, x); + } +} + +} // namespace HPXKernelsSTD +#endif diff --git a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/exec_policy_wrapper_hpx.hpp b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/exec_policy_wrapper_hpx.hpp new file mode 100644 index 00000000..f3e6553d --- /dev/null +++ b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/exec_policy_wrapper_hpx.hpp @@ -0,0 +1,71 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#ifndef __LINALG_HPXKERNELS_EXEC_POLICY_WRAPPER_HPX_HPP_ +#define __LINALG_HPXKERNELS_EXEC_POLICY_WRAPPER_HPX_HPP_ + +#include + +#ifdef LINALG_ENABLE_HPX_DEFAULT +#include +#endif +#include + +namespace HPXKernelsSTD { + +template +struct hpx_exec +{ + using type = ExPolicy; + ExPolicy policy_; +}; + +template +auto execpolicy_mapper(hpx_exec policy) +{ + return policy; +} +} // namespace HPXKernelsSTD + +// Remap standard execution policies to HPX +#ifdef LINALG_ENABLE_HPX_DEFAULT +namespace std { namespace experimental { inline namespace __p1673_version_0 { +namespace linalg { +auto execpolicy_mapper(std::execution::parallel_policy) +{ + return HPXKernelsSTD::hpx_exec{ + hpx::execution::par}; +} +auto execpolicy_mapper(std::execution::parallel_unsequenced_policy) +{ + return HPXKernelsSTD::hpx_exec{ + hpx::execution::par_unseq}; +} +}}}} // namespace std::experimental::__p1673_version_0::linalg +#endif + +namespace std { namespace experimental { inline namespace __p1673_version_0 { +namespace linalg { +auto execpolicy_mapper(hpx::execution::parallel_policy policy) +{ + return HPXKernelsSTD::hpx_exec{ + std::move(policy)}; +} +auto execpolicy_mapper(hpx::execution::parallel_unsequenced_policy policy) +{ + return HPXKernelsSTD::hpx_exec{ + std::move(policy)}; +} +#if defined(HPX_HAVE_DATAPAR) +auto execpolicy_mapper(hpx::execution::simd_policy policy) +{ + return HPXKernelsSTD::hpx_exec{ + std::move(policy)}; +} +auto execpolicy_mapper(hpx::execution::par_simd_policy policy) +{ + return HPXKernelsSTD::hpx_exec{ + std::move(policy)}; +} +#endif +}}}} // namespace std::experimental::__p1673_version_0::linalg +#endif diff --git a/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/signal_hpx_impl_called.hpp b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/signal_hpx_impl_called.hpp new file mode 100644 index 00000000..d9683c54 --- /dev/null +++ b/tpl-implementations/include/experimental/__p1673_bits/hpx-kernels/signal_hpx_impl_called.hpp @@ -0,0 +1,61 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2019) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Christian R. Trott (crtrott@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_HPXKERNELS_UTILS_HPP_ +#define LINALG_TPLIMPLEMENTATIONS_INCLUDE_EXPERIMENTAL___P1673_BITS_HPXKERNELS_UTILS_HPP_ + +#include + +namespace HPXKernelsSTD { namespace Impl { + +#if defined(HPX_STDBLAS_ENABLE_TESTS) +void signal_hpx_impl_called(std::string_view functionName); +#else +constexpr void signal_hpx_impl_called( + std::string_view /* functionName */) noexcept +{} +#endif + +}} // namespace HPXKernelsSTD::Impl + +#endif diff --git a/tpl-implementations/include/experimental/linalg_hpxkernels b/tpl-implementations/include/experimental/linalg_hpxkernels new file mode 100644 index 00000000..6e6d253c --- /dev/null +++ b/tpl-implementations/include/experimental/linalg_hpxkernels @@ -0,0 +1,35 @@ +// Copyright (c) 2022 Hartmut Kaiser + +#pragma once + +#include +#include + +// blas1 (according to P1673) +//#include "__p1673_bits/hpx-kernels/blas1_dot_hpx.hpp" +#include "__p1673_bits/hpx-kernels/blas1_add_hpx.hpp" +#include "__p1673_bits/hpx-kernels/blas1_scale_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_idx_abs_max_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_vector_norm2_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_vector_abs_sum_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_vector_sum_of_squares_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_matrix_frob_norm_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_matrix_inf_norm_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_matrix_one_norm_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_swap_elements_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas1_copy_hpx.hpp" + +// blas2 (according to P1673) +//#include "__p1673_bits/hpx-kernels/blas2_matrix_rank_1_update.hpp" +//#include "__p1673_bits/hpx-kernels/blas2_matrix_rank_2_update.hpp" +//#include "__p1673_bits/hpx-kernels/blas2_gemv_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas2_symv_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas2_hemv_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas2_triangular_mat_vec_product.hpp" + +// blas3 (according to P1673) +//#include "__p1673_bits/hpx-kernels/blas3_overwriting_gemm_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas3_matrix_rank_k_update.hpp" +//#include "__p1673_bits/hpx-kernels/blas3_matrix_rank_2k_update.hpp" +//#include "__p1673_bits/hpx-kernels/blas3_matrix_product_hpx.hpp" +//#include "__p1673_bits/hpx-kernels/blas3_triangular_matrix_matrix_solve.hpp"