From bb4b1ef890f633153159a36847de39dd03982ac0 Mon Sep 17 00:00:00 2001 From: Francesco Rizzi Date: Wed, 27 Oct 2021 11:47:33 +0200 Subject: [PATCH] customization: tpl hooks for sevaral BLAS1,2 fncs #96 --- .../__p1673_bits/blas1_givens.hpp | 99 ++++++++++++++++++- .../__p1673_bits/blas1_linalg_add.hpp | 4 - .../__p1673_bits/blas1_matrix_frob_norm.hpp | 64 +++++++++++- .../__p1673_bits/blas1_matrix_inf_norm.hpp | 71 +++++++++++-- .../__p1673_bits/blas1_matrix_one_norm.hpp | 67 +++++++++++-- .../experimental/__p1673_bits/blas1_scale.hpp | 15 +-- .../__p1673_bits/blas1_vector_idx_abs_max.hpp | 10 +- .../blas2_matrix_vector_product.hpp | 80 +++++++++++++-- .../blas2_matrix_vector_solve.hpp | 67 ++++++++++++- 9 files changed, 426 insertions(+), 51 deletions(-) diff --git a/include/experimental/__p1673_bits/blas1_givens.hpp b/include/experimental/__p1673_bits/blas1_givens.hpp index f12d521b..0f7932e6 100644 --- a/include/experimental/__p1673_bits/blas1_givens.hpp +++ b/include/experimental/__p1673_bits/blas1_givens.hpp @@ -84,6 +84,33 @@ namespace linalg { // DSQRT -> sqrt (Real input and return value) // slapy2(real(fs), aimag(fs)) -> hypot(real(fs), imag(fs)) + +// begin anonymous namespace +namespace { +template +struct is_custom_givens_rotation_apply_avail : std::false_type {}; + +template +struct is_custom_givens_rotation_apply_avail< + Exec, x_t, y_t, c_t, s_t, + std::void_t< + decltype(givens_rotation_apply + (std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval() + ) + ) + > + > +{ + static constexpr bool value = !std::is_same::value; +}; +} // end anonymous namespace + + + template void givens_rotation_setup(const Real f, const Real g, @@ -344,6 +371,7 @@ void givens_rotation_setup(const complex& f, } } +// c and s are std::floating_point template::size_type ext1, class Layout1, @@ -354,6 +382,7 @@ template void givens_rotation_apply( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout1, Accessor1> x, std::experimental::mdspan, Layout2, Accessor2> y, const Real c, @@ -381,15 +410,47 @@ template void givens_rotation_apply( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, + std::experimental::mdspan, Layout1, Accessor1> x, + std::experimental::mdspan, Layout2, Accessor2> y, + const Real c, + const Real s) +{ + + constexpr bool use_custom = is_custom_givens_rotation_apply_avail< + decltype(execpolicy_mapper(exec)), decltype(x), decltype(y), Real, Real + >::value; + + if constexpr(use_custom){ + givens_rotation_apply(execpolicy_mapper(exec), x, y, c, s); + } + else + { + givens_rotation_apply(std::experimental::linalg::impl::inline_exec_t(), x, y, c, s); + } +} + +template::size_type ext1, + class Layout1, + class Accessor1, + class ElementType2, + extents<>::size_type ext2, + class Layout2, + class Accessor2, + std::floating_point Real> +void givens_rotation_apply( std::experimental::mdspan, Layout1, Accessor1> x, std::experimental::mdspan, Layout2, Accessor2> y, const Real c, const Real s) { - givens_rotation_apply(x, y, c, s); + givens_rotation_apply(std::experimental::linalg::impl::default_exec_t(), x, y, c, s); } + +// c is std::floating_point +// s is complex template::size_type ext1, class Layout1, @@ -400,6 +461,7 @@ template void givens_rotation_apply( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout1, Accessor1> x, std::experimental::mdspan, Layout2, Accessor2> y, const Real c, @@ -428,13 +490,42 @@ template void givens_rotation_apply( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, + std::experimental::mdspan, Layout1, Accessor1> x, + std::experimental::mdspan, Layout2, Accessor2> y, + const Real c, + const complex s) +{ + + constexpr bool use_custom = is_custom_givens_rotation_apply_avail< + decltype(execpolicy_mapper(exec)), decltype(x), decltype(y), Real, complex + >::value; + + if constexpr(use_custom){ + givens_rotation_apply(execpolicy_mapper(exec), x, y, c, s); + } + else + { + givens_rotation_apply(std::experimental::linalg::impl::inline_exec_t(), x, y, c, s); + } +} + +template::size_type ext1, + class Layout1, + class Accessor1, + class ElementType2, + extents<>::size_type ext2, + class Layout2, + class Accessor2, + std::floating_point Real> +void givens_rotation_apply( std::experimental::mdspan, Layout1, Accessor1> x, std::experimental::mdspan, Layout2, Accessor2> y, const Real c, const complex s) { - givens_rotation_apply(x, y, c, s); + givens_rotation_apply(std::experimental::linalg::impl::default_exec_t(), x, y, c, s); } } // end namespace linalg diff --git a/include/experimental/__p1673_bits/blas1_linalg_add.hpp b/include/experimental/__p1673_bits/blas1_linalg_add.hpp index 0c5a24a7..14c71758 100644 --- a/include/experimental/__p1673_bits/blas1_linalg_add.hpp +++ b/include/experimental/__p1673_bits/blas1_linalg_add.hpp @@ -154,10 +154,6 @@ struct is_custom_add_avail< } // end anonymous namespace -// ------------ -// PUBLIC API: -// ------------ - template::size_type ... ext_x, class Layout_x, diff --git a/include/experimental/__p1673_bits/blas1_matrix_frob_norm.hpp b/include/experimental/__p1673_bits/blas1_matrix_frob_norm.hpp index af55cbf6..160a5ad6 100644 --- a/include/experimental/__p1673_bits/blas1_matrix_frob_norm.hpp +++ b/include/experimental/__p1673_bits/blas1_matrix_frob_norm.hpp @@ -51,14 +51,43 @@ namespace experimental { inline namespace __p1673_version_0 { namespace linalg { +// begin anonymous namespace +namespace { + +template +struct is_custom_matrix_frob_norm_avail : std::false_type {}; + +template +struct is_custom_matrix_frob_norm_avail< + Exec, A_t, Scalar, + std::enable_if_t< + std::is_same< + decltype(matrix_frob_norm + (std::declval(), + std::declval(), + std::declval() + ) + ), + Scalar + >::value + > + > +{ + static constexpr bool value = + !std::is_same::value; +}; + +} // end anonymous namespace + template< class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor, class Scalar> Scalar matrix_frob_norm( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout, Accessor> A, Scalar init) { @@ -100,25 +129,50 @@ Scalar matrix_frob_norm( template::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor, class Scalar> Scalar matrix_frob_norm( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, std::experimental::mdspan, Layout, Accessor> A, Scalar init) { - return matrix_frob_norm(A, init); + + constexpr bool use_custom = is_custom_matrix_frob_norm_avail< + decltype(execpolicy_mapper(exec)), decltype(A), Scalar + >::value; + + if constexpr(use_custom){ + return matrix_frob_norm(execpolicy_mapper(exec), A, init); + } + else{ + return matrix_frob_norm(std::experimental::linalg::impl::inline_exec_t(), A, init); + } } +template< + class ElementType, + extents<>::size_type numRows, + extents<>::size_type numCols, + class Layout, + class Accessor, + class Scalar> +Scalar matrix_frob_norm( + std::experimental::mdspan, Layout, Accessor> A, + Scalar init) +{ + return matrix_frob_norm(std::experimental::linalg::impl::default_exec_t(), A, init); +} + + // TODO: Implement auto functions #if 0 template auto matrix_frob_norm(in_matrix_t A) { - + } template +struct is_custom_matrix_inf_norm_avail : std::false_type {}; + +template +struct is_custom_matrix_inf_norm_avail< + Exec, A_t, Scalar, + std::enable_if_t< + std::is_same< + decltype(matrix_inf_norm + (std::declval(), + std::declval(), + std::declval() + ) + ), + Scalar + >::value + > + > +{ + static constexpr bool value = + !std::is_same::value; +}; + +} // end anonymous namespace + template< class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor, class Scalar> Scalar matrix_inf_norm( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout, Accessor> A, Scalar init) { @@ -89,28 +118,52 @@ Scalar matrix_inf_norm( template< class ExecutionPolicy, class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor, class Scalar> Scalar matrix_inf_norm( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, + std::experimental::mdspan, Layout, Accessor> A, + Scalar init) +{ + + constexpr bool use_custom = is_custom_matrix_inf_norm_avail< + decltype(execpolicy_mapper(exec)), decltype(A), Scalar + >::value; + + if constexpr(use_custom){ + return matrix_inf_norm(execpolicy_mapper(exec), A, init); + } + else{ + return matrix_inf_norm(std::experimental::linalg::impl::inline_exec_t(), A, init); + } +} + +template< + class ElementType, + extents<>::size_type numRows, + extents<>::size_type numCols, + class Layout, + class Accessor, + class Scalar> +Scalar matrix_inf_norm( std::experimental::mdspan, Layout, Accessor> A, Scalar init) { - return matrix_inf_norm(A, init); + return matrix_inf_norm(std::experimental::linalg::impl::default_exec_t(), A, init); } namespace matrix_inf_norm_detail { using std::abs; - + // The point of this is to do correct ADL for abs, // without exposing "using std::abs" in the outer namespace. template< class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor> @@ -121,21 +174,21 @@ namespace matrix_inf_norm_detail { template< class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor> auto matrix_inf_norm( std::experimental::mdspan, Layout, Accessor> A) -> decltype(matrix_inf_norm_detail::matrix_inf_norm_return_type_deducer(A)) -{ +{ using return_t = decltype(matrix_inf_norm_detail::matrix_inf_norm_return_type_deducer(A)); return matrix_inf_norm(A, return_t{}); } template::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor> diff --git a/include/experimental/__p1673_bits/blas1_matrix_one_norm.hpp b/include/experimental/__p1673_bits/blas1_matrix_one_norm.hpp index 23c74598..103a9ab4 100644 --- a/include/experimental/__p1673_bits/blas1_matrix_one_norm.hpp +++ b/include/experimental/__p1673_bits/blas1_matrix_one_norm.hpp @@ -51,14 +51,44 @@ namespace experimental { inline namespace __p1673_version_0 { namespace linalg { +// begin anonymous namespace +namespace { + +template +struct is_custom_matrix_one_norm_avail : std::false_type {}; + +template +struct is_custom_matrix_one_norm_avail< + Exec, A_t, Scalar, + std::enable_if_t< + std::is_same< + decltype(matrix_one_norm + (std::declval(), + std::declval(), + std::declval() + ) + ), + Scalar + >::value + > + > +{ + static constexpr bool value = + !std::is_same::value; +}; + +} // end anonymous namespace + + template< class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor, class Scalar> Scalar matrix_one_norm( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout, Accessor> A, Scalar init) { @@ -91,19 +121,44 @@ Scalar matrix_one_norm( template< class ExecutionPolicy, class ElementType, - extents<>::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor, class Scalar> Scalar matrix_one_norm( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, std::experimental::mdspan, Layout, Accessor> A, Scalar init) { - return matrix_one_norm(A, init); + + constexpr bool use_custom = is_custom_matrix_one_norm_avail< + decltype(execpolicy_mapper(exec)), decltype(A), Scalar + >::value; + + if constexpr(use_custom){ + return matrix_one_norm(execpolicy_mapper(exec), A, init); + } + else{ + return matrix_one_norm(std::experimental::linalg::impl::inline_exec_t(), A, init); + } } +template< + class ElementType, + extents<>::size_type numRows, + extents<>::size_type numCols, + class Layout, + class Accessor, + class Scalar> +Scalar matrix_one_norm( + std::experimental::mdspan, Layout, Accessor> A, + Scalar init) +{ + return matrix_one_norm(std::experimental::linalg::impl::default_exec_t(), A, init); +} + + namespace matrix_one_norm_detail { // The point of this is to do correct ADL for abs, @@ -128,14 +183,14 @@ template< auto matrix_one_norm( std::experimental::mdspan, Layout, Accessor> A) -> decltype(matrix_one_norm_detail::matrix_one_norm_return_type_deducer(A)) -{ +{ using return_t = decltype(matrix_one_norm_detail::matrix_one_norm_return_type_deducer(A)); return matrix_one_norm(A, return_t{}); } template::size_type numRows, + extents<>::size_type numRows, extents<>::size_type numCols, class Layout, class Accessor> diff --git a/include/experimental/__p1673_bits/blas1_scale.hpp b/include/experimental/__p1673_bits/blas1_scale.hpp index 2d42d423..55598451 100644 --- a/include/experimental/__p1673_bits/blas1_scale.hpp +++ b/include/experimental/__p1673_bits/blas1_scale.hpp @@ -88,12 +88,15 @@ template struct is_custom_scale_avail : std::false_type {}; template -struct is_custom_scale_avail(), - std::declval(), - std::declval())) >> { - static constexpr bool value = !std::is_same::value; +struct is_custom_scale_avail< + Exec,Scalar,x_t, + std::void_t(), + std::declval(), + std::declval())) >> +{ + static constexpr bool value = !std::is_same + ::value; }; } // end anonymous namespace diff --git a/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp b/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp index 9badf9cb..6b787950 100644 --- a/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp +++ b/include/experimental/__p1673_bits/blas1_vector_idx_abs_max.hpp @@ -58,11 +58,9 @@ template struct is_custom_idx_abs_max_avail< Exec, v_t, std::enable_if_t< + //FRizzi: maybe should use is_convertible? std::is_same< - decltype(idx_abs_max(std::declval(), - std::declval() - ) - ), + decltype(idx_abs_max(std::declval(), std::declval())), extents<>::size_type >::value > @@ -95,10 +93,6 @@ extents<>::size_type idx_abs_max_default_impl( } // end anonymous namespace -// ------------ -// PUBLIC API: -// ------------ - template::size_type ext0, class Layout, diff --git a/include/experimental/__p1673_bits/blas2_matrix_vector_product.hpp b/include/experimental/__p1673_bits/blas2_matrix_vector_product.hpp index 72abf8f7..d5ba4f10 100644 --- a/include/experimental/__p1673_bits/blas2_matrix_vector_product.hpp +++ b/include/experimental/__p1673_bits/blas2_matrix_vector_product.hpp @@ -51,8 +51,10 @@ namespace experimental { inline namespace __p1673_version_0 { namespace linalg { -// Overwriting general matrix-vector product: y := A * x + namespace { + +// Overwriting general matrix-vector product: y := A * x template struct is_custom_matrix_vector_product_avail : std::false_type {}; @@ -67,6 +69,32 @@ struct is_custom_matrix_vector_product_avail +struct is_custom_tri_mat_vec_product_avail : std::false_type {}; + +template +struct is_custom_tri_mat_vec_product_avail< + Exec, A_t, Tri_t, D_t, X_t, Y_t, + std::void_t< + decltype(triangular_matrix_vector_product + (std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval() + ) + ) + > + > +{ + static constexpr bool value = + !std::is_same::value; +}; + +} // end anonymous namespace + template::size_type numRows_A, extents<>::size_type numCols_A, @@ -81,7 +109,7 @@ template requires (Layout_A::template mapping >::is_always_unique()) -void linalg_matrix_vector_product( +void matrix_vector_product( std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, std::experimental::mdspan, Layout_x, Accessor_x> x, @@ -97,8 +125,6 @@ void linalg_matrix_vector_product( } } -} // end anonymous namespace - template::size_type numRows_A, @@ -126,7 +152,7 @@ void matrix_vector_product( decltype(y)>::value) { matrix_vector_product(execpolicy_mapper(exec), A, x, y); } else { - linalg_matrix_vector_product(std::experimental::linalg::impl::inline_exec_t(), A, x, y); + matrix_vector_product(std::experimental::linalg::impl::inline_exec_t(), A, x, y); } } @@ -553,6 +579,7 @@ template requires (Layout_A::template mapping>::is_always_unique()) void triangular_matrix_vector_product( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, DiagonalStorage d, @@ -608,16 +635,55 @@ template void triangular_matrix_vector_product( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_x, Accessor_x> x, + std::experimental::mdspan, Layout_y, Accessor_y> y) +{ + + constexpr bool use_custom = is_custom_tri_mat_vec_product_avail< + decltype(execpolicy_mapper(exec)), decltype(A), decltype(t), decltype(d), decltype(x), decltype(y) + >::value; + + if constexpr(use_custom){ + triangular_matrix_vector_product(execpolicy_mapper(exec), A, t, d, x, y); + } + else + { + triangular_matrix_vector_product(std::experimental::linalg::impl::inline_exec_t(), + A, t, d, x, y); + } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_x, + extents<>::size_type ext_x, + class Layout_x, + class Accessor_x, + class ElementType_y, + extents<>::size_type ext_y, + class Layout_y, + class Accessor_y> + requires (Layout_A::template mapping>::is_always_unique()) +void triangular_matrix_vector_product( std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, DiagonalStorage d, std::experimental::mdspan, Layout_x, Accessor_x> x, std::experimental::mdspan, Layout_y, Accessor_y> y) { - triangular_matrix_vector_product(A, t, d, x, y); + triangular_matrix_vector_product(std::experimental::linalg::impl::default_exec_t(), A, t, d, x, y); } + // Updating triangular matrix-vector product: z := y + A * x template +struct is_custom_tri_mat_vec_solve_avail : std::false_type {}; + +template +struct is_custom_tri_mat_vec_solve_avail< + Exec, A_t, Tri_t, D_t, B_t, X_t, + std::void_t< + decltype(triangular_matrix_vector_solve + (std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval(), + std::declval() + ) + ) + > + > +{ + static constexpr bool value = + !std::is_same::value; +}; + } // end anonymous namespace + template::size_type numRows_A, extents<>::size_type numCols_A, @@ -165,6 +190,7 @@ template void triangular_matrix_vector_solve( + std::experimental::linalg::impl::inline_exec_t&& /* exec */, std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, DiagonalStorage d, @@ -196,14 +222,51 @@ template void triangular_matrix_vector_solve( - ExecutionPolicy&& /* exec */, + ExecutionPolicy&& exec, + std::experimental::mdspan, Layout_A, Accessor_A> A, + Triangle t, + DiagonalStorage d, + std::experimental::mdspan, Layout_B, Accessor_B> b, + std::experimental::mdspan, Layout_X, Accessor_X> x) +{ + constexpr bool use_custom = is_custom_tri_mat_vec_solve_avail< + decltype(execpolicy_mapper(exec)), decltype(A), decltype(t), decltype(d), decltype(b), decltype(x) + >::value; + + if constexpr(use_custom){ + triangular_matrix_vector_solve(execpolicy_mapper(exec), A, t, d, b, x); + } + else + { + triangular_matrix_vector_solve(std::experimental::linalg::impl::inline_exec_t(), + A, t, d, b, x); + } +} + +template::size_type numRows_A, + extents<>::size_type numCols_A, + class Layout_A, + class Accessor_A, + class Triangle, + class DiagonalStorage, + class ElementType_B, + extents<>::size_type ext_B, + class Layout_B, + class Accessor_B, + class ElementType_X, + extents<>::size_type ext_X, + class Layout_X, + class Accessor_X> +void triangular_matrix_vector_solve( std::experimental::mdspan, Layout_A, Accessor_A> A, Triangle t, DiagonalStorage d, std::experimental::mdspan, Layout_B, Accessor_B> b, std::experimental::mdspan, Layout_X, Accessor_X> x) { - triangular_matrix_vector_solve(A, t, d, b, x); + triangular_matrix_vector_solve(std::experimental::linalg::impl::default_exec_t(), + A, t, d, b, x); } } // end namespace linalg