-
Notifications
You must be signed in to change notification settings - Fork 22
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added language standard parallelism to dot product #251
base: main
Are you sure you want to change the base?
Changes from all commits
d2ae43f
dfaf869
6574231
b6cb626
c3f4957
2b8f143
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
// Make mdspan less verbose | ||
using std::experimental::mdspan; | ||
using std::experimental::extents; | ||
using std::experimental::dynamic_extent; | ||
using std::dynamic_extent; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This change suggests that CI doesn't actually build the examples. Should we consider fixing that? This change might actually need to depend on the C++ version, as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CI builds the examples; I just wasn't building them on my PC. I was unaware of the macros change. Can you give me an example? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Using these macros might require updating the version of the reference mdspan implementation that github's CI pulls in. |
||
|
||
int main(int argc, char* argv[]) { | ||
std::cout << "Scale" << std::endl; | ||
|
@@ -26,7 +26,7 @@ int main(int argc, char* argv[]) { | |
// With CTAD working we could do, GCC 11.1 works but some others are buggy | ||
// mdspan x(x_vec.data(), N); | ||
mdspan<double, extents<std::size_t, dynamic_extent>> x(x_vec.data(),N); | ||
for(int i=0; i<x.extent(0); i++) x(i) = i; | ||
for(int i=0; i<x.extent(0); i++) x[i] = i; | ||
|
||
// Call linalg::scale x = 2.0*x; | ||
std::experimental::linalg::scale(2.0, x); | ||
|
@@ -36,6 +36,6 @@ int main(int argc, char* argv[]) { | |
std::experimental::linalg::scale(2.0, x); | ||
#endif | ||
|
||
for(int i=0; i<x.extent(0); i+=5) std::cout << i << " " << x(i) << std::endl; | ||
for(int i=0; i<x.extent(0); i+=5) std::cout << i << " " << x[i] << std::endl; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,7 @@ | |
// Make mdspan less verbose | ||
using std::experimental::mdspan; | ||
using std::experimental::extents; | ||
using std::experimental::dynamic_extent; | ||
using std::dynamic_extent; | ||
|
||
int main(int argc, char* argv[]) { | ||
std::cout << "Matrix Vector Product Basic" << std::endl; | ||
|
@@ -31,11 +31,11 @@ int main(int argc, char* argv[]) { | |
mdspan<double, extents<std::size_t, dynamic_extent>> y(y_vec.data(),N); | ||
for(int i=0; i<A.extent(0); i++) | ||
for(int j=0; j<A.extent(1); j++) | ||
A(i,j) = 100.0*i+j; | ||
A[i,j] = 100.0*i+j; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will only compile if the C++23 feature "multidimensional subscript operator" (P2128R6) is available. As a result, would you consider one of the following changes?
|
||
for(int i=0; i<x.extent(0); i++) | ||
x(i) = 1. * i; | ||
x[i] = 1. * i; | ||
for(int i=0; i<y.extent(0); i++) | ||
y(i) = -1. * i; | ||
y[i] = -1. * i; | ||
|
||
// y = A * x | ||
std::experimental::linalg::matrix_vector_product(A, x, y); | ||
|
@@ -50,6 +50,6 @@ int main(int argc, char* argv[]) { | |
std::experimental::linalg::scaled(2.0, A), x, | ||
std::experimental::linalg::scaled(0.5, y), y); | ||
#endif | ||
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y(i) << std::endl; | ||
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y[i] << std::endl; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,9 +5,9 @@ | |
// Make mdspan less verbose | ||
using std::experimental::mdspan; | ||
using std::experimental::extents; | ||
using std::experimental::dynamic_extent; | ||
using std::dynamic_extent; | ||
using std::experimental::submdspan; | ||
using std::experimental::full_extent; | ||
using std::full_extent; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see above comment on |
||
|
||
int main(int argc, char* argv[]) { | ||
std::cout << "Matrix Vector Product MixedPrec" << std::endl; | ||
|
@@ -25,13 +25,13 @@ int main(int argc, char* argv[]) { | |
for(int m=0; m<A.extent(0); m++) | ||
for(int i=0; i<A.extent(1); i++) | ||
for(int j=0; j<A.extent(2); j++) | ||
A(m,i,j) = 1000.0 * m + 100.0 * i + j; | ||
A[m,i,j] = 1000.0 * m + 100.0 * i + j; | ||
for(int i=0; i<x.extent(0); i++) | ||
for(int m=0; m<x.extent(1); m++) | ||
x(i,m) = 33. * i + 0.33 * m; | ||
x[i,m] = 33. * i + 0.33 * m; | ||
for(int m=0; m<y.extent(0); m++) | ||
for(int i=0; i<y.extent(1); i++) | ||
y(m,i) = 33. * m + 0.33 * i; | ||
y[m,i] = 33. * m + 0.33 * i; | ||
|
||
for(int m = 0; m < M; m++) { | ||
auto A_m = submdspan(A, m, full_extent, full_extent); | ||
|
@@ -41,7 +41,7 @@ int main(int argc, char* argv[]) { | |
std::experimental::linalg::matrix_vector_product(A_m, x_m, y_m); | ||
} | ||
|
||
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y(i,1) << std::endl; | ||
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y[i,1] << std::endl; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -43,6 +43,7 @@ | |||||
#ifndef LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_DOT_HPP_ | ||||||
#define LINALG_INCLUDE_EXPERIMENTAL___P1673_BITS_BLAS1_DOT_HPP_ | ||||||
|
||||||
#include <ranges> | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure! Can you point me to an example of using a feature test macro? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure! This web page gives a good summary. // Ensure that the feature test macro __cpp_lib_ranges is available;
// <version> also defines this macro, but that is a C++20 header.
#include <algorithm>
#if defined(__cpp_lib_ranges)
# include <ranges>
#endif
void some_function() {
#if defined(__cpp_lib_ranges_iota)
// ... code using views::iota ...
#else
// ... fall-back code ...
#endif
} The point of using two different macros -- one for the header, and one for the specific feature |
||||||
#include <type_traits> | ||||||
|
||||||
namespace std { | ||||||
|
@@ -90,7 +91,7 @@ template<class ElementType1, | |||||
class Accessor2, | ||||||
class Scalar> | ||||||
Scalar dot( | ||||||
std::experimental::linalg::impl::inline_exec_t&& /* exec */, | ||||||
std::experimental::linalg::impl::inline_exec_t&& exec, | ||||||
std::experimental::mdspan<ElementType1, std::experimental::extents<SizeType1, ext1>, Layout1, Accessor1> v1, | ||||||
std::experimental::mdspan<ElementType2, std::experimental::extents<SizeType2, ext2>, Layout2, Accessor2> v2, | ||||||
Scalar init) | ||||||
|
@@ -100,10 +101,18 @@ Scalar dot( | |||||
v1.static_extent(0) == v2.static_extent(0)); | ||||||
|
||||||
using size_type = std::common_type_t<SizeType1, SizeType2>; | ||||||
for (size_type k = 0; k < v1.extent(0); ++k) { | ||||||
init += v1(k) * v2(k); | ||||||
} | ||||||
return init; | ||||||
using scalar_type = std::common_type_t<ElementType1, ElementType2, Scalar>; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not necessary the right type. For example, if |
||||||
using std::ranges::iota_view; | ||||||
using std::ranges::begin; | ||||||
using std::ranges::end; | ||||||
|
||||||
iota_view range{size_type{}, v1.extent(0)}; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The preferred way to use range factories is |
||||||
|
||||||
Scalar sum = std::transform_reduce(exec, begin(range), end(range), init, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The "inline" executor should be executing inline. This means that, while it can use Standard Library algorithms, it shouldn't be passing along any execution policy (even if that execution policy is "sequential" -- please see my comments below). Would you consider changing this to remove the execution policy argument?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. What do we need to add to handle other execution policies? |
||||||
std::plus<void>{}, | ||||||
[=](size_type i) { return v1[i] * v2[i]; }); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks correct. Another approach would be to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the input. Can you show me what that would look like? |
||||||
|
||||||
return sum; | ||||||
} | ||||||
|
||||||
template<class ExecutionPolicy, | ||||||
|
@@ -155,7 +164,7 @@ Scalar dot(std::experimental::mdspan<ElementType1, std::experimental::extents<Si | |||||
std::experimental::mdspan<ElementType2, std::experimental::extents<SizeType2, ext2>, Layout2, Accessor2> v2, | ||||||
Scalar init) | ||||||
{ | ||||||
return dot(std::experimental::linalg::impl::default_exec_t(), v1, v2, init); | ||||||
return dot(std::experimental::linalg::impl::default_exec(), v1, v2, init); | ||||||
} | ||||||
|
||||||
template<class ElementType1, | ||||||
|
@@ -217,7 +226,7 @@ namespace dot_detail { | |||||
auto dot_return_type_deducer( | ||||||
std::experimental::mdspan<ElementType1, std::experimental::extents<SizeType1, ext1>, Layout1, Accessor1> x, | ||||||
std::experimental::mdspan<ElementType2, std::experimental::extents<SizeType2, ext2>, Layout2, Accessor2> y) | ||||||
-> decltype(x(0) * y(0)); | ||||||
-> decltype(x[0] * y[0]); | ||||||
} // namespace dot_detail | ||||||
|
||||||
|
||||||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -82,7 +82,7 @@ void add_rank_1( | |||||||||||||||||||||||||
|
||||||||||||||||||||||||||
using size_type = std::common_type_t<SizeType_x, SizeType_y, SizeType_z>; | ||||||||||||||||||||||||||
for (size_type i = 0; i < z.extent(0); ++i) { | ||||||||||||||||||||||||||
z(i) = x(i) + y(i); | ||||||||||||||||||||||||||
z[i] = x[i] + y[i]; | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
||||||||||||||||||||||||||
|
@@ -132,7 +132,7 @@ void add_rank_2( | |||||||||||||||||||||||||
using size_type = std::common_type_t<SizeType_x, SizeType_y, SizeType_z>; | ||||||||||||||||||||||||||
for (size_type j = 0; j < x.extent(1); ++j) { | ||||||||||||||||||||||||||
for (size_type i = 0; i < x.extent(0); ++i) { | ||||||||||||||||||||||||||
z(i,j) = x(i,j) + y(i,j); | ||||||||||||||||||||||||||
z[i,j] = x[i,j] + y[i,j]; | ||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The actual implementation is a C++17 back-port, so we unfortunately have to roll with whatever operator (parentheses or brackets) the user has available. If you really do want to change the implementation, then we might have to go with something like the following.
Suggested change
or at least
Suggested change
It might be best just to leave the implementation alone; we might want to come up with a better way to do this. |
||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
More recent versions of GCC (for instance) shouldn't require TBB for
std::execution::par
to work. Furthermore, nvc++ comes with its own, non-TBB implementation ofstd::execution::par
. Would you consider gating this on compiler version, instead of requiring a third-party library? That could even be done in the code -- the code would just need to test the appropriate compiler version to ensure that the C++ algorithms are available (see https://en.cppreference.com/w/cpp/compiler_support/17 and search for "Parallel algorithms and execution policies").