Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added language standard parallelism to dot product #251

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/01_scale.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ int main(int argc, char* argv[]) {
// With CTAD working we could do, GCC 11.1 works but some others are buggy
// mdspan x(x_vec.data(), N);
mdspan<double, extents<std::size_t, dynamic_extent>> x(x_vec.data(),N);
for(int i=0; i<x.extent(0); i++) x(i) = i;
for(int i=0; i<x.extent(0); i++) x[i] = i;

// Call linalg::scale x = 2.0*x;
std::experimental::linalg::scale(2.0, x);
Expand All @@ -36,6 +36,6 @@ int main(int argc, char* argv[]) {
std::experimental::linalg::scale(2.0, x);
#endif

for(int i=0; i<x.extent(0); i+=5) std::cout << i << " " << x(i) << std::endl;
for(int i=0; i<x.extent(0); i+=5) std::cout << i << " " << x[i] << std::endl;
}
}
8 changes: 4 additions & 4 deletions examples/02_matrix_vector_product_basic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ int main(int argc, char* argv[]) {
mdspan<double, extents<std::size_t, dynamic_extent>> y(y_vec.data(),N);
for(int i=0; i<A.extent(0); i++)
for(int j=0; j<A.extent(1); j++)
A(i,j) = 100.0*i+j;
A[i,j] = 100.0*i+j;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This will only compile if the C++23 feature "multidimensional subscript operator" (P2128R6) is available. As a result, would you consider one of the following changes?

  1. Protect the example so it only compiles if the feature test macro __cpp_multidimensional_subscript is defined, OR
  2. Add #define MDSPAN_USE_PAREN_OPERATOR 1 to the example before including any mdspan headers (see top of https://godbolt.org/z/Yrr8oe9sE for an example of the relevant macros), and use parentheses instead of brackets (e.g., A(i,j))

for(int i=0; i<x.extent(0); i++)
x(i) = 1. * i;
x[i] = 1. * i;
for(int i=0; i<y.extent(0); i++)
y(i) = -1. * i;
y[i] = -1. * i;

// y = A * x
std::experimental::linalg::matrix_vector_product(A, x, y);
Expand All @@ -50,6 +50,6 @@ int main(int argc, char* argv[]) {
std::experimental::linalg::scaled(2.0, A), x,
std::experimental::linalg::scaled(0.5, y), y);
#endif
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y(i) << std::endl;
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y[i] << std::endl;
}
}
8 changes: 4 additions & 4 deletions examples/03_matrix_vector_product_mixedprec.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ int main(int argc, char* argv[]) {
for(int m=0; m<A.extent(0); m++)
for(int i=0; i<A.extent(1); i++)
for(int j=0; j<A.extent(2); j++)
A(m,i,j) = 1000.0 * m + 100.0 * i + j;
A[m,i,j] = 1000.0 * m + 100.0 * i + j;
for(int i=0; i<x.extent(0); i++)
for(int m=0; m<x.extent(1); m++)
x(i,m) = 33. * i + 0.33 * m;
x[i,m] = 33. * i + 0.33 * m;
for(int m=0; m<y.extent(0); m++)
for(int i=0; i<y.extent(1); i++)
y(m,i) = 33. * m + 0.33 * i;
y[m,i] = 33. * m + 0.33 * i;

for(int m = 0; m < M; m++) {
auto A_m = submdspan(A, m, full_extent, full_extent);
Expand All @@ -41,7 +41,7 @@ int main(int argc, char* argv[]) {
std::experimental::linalg::matrix_vector_product(A_m, x_m, y_m);
}

for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y(i,1) << std::endl;
for(int i=0; i<y.extent(0); i+=5) std::cout << i << " " << y[i,1] << std::endl;
}
}

2 changes: 1 addition & 1 deletion include/experimental/__p1673_bits/blas1_dot.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ namespace dot_detail {
auto dot_return_type_deducer(
std::experimental::mdspan<ElementType1, std::experimental::extents<SizeType1, ext1>, Layout1, Accessor1> x,
std::experimental::mdspan<ElementType2, std::experimental::extents<SizeType2, ext2>, Layout2, Accessor2> y)
-> decltype(x(0) * y(0));
-> decltype(x[0] * y[0]);
} // namespace dot_detail


Expand Down
12 changes: 6 additions & 6 deletions include/experimental/__p1673_bits/blas1_givens.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,9 +399,9 @@ void givens_rotation_apply(
using index_type = ::std::common_type_t<SizeType1, SizeType2>;
const auto x_extent_0 = static_cast<index_type>(x.extent(0));
for (index_type i = 0; i < x_extent_0; ++i) {
const auto dtemp = c * x(i) + s * y(i);
y(i) = c * y(i) - s * x(i);
x(i) = dtemp;
const auto dtemp = c * x[i] + s * y[i];
y[i] = c * y[i] - s * x[i];
x[i] = dtemp;
}
}

Expand Down Expand Up @@ -496,9 +496,9 @@ void givens_rotation_apply(
using index_type = ::std::common_type_t<SizeType1, SizeType2>;
const auto x_extent_0 = static_cast<index_type>(x.extent(0));
for (index_type i = 0; i < x_extent_0; ++i) {
const auto dtemp = c * x(i) + s * y(i);
y(i) = c * y(i) - conj(s) * x(i);
x(i) = dtemp;
const auto dtemp = c * x[i] + s * y[i];
y[i] = c * y[i] - conj(s) * x[i];
x[i] = dtemp;
}
}

Expand Down
4 changes: 2 additions & 2 deletions include/experimental/__p1673_bits/blas1_linalg_add.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ void add_rank_1(

using size_type = std::common_type_t<SizeType_x, SizeType_y, SizeType_z>;
for (size_type i = 0; i < z.extent(0); ++i) {
z(i) = x(i) + y(i);
z[i] = x[i] + y[i];
}
}

Expand Down Expand Up @@ -132,7 +132,7 @@ void add_rank_2(
using size_type = std::common_type_t<SizeType_x, SizeType_y, SizeType_z>;
for (size_type j = 0; j < x.extent(1); ++j) {
for (size_type i = 0; i < x.extent(0); ++i) {
z(i,j) = x(i,j) + y(i,j);
z[i,j] = x[i,j] + y[i,j];
Copy link
Contributor

@mhoemmen mhoemmen Jun 7, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual implementation is a C++17 back-port, so we unfortunately have to roll with whatever operator (parentheses or brackets) the user has available. If you really do want to change the implementation, then we might have to go with something like the following.

Suggested change
z[i,j] = x[i,j] + y[i,j];
#if (MDSPAN_USE_PAREN_OPERATOR > 0)
z(i,j) = x(i,j) + y(i,j);
#else
z[i,j] = x[i,j] + y[i,j];
#endif

or at least

Suggested change
z[i,j] = x[i,j] + y[i,j];
#if defined(__cpp_multidimensional_subscript)
z[i,j] = x[i,j] + y[i,j];
#else
z(i,j) = x(i,j) + y(i,j);
#endif

It might be best just to leave the implementation alone; we might want to come up with a better way to do this.

}
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/experimental/__p1673_bits/blas1_linalg_copy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ void copy_rank_1(
x.static_extent(0) == y.static_extent(0));
using size_type = std::common_type_t<SizeType_x, SizeType_y>;
for (size_type i = 0; i < y.extent(0); ++i) {
y(i) = x(i);
y[i] = x[i];
}
}

Expand Down Expand Up @@ -98,7 +98,7 @@ void copy_rank_2(
using size_type = std::common_type_t<SizeType_x, SizeType_y>;
for (size_type j = 0; j < y.extent(1); ++j) {
for (size_type i = 0; i < y.extent(0); ++i) {
y(i,j) = x(i,j);
y[i,j] = x[i,j];
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/experimental/__p1673_bits/blas1_linalg_swap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ void swap_rank_1(
using size_type = std::common_type_t<SizeType_x, SizeType_y>;

for (size_type i = 0; i < y.extent(0); ++i) {
swap(x(i), y(i));
swap(x[i], y[i]);
}
}

Expand Down Expand Up @@ -106,7 +106,7 @@ void swap_rank_2(

for (size_type j = 0; j < y.extent(1); ++j) {
for (size_type i = 0; i < y.extent(0); ++i) {
swap(x(i,j), y(i,j));
swap(x[i,j], y[i,j]);
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions include/experimental/__p1673_bits/blas1_matrix_inf_norm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,14 +100,14 @@ Scalar matrix_inf_norm(
return result;
}
else if(A.extent(0) == size_type(1) && A.extent(1) == size_type(1)) {
result += abs(A(0, 0));
result += abs(A[0, 0]);
return result;
}

for (size_type i = 0; i < A.extent(0); ++i) {
auto row_sum = init;
for (size_type j = 0; j < A.extent(1); ++j) {
row_sum += abs(A(i,j));
row_sum += abs(A[i,j]);
}
result = max(row_sum, result);
}
Expand Down Expand Up @@ -170,7 +170,7 @@ namespace matrix_inf_norm_detail {
class Layout,
class Accessor>
auto matrix_inf_norm_return_type_deducer(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, numRows, numCols>, Layout, Accessor> A) -> decltype(abs(A(0,0)));
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, numRows, numCols>, Layout, Accessor> A) -> decltype(abs(A[0,0]));

} // namespace matrix_inf_norm_detail

Expand Down
6 changes: 3 additions & 3 deletions include/experimental/__p1673_bits/blas1_matrix_one_norm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ Scalar matrix_one_norm(
return result;
}
else if(A.extent(0) == size_type(1) && A.extent(1) == size_type(1)) {
result += abs(A(0, 0));
result += abs(A[0, 0]);
return result;
}

Expand All @@ -110,7 +110,7 @@ Scalar matrix_one_norm(
for (size_type j = 0; j < A.extent(1); ++j) {
auto col_sum = init;
for (size_type i = 0; i < A.extent(0); ++i) {
col_sum += abs(A(i,j));
col_sum += abs(A[i,j]);
}
result = max(col_sum, result);
}
Expand Down Expand Up @@ -171,7 +171,7 @@ namespace matrix_one_norm_detail {
class Layout,
class Accessor>
auto matrix_one_norm_return_type_deducer(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, numRows, numCols>, Layout, Accessor> A) -> decltype(abs(A(0,0)));
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, numRows, numCols>, Layout, Accessor> A) -> decltype(abs(A[0,0]));

} // namespace matrix_one_norm_detail

Expand Down
4 changes: 2 additions & 2 deletions include/experimental/__p1673_bits/blas1_scale.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ void linalg_scale_rank_1(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
{
for (SizeType i = 0; i < x.extent(0); ++i) {
x(i) *= alpha;
x[i] *= alpha;
}
}

Expand All @@ -78,7 +78,7 @@ void linalg_scale_rank_2(
{
for (SizeType j = 0; j < A.extent(1); ++j) {
for (SizeType i = 0; i < A.extent(0); ++i) {
A(i,j) *= alpha;
A[i,j] *= alpha;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions include/experimental/__p1673_bits/blas1_vector_abs_sum.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Scalar vector_abs_sum(
const SizeType numElt = v.extent(0);
for (SizeType i = 0; i < numElt; ++i) {
using std::abs;
init += abs(v(i));
init += abs(v[i]);
}
return init;
}
Expand Down Expand Up @@ -140,7 +140,7 @@ namespace vector_abs_detail {
class Accessor>
auto vector_abs_return_type_deducer(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
-> decltype(abs(x(0)));
-> decltype(abs(x[0]));
} // namespace vector_abs_detail


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,17 @@ SizeType idx_abs_max_default_impl(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> v)
{
using std::abs;
using magnitude_type = decltype(abs(v(0)));
using magnitude_type = decltype(abs(v[0]));

if (v.extent(0) == 0) {
return std::numeric_limits<SizeType>::max();
}

SizeType maxInd = 0;
magnitude_type maxVal = abs(v(0));
magnitude_type maxVal = abs(v[0]);
for (SizeType i = 1; i < v.extent(0); ++i) {
if (maxVal < abs(v(i))) {
maxVal = abs(v(i));
if (maxVal < abs(v[i])) {
maxVal = abs(v[i]);
maxInd = i;
}
}
Expand Down
14 changes: 7 additions & 7 deletions include/experimental/__p1673_bits/blas1_vector_norm2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ template<class ElementType,
class Scalar>
Scalar vector_norm2(
std::experimental::linalg::impl::inline_exec_t&& exec,
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x,
mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x,
Scalar init)
{
// Initialize the sum of squares result
Expand All @@ -107,7 +107,7 @@ template<class ExecutionPolicy,
class Scalar>
Scalar vector_norm2(
ExecutionPolicy&& exec,
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x,
mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x,
Scalar init)
{
constexpr bool use_custom = is_custom_vector_norm2_avail<
Expand All @@ -129,7 +129,7 @@ template<class ElementType,
class Accessor,
class Scalar>
Scalar vector_norm2(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x,
mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x,
Scalar init)
{
return vector_norm2(std::experimental::linalg::impl::default_exec_t(), x, init);
Expand All @@ -147,16 +147,16 @@ namespace vector_norm2_detail {
class Layout,
class Accessor>
auto vector_norm2_return_type_deducer(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
-> decltype(abs(x(0)) * abs(x(0)));
mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
-> decltype(abs(x[0]) * abs(x[0]));
} // namespace vector_norm2_detail

template<class ElementType,
class SizeType, ::std::size_t ext0,
class Layout,
class Accessor>
auto vector_norm2(
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
-> decltype(vector_norm2_detail::vector_norm2_return_type_deducer(x))
{
using return_t = decltype(vector_norm2_detail::vector_norm2_return_type_deducer(x));
Expand All @@ -170,7 +170,7 @@ template<class ExecutionPolicy,
class Accessor>
auto vector_norm2(
ExecutionPolicy&& exec,
std::experimental::mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
mdspan<ElementType, std::experimental::extents<SizeType, ext0>, Layout, Accessor> x)
-> decltype(vector_norm2_detail::vector_norm2_return_type_deducer(x))
{
using return_t = decltype(vector_norm2_detail::vector_norm2_return_type_deducer(x));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ sum_of_squares_result<Scalar> vector_sum_of_squares(
Scalar scale = init.scaling_factor;
Scalar ssq = init.scaled_sum_of_squares;
for (SizeType i = 0; i < x.extent(0); ++i) {
if (abs(x(i)) != 0.0) {
const auto absxi = abs(x(i));
if (abs(x[i]) != 0.0) {
const auto absxi = abs(x[i]);
const auto quotient = scale / absxi;
if (scale < absxi) {
ssq = Scalar(1.0) + ssq * quotient * quotient;
Expand Down
Loading