From 08c47b9e631ff98175a90dccdce6c39a2782b67b Mon Sep 17 00:00:00 2001 From: Alex <114611289+sanya2905@users.noreply.github.com> Date: Mon, 30 Dec 2024 01:55:55 +0300 Subject: [PATCH] =?UTF-8?q?[FIX=20REVERT]=20=D0=92=D0=B5=D1=80=D1=88=D0=B8?= =?UTF-8?q?=D0=BD=D0=B8=D0=BD=D0=B0=20=D0=90=D0=BB=D0=B5=D0=BA=D1=81=D0=B0?= =?UTF-8?q?=D0=BD=D0=B4=D1=80=D0=B0.=20=D0=97=D0=B0=D0=B4=D0=B0=D1=87?= =?UTF-8?q?=D0=B0=203.=20=D0=92=D0=B0=D1=80=D0=B8=D0=B0=D0=BD=D1=82=201.?= =?UTF-8?q?=20=D0=A3=D0=BC=D0=BD=D0=BE=D0=B6=D0=B5=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=BF=D0=BB=D0=BE=D1=82=D0=BD=D1=8B=D1=85=20=D0=BC=D0=B0=D1=82?= =?UTF-8?q?=D1=80=D0=B8=D1=86.=20=D0=AD=D0=BB=D0=B5=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D1=82=D1=8B=20=D1=82=D0=B8=D0=BF=D0=B0=20double.=20=D0=91?= =?UTF-8?q?=D0=BB=D0=BE=D1=87=D0=BD=D0=B0=D1=8F=20=D1=81=D1=85=D0=B5=D0=BC?= =?UTF-8?q?=D0=B0,=20=D0=B0=D0=BB=D0=B3=D0=BE=D1=80=D0=B8=D1=82=D0=BC=20?= =?UTF-8?q?=D0=9A=D1=8D=D0=BD=D0=BD=D0=BE=D0=BD=D0=B0=20(#858)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../func_tests/main.cpp | 254 ++++++++++++++++++ .../include/ops_mpi.hpp | 111 ++++++++ .../perf_tests/main.cpp | 100 +++++++ .../src/ops_mpi.cpp | 229 ++++++++++++++++ .../func_tests/main.cpp | 131 +++++++++ .../include/ops_seq.hpp | 88 ++++++ .../perf_tests/main.cpp | 80 ++++++ .../src/ops_seq.cpp | 56 ++++ 8 files changed, 1049 insertions(+) create mode 100644 tasks/mpi/vershinina_a_cannons_algorithm/func_tests/main.cpp create mode 100644 tasks/mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp create mode 100644 tasks/mpi/vershinina_a_cannons_algorithm/perf_tests/main.cpp create mode 100644 tasks/mpi/vershinina_a_cannons_algorithm/src/ops_mpi.cpp create mode 100644 tasks/seq/vershinina_a_cannons_algorithm/func_tests/main.cpp create mode 100644 tasks/seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp create mode 100644 tasks/seq/vershinina_a_cannons_algorithm/perf_tests/main.cpp create mode 100644 tasks/seq/vershinina_a_cannons_algorithm/src/ops_seq.cpp diff --git a/tasks/mpi/vershinina_a_cannons_algorithm/func_tests/main.cpp b/tasks/mpi/vershinina_a_cannons_algorithm/func_tests/main.cpp new file mode 100644 index 00000000000..996ed896627 --- /dev/null +++ b/tasks/mpi/vershinina_a_cannons_algorithm/func_tests/main.cpp @@ -0,0 +1,254 @@ +#include + +#include +#include +#include +#include + +#include "mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp" + +std::vector getRandomMatrix(double r) { + std::random_device dev; + std::mt19937 gen(dev()); + std::uniform_int_distribution<> distr(0, 100); + std::vector matrix(r * r, 0.0); + for (int i = 0; i < r * r; i++) { + matrix[i] = distr(gen); + } + return matrix; +} + +TEST(vershinina_a_cannons_algorithm, Test_1) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + + int n = 3; + auto lhs = getRandomMatrix(3); + auto rhs = getRandomMatrix(3); + + std::vector res(n * n, 0.0); + std::shared_ptr taskDataPar = std::make_shared(); + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + vershinina_a_cannons_algorithm::TestMPITaskParallel testTaskPar(taskDataPar); + if (!testTaskPar.validation()) { + GTEST_SKIP(); + } + testTaskPar.pre_processing(); + testTaskPar.run(); + testTaskPar.post_processing(); + if (world.rank() == 0) { + std::vector ref_res(n * n, 0.0); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs_count.emplace_back(n); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataSeq->outputs.emplace_back(reinterpret_cast(ref_res.data())); + + vershinina_a_cannons_algorithm::TestMPITaskSequential testTaskSeq(taskDataSeq); + ASSERT_TRUE(testTaskSeq.validation()); + testTaskSeq.pre_processing(); + testTaskSeq.run(); + testTaskSeq.post_processing(); + for (int i = 0; i < (int)res.size(); i++) { + ASSERT_NEAR(res[i], ref_res[i], 0.1); + } + } +} + +TEST(vershinina_a_cannons_algorithm, Test_2) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + + int n = 5; + auto lhs = getRandomMatrix(5); + auto rhs = getRandomMatrix(5); + + std::vector res(n * n, 0.0); + + std::shared_ptr taskDataPar = std::make_shared(); + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + + vershinina_a_cannons_algorithm::TestMPITaskParallel testTaskPar(taskDataPar); + if (!testTaskPar.validation()) { + GTEST_SKIP(); + } + testTaskPar.pre_processing(); + testTaskPar.run(); + testTaskPar.post_processing(); + + if (world.rank() == 0) { + std::vector ref_res(n * n, 0.0); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs_count.emplace_back(n); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataSeq->outputs.emplace_back(reinterpret_cast(ref_res.data())); + + vershinina_a_cannons_algorithm::TestMPITaskSequential testTaskSeq(taskDataSeq); + ASSERT_TRUE(testTaskSeq.validation()); + testTaskSeq.pre_processing(); + testTaskSeq.run(); + testTaskSeq.post_processing(); + + for (int i = 0; i < (int)res.size(); i++) { + ASSERT_NEAR(res[i], ref_res[i], 0.1); + } + } +} + +TEST(vershinina_a_cannons_algorithm, Test_3) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + + int n = 10; + auto lhs = getRandomMatrix(10); + auto rhs = getRandomMatrix(10); + + std::vector res(n * n, 0.0); + + std::shared_ptr taskDataPar = std::make_shared(); + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + + vershinina_a_cannons_algorithm::TestMPITaskParallel testTaskPar(taskDataPar); + if (!testTaskPar.validation()) { + GTEST_SKIP(); + } + testTaskPar.pre_processing(); + testTaskPar.run(); + testTaskPar.post_processing(); + + if (world.rank() == 0) { + std::vector ref_res(n * n, 0.0); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs_count.emplace_back(n); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataSeq->outputs.emplace_back(reinterpret_cast(ref_res.data())); + + vershinina_a_cannons_algorithm::TestMPITaskSequential testTaskSeq(taskDataSeq); + ASSERT_TRUE(testTaskSeq.validation()); + testTaskSeq.pre_processing(); + testTaskSeq.run(); + testTaskSeq.post_processing(); + + for (int i = 0; i < (int)res.size(); i++) { + ASSERT_NEAR(res[i], ref_res[i], 0.1); + } + } +} + +TEST(vershinina_a_cannons_algorithm, Test_4) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + + int n = 15; + auto lhs = getRandomMatrix(15); + auto rhs = getRandomMatrix(15); + + std::vector res(n * n, 0.0); + + std::shared_ptr taskDataPar = std::make_shared(); + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + + vershinina_a_cannons_algorithm::TestMPITaskParallel testTaskPar(taskDataPar); + if (!testTaskPar.validation()) { + GTEST_SKIP(); + } + testTaskPar.pre_processing(); + testTaskPar.run(); + testTaskPar.post_processing(); + + if (world.rank() == 0) { + std::vector ref_res(n * n, 0.0); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs_count.emplace_back(n); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataSeq->outputs.emplace_back(reinterpret_cast(ref_res.data())); + + vershinina_a_cannons_algorithm::TestMPITaskSequential testTaskSeq(taskDataSeq); + ASSERT_TRUE(testTaskSeq.validation()); + testTaskSeq.pre_processing(); + testTaskSeq.run(); + testTaskSeq.post_processing(); + + for (int i = 0; i < (int)res.size(); i++) { + ASSERT_NEAR(res[i], ref_res[i], 0.1); + } + } +} +TEST(vershinina_a_cannons_algorithm, Test_5) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + + int n = 30; + auto lhs = getRandomMatrix(30); + auto rhs = getRandomMatrix(30); + + std::vector res(n * n, 0.0); + + std::shared_ptr taskDataPar = std::make_shared(); + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + + vershinina_a_cannons_algorithm::TestMPITaskParallel testTaskPar(taskDataPar); + if (!testTaskPar.validation()) { + GTEST_SKIP(); + } + testTaskPar.pre_processing(); + testTaskPar.run(); + testTaskPar.post_processing(); + + if (world.rank() == 0) { + std::vector ref_res(n * n, 0.0); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs_count.emplace_back(n); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataSeq->outputs.emplace_back(reinterpret_cast(ref_res.data())); + + vershinina_a_cannons_algorithm::TestMPITaskSequential testTaskSeq(taskDataSeq); + ASSERT_TRUE(testTaskSeq.validation()); + testTaskSeq.pre_processing(); + testTaskSeq.run(); + testTaskSeq.post_processing(); + + for (int i = 0; i < (int)res.size(); i++) { + ASSERT_NEAR(res[i], ref_res[i], 0.1); + } + } +} diff --git a/tasks/mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp b/tasks/mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp new file mode 100644 index 00000000000..b03e0ece3e7 --- /dev/null +++ b/tasks/mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp @@ -0,0 +1,111 @@ +#pragma once + +#include +#include +#include +#include + +#include "core/task/include/task.hpp" + +namespace vershinina_a_cannons_algorithm { + +template +struct TMatrix { + size_t n; + + std::vector data{}; + size_t hshift{}; + size_t vshift{}; + + void set_horizontal_shift(size_t shift) { hshift = shift; } + void set_vertical_shift(size_t shift) { vshift = shift; } + + const T& at(size_t row, size_t col) const noexcept { return data[row * n + col]; } + T& at(size_t row, size_t col) noexcept { return const_cast(std::as_const(*this).at(row, col)); } + + const T& at_h(size_t row, size_t col) const noexcept { + size_t actual_hshift = (hshift + row) % n; + if (col < n - actual_hshift) { + col += actual_hshift; + } else { + col = col - (n - actual_hshift); + } + return data[row * n + col]; + } + T& at_h(size_t row, size_t col) noexcept { return const_cast(std::as_const(*this).at_h(row, col)); } + + const T& at_v(size_t row, size_t col) const noexcept { + size_t actual_vshift = (vshift + col) % n; + if (row < n - actual_vshift) { + row += actual_vshift; + } else { + row = row - (n - actual_vshift); + } + return data[row * n + col]; + } + T& at_v(size_t row, size_t col) noexcept { return const_cast(std::as_const(*this).at_v(row, col)); } + + bool operator==(const TMatrix& other) const noexcept { return n == other.n && data == other.data; } + + void read(const T* src) { data.assign(src, src + n * n); } + + friend std::ostream& operator<<(std::ostream& os, const TMatrix& m) { + os << "M(" << m.n << "," << m.n << "): ["; + for (const auto& e : m.data) { + os << e << ' '; + } + os << ']'; + return os; + } + + static TMatrix create(size_t n, std::initializer_list intl = {}) { + TMatrix mat = {n, std::vector(intl)}; + mat.data.resize(n * n); + return mat; + } + TMatrix operator*(const TMatrix& rhs) const { + auto res = create(n); + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < rhs.n; j++) { + res.at(i, j) = 0; + for (size_t k = 0; k < rhs.n; k++) { + res.at(i, j) += at(i, k) * rhs.at(k, j); + } + } + } + return res; + } +}; + +class TestMPITaskSequential : public ppc::core::Task { + public: + explicit TestMPITaskSequential(std::shared_ptr taskData_) : Task(std::move(taskData_)) {} + bool pre_processing() override; + bool validation() override; + bool run() override; + bool post_processing() override; + int n{}; + + private: + TMatrix lhs_{}; + TMatrix rhs_{}; + TMatrix res_{}; + TMatrix res_c{}; +}; + +class TestMPITaskParallel : public ppc::core::Task { + public: + explicit TestMPITaskParallel(std::shared_ptr taskData_) : Task(std::move(taskData_)) {} + bool pre_processing() override; + bool validation() override; + bool run() override; + bool post_processing() override; + + private: + int n; + std::pair, std::vector> in_; + std::vector res_; + boost::mpi::communicator world; +}; + +} // namespace vershinina_a_cannons_algorithm \ No newline at end of file diff --git a/tasks/mpi/vershinina_a_cannons_algorithm/perf_tests/main.cpp b/tasks/mpi/vershinina_a_cannons_algorithm/perf_tests/main.cpp new file mode 100644 index 00000000000..231b568cacd --- /dev/null +++ b/tasks/mpi/vershinina_a_cannons_algorithm/perf_tests/main.cpp @@ -0,0 +1,100 @@ + +#include + +#include +#include +#include + +#include "core/perf/include/perf.hpp" +#include "mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp" + +std::vector getRandomMatrix(double r) { + std::random_device dev; + std::mt19937 gen(dev()); + std::uniform_int_distribution<> distr(0, 100); + std::vector matrix(r * r, 0.0); + for (int i = 0; i < r * r; i++) { + matrix[i] = distr(gen); + } + return matrix; +} + +TEST(vershinina_a_cannons_algorithm, test_pipeline_run) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + int n = 24; + auto lhs = getRandomMatrix(24); + auto rhs = getRandomMatrix(24); + + std::vector res(n * n, 0.0); + + std::shared_ptr taskDataPar = std::make_shared(); + + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + + auto testMpiTaskParallel = std::make_shared(taskDataPar); + ASSERT_EQ(testMpiTaskParallel->validation(), true); + testMpiTaskParallel->pre_processing(); + testMpiTaskParallel->run(); + testMpiTaskParallel->post_processing(); + + auto perfAttr = std::make_shared(); + perfAttr->num_running = 10; + const boost::mpi::timer current_timer; + perfAttr->current_timer = [&] { return current_timer.elapsed(); }; + + auto perfResults = std::make_shared(); + + auto perfAnalyzer = std::make_shared(testMpiTaskParallel); + perfAnalyzer->pipeline_run(perfAttr, perfResults); + if (world.rank() == 0) { + ppc::core::Perf::print_perf_statistic(perfResults); + } +} + +TEST(vershinina_a_cannons_algorithm, test_task_run) { + boost::mpi::communicator world; + if (world.size() < 4) { + GTEST_SKIP(); + } + int n = 24; + auto lhs = getRandomMatrix(24); + auto rhs = getRandomMatrix(24); + + std::vector res(n * n, 0.0); + + std::shared_ptr taskDataPar = std::make_shared(); + + if (world.rank() == 0) { + taskDataPar->inputs_count.emplace_back(n); + taskDataPar->inputs.emplace_back(reinterpret_cast(lhs.data())); + taskDataPar->inputs.emplace_back(reinterpret_cast(rhs.data())); + taskDataPar->outputs.emplace_back(reinterpret_cast(res.data())); + } + + auto testMpiTaskParallel = std::make_shared(taskDataPar); + ASSERT_EQ(testMpiTaskParallel->validation(), true); + testMpiTaskParallel->pre_processing(); + testMpiTaskParallel->run(); + testMpiTaskParallel->post_processing(); + + auto perfAttr = std::make_shared(); + perfAttr->num_running = 10; + const boost::mpi::timer current_timer; + perfAttr->current_timer = [&] { return current_timer.elapsed(); }; + + auto perfResults = std::make_shared(); + + auto perfAnalyzer = std::make_shared(testMpiTaskParallel); + perfAnalyzer->task_run(perfAttr, perfResults); + if (world.rank() == 0) { + ppc::core::Perf::print_perf_statistic(perfResults); + } +} \ No newline at end of file diff --git a/tasks/mpi/vershinina_a_cannons_algorithm/src/ops_mpi.cpp b/tasks/mpi/vershinina_a_cannons_algorithm/src/ops_mpi.cpp new file mode 100644 index 00000000000..d9490b601ca --- /dev/null +++ b/tasks/mpi/vershinina_a_cannons_algorithm/src/ops_mpi.cpp @@ -0,0 +1,229 @@ +#include "mpi/vershinina_a_cannons_algorithm/include/ops_mpi.hpp" + +#include +#include +#include +#include +#include +#include + +bool vershinina_a_cannons_algorithm::TestMPITaskSequential::pre_processing() { + internal_order_test(); + n = taskData->inputs_count[0]; + + lhs_.n = n; + rhs_.n = n; + + res_c.n = n; + res_.n = n; + + lhs_.read(reinterpret_cast(taskData->inputs[0])); + rhs_.read(reinterpret_cast(taskData->inputs[1])); + res_c = TMatrix::create(n); + res_ = TMatrix::create(n); + + lhs_.hshift = 0; + rhs_.vshift = 0; + + return true; +} + +bool vershinina_a_cannons_algorithm::TestMPITaskSequential::validation() { + internal_order_test(); + return taskData->inputs.size() == 2 && taskData->inputs_count[0] > 0; +} + +bool vershinina_a_cannons_algorithm::TestMPITaskSequential::run() { + internal_order_test(); + + for (int k = 0; k < n; k++) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + res_c.at(i, j) = lhs_.at_h(i, j) * rhs_.at_v(i, j); + } + } + for (int t = 0; t < n; t++) { + for (int s = 0; s < n; s++) { + res_.at(t, s) += res_c.at(t, s); + } + } + lhs_.hshift++; + rhs_.vshift++; + } + return true; +} + +bool vershinina_a_cannons_algorithm::TestMPITaskSequential::post_processing() { + internal_order_test(); + std::copy(res_.data.begin(), res_.data.end(), reinterpret_cast(taskData->outputs[0])); + return true; +} + +void copy_mat(double* src, std::vector& dst, int n) { dst.assign(src, src + (n * n)); } + +bool vershinina_a_cannons_algorithm::TestMPITaskParallel::pre_processing() { + internal_order_test(); + + if (world.rank() == 0) { + n = taskData->inputs_count[0]; + copy_mat(reinterpret_cast(taskData->inputs[0]), in_.first, n); + copy_mat(reinterpret_cast(taskData->inputs[1]), in_.second, n); + } + return true; +} + +bool vershinina_a_cannons_algorithm::TestMPITaskParallel::validation() { + internal_order_test(); + return world.rank() != 0 || (taskData->inputs.size() == 2 && taskData->inputs_count[0] > 0); +} + +int find_most_close_power_of_2(int x) { return std::floor(std::sqrt(x)); } + +std::vector mkpad(const std::vector& in, int n, int padding) { + std::vector res(padding * padding, 0.); + for (int i = 0; i < n; i++) { + std::copy(in.begin() + i * n, in.begin() + (i + 1) * n, res.begin() + i * padding); + } + return res; +} +std::vector mkblock(const std::vector& in, int padding, int row, int col, int block) { + std::vector res(block * block, 0.); + for (int i = 0; i < block; i++) { + const int idx = (row * block + i) * padding + (col * block); + std::copy(in.begin() + idx, in.begin() + idx + block, res.begin() + i * block); + } + return res; +} + +std::pair coords(const boost::mpi::cartesian_communicator& cart, int rank) { + auto coords = cart.coordinates(rank); + return {coords[0], coords[1]}; +} + +bool vershinina_a_cannons_algorithm::TestMPITaskParallel::run() { + internal_order_test(); + + boost::mpi::broadcast(world, n, 0); + + auto [lhs_, rhs_] = in_; + + const int power = find_most_close_power_of_2(world.size()); + const int involved = std::pow(power, 2); + + if (world.rank() >= involved) { + world.split(1); + return true; + } + + auto active_comm = world.split(0); + + const int padding = power * ((n + power - 1) / power); + const int block = padding / power; + + const auto padding2 = padding * padding; + const auto block2 = block * block; + + if (world.rank() == 0) { + lhs_ = mkpad(lhs_, n, padding); + rhs_ = mkpad(rhs_, n, padding); + } + + boost::mpi::cartesian_communicator cart{active_comm, boost::mpi::cartesian_topology{{power, true}, {power, true}}, + false}; + + const auto [row, col] = coords(cart, cart.rank()); + + auto [left_rank, right_rank] = cart.shifted_ranks(1, 1); + auto [up_rank, down_rank] = cart.shifted_ranks(0, 1); + std::vector local_lhs(block2, 0.0); + std::vector local_rhs(block2, 0.0); + std::vector local_res(block2, 0.0); + + if (cart.rank() == 0) { + for (int proc = 0; proc < involved; ++proc) { + const auto [p_row, p_col] = coords(cart, proc); + + auto lblock = mkblock(lhs_, padding, p_row, p_col, block); + auto rblock = mkblock(rhs_, padding, p_row, p_col, block); + + if (proc == 0) { + local_lhs = std::move(lblock); + local_rhs = std::move(rblock); + } else { + cart.send(proc, 0, lblock.data(), block2); + cart.send(proc, 1, rblock.data(), block2); + } + } + } else { + cart.recv(0, 0, local_lhs.data(), block2); + cart.recv(0, 1, local_rhs.data(), block2); + } + + for (int i = 0; i < row; i++) { + cart.send(right_rank, 2, local_lhs.data(), block2); + cart.recv(right_rank, 2, local_lhs.data(), block2); + } + for (int i = 0; i < col; i++) { + cart.send(down_rank, 3, local_rhs.data(), block2); + cart.recv(down_rank, 3, local_rhs.data(), block2); + } + + for (int s = 0; s < power; s++) { + for (int i = 0; i < block; i++) { + for (int l = 0; l < block; l++) { + double a_il = local_lhs[i * block + l]; + for (int j = 0; j < block; j++) { + local_res[i * block + j] += a_il * local_rhs[l * block + j]; + } + } + } + cart.send(right_rank, 4, local_lhs.data(), block2); + cart.recv(right_rank, 4, local_lhs.data(), block2); + cart.send(down_rank, 5, local_rhs.data(), block2); + cart.recv(down_rank, 5, local_rhs.data(), block2); + } + + if (cart.rank() != 0) { + cart.send(0, 6, local_res.data(), block2); + } else { + res_.resize(padding2, 0.0); + + for (int proc = 0; proc < involved; proc++) { + if (proc == 0) { + for (int i = 0; i < block; i++) { + int dest_index = i * padding; + std::copy(local_res.begin() + i * block, local_res.begin() + (i + 1) * block, res_.begin() + dest_index); + } + } else { + std::vector buf(block2); + cart.recv(proc, 6, buf.data(), block2); + + const auto [p_row, p_col] = coords(cart, proc); + const int begin_row = p_row * block; + const int begin_col = p_col * block; + + for (int i = 0; i < block; i++) { + std::copy(buf.begin() + i * block, buf.begin() + (i + 1) * block, + res_.begin() + (begin_row + i) * padding + begin_col); + } + } + } + + std::vector overall_res(n * n, 0.0); + for (int i = 0; i < n; i++) { + std::copy(res_.begin() + i * padding, res_.begin() + i * padding + n, overall_res.begin() + i * n); + } + res_ = std::move(overall_res); + } + return true; +} + +bool vershinina_a_cannons_algorithm::TestMPITaskParallel::post_processing() { + internal_order_test(); + + if (world.rank() == 0) { + auto* data_ptr = reinterpret_cast(taskData->outputs[0]); + std::copy(res_.begin(), res_.end(), data_ptr); + } + return true; +} \ No newline at end of file diff --git a/tasks/seq/vershinina_a_cannons_algorithm/func_tests/main.cpp b/tasks/seq/vershinina_a_cannons_algorithm/func_tests/main.cpp new file mode 100644 index 00000000000..877047adc2c --- /dev/null +++ b/tasks/seq/vershinina_a_cannons_algorithm/func_tests/main.cpp @@ -0,0 +1,131 @@ +#include + +#include +#include + +#include "seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp" + +vershinina_a_cannons_algorithm::TMatrix getRandomMatrix(double r) { + std::random_device dev; + std::mt19937 gen(dev()); + std::uniform_int_distribution<> distr(0, 100); + auto matrix = vershinina_a_cannons_algorithm::TMatrix::create(r); + for (size_t i = 0; i < matrix.n * matrix.n; i++) { + matrix.data[i] = distr(gen); + } + return matrix; +} + +TEST(vershinina_a_cannons_algorithm, Test_1) { + auto lhs = vershinina_a_cannons_algorithm::TMatrix::create( + 4, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); + auto rhs = vershinina_a_cannons_algorithm::TMatrix::create( + 4, {1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, 4, 8, 12, 16}); + + auto act_res = vershinina_a_cannons_algorithm::TMatrix::create(4); + + auto ref_res = vershinina_a_cannons_algorithm::TMatrix::create( + 4, {30, 70, 110, 150, 70, 174, 278, 382, 110, 278, 446, 614, 150, 382, 614, 846}); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(4); + taskDataSeq->outputs.emplace_back(reinterpret_cast(act_res.data.data())); + taskDataSeq->outputs_count.emplace_back(act_res.n); + + vershinina_a_cannons_algorithm::TestTaskSequential testTaskSequential(taskDataSeq); + ASSERT_EQ(testTaskSequential.validation(), true); + testTaskSequential.pre_processing(); + testTaskSequential.run(); + testTaskSequential.post_processing(); + ASSERT_EQ(act_res, ref_res); +} + +TEST(vershinina_a_cannons_algorithm, Test_2) { + auto lhs = + vershinina_a_cannons_algorithm::TMatrix::create(4, {2, 3, 4, 5, 9, 8, 7, 6, 5, 4, 2, 3, 8, 7, 3, 4}); + auto rhs = + vershinina_a_cannons_algorithm::TMatrix::create(4, {3, 5, 7, 6, 2, 7, 6, 3, 7, 5, 3, 2, 4, 3, 2, 5}); + + auto act_res = vershinina_a_cannons_algorithm::TMatrix::create(4); + + auto ref_res = vershinina_a_cannons_algorithm::TMatrix::create( + 4, {60, 66, 54, 54, 116, 154, 144, 122, 49, 72, 71, 61, 75, 116, 115, 95}); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(4); + taskDataSeq->outputs.emplace_back(reinterpret_cast(act_res.data.data())); + taskDataSeq->outputs_count.emplace_back(act_res.n); + + vershinina_a_cannons_algorithm::TestTaskSequential testTaskSequential(taskDataSeq); + ASSERT_EQ(testTaskSequential.validation(), true); + testTaskSequential.pre_processing(); + testTaskSequential.run(); + testTaskSequential.post_processing(); + ASSERT_EQ(act_res, ref_res); +} + +TEST(vershinina_a_cannons_algorithm, Test_3) { + auto lhs = vershinina_a_cannons_algorithm::TMatrix::create(3, {1, 2, 3, 4, 5, 6, 7, 8, 9}); + auto rhs = vershinina_a_cannons_algorithm::TMatrix::create(3, {1, 4, 7, 2, 5, 8, 3, 6, 9}); + + auto act_res = vershinina_a_cannons_algorithm::TMatrix::create(3); + + auto ref_res = vershinina_a_cannons_algorithm::TMatrix::create(3, {14, 32, 50, 32, 77, 122, 50, 122, 194}); + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(3); + taskDataSeq->outputs.emplace_back(reinterpret_cast(act_res.data.data())); + taskDataSeq->outputs_count.emplace_back(act_res.n); + + vershinina_a_cannons_algorithm::TestTaskSequential testTaskSequential(taskDataSeq); + ASSERT_EQ(testTaskSequential.validation(), true); + testTaskSequential.pre_processing(); + testTaskSequential.run(); + testTaskSequential.post_processing(); + ASSERT_EQ(act_res, ref_res); +} + +TEST(vershinina_a_cannons_algorithm, Test_4) { + auto lhs = getRandomMatrix(3); + auto rhs = getRandomMatrix(3); + + auto res = vershinina_a_cannons_algorithm::TMatrix::create(3); + + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(3); + taskDataSeq->outputs.emplace_back(reinterpret_cast(res.data.data())); + taskDataSeq->outputs_count.emplace_back(res.n); + + vershinina_a_cannons_algorithm::TestTaskSequential testTaskSequential(taskDataSeq); + ASSERT_EQ(testTaskSequential.validation(), true); + testTaskSequential.pre_processing(); + testTaskSequential.run(); + testTaskSequential.post_processing(); + ASSERT_EQ(res, lhs * rhs); +} + +TEST(vershinina_a_cannons_algorithm, Test_5) { + auto lhs = getRandomMatrix(10); + auto rhs = getRandomMatrix(10); + + auto res = vershinina_a_cannons_algorithm::TMatrix::create(10); + + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(10); + taskDataSeq->outputs.emplace_back(reinterpret_cast(res.data.data())); + taskDataSeq->outputs_count.emplace_back(res.n); + + vershinina_a_cannons_algorithm::TestTaskSequential testTaskSequential(taskDataSeq); + ASSERT_EQ(testTaskSequential.validation(), true); + testTaskSequential.pre_processing(); + testTaskSequential.run(); + testTaskSequential.post_processing(); + ASSERT_EQ(res, lhs * rhs); +} diff --git a/tasks/seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp b/tasks/seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp new file mode 100644 index 00000000000..f8ad3afc783 --- /dev/null +++ b/tasks/seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include "core/task/include/task.hpp" + +namespace vershinina_a_cannons_algorithm { + +template +struct TMatrix { + size_t n; + + std::vector data{}; + size_t hshift{}; + size_t vshift{}; + + void set_horizontal_shift(size_t shift) { hshift = shift; } + void set_vertical_shift(size_t shift) { vshift = shift; } + + const T& at(size_t row, size_t col) const noexcept { return data[row * n + col]; } + T& at(size_t row, size_t col) noexcept { return const_cast(std::as_const(*this).at(row, col)); } + + const T& at_h(size_t row, size_t col) const noexcept { + size_t actual_hshift = (hshift + row) % n; + if (col < n - actual_hshift) { + col += actual_hshift; + } else { + col = col - (n - actual_hshift); + } + return data[row * n + col]; + } + T& at_h(size_t row, size_t col) noexcept { return const_cast(std::as_const(*this).at_h(row, col)); } + + const T& at_v(size_t row, size_t col) const noexcept { + size_t actual_vshift = (vshift + col) % n; + if (row < n - actual_vshift) { + row += actual_vshift; + } else { + row = row - (n - actual_vshift); + } + return data[row * n + col]; + } + T& at_v(size_t row, size_t col) noexcept { return const_cast(std::as_const(*this).at_v(row, col)); } + + bool operator==(const TMatrix& other) const noexcept { return n == other.n && data == other.data; } + + void read(const T* src) { data.assign(src, src + n * n); } + + static TMatrix create(size_t n, std::initializer_list intl = {}) { + TMatrix mat = {n, std::vector(intl)}; + mat.data.resize(n * n); + return mat; + } + TMatrix operator*(const TMatrix& rhs) const { + auto res = create(n); + for (size_t i = 0; i < n; i++) { + for (size_t j = 0; j < rhs.n; j++) { + res.at(i, j) = 0; + for (size_t k = 0; k < rhs.n; k++) { + res.at(i, j) += at(i, k) * rhs.at(k, j); + } + } + } + return res; + } +}; + +class TestTaskSequential : public ppc::core::Task { + public: + explicit TestTaskSequential(std::shared_ptr taskData_) : Task(std::move(taskData_)) {} + bool pre_processing() override; + bool validation() override; + bool run() override; + bool post_processing() override; + int n{}; + + private: + TMatrix lhs_{}; + TMatrix rhs_{}; + TMatrix res_{}; + TMatrix res_c{}; +}; +} // namespace vershinina_a_cannons_algorithm \ No newline at end of file diff --git a/tasks/seq/vershinina_a_cannons_algorithm/perf_tests/main.cpp b/tasks/seq/vershinina_a_cannons_algorithm/perf_tests/main.cpp new file mode 100644 index 00000000000..0076c7792a8 --- /dev/null +++ b/tasks/seq/vershinina_a_cannons_algorithm/perf_tests/main.cpp @@ -0,0 +1,80 @@ +#include + +#include +#include + +#include "core/perf/include/perf.hpp" +#include "seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp" + +vershinina_a_cannons_algorithm::TMatrix getRandomMatrix(double r) { + std::random_device dev; + std::mt19937 gen(dev()); + std::uniform_int_distribution<> distr(0, 100); + auto matrix = vershinina_a_cannons_algorithm::TMatrix::create(r); + for (size_t i = 0; i < matrix.n * matrix.n; i++) { + matrix.data[i] = distr(gen); + } + return matrix; +} + +TEST(vershinina_a_cannons_algorithm, test_pipeline_run) { + auto lhs = getRandomMatrix(24); + auto rhs = getRandomMatrix(24); + + auto act_res = vershinina_a_cannons_algorithm::TMatrix::create(24); + + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(24); + taskDataSeq->outputs.emplace_back(reinterpret_cast(act_res.data.data())); + taskDataSeq->outputs_count.emplace_back(act_res.n); + + auto testTaskSequential = std::make_shared(taskDataSeq); + + auto perfAttr = std::make_shared(); + perfAttr->num_running = 10; + const auto t0 = std::chrono::high_resolution_clock::now(); + perfAttr->current_timer = [&] { + auto current_time_point = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(current_time_point - t0).count(); + return static_cast(duration) * 1e-9; + }; + + auto perfResults = std::make_shared(); + + auto perfAnalyzer = std::make_shared(testTaskSequential); + perfAnalyzer->pipeline_run(perfAttr, perfResults); + ppc::core::Perf::print_perf_statistic(perfResults); +} + +TEST(vershinina_a_cannons_algorithm, test_task_run) { + auto lhs = getRandomMatrix(24); + auto rhs = getRandomMatrix(24); + + auto act_res = vershinina_a_cannons_algorithm::TMatrix::create(24); + + std::shared_ptr taskDataSeq = std::make_shared(); + taskDataSeq->inputs.emplace_back(reinterpret_cast(lhs.data.data())); + taskDataSeq->inputs.emplace_back(reinterpret_cast(rhs.data.data())); + taskDataSeq->inputs_count.emplace_back(24); + taskDataSeq->outputs.emplace_back(reinterpret_cast(act_res.data.data())); + taskDataSeq->outputs_count.emplace_back(act_res.n); + + auto testTaskSequential = std::make_shared(taskDataSeq); + + auto perfAttr = std::make_shared(); + perfAttr->num_running = 10; + const auto t0 = std::chrono::high_resolution_clock::now(); + perfAttr->current_timer = [&] { + auto current_time_point = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(current_time_point - t0).count(); + return static_cast(duration) * 1e-9; + }; + + auto perfResults = std::make_shared(); + + auto perfAnalyzer = std::make_shared(testTaskSequential); + perfAnalyzer->task_run(perfAttr, perfResults); + ppc::core::Perf::print_perf_statistic(perfResults); +} diff --git a/tasks/seq/vershinina_a_cannons_algorithm/src/ops_seq.cpp b/tasks/seq/vershinina_a_cannons_algorithm/src/ops_seq.cpp new file mode 100644 index 00000000000..d8974050270 --- /dev/null +++ b/tasks/seq/vershinina_a_cannons_algorithm/src/ops_seq.cpp @@ -0,0 +1,56 @@ +#include "seq/vershinina_a_cannons_algorithm/include/ops_seq.hpp" + +#include + +using namespace std::chrono_literals; + +bool vershinina_a_cannons_algorithm::TestTaskSequential::pre_processing() { + internal_order_test(); + n = taskData->inputs_count[0]; + + lhs_.n = n; + rhs_.n = n; + + res_c.n = n; + res_.n = n; + + lhs_.read(reinterpret_cast(taskData->inputs[0])); + rhs_.read(reinterpret_cast(taskData->inputs[1])); + res_c = TMatrix::create(n); + res_ = TMatrix::create(n); + + lhs_.hshift = 0; + rhs_.vshift = 0; + + return true; +} + +bool vershinina_a_cannons_algorithm::TestTaskSequential::validation() { + internal_order_test(); + return taskData->inputs.size() == 2 && taskData->inputs_count[0] > 0; +} + +bool vershinina_a_cannons_algorithm::TestTaskSequential::run() { + internal_order_test(); + for (int k = 0; k < n; k++) { + for (int i = 0; i < n; i++) { + for (int j = 0; j < n; j++) { + res_c.at(i, j) = lhs_.at_h(i, j) * rhs_.at_v(i, j); + } + } + for (int t = 0; t < n; t++) { + for (int s = 0; s < n; s++) { + res_.at(t, s) += res_c.at(t, s); + } + } + lhs_.hshift++; + rhs_.vshift++; + } + return true; +} + +bool vershinina_a_cannons_algorithm::TestTaskSequential::post_processing() { + internal_order_test(); + std::copy(res_.data.begin(), res_.data.end(), reinterpret_cast(taskData->outputs[0])); + return true; +}