generated from learning-process/parallel_programming_course
-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Седова Ольга. Задача 2. Вариант 16. Ленточная вертикальная схема умно…
…жения матрицы на вектор (#386) Алгоритм ленточной вертикальной схемы: Алгоритм предполагает вертикальное разбиение матрицы на столбцы (ленты). Каждый процесс отвечает за умножение одной или нескольких лент на вектор. После этого частичные результаты суммируются (функция reduce в параллельной версии). 1. Структура данных taskData: Существует некоторая структура данных taskData, которая содержит входные данные (матрицу и вектор) и выходные данные (результирующий вектор). Она предоставляет указатели на данные (taskData->inputs[0], taskData->inputs[1], taskData->outputs[0]) и их размеры (taskData->inputs_count[0], taskData->inputs_count[1]). 2. Функция validation(): Эта функция проверяет корректность входных данных. Она проверяет, что taskData не является нулевым указателем, что указатели на входные данные не являются нулевыми и что количество элементов в матрице кратно количеству элементов в векторе (что является необходимым условием для умножения матрицы на вектор). 3. Класс ParallelMPI: Этот класс реализует параллельную версию алгоритма с использованием библиотеки Boost.MPI. -pre_processing(): Функция выполняет предварительную обработку данных. Она извлекает данные из taskData, вычисляет размеры матрицы (количество строк rows_ и столбцов cols_), распределяет работу между процессами (создаются векторы proc и off, которые определяют количество вычислений и смещение для каждого процесса). Распределение основано на вертикальном разбиении матрицы. -run(): Функция выполняет параллельное вычисление. Она использует функцию для рассылки (broadcast) данных всем процессам (размеры матрицы, распределение работы, входные данные). Каждый процесс вычисляет свою часть произведения, используя proc и off для определения диапазона вычислений. Результаты частичных вычислений сводятся с помощью функции reduce. -post_processing(): Функция выполняет обработку после вычислений. Если процесс имеет ранг 0 (главный процесс), то он собирает результаты частичных вычислений в результирующий вектор и записывает его в taskData->outputs[0]. 4. Класс SequentialMPI: Этот класс реализует последовательную версию алгоритма. -pre_processing(): Извлекает данные из taskData и вычисляет размеры матрицы. -run(): Выполняет последовательное вычисление произведения матрицы на вектор по ленточной вертикальной схеме, используя стандартные циклы. -post_processing(): Записывает результирующий вектор в taskData->outputs[0]. Используемые материалы при написании кода 1. Гергель В.П. Введение в методы параллельного программирования Раздел 7 Параллельные методы умножения матрицы на вектор 2. https://intuit.ru/studies/courses/1156/190/lecture/4952
- Loading branch information
1 parent
ce4c970
commit c82fba8
Showing
8 changed files
with
922 additions
and
0 deletions.
There are no files selected for viewing
209 changes: 209 additions & 0 deletions
209
tasks/mpi/sedova_o_vertical_ribbon_scheme/func_tests/main.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include <boost/mpi/communicator.hpp> | ||
#include <boost/mpi/environment.hpp> | ||
#include <random> | ||
#include <vector> | ||
|
||
#include "mpi/sedova_o_vertical_ribbon_scheme/include/ops_mpi.hpp" | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, distribution1) { | ||
int rows_ = 5; | ||
int cols_ = 3; | ||
int count_proc = 5; | ||
std::vector<int> proc_(count_proc, 0); | ||
std::vector<int> off(count_proc, 0); | ||
if (count_proc > rows_) { | ||
for (int i = 0; i < rows_; ++i) { | ||
off[i] = i * cols_; | ||
proc_[i] = cols_; | ||
} | ||
for (int i = rows_; i < count_proc; ++i) { | ||
off[i] = -1; | ||
proc_[i] = 0; | ||
} | ||
} else { | ||
int count_proc_ = rows_ / count_proc; | ||
int surplus = rows_ % count_proc; | ||
int offset = 0; | ||
for (int i = 0; i < count_proc; ++i) { | ||
if (surplus > 0) { | ||
proc_[i] = (count_proc_ + 1) * cols_; | ||
--surplus; | ||
} else { | ||
proc_[i] = count_proc_ * cols_; | ||
} | ||
off[i] = offset; | ||
offset += proc_[i]; | ||
} | ||
} | ||
std::vector<int> expected_proc = {3, 3, 3, 3, 3}; | ||
std::vector<int> expected_off = {0, 3, 6, 9, 12}; | ||
EXPECT_EQ(proc_, expected_proc); | ||
EXPECT_EQ(off, expected_off); | ||
} | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, distribution2) { | ||
int rows_ = 5; | ||
int cols_ = 3; | ||
int count_proc = 3; | ||
std::vector<int> proc_(count_proc, 0); | ||
std::vector<int> off(count_proc, 0); | ||
if (count_proc > rows_) { | ||
for (int i = 0; i < rows_; ++i) { | ||
off[i] = i * cols_; | ||
proc_[i] = cols_; | ||
} | ||
for (int i = rows_; i < count_proc; ++i) { | ||
off[i] = -1; | ||
proc_[i] = 0; | ||
} | ||
} else { | ||
int count_proc_ = rows_ / count_proc; | ||
int surplus = rows_ % count_proc; | ||
int offset = 0; | ||
for (int i = 0; i < count_proc; ++i) { | ||
if (surplus > 0) { | ||
proc_[i] = (count_proc_ + 1) * cols_; | ||
--surplus; | ||
} else { | ||
proc_[i] = count_proc_ * cols_; | ||
} | ||
off[i] = offset; | ||
offset += proc_[i]; | ||
} | ||
} | ||
std::vector<int> expected_proc = {6, 6, 3}; | ||
std::vector<int> expected_off = {0, 6, 12}; | ||
EXPECT_EQ(proc_, expected_proc); | ||
EXPECT_EQ(off, expected_off); | ||
} | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, distribution3) { | ||
int rows_ = 5; | ||
int cols_ = 4; | ||
int count_proc = 6; | ||
std::vector<int> proc_(count_proc, 0); | ||
std::vector<int> off(count_proc, 0); | ||
if (count_proc > rows_) { | ||
for (int i = 0; i < rows_; ++i) { | ||
off[i] = i * cols_; | ||
proc_[i] = cols_; | ||
} | ||
for (int i = rows_; i < count_proc; ++i) { | ||
off[i] = -1; | ||
proc_[i] = 0; | ||
} | ||
} else { | ||
int count_proc_ = rows_ / count_proc; | ||
int surplus = rows_ % count_proc; | ||
int offset = 0; | ||
for (int i = 0; i < count_proc; ++i) { | ||
if (surplus > 0) { | ||
proc_[i] = (count_proc_ + 1) * cols_; | ||
--surplus; | ||
} else { | ||
proc_[i] = count_proc_ * cols_; | ||
} | ||
off[i] = offset; | ||
offset += proc_[i]; | ||
} | ||
} | ||
std::vector<int> expected_proc = {4, 4, 4, 4, 4, 0}; | ||
std::vector<int> expected_off = {0, 4, 8, 12, 16, -1}; | ||
EXPECT_EQ(proc_, expected_proc); | ||
EXPECT_EQ(off, expected_off); | ||
} | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, distribution4) { | ||
int rows_ = 10; | ||
int cols_ = 4; | ||
int count_proc = 8; | ||
std::vector<int> proc_(count_proc, 0); | ||
std::vector<int> off(count_proc, 0); | ||
if (count_proc > rows_) { | ||
for (int i = 0; i < rows_; ++i) { | ||
off[i] = i * cols_; | ||
proc_[i] = cols_; | ||
} | ||
for (int i = rows_; i < count_proc; ++i) { | ||
off[i] = -1; | ||
proc_[i] = 0; | ||
} | ||
} else { | ||
int count_proc_ = rows_ / count_proc; | ||
int surplus = rows_ % count_proc; | ||
int offset = 0; | ||
for (int i = 0; i < count_proc; ++i) { | ||
if (surplus > 0) { | ||
proc_[i] = (count_proc_ + 1) * cols_; | ||
--surplus; | ||
} else { | ||
proc_[i] = count_proc_ * cols_; | ||
} | ||
off[i] = offset; | ||
offset += proc_[i]; | ||
} | ||
} | ||
std::vector<int> expected_proc = {8, 8, 4, 4, 4, 4, 4, 4}; | ||
std::vector<int> expected_off = {0, 8, 16, 20, 24, 28, 32, 36}; | ||
EXPECT_EQ(proc_, expected_proc); | ||
EXPECT_EQ(off, expected_off); | ||
} | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, false_validation) { | ||
std::vector<int> matrix = {1, 2, 3}; | ||
std::vector<int> vector = {7, 8}; | ||
std::vector<int> result(3, 0); | ||
|
||
std::shared_ptr<ppc::core::TaskData> taskDataSeq = std::make_shared<ppc::core::TaskData>(); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix.data())); | ||
taskDataSeq->inputs_count.emplace_back(matrix.size()); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(vector.data())); | ||
taskDataSeq->inputs_count.emplace_back(vector.size()); | ||
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t *>(result.data())); | ||
taskDataSeq->outputs_count.emplace_back(result.size()); | ||
|
||
sedova_o_vertical_ribbon_scheme_mpi::SequentialMPI TestSequential(taskDataSeq); | ||
EXPECT_FALSE(TestSequential.validation()); | ||
} | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, true_validation) { | ||
std::vector<int> matrix = {1, 2, 3, 4}; | ||
std::vector<int> vector = {7, 8}; | ||
std::vector<int> result(2, 0); | ||
|
||
std::shared_ptr<ppc::core::TaskData> taskDataPar = std::make_shared<ppc::core::TaskData>(); | ||
taskDataPar->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix.data())); | ||
taskDataPar->inputs_count.emplace_back(matrix.size()); | ||
taskDataPar->inputs.emplace_back(reinterpret_cast<uint8_t *>(vector.data())); | ||
taskDataPar->inputs_count.emplace_back(vector.size()); | ||
taskDataPar->outputs.emplace_back(reinterpret_cast<uint8_t *>(result.data())); | ||
taskDataPar->outputs_count.emplace_back(result.size()); | ||
|
||
sedova_o_vertical_ribbon_scheme_mpi::ParallelMPI taskParallel(taskDataPar); | ||
EXPECT_TRUE(taskParallel.validation()); | ||
} | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, correct_matrix_and_vector_seq) { | ||
std::vector<int> matrix = {1, 2, 3, 4, 5, 6}; | ||
std::vector<int> vector = {7, 8}; | ||
std::vector<int> result(3, 0); | ||
|
||
std::shared_ptr<ppc::core::TaskData> taskDataSeq = std::make_shared<ppc::core::TaskData>(); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(matrix.data())); | ||
taskDataSeq->inputs_count.emplace_back(matrix.size()); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t *>(vector.data())); | ||
taskDataSeq->inputs_count.emplace_back(vector.size()); | ||
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t *>(result.data())); | ||
taskDataSeq->outputs_count.emplace_back(result.size()); | ||
|
||
sedova_o_vertical_ribbon_scheme_mpi::SequentialMPI TestSequential(taskDataSeq); | ||
ASSERT_TRUE(TestSequential.validation()); | ||
TestSequential.pre_processing(); | ||
TestSequential.run(); | ||
TestSequential.post_processing(); | ||
|
||
std::vector<int> expected_result = {39, 54, 69}; | ||
ASSERT_EQ(result, expected_result); | ||
} |
52 changes: 52 additions & 0 deletions
52
tasks/mpi/sedova_o_vertical_ribbon_scheme/include/ops_mpi.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Copyright 2024 Sedova Olga | ||
#pragma once | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <boost/mpi/collectives.hpp> | ||
#include <boost/mpi/communicator.hpp> | ||
#include <memory> | ||
#include <numeric> | ||
#include <string> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include "core/task/include/task.hpp" | ||
|
||
namespace sedova_o_vertical_ribbon_scheme_mpi { | ||
|
||
class ParallelMPI : public ppc::core::Task { | ||
public: | ||
explicit ParallelMPI(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {} | ||
bool pre_processing() override; | ||
bool validation() override; | ||
bool run() override; | ||
bool post_processing() override; | ||
|
||
private: | ||
int rows_{}; | ||
int cols_{}; | ||
std::vector<int> input_matrix_1; | ||
std::vector<int> input_vector_1; | ||
std::vector<int> result_vector_; | ||
std::vector<int> proc; | ||
std::vector<int> off; | ||
boost::mpi::communicator world; | ||
}; | ||
|
||
class SequentialMPI : public ppc::core::Task { | ||
public: | ||
explicit SequentialMPI(std::shared_ptr<ppc::core::TaskData> taskData_) : Task(std::move(taskData_)) {} | ||
bool pre_processing() override; | ||
bool validation() override; | ||
bool run() override; | ||
bool post_processing() override; | ||
|
||
private: | ||
int* matrix_; | ||
int* vector_; | ||
std::vector<int> result_vector_; | ||
int rows_; | ||
int cols_; | ||
}; | ||
} // namespace sedova_o_vertical_ribbon_scheme_mpi |
130 changes: 130 additions & 0 deletions
130
tasks/mpi/sedova_o_vertical_ribbon_scheme/perf_tests/main.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
#include <gtest/gtest.h> | ||
|
||
#include <boost/mpi/timer.hpp> | ||
#include <random> | ||
|
||
#include "core/perf/include/perf.hpp" | ||
#include "mpi/sedova_o_vertical_ribbon_scheme/include/ops_mpi.hpp" | ||
|
||
TEST(sedova_o_vertical_ribbon_scheme_mpi, test_pipeline_run) { | ||
boost::mpi::environment env; | ||
boost::mpi::communicator world; | ||
std::vector<int> global_matrix; | ||
std::vector<int> global_vector; | ||
std::vector<int> global_result; | ||
std::shared_ptr<ppc::core::TaskData> taskDataPar = std::make_shared<ppc::core::TaskData>(); | ||
int rows_; | ||
int cols_; | ||
if (world.rank() == 0) { | ||
rows_ = 2024; | ||
cols_ = 2024; | ||
global_vector.resize(cols_); | ||
global_matrix.resize(rows_ * cols_); | ||
for (int j = 0; j < rows_; ++j) { | ||
for (int i = 0; i < cols_; ++i) { | ||
global_matrix[j * cols_ + i] = (rand() % 101) - 50; | ||
} | ||
} | ||
for (int i = 0; i < rows_; ++i) { | ||
global_vector[i] = (rand() % 100) - 50; | ||
} | ||
global_result.resize(cols_, 0); | ||
taskDataPar->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_matrix.data())); | ||
taskDataPar->inputs_count.emplace_back(global_matrix.size()); | ||
taskDataPar->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_vector.data())); | ||
taskDataPar->inputs_count.emplace_back(global_vector.size()); | ||
taskDataPar->outputs.emplace_back(reinterpret_cast<uint8_t*>(global_result.data())); | ||
taskDataPar->outputs_count.emplace_back(global_result.size()); | ||
} | ||
auto taskParallel = std::make_shared<sedova_o_vertical_ribbon_scheme_mpi::ParallelMPI>(taskDataPar); | ||
ASSERT_TRUE(taskParallel->validation()); | ||
taskParallel->pre_processing(); | ||
taskParallel->run(); | ||
taskParallel->post_processing(); | ||
auto perfAttr = std::make_shared<ppc::core::PerfAttr>(); | ||
perfAttr->num_running = 10; | ||
const boost::mpi::timer current_timer; | ||
perfAttr->current_timer = [&] { return current_timer.elapsed(); }; | ||
auto perfResults = std::make_shared<ppc::core::PerfResults>(); | ||
auto perfAnalyzer = std::make_shared<ppc::core::Perf>(taskParallel); | ||
perfAnalyzer->pipeline_run(perfAttr, perfResults); | ||
if (world.rank() == 0) { | ||
ppc::core::Perf::print_perf_statistic(perfResults); | ||
std::vector<int> seq_result(global_result.size(), 0); | ||
auto taskDataSeq = std::make_shared<ppc::core::TaskData>(); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_matrix.data())); | ||
taskDataSeq->inputs_count.emplace_back(global_matrix.size()); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_vector.data())); | ||
taskDataSeq->inputs_count.emplace_back(global_vector.size()); | ||
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t*>(seq_result.data())); | ||
taskDataSeq->outputs_count.emplace_back(seq_result.size()); | ||
auto taskSequential = std::make_shared<sedova_o_vertical_ribbon_scheme_mpi::SequentialMPI>(taskDataSeq); | ||
ASSERT_TRUE(taskSequential->validation()); | ||
taskSequential->pre_processing(); | ||
taskSequential->run(); | ||
taskSequential->post_processing(); | ||
ASSERT_EQ(global_result.size(), seq_result.size()); | ||
EXPECT_EQ(global_result, seq_result); | ||
} | ||
} | ||
TEST(sedova_o_vertical_ribbon_scheme_mpi, test_task_run) { | ||
boost::mpi::environment env; | ||
boost::mpi::communicator world; | ||
std::vector<int> global_matrix; | ||
std::vector<int> global_vector; | ||
std::vector<int> global_result; | ||
std::shared_ptr<ppc::core::TaskData> taskDataPar = std::make_shared<ppc::core::TaskData>(); | ||
int rows_; | ||
int cols_; | ||
if (world.rank() == 0) { | ||
rows_ = 2000; | ||
cols_ = 2000; | ||
global_matrix.resize(rows_ * cols_); | ||
global_vector.resize(cols_); | ||
for (int j = 0; j < rows_; ++j) { | ||
for (int i = 0; i < cols_; ++i) { | ||
global_matrix[j * cols_ + i] = (rand() % 101) - 50; | ||
} | ||
} | ||
for (int i = 0; i < rows_; ++i) { | ||
global_vector[i] = (rand() % 100) - 50; | ||
} | ||
global_result.resize(cols_, 0); | ||
taskDataPar->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_matrix.data())); | ||
taskDataPar->inputs_count.emplace_back(global_matrix.size()); | ||
taskDataPar->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_vector.data())); | ||
taskDataPar->inputs_count.emplace_back(global_vector.size()); | ||
taskDataPar->outputs.emplace_back(reinterpret_cast<uint8_t*>(global_result.data())); | ||
taskDataPar->outputs_count.emplace_back(global_result.size()); | ||
} | ||
auto taskParallel = std::make_shared<sedova_o_vertical_ribbon_scheme_mpi::ParallelMPI>(taskDataPar); | ||
ASSERT_TRUE(taskParallel->validation()); | ||
taskParallel->pre_processing(); | ||
taskParallel->run(); | ||
taskParallel->post_processing(); | ||
auto perfAttr = std::make_shared<ppc::core::PerfAttr>(); | ||
perfAttr->num_running = 10; | ||
const boost::mpi::timer current_timer; | ||
perfAttr->current_timer = [&] { return current_timer.elapsed(); }; | ||
auto perfResults = std::make_shared<ppc::core::PerfResults>(); | ||
auto perfAnalyzer = std::make_shared<ppc::core::Perf>(taskParallel); | ||
perfAnalyzer->task_run(perfAttr, perfResults); | ||
if (world.rank() == 0) { | ||
ppc::core::Perf::print_perf_statistic(perfResults); | ||
std::vector<int> seq_result(global_result.size(), 0); | ||
auto taskDataSeq = std::make_shared<ppc::core::TaskData>(); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_matrix.data())); | ||
taskDataSeq->inputs_count.emplace_back(global_matrix.size()); | ||
taskDataSeq->inputs.emplace_back(reinterpret_cast<uint8_t*>(global_vector.data())); | ||
taskDataSeq->inputs_count.emplace_back(global_vector.size()); | ||
taskDataSeq->outputs.emplace_back(reinterpret_cast<uint8_t*>(seq_result.data())); | ||
taskDataSeq->outputs_count.emplace_back(seq_result.size()); | ||
auto taskSequential = std::make_shared<sedova_o_vertical_ribbon_scheme_mpi::SequentialMPI>(taskDataSeq); | ||
ASSERT_TRUE(taskSequential->validation()); | ||
taskSequential->pre_processing(); | ||
taskSequential->run(); | ||
taskSequential->post_processing(); | ||
ASSERT_EQ(global_result.size(), seq_result.size()); | ||
EXPECT_EQ(global_result, seq_result); | ||
} | ||
} |
Oops, something went wrong.