From f4788452696d04d10f2c2f3496e357a5500dca0f Mon Sep 17 00:00:00 2001 From: Alexey Kukanov Date: Mon, 25 Nov 2024 17:56:41 +0100 Subject: [PATCH 1/6] Extend the documentation with more information about multidimensional ranges --- ..._Topic_Other_Kinds_of_Iteration_Spaces.rst | 37 ++++++++++++++++- doc/main/tbb_userguide/parallel_for_os.rst | 5 ++- .../snippets/blocked_nd_range_example.cpp | 41 +++++++++++++++++++ .../snippets/blocked_nd_range_example.h | 37 +++++++++++++++++ 4 files changed, 117 insertions(+), 3 deletions(-) create mode 100644 doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp create mode 100644 doc/main/tbb_userguide/snippets/blocked_nd_range_example.h diff --git a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst index 3352dd8d32..a6cb736dab 100644 --- a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst +++ b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst @@ -72,4 +72,39 @@ along its longest axis. When used with ``parallel_for``, it causes the loop to be "recursively blocked" in a way that improves cache usage. This nice cache behavior means that using ``parallel_for`` over a ``blocked_range2d`` can make a loop run faster than the sequential -equivalent, even on a single processor. +equivalent, even on a single processor. + +Also, ``blocked_range2d`` allows to use different value types across +its first dimenstion (called "rows") and the second one ("columns"). +That allows combining indexes, pointers, and iterators into a joint +iteration space. The method functions ``rows()`` and ``cols()`` return +corresponding dimensions in the form of a ``blocked_range``. + +The ``blocked_range3d`` class template extends this approach to 3D by adding +``pages()`` as the first dimension, followed by ``rows()`` and ``cols()``. + +The ``blocked_nd_range`` class template represents a blocked iteration +space of any dimensionality, but in a slightly different way. All dimensions +of ``blocked_nd_range`` must be specified over the same value type, and the +constructor takes N instances of ``blocked_range``, not individual boundary +values. To indicate the distinctions, the different naming pattern was chosen. + + +An Example of a Multidimensional Iteration Space +------------------------------------------------ + +The example demonstrates calculation of a 3-dimensional filter over the pack +of feature maps, applying a kernel to a subrange of features. + +The ``convolution3d`` function iterates over the output cells and sets cell +values to the result of the ``kernel3d`` function, which summarizes values +from feature maps. + +For the computation to be performed in parallel, ``tbb::parallel_for`` is called +with ``tbb::blocked_nd_range`` as an argument. The body function then +iterates over the received 3-dimensional subrange in a loop nest, using +the ``dim`` method function to obtain loop boundaries for each dimension. + + +.. literalinclude:: ./snippets/blocked_nd_range_example.h + :language: c++ diff --git a/doc/main/tbb_userguide/parallel_for_os.rst b/doc/main/tbb_userguide/parallel_for_os.rst index fed07af68b..cbc7578f4c 100644 --- a/doc/main/tbb_userguide/parallel_for_os.rst +++ b/doc/main/tbb_userguide/parallel_for_os.rst @@ -55,8 +55,9 @@ before each identifier. The rest of the examples assume that such a Note the argument to ``operator()``. A ``blocked_range`` is a template class provided by the library. It describes a one-dimensional iteration space over type ``T``. Class ``parallel_for`` works with other -kinds of iteration spaces too. The library provides ``blocked_range2d`` -for two-dimensional spaces. You can define your own spaces as explained +kinds of iteration spaces too. The library provides ``blocked_range2d``, +``blocked_range3d``, and ``blocked_nd_range`` for multidimensional spaces. +You can define your own spaces as explained in :ref:`Advanced_Topic_Other_Kinds_of_Iteration_Spaces`. diff --git a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp new file mode 100644 index 0000000000..b68587b22b --- /dev/null +++ b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp @@ -0,0 +1,41 @@ +#include "oneapi/tbb/tbb_config.h" + +#include "blocked_nd_range_example.h" +#endif + +#include "oneapi/tbb/tbb_stddef.h" +#include + +int main() { + const int kernel_length = 9; + const int kernel_width = 5; + const int kernel_height = 5; + + const int feature_maps_length = 128; + const int feature_maps_width = 16; + const int feature_maps_heigth = 16; + + const int out_length = feature_maps_length - kernel_length + 1; + const int out_width = feature_maps_width - kernel_width + 1; + const int out_heigth = feature_maps_heigth - kernel_height + 1; + + // Initializes feature maps with 1 in each cell and out with zeros. + std::vector>> feature_maps(feature_maps_length, std::vector>(feature_maps_width, std::vector(feature_maps_heigth, 1.0f))); + std::vector>> out(out_length, std::vector>(out_width, std::vector(out_heigth, 0.f))); + + // 3D convolution calculates sum of all elements in kernel + convolution3d(feature_maps, out, + out_length, out_width, out_heigth, + kernel_length, kernel_width, kernel_height); + + // Checks correctness of convolution by equality to expected sum of elements + float expected = float(kernel_length * kernel_height * kernel_width); + for (auto i : out) { + for (auto j : i) { + for (auto k : j) { + __TBB_ASSERT_RELEASE(k == expected, "convolution fails to calculate correctly"); + } + } + } + return 0; +} diff --git a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.h b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.h new file mode 100644 index 0000000000..ded2a09c57 --- /dev/null +++ b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.h @@ -0,0 +1,37 @@ +#include "oneapi/tbb/blocked_nd_range.h" +#include "oneapi/tbb/parallel_for.h" + +template +float kernel3d(const Features& feature_maps, int i, int j, int k, + int kernel_length, int kernel_width, int kernel_height) { + float result = 0.f; + + for (int feature_i = i; feature_i < i + kernel_length; ++feature_i) + for (int feature_j = j; feature_j < j + kernel_width; ++feature_j) + for (int feature_k = k; feature_k < k + kernel_width; ++feature_k) + result += feature_maps[feature_i][feature_j][feature_k]; + + return result; +} + +template +void convolution3d(const Features& feature_maps, Output& out, + int out_length, int out_width, int out_heigth, + int kernel_length, int kernel_width, int kernel_height) { + using range_t = oneapi::tbb::blocked_nd_range; + + oneapi::tbb::parallel_for( + range_t({0, out_length}, {0, out_width}, {0, out_heigth}), + [&](const range_t& out_range) { + auto out_x = out_range.dim(0); + auto out_y = out_range.dim(1); + auto out_z = out_range.dim(2); + + for (int i = out_x.begin(); i < out_x.end(); ++i) + for (int j = out_y.begin(); j < out_y.end(); ++j) + for (int k = out_z.begin(); k < out_z.end(); ++k) + out[i][j][k] = kernel3d(feature_maps, i, j, k, + kernel_length, kernel_width, kernel_height); + } + ); +} From 9f5100665957e00061ad63872a07e6652ff9ec2b Mon Sep 17 00:00:00 2001 From: Alexey Kukanov Date: Mon, 25 Nov 2024 18:15:16 +0100 Subject: [PATCH 2/6] Fix the sample test. --- doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp index b68587b22b..882466a8f4 100644 --- a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp +++ b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp @@ -1,7 +1,6 @@ #include "oneapi/tbb/tbb_config.h" #include "blocked_nd_range_example.h" -#endif #include "oneapi/tbb/tbb_stddef.h" #include From 9a0fc3556fa4b5b0eb3339a6427a5f00129a113c Mon Sep 17 00:00:00 2001 From: Alexey Kukanov Date: Mon, 25 Nov 2024 22:15:38 +0100 Subject: [PATCH 3/6] Fix a typo --- .../Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst index a6cb736dab..f847dd1113 100644 --- a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst +++ b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst @@ -75,7 +75,7 @@ This nice cache behavior means that using ``parallel_for`` over a equivalent, even on a single processor. Also, ``blocked_range2d`` allows to use different value types across -its first dimenstion (called "rows") and the second one ("columns"). +its first dimension (called "rows") and the second one ("columns"). That allows combining indexes, pointers, and iterators into a joint iteration space. The method functions ``rows()`` and ``cols()`` return corresponding dimensions in the form of a ``blocked_range``. From 368859a7e5cc570e09da40705ff67a10ecf3693f Mon Sep 17 00:00:00 2001 From: Alexey Kukanov Date: Wed, 27 Nov 2024 16:07:02 +0100 Subject: [PATCH 4/6] Address review feedback --- .../tbb_userguide/snippets/blocked_nd_range_example.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp index 882466a8f4..554c08d13c 100644 --- a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp +++ b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp @@ -1,9 +1,6 @@ -#include "oneapi/tbb/tbb_config.h" - #include "blocked_nd_range_example.h" - -#include "oneapi/tbb/tbb_stddef.h" #include +#include int main() { const int kernel_length = 9; @@ -32,7 +29,7 @@ int main() { for (auto i : out) { for (auto j : i) { for (auto k : j) { - __TBB_ASSERT_RELEASE(k == expected, "convolution fails to calculate correctly"); + assert(k == expected && "convolution failed to calculate correctly"); } } } From 05343631f2df3fad3526764c65395432ce06833b Mon Sep 17 00:00:00 2001 From: Alexey Kukanov Date: Thu, 28 Nov 2024 12:40:48 +0100 Subject: [PATCH 5/6] Apply suggestions from code review Co-authored-by: Alexandra --- .../Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst | 4 ++-- doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst index f847dd1113..f4208450a0 100644 --- a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst +++ b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst @@ -76,7 +76,7 @@ equivalent, even on a single processor. Also, ``blocked_range2d`` allows to use different value types across its first dimension (called "rows") and the second one ("columns"). -That allows combining indexes, pointers, and iterators into a joint +That means you can combine indexes, pointers, and iterators into a joint iteration space. The method functions ``rows()`` and ``cols()`` return corresponding dimensions in the form of a ``blocked_range``. @@ -90,7 +90,7 @@ constructor takes N instances of ``blocked_range``, not individual boundary values. To indicate the distinctions, the different naming pattern was chosen. -An Example of a Multidimensional Iteration Space +Example of a Multidimensional Iteration Space ------------------------------------------------ The example demonstrates calculation of a 3-dimensional filter over the pack diff --git a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp index 554c08d13c..7417123999 100644 --- a/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp +++ b/doc/main/tbb_userguide/snippets/blocked_nd_range_example.cpp @@ -19,12 +19,12 @@ int main() { std::vector>> feature_maps(feature_maps_length, std::vector>(feature_maps_width, std::vector(feature_maps_heigth, 1.0f))); std::vector>> out(out_length, std::vector>(out_width, std::vector(out_heigth, 0.f))); - // 3D convolution calculates sum of all elements in kernel + // 3D convolution calculates the sum of all elements in the kernel convolution3d(feature_maps, out, out_length, out_width, out_heigth, kernel_length, kernel_width, kernel_height); - // Checks correctness of convolution by equality to expected sum of elements + // Checks correctness of convolution by equality to the expected sum of elements float expected = float(kernel_length * kernel_height * kernel_width); for (auto i : out) { for (auto j : i) { From 7017e590b1f09ae02cd3a9d5613040c8d07f7ae4 Mon Sep 17 00:00:00 2001 From: Alexey Kukanov Date: Thu, 28 Nov 2024 14:59:23 +0100 Subject: [PATCH 6/6] Improve with the help of review feedback --- ..._Topic_Other_Kinds_of_Iteration_Spaces.rst | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst index f4208450a0..99446ab659 100644 --- a/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst +++ b/doc/main/tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces.rst @@ -74,36 +74,37 @@ This nice cache behavior means that using ``parallel_for`` over a ``blocked_range2d`` can make a loop run faster than the sequential equivalent, even on a single processor. -Also, ``blocked_range2d`` allows to use different value types across -its first dimension (called "rows") and the second one ("columns"). +The ``blocked_range2d`` allows you to use different value types for +its first dimension, *rows*, and the second one, *columns*. That means you can combine indexes, pointers, and iterators into a joint -iteration space. The method functions ``rows()`` and ``cols()`` return -corresponding dimensions in the form of a ``blocked_range``. +iteration space. Use the methods ``rows()`` and ``cols()`` to obtain +``blocked_range`` objects that represent the respective dimensions. The ``blocked_range3d`` class template extends this approach to 3D by adding ``pages()`` as the first dimension, followed by ``rows()`` and ``cols()``. The ``blocked_nd_range`` class template represents a blocked iteration -space of any dimensionality, but in a slightly different way. All dimensions -of ``blocked_nd_range`` must be specified over the same value type, and the -constructor takes N instances of ``blocked_range``, not individual boundary -values. To indicate the distinctions, the different naming pattern was chosen. +space of any dimensionality. Unlike the previously described 2D and 3D ranges, +``blocked_nd_range`` uses the same value type for all its axes, and its +constructor requires you to pass N instances of ``blocked_range`` instead of +individual boundary values. The change in the naming pattern reflects these +differences. Example of a Multidimensional Iteration Space ------------------------------------------------ The example demonstrates calculation of a 3-dimensional filter over the pack -of feature maps, applying a kernel to a subrange of features. +of feature maps. -The ``convolution3d`` function iterates over the output cells and sets cell -values to the result of the ``kernel3d`` function, which summarizes values -from feature maps. +The ``convolution3d`` function iterates over the output cells, assigning to +each cell the result of the ``kernel3d`` function that combines the values +from a range in the feature maps. -For the computation to be performed in parallel, ``tbb::parallel_for`` is called -with ``tbb::blocked_nd_range`` as an argument. The body function then -iterates over the received 3-dimensional subrange in a loop nest, using -the ``dim`` method function to obtain loop boundaries for each dimension. +To run the computation in parallel, ``tbb::parallel_for`` is called with +``tbb::blocked_nd_range`` as an argument. The body function processes +the received 3D subrange in nested loops, using the method ``dim`` to get +the loop boundaries for each dimension. .. literalinclude:: ./snippets/blocked_nd_range_example.h