diff --git a/doc/GSG/before_beginning_and_example.rst b/doc/GSG/before_beginning_and_example.rst deleted file mode 100644 index f7119ffdd4..0000000000 --- a/doc/GSG/before_beginning_and_example.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. _Before_You_Begin: - -Before You Begin -**************** - -After installing |short_name|, you need to set the environment variables: - -#. Go to the oneTBB installation directory (````). By default, ```` is the following: - - * On Linux* OS: - - * For superusers (root): ``/opt/intel/oneapi`` - * For ordinary users (non-root): ``$HOME/intel/oneapi`` - - * On Windows* OS: - - * ``\Intel\oneAPI`` - -#. Set the environment variables, using the script in , by running - - * On Linux* OS: - - ``vars.{sh|csh} in /tbb/latest/env`` - - * On Windows* OS: - - ``vars.bat in /tbb/latest/env`` - - -Example -******* - -Below you can find a typical example for a |short_name| algorithm. -The sample calculates a sum of all integer numbers from 1 to 100. - -.. code:: cpp - - int sum = oneapi::tbb::parallel_reduce(oneapi::tbb::blocked_range(1,101), 0, - [](oneapi::tbb::blocked_range const& r, int init) -> int { - for (int v = r.begin(); v != r.end(); v++ ) { - init += v; - } - return init; - }, - [](int lhs, int rhs) -> int { - return lhs + rhs; - } - ); \ No newline at end of file diff --git a/doc/GSG/conf.py b/doc/GSG/conf.py deleted file mode 100644 index e45812d1a0..0000000000 --- a/doc/GSG/conf.py +++ /dev/null @@ -1,299 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - -SOURCE_DIR = os.path.dirname(__file__) -LATEX_DIR = os.path.join(SOURCE_DIR, '_latex') -PREAMBLE_FILE = os.path.join(LATEX_DIR, 'preamble.tex') -TITLE_PAGE_FILE = os.path.join(LATEX_DIR, 'title_page.tex') - -BUILD_TYPE = os.getenv("BUILD_TYPE") - -# -- Project information ----------------------------------------------------- - - -if BUILD_TYPE == 'oneapi' or BUILD_TYPE == 'dita': - project = u'Intel® oneAPI Threading Building Blocks (oneTBB)' -else: - project = u'oneAPI Threading Building Blocks (oneTBB)' -copyright = u'2021, Intel Corporation' -author = u'Intel' - -# The short X.Y version -version = u'' -# The full version, including alpha/beta/rc tags -release = u'' - - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.imgmath', - 'sphinx.ext.ifconfig', - 'sphinx.ext.viewcode', - 'sphinx.ext.githubpages', -] - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -#master_doc = 'main/title_main' -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = None - -# Syntax highlighting for the :: directive -highlight_language = 'cpp' - - -if BUILD_TYPE == 'oneapi' or BUILD_TYPE == 'dita': - rst_prolog = """ -.. |full_name| replace:: Intel\ |reg|\ oneAPI Threading Building Blocks (oneTBB) -.. |short_name| replace:: oneTBB -.. |product| replace:: oneTBB -.. |reg| unicode:: U+000AE -.. |copy| unicode:: U+000A9 -.. |base_tk| replace:: Intel\ |reg|\ oneAPI Base Toolkit -.. |dpcpp| replace:: Intel\ |reg|\ oneAPI DPC++/C++ Compiler - """ -else: - rst_prolog = """ -.. |full_name| replace:: oneAPI Threading Building Blocks (oneTBB) -.. |short_name| replace:: oneTBB -.. |product| replace:: oneTBB -.. |reg| unicode:: U+000AE -.. |copy| unicode:: U+000A9 -.. |base_tk| replace:: Intel\ |reg|\ oneAPI Base Toolkit -.. |dpcpp| replace:: Intel\ |reg|\ oneAPI DPC++/C++ Compiler - """ - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -if BUILD_TYPE == 'oneapi' or BUILD_TYPE == 'dita': - html_theme = 'sphinx_rtd_theme' -else: - html_theme = 'sphinx_book_theme' - html_theme_options = { - 'repository_url': 'https://github.com/oneapi-src/oneTBB', - 'path_to_docs': 'doc/main', - 'use_issues_button': True, - 'use_edit_page_button': True, - 'repository_branch': 'master', - 'extra_footer': '

Cookies

' - } - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -if BUILD_TYPE == 'oneapi' or BUILD_TYPE == 'dita': - html_context = { - 'css_files': [ - '_static/theme_overrides.css', # override wide tables in RTD theme - ], - } -else: - html_js_files = ['custom.js'] - html_logo = '_static/oneAPI-rgb-rev-100.png' - -html_favicon = '_static/favicons.png' - - - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'sphinx-infodevdoc' - - -# -- Options for LaTeX output ------------------------------------------------ - -#latex_engine = 'xelatex' -#PDF_TITLE = 'Information Development Template' -# -#with open(PREAMBLE_FILE, 'r', encoding='utf-8') as f: -# PREAMBLE = f.read() -# -#with open(TITLE_PAGE_FILE, 'r', encoding='utf-8') as f: -# TITLE_PAGE = f.read().replace('', PDF_TITLE) -# -# -#latex_elements = { -# # The paper size ('letterpaper' or 'a4paper'). -# # -# 'extraclassoptions': 'openany,oneside', -# 'babel' : '\\usepackage[english]{babel}', -# 'papersize': 'a4paper', -# 'releasename':" ", -# # Sonny, Lenny, Glenn, Conny, Rejne, Bjarne and Bjornstrup -# # 'fncychap': '\\usepackage[Lenny]{fncychap}', -# 'fncychap': '', -# #'fontpkg': '\\usepackage{amsmath,amsfonts,amssymb,amsthm}', -# -# 'figure_align':'htbp', -# # The font size ('10pt', '11pt' or '12pt'). -# # -# 'pointsize': '12pt', -# -# # Additional stuff for the LaTeX preamble. -# # -# 'preamble': PREAMBLE, -# -# 'maketitle': TITLE_PAGE, -# # Latex figure (float) alignment -# # -# # 'figure_align': 'htbp', -# 'sphinxsetup': \ -# 'hmargin={0.7in,0.7in}, vmargin={1in,1in}, \ -# verbatimwithframe=true, \ -# TitleColor={rgb}{0,0.686,0.941}, \ -# HeaderFamily=\\rmfamily\\bfseries, \ -# InnerLinkColor={rgb}{0,0.686,0.941}, \ -# OuterLinkColor={rgb}{0,0.686,0.941}', -# -# 'tableofcontents':' ' -#} -# -#latex_logo = '_latex/intel_logo.png' -## Grouping the document tree into LaTeX files. List of tuples -## (source start file, target name, title, -## author, documentclass [howto, manual, or own class]). -#latex_documents = [ -# (master_doc, 'sphinx-infodev.tex', u'sphinx-infodev Documentation', -# u'Intel', 'manual'), -#] - -#breathe_projects = { #todd-mod -# project: "../doxygen/xml" -#} -#breathe_default_project = project - -# Setup the exhale extension -#exhale_args = { #todd-mod -# # These arguments are required -# "containmentFolder": "./api", -# "rootFileName": "library_root.rst", -# "rootFileTitle": "Library API", -# "doxygenStripFromPath": "..", -# "fullApiSubSectionTitle": 'Full API' -#} - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'sphinx-infodev', u'sphinx-infodev Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'sphinx-infodev', u'sphinx-infodev Documentation', - author, 'sphinx-infodev', 'One line description of project.', - 'Miscellaneous'), -] - - -# -- Options for Epub output ------------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - - -# -- Extension configuration ------------------------------------------------- - -# -- Options for intersphinx extension --------------------------------------- - -# Example configuration for intersphinx: refer to the Python standard library. -intersphinx_mapping = {'https://docs.python.org/': None} - -# -- Options for todo extension ---------------------------------------------- - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = True diff --git a/doc/GSG/examples.rst b/doc/GSG/examples.rst deleted file mode 100644 index 6fe719c68d..0000000000 --- a/doc/GSG/examples.rst +++ /dev/null @@ -1,45 +0,0 @@ -.. _examples: - -oneTBB Samples -============== - -Refer to the following examples to see how |short_name| works. - -* **Containers** - - * `concurrent_hash_map `_ - * `concurrent_priority_queue `_ - -* `Flow Graph `_ - * `A solution to the binpacking problem using a queue_node, a buffer_node, and function_node. `_ - * `Cholesky Factorization algorithm `_ - * `An implementation of dining philosophers in graph using the reserving join_node `_ - * `A parallel implementation of bzip2 block-sorting file compressor `_ - * `An example of a collection of digital logic gates that can be easily composed into larger circuits `_ - * `An example of a Kohonen Self-Organizing Map using cancellation `_ - * `Split computational kernel for execution between CPU and GPU `_ - -* **Algorithms** - - * `parallel_for `_ - * `Game of life overlay `_ - * `Polygon overlay `_ - * `Parallel seismic wave simulation `_ - * `Parallel 2-D raytracer/renderer `_ - * `Find largest matching substrings `_ - * `Resumable task: Split computational kernel for execution between CPU and GPU `_ - * `parallel_for_each `_ - * `parallel_pipeline `_ - * `parallel_reduce `_ - -* **Task Scheduler** - - * `task_arena `_ - * `task_group `_ - * `Execute similar computational kernels, with one task executing the SYCL* code and the other task executing the oneTBB code `_ - -* **Other** - - * `Compute Fibonacci numbers in different ways `_ - - diff --git a/doc/GSG/hybrid_cpu_support.rst b/doc/GSG/hybrid_cpu_support.rst deleted file mode 100644 index a2b32f12b4..0000000000 --- a/doc/GSG/hybrid_cpu_support.rst +++ /dev/null @@ -1,40 +0,0 @@ -.. _hybrid_cpu_support: - -Hybrid CPU and NUMA Support -*************************** - -If you need NUMA/Hybrid CPU support in oneTBB, you need to make sure that HWLOC* is installed on your system. - -HWLOC* (Hardware Locality) is a library that provides a portable abstraction of the hierarchical topology of modern architectures (NUMA, hybrid CPU systems, etc). -oneTBB relies on HWLOC* to identify the underlying topology of the system to optimize thread scheduling and memory allocation. - -Without HWLOC*, oneTBB may not take advantage of NUMA/Hybrid CPU support. Therefore, it's important to make sure that HWLOC* is installed before using oneTBB on such systems. - -Check HWLOC* on the System -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -To check if HWLOC* is already installed on your system, run `hwloc-ls`: - - * For Linux* OS, in the command line. - * For Windows* OS, in the command prompt. - -If HWLOC* is installed, the command displays information about the hardware topology of your system. -If it is not installed, you receive an error message saying that the command ``hwloc-ls`` could not be found. - -.. note:: For Hybrid CPU support, make sure that HWLOC* is version 2.5 or higher. - For NUMA support, install HWLOC* version 1.11 or higher. - -Install HWLOC* -^^^^^^^^^^^^^^ - -To install HWLOC*, visit the official Portable Hardware Locality website (https://www-lb.open-mpi.org/projects/hwloc/). - -* For Windows* OS, binaries are available for download. -* For Linux* OS, only the source code is provided and binaries should be built. - -On Linux* OS, HWLOC* can be also installed with package managers, such as APT*, YUM*, etc. -To do so, run: ``sudo apt install hwloc``. - - -.. note:: For Hybrid CPU support, make sure that HWLOC* is version 2.5 or higher. - For NUMA support, install HWLOC* version 1.11 or higher. diff --git a/doc/GSG/index.rst b/doc/GSG/index.rst deleted file mode 100644 index 0488ff1f31..0000000000 --- a/doc/GSG/index.rst +++ /dev/null @@ -1,121 +0,0 @@ -.. _Get_Started_Guide - -Get Started with |full_name| -============================ - - -|full_name| enables you to simplify parallel programming by breaking -computation into parallel running tasks. oneTBB is available as a stand-alone -product and as part of the |base_tk|. - -|short_name| is a runtime-based parallel programming model for C++ code that uses threads. -It consists of a template-based runtime library to help you harness the latent performance -of multi-core processors. Use |short_name| to write scalable applications that: - -- Specify logical parallel structure instead of threads -- Emphasize data parallel programming -- Take advantage of concurrent collections and parallel algorithms - -System Requirements -******************* - -Refer to the `oneTBB System Requirements `_. - - -Before You Begin -**************** - -Download |short_name| as a `stand-alone product `_ -or as a part of the `Intel(R) oneAPI Base Toolkit `_. - -After installing |short_name|, you need to set the environment variables: - -#. Go to the oneTBB installation directory (````). By default, ```` is the following: - - * On Linux* OS: - - * For superusers (root): ``/opt/intel/oneapi`` - * For ordinary users (non-root): ``$HOME/intel/oneapi`` - - * On Windows* OS: - - * ``\Intel\oneAPI`` - -#. Set the environment variables, using the script in , by running - - * On Linux* OS: - - ``vars.{sh|csh} in /tbb/latest/env`` - - * On Windows* OS: - - ``vars.bat in /tbb/latest/env`` - - -Example -******* - -Below you can find a typical example for a |short_name| algorithm. -The sample calculates a sum of all integer numbers from 1 to 100. - -.. code:: cpp - - int sum = oneapi::tbb::parallel_reduce(oneapi::tbb::blocked_range(1,101), 0, - [](oneapi::tbb::blocked_range const& r, int init) -> int { - for (int v = r.begin(); v != r.end(); v++ ) { - init += v; - } - return init; - }, - [](int lhs, int rhs) -> int { - return lhs + rhs; - } - ); - -Find more -********* - -.. list-table:: - :widths: 40 60 - :header-rows: 0 - - - * - - - `oneTBB Community Forum `_ - - `Product FAQs `_ - - `Support requests `_ - - Use these resources if you need support with oneTBB. - - * - `Release Notes `_ - - Find up-to-date information about the product, including detailed notes, known issues, and changes. - - * - `Documentation `_: `Developer Guide `_ and `API Reference `_ - - Learn to use oneTBB. - * - `GitHub* `_ - - Find oneTBB implementation in open source. - - -Notices and Disclaimers -*********************** - -Intel technologies may require enabled hardware, software or service activation. - -No product or component can be absolutely secure. - -Your costs and results may vary. - -© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks -of Intel Corporation or its subsidiaries. Other names and brands may be claimed -as the property of others. - -No license (express or implied, by estoppel or otherwise) to any intellectual -property rights is granted by this document. - -The products described may contain design defects or errors known as errata which -ay cause the product to deviate from published specifications. Current -characterized errata are available on request. - -Intel disclaims all express and implied warranties, including without limitation, -the implied warranties of merchantability, fitness for a particular purpose, -and non-infringement, as well as any warranty arising from course of performance, -course of dealing, or usage in trade. diff --git a/doc/GSG/intro.rst b/doc/GSG/intro.rst deleted file mode 100644 index da8c558d21..0000000000 --- a/doc/GSG/intro.rst +++ /dev/null @@ -1,29 +0,0 @@ -.. _intro: - -What oneTBB Is -============== - -|full_name| is a runtime-based parallel programming model for C++ code that uses threads. -The template-based runtime library can help you harness the latent performance of multi-core processors. - -oneTBB enables you to simplify parallel programming by breaking computation into parallel running tasks. Within a single process, -parallelism is carried out through threads, an operating system mechanism that allows the same or different sets of instructions -to be executed simultaneously. Using threads can make your program work faster and more efficiently. - -Here you can see one of the possible executions of tasks by threads. - -.. figure:: Images/how-oneTBB-works.png - :scale: 70% - :align: center - -Use oneTBB to write scalable applications that: - -* Specify logical parallel structure instead of threads. -* Emphasize data-parallel programming. -* Take advantage of concurrent collections and parallel algorithms. - -oneTBB supports nested parallelism and load balancing. It means that you can use the library without worrying about oversubscribing a system, which happens when more tasks are assigned to a system than it can handle efficiently. - -oneTBB is used in different areas, such as scientific simulations, gaming, data analysis, etc. - -It is available as a stand-alone product and as part of the |base_tk|. diff --git a/doc/GSG/intro_gsg.rst b/doc/GSG/intro_gsg.rst deleted file mode 100644 index 49fda780f6..0000000000 --- a/doc/GSG/intro_gsg.rst +++ /dev/null @@ -1,23 +0,0 @@ -.. _Intro_gsg: - - -|full_name| is a runtime-based parallel programming model for C++ code that uses threads. -It consists of a template-based runtime library to help you harness the latent performance of multi-core processors. - -oneTBB enables you to simplify parallel programming by breaking computation into parallel running tasks. Within a single process, -parallelism is carried out through threads, an operating system mechanism that allows the same or different sets of instructions -to be executed simultaneously. - -Here you can see one of the possible executions of tasks by threads. - -.. figure:: /GSG/Images/how-oneTBB-works.png - :scale: 70% - :align: center - -Use oneTBB to write scalable applications that: - -* Specify logical parallel structure instead of threads -* Emphasize data-parallel programming -* Take advantage of concurrent collections and parallel algorithms - -oneTBB supports nested parallelism and load balancing. It means that you can use the library without being worried about oversubscribing a system. diff --git a/doc/main/_static/custom.js b/doc/main/_static/custom.js deleted file mode 100644 index a7d312de32..0000000000 --- a/doc/main/_static/custom.js +++ /dev/null @@ -1,37 +0,0 @@ -window.MathJax = { - TeX: { - Macros: { - src: '\\operatorname{src}', - srclayer: '\\operatorname{src\\_layer}', - srciter: '\\operatorname{src\\_iter}', - srciterc: '\\operatorname{src\\_iter\\_c}', - weights: '\\operatorname{weights}', - weightslayer: '\\operatorname{weights\\_layer}', - weightsiter: '\\operatorname{weights\\_iter}', - weightspeephole: '\\operatorname{weights\\_peephole}', - weightsprojection: '\\operatorname{weights\\_projection}', - bias: '\\operatorname{bias}', - dst: '\\operatorname{dst}', - dstlayer: '\\operatorname{dst\\_layer}', - dstiter: '\\operatorname{dst\\_iter}', - dstiterc: '\\operatorname{dst\\_iter\\_c}', - diffsrc: '\\operatorname{diff\\_src}', - diffsrclayer: '\\operatorname{diff\\_src\\_layer}', - diffsrciter: '\\operatorname{diff\\_src\\_iter}', - diffsrciterc: '\\operatorname{diff\\_src\\_iter\\_c}', - diffweights: '\\operatorname{diff\\_weights}', - diffweightslayer: '\\operatorname{diff\\_weights\\_layer}', - diffweightsiter: '\\operatorname{diff\\_weights\\_iter}', - diffweightspeephole: '\\operatorname{diff\\_weights\\_peephole}', - diffweightsprojection: '\\operatorname{diff\\_weights\\_projection}', - diffbias: '\\operatorname{diff\\_bias}', - diffdst: '\\operatorname{diff\\_dst}', - diffdstlayer: '\\operatorname{diff\\_dst\\_layer}', - diffdstiter: '\\operatorname{diff\\_dst\\_iter}', - diffdstiterc: '\\operatorname{diff\\_dst\\_iter\\_c}', - diffgamma: '\\operatorname{diff\\_\\gamma}', - diffbeta: '\\operatorname{diff\\_\\beta}', - workspace: '\\operatorname{workspace}' - } - } -} \ No newline at end of file diff --git a/doc/main/_static/favicons.png b/doc/main/_static/favicons.png deleted file mode 100644 index f450376b19..0000000000 Binary files a/doc/main/_static/favicons.png and /dev/null differ diff --git a/doc/main/_static/oneAPI-rgb-rev-100.png b/doc/main/_static/oneAPI-rgb-rev-100.png deleted file mode 100644 index 58d2d5c54e..0000000000 Binary files a/doc/main/_static/oneAPI-rgb-rev-100.png and /dev/null differ diff --git a/doc/main/_static/theme_overrides.css b/doc/main/_static/theme_overrides.css deleted file mode 100644 index 63ee6cc74c..0000000000 --- a/doc/main/_static/theme_overrides.css +++ /dev/null @@ -1,13 +0,0 @@ -/* override table width restrictions */ -@media screen and (min-width: 767px) { - - .wy-table-responsive table td { - /* !important prevents the common CSS stylesheets from overriding - this as on RTD they are loaded after this stylesheet */ - white-space: normal !important; - } - - .wy-table-responsive { - overflow: visible !important; - } -} diff --git a/doc/main/intro/intro.rst b/doc/main/intro/intro.rst deleted file mode 100644 index 652113c2f4..0000000000 --- a/doc/main/intro/intro.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _intro: - -Introduction -============ - - -|full_name| is a library that supports scalable parallel programming using -standard ISO C++ code. It does not require special languages or -compilers. It is designed to promote scalable data parallel programming. -Additionally, it fully supports nested parallelism, so you can build -larger parallel components from smaller parallel components. To use the -library, you specify tasks, not threads, and let the library map tasks -onto threads in an efficient manner. - - -Many of the library interfaces employ generic programming, in which -interfaces are defined by requirements on types and not specific types. -The C++ Standard Template Library (STL) is an example of generic -programming. Generic programming enables oneTBB to be flexible yet -efficient. The generic interfaces enable you to customize components to -your specific needs. - - -.. note:: - |full_name| requires C++11 standard compiler support. - - -The net result is that oneTBB enables you to specify parallelism far -more conveniently than using raw threads, and at the same time can -improve performance. - - -.. admonition:: Product and Performance Information - - Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `_. - Notice revision #20201201 - - - diff --git a/doc/main/intro/introducing_main.rst b/doc/main/intro/introducing_main.rst deleted file mode 100644 index 311709df43..0000000000 --- a/doc/main/intro/introducing_main.rst +++ /dev/null @@ -1,39 +0,0 @@ -.. _introducing_main: - -Introduction -============ - - -|full_name| is a library that supports scalable parallel programming using -standard ISO C++ code. It does not require special languages or -compilers. It is designed to promote scalable data parallel programming. -Additionally, it fully supports nested parallelism, so you can build -larger parallel components from smaller parallel components. To use the -library, you specify tasks, not threads, and let the library map tasks -onto threads in an efficient manner. - - -Many of the library interfaces employ generic programming, in which -interfaces are defined by requirements on types and not specific types. -The C++ Standard Template Library (STL) is an example of generic -programming. Generic programming enables oneTBB to be flexible yet -efficient. The generic interfaces enable you to customize components to -your specific needs. - - -.. note:: - |full_name| requires C++11 standard compiler support. - - -The net result is that oneTBB enables you to specify parallelism far -more conveniently than using raw threads, and at the same time can -improve performance. - - -.. admonition:: Product and Performance Information - - Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `_. - Notice revision #20201201 - - - diff --git a/doc/main/intro/notices_and_disclaimers.rst b/doc/main/intro/notices_and_disclaimers.rst deleted file mode 100644 index 0fe0cc6baf..0000000000 --- a/doc/main/intro/notices_and_disclaimers.rst +++ /dev/null @@ -1,49 +0,0 @@ -.. _notices_and_disclaimers: - -Notices and Disclaimers -======================= - - -Intel technologies may require enabled hardware, software or service -activation. - - -No product or component can be absolutely secure. - - -Your costs and results may vary. - - -© Intel Corporation. Intel, the Intel logo, and other Intel marks are -trademarks of Intel Corporation or its subsidiaries. Other names and -brands may be claimed as the property of others. - - -Intel's compilers may or may not optimize to the same degree for -non-Intel microprocessors for optimizations that are not unique to Intel -microprocessors. These optimizations include SSE2, SSE3, and SSSE3 -instruction sets and other optimizations. Intel does not guarantee the -availability, functionality, or effectiveness of any optimization on -microprocessors not manufactured by Intel. Microprocessor-dependent -optimizations in this product are intended for use with Intel -microprocessors. Certain optimizations not specific to Intel -microarchitecture are reserved for Intel microprocessors. Please refer -to the applicable product User and Reference Guides for more information -regarding the specific instruction sets covered by this notice. - - -No license (express or implied, by estoppel or otherwise) to any -intellectual property rights is granted by this document. - - -The products described may contain design defects or errors known as -errata which may cause the product to deviate from published -specifications. Current characterized errata are available on request. - - -Intel disclaims all express and implied warranties, including without -limitation, the implied warranties of merchantability, fitness for a -particular purpose, and non-infringement, as well as any warranty -arising from course of performance, course of dealing, or usage in -trade. - diff --git a/doc/main/tbb_userguide/Bandwidth_and_Cache_Affinity.rst b/doc/main/tbb_userguide/Bandwidth_and_Cache_Affinity.rst deleted file mode 100644 index 60e6d69045..0000000000 --- a/doc/main/tbb_userguide/Bandwidth_and_Cache_Affinity.rst +++ /dev/null @@ -1,113 +0,0 @@ -.. _Bandwidth_and_Cache_Affinity: - -Bandwidth and Cache Affinity -============================ - - -For a sufficiently simple function ``Foo``, the examples might not show -good speedup when written as parallel loops. The cause could be -insufficient system bandwidth between the processors and memory. In that -case, you may have to rethink your algorithm to take better advantage of -cache. Restructuring to better utilize the cache usually benefits the -parallel program as well as the serial program. - - -An alternative to restructuring that works in some cases is -``affinity_partitioner.`` It not only automatically chooses the -grainsize, but also optimizes for cache affinity and tries to distribute -the data uniformly among threads. Using ``affinity_partitioner`` can -significantly improve performance when: - - -- The computation does a few operations per data access. - - -- The data acted upon by the loop fits in cache. - - -- The loop, or a similar loop, is re-executed over the same data. - - -- There are more than two hardware threads available (and especially if - the number of threads is not a power of two). If only two threads are - available, the default scheduling in |full_name| - usually provides sufficient cache affinity. - - -The following code shows how to use ``affinity_partitioner``. - - -:: - - - #include "oneapi/tbb.h" -   - - void ParallelApplyFoo( float a[], size_t n ) { - static affinity_partitioner ap; - parallel_for(blocked_range(0,n), ApplyFoo(a), ap); - } -   - - void TimeStepFoo( float a[], size_t n, int steps ) { - for( int t=0; t`_. - Notice revision #20201201 - - - - -.. |image0| image:: Images/image007.jpg - :width: 453px - :height: 178px -.. |image1| image:: Images/image008.jpg - :width: 551px - :height: 192px - diff --git a/doc/main/tbb_userguide/Controlling_Chunking.rst b/doc/main/tbb_userguide/Controlling_Chunking.rst deleted file mode 100644 index 487292b45f..0000000000 --- a/doc/main/tbb_userguide/Controlling_Chunking.rst +++ /dev/null @@ -1,175 +0,0 @@ -.. _Controlling_Chunking: - -Controlling Chunking -==================== - - -Chunking is controlled by a *partitioner* and a *grainsize.*\ To gain -the most control over chunking, you specify both. - - -- Specify ``simple_partitioner()`` as the third argument to - ``parallel_for``. Doing so turns off automatic chunking. - - -- Specify the grainsize when constructing the range. The thread - argument form of the constructor is - ``blocked_range(begin,end,grainsize)``. The default value of - ``grainsize`` is 1. It is in units of loop iterations per chunk. - - -If the chunks are too small, the overhead may exceed the performance -advantage. - - -The following code is the last example from parallel_for, modified to -use an explicit grainsize ``G``. - - -:: - - - #include "oneapi/tbb.h" -   - - void ParallelApplyFoo( float a[], size_t n ) { - parallel_for(blocked_range(0,n,G), ApplyFoo(a), - simple_partitioner()); - } - - -The grainsize sets a minimum threshold for parallelization. The -``parallel_for`` in the example invokes ``ApplyFoo::operator()`` on -chunks, possibly of different sizes. Let *chunksize* be the number of -iterations in a chunk. Using ``simple_partitioner`` guarantees that -[G/2] <= *chunksize* <= G. - - -There is also an intermediate level of control where you specify the -grainsize for the range, but use an ``auto_partitioner`` and -``affinity_partitioner``. An ``auto_partitioner`` is the default -partitioner. Both partitioners implement the automatic grainsize -heuristic described in :ref:`Automatic_Chunking`. An -``affinity_partitioner`` implies an additional hint, as explained later -in Section :ref:`Bandwidth_and_Cache_Affinity`. Though these partitioners -may cause chunks to have more than G iterations, they never generate -chunks with less than [G/2] iterations. Specifying a range with an -explicit grainsize may occasionally be useful to prevent these -partitioners from generating wastefully small chunks if their heuristics -fail. - - -Because of the impact of grainsize on parallel loops, it is worth -reading the following material even if you rely on ``auto_partitioner`` -and ``affinity_partitioner`` to choose the grainsize automatically. - - -.. container:: tablenoborder - - - .. list-table:: - :header-rows: 1 - - * - |image0| - - |image1| - * - Case A - - Case B - - - - -The above figure illustrates the impact of grainsize by showing the -useful work as the gray area inside a brown border that represents -overhead. Both Case A and Case B have the same total gray area. Case A -shows how too small a grainsize leads to a relatively high proportion of -overhead. Case B shows how a large grainsize reduces this proportion, at -the cost of reducing potential parallelism. The overhead as a fraction -of useful work depends upon the grainsize, not on the number of grains. -Consider this relationship and not the total number of iterations or -number of processors when setting a grainsize. - - -A rule of thumb is that ``grainsize`` iterations of ``operator()`` -should take at least 100,000 clock cycles to execute. For example, if a -single iteration takes 100 clocks, then the ``grainsize`` needs to be at -least 1000 iterations. When in doubt, do the following experiment: - - -#. Set the ``grainsize`` parameter higher than necessary. The grainsize - is specified in units of loop iterations. If you have no idea of how - many clock cycles an iteration might take, start with - ``grainsize``\ =100,000. The rationale is that each iteration - normally requires at least one clock per iteration. In most cases, - step 3 will guide you to a much smaller value. - - -#. Run your algorithm. - - -#. Iteratively halve the ``grainsize`` parameter and see how much the - algorithm slows down or speeds up as the value decreases. - - -A drawback of setting a grainsize too high is that it can reduce -parallelism. For example, if the grainsize is 1000 and the loop has 2000 -iterations, the ``parallel_for`` distributes the loop across only two -processors, even if more are available. However, if you are unsure, err -on the side of being a little too high instead of a little too low, -because too low a value hurts serial performance, which in turns hurts -parallel performance if there is other parallelism available higher up -in the call tree. - - -.. tip:: - You do not have to set the grainsize too precisely. - - -The next figure shows the typical "bathtub curve" for execution time -versus grainsize, based on the floating point ``a[i]=b[i]*c`` -computation over a million indices. There is little work per iteration. -The times were collected on a four-socket machine with eight hardware -threads. - - -.. container:: fignone - :name: fig2 - - - Wall Clock Time Versus Grainsize - |image2| - - -The scale is logarithmic. The downward slope on the left side indicates -that with a grainsize of one, most of the overhead is parallel -scheduling overhead, not useful work. An increase in grainsize brings a -proportional decrease in parallel overhead. Then the curve flattens out -because the parallel overhead becomes insignificant for a sufficiently -large grainsize. At the end on the right, the curve turns up because the -chunks are so large that there are fewer chunks than available hardware -threads. Notice that a grainsize over the wide range 100-100,000 works -quite well. - - -.. tip:: - A general rule of thumb for parallelizing loop nests is to - parallelize the outermost one possible. The reason is that each - iteration of an outer loop is likely to provide a bigger grain of - work than an iteration of an inner loop. - - -.. admonition:: Product and Performance Information - - Performance varies by use, configuration and other factors. Learn more at `www.intel.com/PerformanceIndex `_. - Notice revision #20201201 - - -.. |image0| image:: Images/image002.jpg - :width: 161px - :height: 163px -.. |image1| image:: Images/image004.jpg - :width: 157px - :height: 144px -.. |image2| image:: Images/image006.jpg - :width: 462px - :height: 193px - diff --git a/doc/main/tbb_userguide/Parallelizing_Simple_Loops.rst b/doc/main/tbb_userguide/Parallelizing_Simple_Loops.rst deleted file mode 100644 index c4b8fbfa1d..0000000000 --- a/doc/main/tbb_userguide/Parallelizing_Simple_Loops.rst +++ /dev/null @@ -1,48 +0,0 @@ -.. _Parallelizing_Simple_Loops: - -Parallelizing Simple Loops -========================== - - -The simplest form of scalable parallelism is a loop of iterations that -can each run simultaneously without interfering with each other. The -following sections demonstrate how to parallelize simple loops. - - -.. note:: - |full_name| components are - defined in namespace ``tbb``. For brevity’s sake, the namespace is - explicit in the first mention of a component, but implicit - afterwards. - - -When compiling oneTBB programs, be sure to link in the oneTBB shared -library, otherwise undefined references will occur. The following table -shows compilation commands that use the debug version of the library. -Remove the "``_debug``" portion to link against the production version -of the library. - - -.. container:: tablenoborder - - - .. list-table:: - :header-rows: 1 - - * - Operating System - - Command line - * - Windows\* OS - - ``icl /MD example.cpp tbb_debug.lib`` - * - Linux\* OS - - ``icc example.cpp -ltbb_debug`` - - - -.. toctree:: - :maxdepth: 4 - - ../tbb_userguide/Initializing_and_Terminating_the_Library - ../tbb_userguide/parallel_for - ../tbb_userguide/parallel_reduce - ../tbb_userguide/Advanced_Example - ../tbb_userguide/Advanced_Topic_Other_Kinds_of_Iteration_Spaces diff --git a/doc/main/tbb_userguide/parallel_for.rst b/doc/main/tbb_userguide/parallel_for.rst deleted file mode 100644 index 766a42592f..0000000000 --- a/doc/main/tbb_userguide/parallel_for.rst +++ /dev/null @@ -1,130 +0,0 @@ -.. _parallel_for: - -parallel_for -============ - - -Suppose you want to apply a function ``Foo`` to each element of an -array, and it is safe to process each element concurrently. Here is the -sequential code to do this: - - -:: - - - void SerialApplyFoo( float a[], size_t n ) { - for( size_t i=0; i!=n; ++i ) - Foo(a[i]); - } - - -The iteration space here is of type ``size_t``, and goes from ``0`` to -``n-1``. The template function ``oneapi::tbb::parallel_for`` breaks this iteration -space into chunks, and runs each chunk on a separate thread. The first -step in parallelizing this loop is to convert the loop body into a form -that operates on a chunk. The form is an STL-style function object, -called the *body* object, in which ``operator()`` processes a chunk. The -following code declares the body object. - -:: - - #include "oneapi/tbb.h" - - using namespace oneapi::tbb; - - class ApplyFoo { - float *const my_a; - public: - void operator()( const blocked_range& r ) const { - float *a = my_a; - for( size_t i=r.begin(); i!=r.end(); ++i ) - Foo(a[i]); - } - ApplyFoo( float a[] ) : - my_a(a) - {} - }; - - -The ``using`` directive in the example enables you to use the library -identifiers without having to write out the namespace prefix ``oneapi::tbb`` -before each identifier. The rest of the examples assume that such a -``using`` directive is present. - - -Note the argument to ``operator()``. A ``blocked_range`` is a -template class provided by the library. It describes a one-dimensional -iteration space over type ``T``. Class ``parallel_for`` works with other -kinds of iteration spaces too. The library provides ``blocked_range2d`` -for two-dimensional spaces. You can define your own spaces as explained -in :ref:`Advanced_Topic_Other_Kinds_of_Iteration_Spaces`. - - -An instance of ``ApplyFoo`` needs member fields that remember all the -local variables that were defined outside the original loop but used -inside it. Usually, the constructor for the body object will initialize -these fields, though ``parallel_for`` does not care how the body object -is created. Template function ``parallel_for`` requires that the body -object have a copy constructor, which is invoked to create a separate -copy (or copies) for each worker thread. It also invokes the destructor -to destroy these copies. In most cases, the implicitly generated copy -constructor and destructor work correctly. If they do not, it is almost -always the case (as usual in C++) that you must define *both* to be -consistent. - - -Because the body object might be copied, its ``operator()`` should not -modify the body. Otherwise the modification might or might not become -visible to the thread that invoked ``parallel_for``, depending upon -whether ``operator()`` is acting on the original or a copy. As a -reminder of this nuance, ``parallel_for`` requires that the body -object's ``operator()`` be declared ``const``. - - -The example ``operator()`` loads ``my_a`` into a local variable ``a``. -Though not necessary, there are two reasons for doing this in the -example: - - -- **Style**. It makes the loop body look more like the original. - - -- **Performance**. Sometimes putting frequently accessed values into - local variables helps the compiler optimize the loop better, because - local variables are often easier for the compiler to track. - - -Once you have the loop body written as a body object, invoke the -template function ``parallel_for``, as follows: - - -:: - - - #include "oneapi/tbb.h" -   - - void ParallelApplyFoo( float a[], size_t n ) { - parallel_for(blocked_range(0,n), ApplyFoo(a)); - } - - -The ``blocked_range`` constructed here represents the entire iteration -space from 0 to n-1, which ``parallel_for`` divides into subspaces for -each processor. The general form of the constructor is -``blocked_range(begin,end,grainsize)``. The ``T`` specifies the value -type. The arguments ``begin`` and ``end`` specify the iteration space -STL-style as a half-open interval [``begin``,\ ``end``). The argument -*grainsize* is explained in the :ref:`Controlling_Chunking` section. The -example uses the default grainsize of 1 because by default -``parallel_for`` applies a heuristic that works well with the default -grainsize. - -.. toctree:: - :maxdepth: 4 - - ../tbb_userguide/Lambda_Expressions - ../tbb_userguide/Automatic_Chunking - ../tbb_userguide/Controlling_Chunking - ../tbb_userguide/Bandwidth_and_Cache_Affinity - ../tbb_userguide/Partitioner_Summary