From ed05fb63a87b471c92ba238574938412efaf5996 Mon Sep 17 00:00:00 2001 From: Tobias Hienzsch Date: Tue, 1 Oct 2024 07:06:25 +0200 Subject: [PATCH] [cpp] Improve DoubleHalf precision --- src/cpp/CMakeLists.txt | 2 + src/cpp/pffdtd/double.hpp | 37 +++++++++++++++-- src/cpp/pffdtd/engine_cpu_2d.cpp | 2 +- src/cpp/pffdtd/engine_sycl_2d.cpp | 27 +++---------- src/cpp/pffdtd/engine_sycl_3d.cpp | 2 +- src/cpp/pffdtd/float.hpp | 40 +++++++++++++++++++ src/cpp/pffdtd/precision.cpp | 53 +++++++++++++++++++++++++ src/cpp/pffdtd/precision.hpp | 5 +++ src/cpp/pffdtd/simulation_3d.cpp | 66 ++----------------------------- src/cpp/pffdtd/utility.hpp | 36 +++++++++++++++++ 10 files changed, 180 insertions(+), 90 deletions(-) create mode 100644 src/cpp/pffdtd/float.hpp create mode 100644 src/cpp/pffdtd/precision.cpp diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 1791ec6..58088fc 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -25,8 +25,10 @@ target_sources(pffdtd pffdtd/engine_cpu_3d.cpp pffdtd/engine_cpu_3d.hpp pffdtd/exception.hpp + pffdtd/float.hpp pffdtd/hdf.hpp pffdtd/mdspan.hpp + pffdtd/precision.cpp pffdtd/precision.hpp pffdtd/print.hpp pffdtd/progress.cpp diff --git a/src/cpp/pffdtd/double.hpp b/src/cpp/pffdtd/double.hpp index 69c70ae..e024618 100644 --- a/src/cpp/pffdtd/double.hpp +++ b/src/cpp/pffdtd/double.hpp @@ -2,6 +2,9 @@ // SPDX-FileCopyrightText: 2024 Tobias Hienzsch #pragma once +#include "pffdtd/float.hpp" +#include "pffdtd/utility.hpp" + #if defined(SYCL_LANGUAGE_VERSION) #include "pffdtd/sycl.hpp" #endif @@ -12,12 +15,22 @@ namespace pffdtd { -/// https://github.com/sukop/doubledouble/blob/master/doubledouble.py +/// https://hal.science/hal-01351529v3/document +/// https://inria.hal.science/inria-00070314/document +/// https://github.com/sukop/doubledouble +/// https://github.com/JuliaMath/DoubleFloats.jl +/// https://github.com/FlorisSteenkamp/double-double template struct Double { constexpr Double() = default; - constexpr Double(Real x, Real y = 0.0) noexcept : _high{x}, _low{y} {} + constexpr Double(Real x) noexcept { + auto const [h, l] = split(x); + _high = h; + _low = l; + } + + constexpr Double(Real x, Real y) noexcept : _high{x}, _low{y} {} [[nodiscard]] constexpr auto high() const noexcept -> Real { return _high; } @@ -86,6 +99,21 @@ struct Double { } private: + [[nodiscard]] static constexpr auto split(Real a) noexcept -> Double { + constexpr auto const digits = FloatTraits::digits; + constexpr auto const f = ipow<2>(idiv(digits, 2)) + 1; + + if constexpr (std::same_as and sizeof(double) == 8) { + static_assert(f == 134217729); // 2**27 + 1 + } + + auto const c = static_cast(f) * a; + auto const a_h = c - (c - a); + auto const a_l = a - a_h; + + return {a_h, a_l}; + } + [[nodiscard]] static constexpr auto twoSum(Real x, Real y) noexcept -> Double { auto r = x + y; auto t = r - x; @@ -109,7 +137,7 @@ struct Double { [[nodiscard]] static constexpr auto twoProduct(Real x, Real y) noexcept -> Double { auto r = x * y; -#if defined(__APPLE__) or defined(__clang__) +#if defined(PFFDTD_HAS_FLOAT16) if constexpr (std::same_as) { auto e = static_cast<_Float16>(std::fma(float{x}, float{y}, float{-r})); return Double{r, e}; @@ -125,4 +153,7 @@ struct Double { Real _low{static_cast(0)}; }; +template +struct FloatTraits> : FloatTraits {}; + } // namespace pffdtd diff --git a/src/cpp/pffdtd/engine_cpu_2d.cpp b/src/cpp/pffdtd/engine_cpu_2d.cpp index ef507cb..0abe656 100644 --- a/src/cpp/pffdtd/engine_cpu_2d.cpp +++ b/src/cpp/pffdtd/engine_cpu_2d.cpp @@ -156,7 +156,7 @@ auto EngineCPU2D::operator()(Simulation2D const& sim, Precision precision) const case Precision::Double: return run(sim); case Precision::DoubleFloat: return run>(sim); case Precision::DoubleDouble: return run>(sim); -#if defined(__APPLE__) or defined(__clang__) +#if defined(PFFDTD_HAS_FLOAT16) case Precision::Half: return run<_Float16>(sim); case Precision::DoubleHalf: return run>(sim); #endif diff --git a/src/cpp/pffdtd/engine_sycl_2d.cpp b/src/cpp/pffdtd/engine_sycl_2d.cpp index 1d989db..bb9a767 100644 --- a/src/cpp/pffdtd/engine_sycl_2d.cpp +++ b/src/cpp/pffdtd/engine_sycl_2d.cpp @@ -10,6 +10,7 @@ #include "pffdtd/progress.hpp" #include "pffdtd/sycl.hpp" #include "pffdtd/time.hpp" +#include "pffdtd/utility.hpp" #include @@ -30,35 +31,17 @@ struct CopyInput; template struct CopyOutput; -template -static constexpr auto min_exponent = std::numeric_limits::min_exponent; - -template -static constexpr auto min_exponent> = min_exponent; - -template<> -[[maybe_unused]] static constexpr auto min_exponent<_Float16> = -13; - -template -static constexpr auto max_exponent = std::numeric_limits::max_exponent; - -template -static constexpr auto max_exponent> = max_exponent; - -template<> -[[maybe_unused]] static constexpr auto max_exponent<_Float16> = 16; - [[nodiscard]] constexpr auto to_ixy(auto x, auto y, auto /*Nx*/, auto Ny) { return x * Ny + y; } template auto scaleInput(std::vector const& buf) { - static constexpr auto min_exp = static_cast(min_exponent); - static constexpr auto max_exp = static_cast(max_exponent); + static constexpr auto min_exp = static_cast(FloatTraits::minExponent); + static constexpr auto max_exp = static_cast(FloatTraits::maxExponent); println("min_exp = {}, min_exp = {}", min_exp, max_exp); auto const aexp = 0.5; auto const pow2 = static_cast(std::round(aexp * max_exp + (1 - aexp) * min_exp)); - auto const norm1 = std::pow(2.0, pow2); + auto const norm1 = static_cast(ipow(2, pow2)); auto const max_in = *std::ranges::max_element(buf, [](auto l, auto r) { return std::fabs(l) < std::fabs(r); }); auto const inv_infac = norm1 / max_in; @@ -255,7 +238,7 @@ auto run(Simulation2D const& sim) { auto EngineSYCL2D::operator()(Simulation2D const& sim, Precision precision) const -> stdex::mdarray> { switch (precision) { -#if not defined(__INTEL_LLVM_COMPILER) +#if defined(PFFDTD_HAS_FLOAT16) case Precision::Half: return run<_Float16>(sim); case Precision::DoubleHalf: return run>(sim); #endif diff --git a/src/cpp/pffdtd/engine_sycl_3d.cpp b/src/cpp/pffdtd/engine_sycl_3d.cpp index 75b6ac0..994cd2c 100644 --- a/src/cpp/pffdtd/engine_sycl_3d.cpp +++ b/src/cpp/pffdtd/engine_sycl_3d.cpp @@ -358,7 +358,7 @@ auto run(Simulation3D const& sim) -> void { auto EngineSYCL3D::operator()(Simulation3D const& sim) const -> void { switch (sim.precision) { -#if not defined(__INTEL_LLVM_COMPILER) +#if defined(PFFDTD_HAS_FLOAT16) case Precision::Half: return run<_Float16>(sim); case Precision::DoubleHalf: return run>(sim); #endif diff --git a/src/cpp/pffdtd/float.hpp b/src/cpp/pffdtd/float.hpp new file mode 100644 index 0000000..9f89f4f --- /dev/null +++ b/src/cpp/pffdtd/float.hpp @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2024 Tobias Hienzsch + +#pragma once + +#include + +#if (defined(__APPLE__) or defined(__clang__)) and not defined(__INTEL_LLVM_COMPILER) + #define PFFDTD_HAS_FLOAT16 +#endif + +namespace pffdtd { + +template +struct FloatTraits; + +#if defined(PFFDTD_HAS_FLOAT16) +template<> +struct FloatTraits<_Float16> { + static constexpr auto digits = 11; + static constexpr auto minExponent = -13; + static constexpr auto maxExponent = 16; +}; +#endif + +template<> +struct FloatTraits { + static constexpr auto digits = std::numeric_limits::digits; + static constexpr auto minExponent = std::numeric_limits::min_exponent; + static constexpr auto maxExponent = std::numeric_limits::max_exponent; +}; + +template<> +struct FloatTraits { + static constexpr auto digits = std::numeric_limits::digits; + static constexpr auto minExponent = std::numeric_limits::min_exponent; + static constexpr auto maxExponent = std::numeric_limits::max_exponent; +}; + +} // namespace pffdtd diff --git a/src/cpp/pffdtd/precision.cpp b/src/cpp/pffdtd/precision.cpp new file mode 100644 index 0000000..10fe1f8 --- /dev/null +++ b/src/cpp/pffdtd/precision.cpp @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: MIT +// SPDX-FileCopyrightText: 2024 Tobias Hienzsch + +#include "precision.hpp" + +#include "pffdtd/exception.hpp" +#include "pffdtd/float.hpp" + +namespace pffdtd { + +template +constexpr auto EPS = 0.0; + +template<> +constexpr auto EPS = 1.19209289e-07; + +auto getEpsilon(Precision precision) -> double { + switch (precision) { + case Precision::Half: return EPS; + case Precision::Float: return EPS; + case Precision::Double: return EPS; + + case Precision::DoubleHalf: return EPS; + case Precision::DoubleFloat: return EPS; + case Precision::DoubleDouble: return EPS; + + default: break; + } + + raisef("invalid precision = {}", int(precision)); +} + +auto getMinMaxExponent(Precision precision) -> std::pair { + using std::pair; + switch (precision) { + case Precision::Float: return pair{FloatTraits::minExponent, FloatTraits::maxExponent}; + case Precision::DoubleFloat: return pair{FloatTraits::minExponent, FloatTraits::maxExponent}; + + case Precision::Double: return pair{FloatTraits::minExponent, FloatTraits::maxExponent}; + case Precision::DoubleDouble: return pair{FloatTraits::minExponent, FloatTraits::maxExponent}; + +#if defined(PFFDTD_HAS_FLOAT16) + case Precision::Half: return pair{FloatTraits<_Float16>::minExponent, FloatTraits<_Float16>::maxExponent}; + case Precision::DoubleHalf: return pair{FloatTraits<_Float16>::minExponent, FloatTraits<_Float16>::maxExponent}; +#endif + + default: break; + } + + raisef("invalid precision = {}", int(precision)); +} + +} // namespace pffdtd diff --git a/src/cpp/pffdtd/precision.hpp b/src/cpp/pffdtd/precision.hpp index 9cd10e0..a22422b 100644 --- a/src/cpp/pffdtd/precision.hpp +++ b/src/cpp/pffdtd/precision.hpp @@ -3,6 +3,8 @@ #pragma once +#include + namespace pffdtd { enum struct Precision { @@ -15,4 +17,7 @@ enum struct Precision { DoubleDouble, }; +[[nodiscard]] auto getEpsilon(Precision precision) -> double; +[[nodiscard]] auto getMinMaxExponent(Precision precision) -> std::pair; + } // namespace pffdtd diff --git a/src/cpp/pffdtd/simulation_3d.cpp b/src/cpp/pffdtd/simulation_3d.cpp index dd75776..620c018 100644 --- a/src/cpp/pffdtd/simulation_3d.cpp +++ b/src/cpp/pffdtd/simulation_3d.cpp @@ -6,6 +6,7 @@ #include "pffdtd/double.hpp" #include "pffdtd/exception.hpp" #include "pffdtd/hdf.hpp" +#include "pffdtd/utility.hpp" #include @@ -19,67 +20,6 @@ namespace pffdtd { namespace { -template -constexpr auto EPS = 0.0; - -template<> -constexpr auto EPS = 1.19209289e-07; - -auto getEpsilon(Precision precision) { - switch (precision) { - case Precision::Half: return EPS; - case Precision::Float: return EPS; - case Precision::Double: return EPS; - - case Precision::DoubleHalf: return EPS; - case Precision::DoubleFloat: return EPS; - case Precision::DoubleDouble: return EPS; - - default: break; - } - - raisef("invalid precision = {}", int(precision)); -} - -template -static constexpr auto min_exponent = std::numeric_limits::min_exponent; - -template -static constexpr auto max_exponent = std::numeric_limits::max_exponent; - -template -static constexpr auto min_exponent> = min_exponent; - -template -static constexpr auto max_exponent> = max_exponent; - -#if defined(__APPLE__) or defined(__clang__) -template<> -static constexpr auto min_exponent<_Float16> = -13; - -template<> -static constexpr auto max_exponent<_Float16> = 16; -#endif - -auto getMinMaxExponent(Precision precision) { - switch (precision) { - case Precision::Float: return std::pair{min_exponent, max_exponent}; - case Precision::DoubleFloat: return std::pair{min_exponent, max_exponent}; - - case Precision::Double: return std::pair{min_exponent, max_exponent}; - case Precision::DoubleDouble: return std::pair{min_exponent, max_exponent}; - -#if defined(__APPLE__) or defined(__clang__) - case Precision::Half: return std::pair{min_exponent<_Float16>, max_exponent<_Float16>}; - case Precision::DoubleHalf: return std::pair{min_exponent<_Float16>, max_exponent<_Float16>}; -#endif - - default: break; - } - - raisef("invalid precision = {}", int(precision)); -} - // sort and return indices void sort_keys(int64_t* val_arr, int64_t* key_arr, int64_t N) { // for sorting int64 arrays and returning keys @@ -586,8 +526,8 @@ void scaleInput(Simulation3D& sim) { auto const [min_exp, max_exp] = std::pair{getMinMaxExponent(sim.precision)}; auto const aexp = 0.5; // normalise to middle power of two - auto const pow2 = static_cast(std::round(aexp * max_exp + (1 - aexp) * min_exp)); - auto const norm1 = std::pow(2.0, pow2); + auto const pow2 = static_cast(std::round(aexp * max_exp + (1 - aexp) * min_exp)); + auto const norm1 = static_cast(ipow(2, pow2)); auto const inv_infac = norm1 / max_in; auto const infac = 1.0 / inv_infac; diff --git a/src/cpp/pffdtd/utility.hpp b/src/cpp/pffdtd/utility.hpp index 64f8936..8ec8e36 100644 --- a/src/cpp/pffdtd/utility.hpp +++ b/src/cpp/pffdtd/utility.hpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #ifndef DIV_CEIL @@ -56,4 +57,39 @@ auto convertTo(std::vector> const& in) { return out; } +template +[[nodiscard]] constexpr auto idiv(T x, T y) noexcept -> T { + return (x + y - 1) / y; +} + +template +[[nodiscard]] constexpr auto ipow(T base, T exponent) -> T { + T result = 1; + for (T i = 0; i < exponent; i++) { + result *= base; + } + return result; +} + +template + requires std::integral +[[nodiscard]] constexpr auto ipow(decltype(Base) exponent) -> decltype(Base) { + using Int = decltype(Base); + using UInt = std::make_unsigned_t; + + if constexpr (Base == Int(2)) { + return static_cast(UInt(1) << UInt(exponent)); + } else if constexpr (Base == Int(4)) { + return static_cast(UInt(1) << UInt(exponent * Int(2))); + } else if constexpr (Base == Int(8)) { + return static_cast(UInt(1) << UInt(exponent * Int(3))); + } else if constexpr (Base == Int(16)) { + return static_cast(UInt(1) << UInt(exponent * Int(4))); + } else if constexpr (Base == Int(32)) { + return static_cast(UInt(1) << UInt(exponent * Int(5))); + } else { + return ipow(Base, exponent); + } +} + } // namespace pffdtd