[cpp] Improve DoubleHalf precision

tobanteAudio · Oct 1, 2024 · ed05fb6 · ed05fb6
1 parent a6199c6
commit ed05fb6
Show file tree

Hide file tree

Showing 10 changed files with 180 additions and 90 deletions.
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
@@ -25,8 +25,10 @@ target_sources(pffdtd
         pffdtd/engine_cpu_3d.cpp
         pffdtd/engine_cpu_3d.hpp
         pffdtd/exception.hpp
+        pffdtd/float.hpp
         pffdtd/hdf.hpp
         pffdtd/mdspan.hpp
+        pffdtd/precision.cpp
         pffdtd/precision.hpp
         pffdtd/print.hpp
         pffdtd/progress.cpp

diff --git a/src/cpp/pffdtd/double.hpp b/src/cpp/pffdtd/double.hpp
@@ -2,6 +2,9 @@
 // SPDX-FileCopyrightText: 2024 Tobias Hienzsch
 #pragma once
 
+#include "pffdtd/float.hpp"
+#include "pffdtd/utility.hpp"
+
 #if defined(SYCL_LANGUAGE_VERSION)
   #include "pffdtd/sycl.hpp"
 #endif
@@ -12,12 +15,22 @@
 
 namespace pffdtd {
 
-/// https://github.com/sukop/doubledouble/blob/master/doubledouble.py
+/// https://hal.science/hal-01351529v3/document
+/// https://inria.hal.science/inria-00070314/document
+/// https://github.com/sukop/doubledouble
+/// https://github.com/JuliaMath/DoubleFloats.jl
+/// https://github.com/FlorisSteenkamp/double-double
 template<typename Real>
 struct Double {
   constexpr Double() = default;
 
-  constexpr Double(Real x, Real y = 0.0) noexcept : _high{x}, _low{y} {}
+  constexpr Double(Real x) noexcept {
+    auto const [h, l] = split(x);
+    _high             = h;
+    _low              = l;
+  }
+
+  constexpr Double(Real x, Real y) noexcept : _high{x}, _low{y} {}
 
   [[nodiscard]] constexpr auto high() const noexcept -> Real { return _high; }
 
@@ -86,6 +99,21 @@ struct Double {
   }
 
   private:
+  [[nodiscard]] static constexpr auto split(Real a) noexcept -> Double {
+    constexpr auto const digits = FloatTraits<Real>::digits;
+    constexpr auto const f      = ipow<2>(idiv(digits, 2)) + 1;
+
+    if constexpr (std::same_as<Real, double> and sizeof(double) == 8) {
+      static_assert(f == 134217729); // 2**27 + 1
+    }
+
+    auto const c   = static_cast<Real>(f) * a;
+    auto const a_h = c - (c - a);
+    auto const a_l = a - a_h;
+
+    return {a_h, a_l};
+  }
+
   [[nodiscard]] static constexpr auto twoSum(Real x, Real y) noexcept -> Double {
     auto r = x + y;
     auto t = r - x;
@@ -109,7 +137,7 @@ struct Double {
   [[nodiscard]] static constexpr auto twoProduct(Real x, Real y) noexcept -> Double {
     auto r = x * y;
 
-#if defined(__APPLE__) or defined(__clang__)
+#if defined(PFFDTD_HAS_FLOAT16)
     if constexpr (std::same_as<Real, _Float16>) {
       auto e = static_cast<_Float16>(std::fma(float{x}, float{y}, float{-r}));
       return Double{r, e};
@@ -125,4 +153,7 @@ struct Double {
   Real _low{static_cast<Real>(0)};
 };
 
+template<typename Real>
+struct FloatTraits<Double<Real>> : FloatTraits<Real> {};
+
 } // namespace pffdtd
diff --git a/src/cpp/pffdtd/engine_cpu_2d.cpp b/src/cpp/pffdtd/engine_cpu_2d.cpp
@@ -156,7 +156,7 @@ auto EngineCPU2D::operator()(Simulation2D const& sim, Precision precision) const
     case Precision::Double: return run<double>(sim);
     case Precision::DoubleFloat: return run<Double<float>>(sim);
     case Precision::DoubleDouble: return run<Double<double>>(sim);
-#if defined(__APPLE__) or defined(__clang__)
+#if defined(PFFDTD_HAS_FLOAT16)
     case Precision::Half: return run<_Float16>(sim);
     case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
 #endif

diff --git a/src/cpp/pffdtd/engine_sycl_2d.cpp b/src/cpp/pffdtd/engine_sycl_2d.cpp
@@ -10,6 +10,7 @@
 #include "pffdtd/progress.hpp"
 #include "pffdtd/sycl.hpp"
 #include "pffdtd/time.hpp"
+#include "pffdtd/utility.hpp"
 
 #include <fmt/format.h>
 
@@ -30,35 +31,17 @@ struct CopyInput;
 template<typename Real>
 struct CopyOutput;
 
-template<typename Real>
-static constexpr auto min_exponent = std::numeric_limits<Real>::min_exponent;
-
-template<typename Real>
-static constexpr auto min_exponent<Double<Real>> = min_exponent<Real>;
-
-template<>
-[[maybe_unused]] static constexpr auto min_exponent<_Float16> = -13;
-
-template<typename Real>
-static constexpr auto max_exponent = std::numeric_limits<Real>::max_exponent;
-
-template<typename Real>
-static constexpr auto max_exponent<Double<Real>> = max_exponent<Real>;
-
-template<>
-[[maybe_unused]] static constexpr auto max_exponent<_Float16> = 16;
-
 [[nodiscard]] constexpr auto to_ixy(auto x, auto y, auto /*Nx*/, auto Ny) { return x * Ny + y; }
 
 template<typename Real>
 auto scaleInput(std::vector<double> const& buf) {
-  static constexpr auto min_exp = static_cast<double>(min_exponent<Real>);
-  static constexpr auto max_exp = static_cast<double>(max_exponent<Real>);
+  static constexpr auto min_exp = static_cast<double>(FloatTraits<Real>::minExponent);
+  static constexpr auto max_exp = static_cast<double>(FloatTraits<Real>::maxExponent);
   println("min_exp = {}, min_exp = {}", min_exp, max_exp);
 
   auto const aexp  = 0.5;
   auto const pow2  = static_cast<int>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
-  auto const norm1 = std::pow(2.0, pow2);
+  auto const norm1 = static_cast<double>(ipow(2, pow2));
 
   auto const max_in    = *std::ranges::max_element(buf, [](auto l, auto r) { return std::fabs(l) < std::fabs(r); });
   auto const inv_infac = norm1 / max_in;
@@ -255,7 +238,7 @@ auto run(Simulation2D const& sim) {
 auto EngineSYCL2D::operator()(Simulation2D const& sim, Precision precision) const
     -> stdex::mdarray<double, stdex::dextents<size_t, 2>> {
   switch (precision) {
-#if not defined(__INTEL_LLVM_COMPILER)
+#if defined(PFFDTD_HAS_FLOAT16)
     case Precision::Half: return run<_Float16>(sim);
     case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
 #endif

diff --git a/src/cpp/pffdtd/engine_sycl_3d.cpp b/src/cpp/pffdtd/engine_sycl_3d.cpp
@@ -358,7 +358,7 @@ auto run(Simulation3D const& sim) -> void {
 
 auto EngineSYCL3D::operator()(Simulation3D const& sim) const -> void {
   switch (sim.precision) {
-#if not defined(__INTEL_LLVM_COMPILER)
+#if defined(PFFDTD_HAS_FLOAT16)
     case Precision::Half: return run<_Float16>(sim);
     case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
 #endif

diff --git a/src/cpp/pffdtd/float.hpp b/src/cpp/pffdtd/float.hpp
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Tobias Hienzsch
+
+#pragma once
+
+#include <limits>
+
+#if (defined(__APPLE__) or defined(__clang__)) and not defined(__INTEL_LLVM_COMPILER)
+  #define PFFDTD_HAS_FLOAT16
+#endif
+
+namespace pffdtd {
+
+template<typename T>
+struct FloatTraits;
+
+#if defined(PFFDTD_HAS_FLOAT16)
+template<>
+struct FloatTraits<_Float16> {
+  static constexpr auto digits      = 11;
+  static constexpr auto minExponent = -13;
+  static constexpr auto maxExponent = 16;
+};
+#endif
+
+template<>
+struct FloatTraits<float> {
+  static constexpr auto digits      = std::numeric_limits<float>::digits;
+  static constexpr auto minExponent = std::numeric_limits<float>::min_exponent;
+  static constexpr auto maxExponent = std::numeric_limits<float>::max_exponent;
+};
+
+template<>
+struct FloatTraits<double> {
+  static constexpr auto digits      = std::numeric_limits<double>::digits;
+  static constexpr auto minExponent = std::numeric_limits<double>::min_exponent;
+  static constexpr auto maxExponent = std::numeric_limits<double>::max_exponent;
+};
+
+} // namespace pffdtd
diff --git a/src/cpp/pffdtd/precision.cpp b/src/cpp/pffdtd/precision.cpp
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: MIT
+// SPDX-FileCopyrightText: 2024 Tobias Hienzsch
+
+#include "precision.hpp"
+
+#include "pffdtd/exception.hpp"
+#include "pffdtd/float.hpp"
+
+namespace pffdtd {
+
+template<typename T>
+constexpr auto EPS = 0.0;
+
+template<>
+constexpr auto EPS<float> = 1.19209289e-07;
+
+auto getEpsilon(Precision precision) -> double {
+  switch (precision) {
+    case Precision::Half: return EPS<float>;
+    case Precision::Float: return EPS<float>;
+    case Precision::Double: return EPS<double>;
+
+    case Precision::DoubleHalf: return EPS<float>;
+    case Precision::DoubleFloat: return EPS<float>;
+    case Precision::DoubleDouble: return EPS<double>;
+
+    default: break;
+  }
+
+  raisef<std::invalid_argument>("invalid precision = {}", int(precision));
+}
+
+auto getMinMaxExponent(Precision precision) -> std::pair<int, int> {
+  using std::pair;
+  switch (precision) {
+    case Precision::Float: return pair{FloatTraits<float>::minExponent, FloatTraits<float>::maxExponent};
+    case Precision::DoubleFloat: return pair{FloatTraits<float>::minExponent, FloatTraits<float>::maxExponent};
+
+    case Precision::Double: return pair{FloatTraits<double>::minExponent, FloatTraits<double>::maxExponent};
+    case Precision::DoubleDouble: return pair{FloatTraits<double>::minExponent, FloatTraits<double>::maxExponent};
+
+#if defined(PFFDTD_HAS_FLOAT16)
+    case Precision::Half: return pair{FloatTraits<_Float16>::minExponent, FloatTraits<_Float16>::maxExponent};
+    case Precision::DoubleHalf: return pair{FloatTraits<_Float16>::minExponent, FloatTraits<_Float16>::maxExponent};
+#endif
+
+    default: break;
+  }
+
+  raisef<std::invalid_argument>("invalid precision = {}", int(precision));
+}
+
+} // namespace pffdtd
diff --git a/src/cpp/pffdtd/precision.hpp b/src/cpp/pffdtd/precision.hpp
@@ -3,6 +3,8 @@
 
 #pragma once
 
+#include <utility>
+
 namespace pffdtd {
 
 enum struct Precision {
@@ -15,4 +17,7 @@ enum struct Precision {
   DoubleDouble,
 };
 
+[[nodiscard]] auto getEpsilon(Precision precision) -> double;
+[[nodiscard]] auto getMinMaxExponent(Precision precision) -> std::pair<int, int>;
+
 } // namespace pffdtd
diff --git a/src/cpp/pffdtd/simulation_3d.cpp b/src/cpp/pffdtd/simulation_3d.cpp
@@ -6,6 +6,7 @@
 #include "pffdtd/double.hpp"
 #include "pffdtd/exception.hpp"
 #include "pffdtd/hdf.hpp"
+#include "pffdtd/utility.hpp"
 
 #include <fmt/format.h>
 
@@ -19,67 +20,6 @@ namespace pffdtd {
 
 namespace {
 
-template<typename T>
-constexpr auto EPS = 0.0;
-
-template<>
-constexpr auto EPS<float> = 1.19209289e-07;
-
-auto getEpsilon(Precision precision) {
-  switch (precision) {
-    case Precision::Half: return EPS<float>;
-    case Precision::Float: return EPS<float>;
-    case Precision::Double: return EPS<double>;
-
-    case Precision::DoubleHalf: return EPS<float>;
-    case Precision::DoubleFloat: return EPS<float>;
-    case Precision::DoubleDouble: return EPS<double>;
-
-    default: break;
-  }
-
-  raisef<std::invalid_argument>("invalid precision = {}", int(precision));
-}
-
-template<typename Real>
-static constexpr auto min_exponent = std::numeric_limits<Real>::min_exponent;
-
-template<typename Real>
-static constexpr auto max_exponent = std::numeric_limits<Real>::max_exponent;
-
-template<typename Real>
-static constexpr auto min_exponent<Double<Real>> = min_exponent<Real>;
-
-template<typename Real>
-static constexpr auto max_exponent<Double<Real>> = max_exponent<Real>;
-
-#if defined(__APPLE__) or defined(__clang__)
-template<>
-static constexpr auto min_exponent<_Float16> = -13;
-
-template<>
-static constexpr auto max_exponent<_Float16> = 16;
-#endif
-
-auto getMinMaxExponent(Precision precision) {
-  switch (precision) {
-    case Precision::Float: return std::pair{min_exponent<float>, max_exponent<float>};
-    case Precision::DoubleFloat: return std::pair{min_exponent<float>, max_exponent<float>};
-
-    case Precision::Double: return std::pair{min_exponent<double>, max_exponent<double>};
-    case Precision::DoubleDouble: return std::pair{min_exponent<double>, max_exponent<double>};
-
-#if defined(__APPLE__) or defined(__clang__)
-    case Precision::Half: return std::pair{min_exponent<_Float16>, max_exponent<_Float16>};
-    case Precision::DoubleHalf: return std::pair{min_exponent<_Float16>, max_exponent<_Float16>};
-#endif
-
-    default: break;
-  }
-
-  raisef<std::invalid_argument>("invalid precision = {}", int(precision));
-}
-
 // sort and return indices
 void sort_keys(int64_t* val_arr, int64_t* key_arr, int64_t N) {
   // for sorting int64 arrays and returning keys
@@ -586,8 +526,8 @@ void scaleInput(Simulation3D& sim) {
   auto const [min_exp, max_exp] = std::pair<double, double>{getMinMaxExponent(sim.precision)};
 
   auto const aexp      = 0.5; // normalise to middle power of two
-  auto const pow2      = static_cast<int32_t>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
-  auto const norm1     = std::pow(2.0, pow2);
+  auto const pow2      = static_cast<int>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
+  auto const norm1     = static_cast<double>(ipow(2, pow2));
   auto const inv_infac = norm1 / max_in;
   auto const infac     = 1.0 / inv_infac;