Skip to content

Commit

Permalink
[cpp] Improve DoubleHalf precision
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiashienzsch committed Oct 1, 2024
1 parent a6199c6 commit ed05fb6
Show file tree
Hide file tree
Showing 10 changed files with 180 additions and 90 deletions.
2 changes: 2 additions & 0 deletions src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,10 @@ target_sources(pffdtd
pffdtd/engine_cpu_3d.cpp
pffdtd/engine_cpu_3d.hpp
pffdtd/exception.hpp
pffdtd/float.hpp
pffdtd/hdf.hpp
pffdtd/mdspan.hpp
pffdtd/precision.cpp
pffdtd/precision.hpp
pffdtd/print.hpp
pffdtd/progress.cpp
Expand Down
37 changes: 34 additions & 3 deletions src/cpp/pffdtd/double.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
// SPDX-FileCopyrightText: 2024 Tobias Hienzsch
#pragma once

#include "pffdtd/float.hpp"
#include "pffdtd/utility.hpp"

#if defined(SYCL_LANGUAGE_VERSION)
#include "pffdtd/sycl.hpp"
#endif
Expand All @@ -12,12 +15,22 @@

namespace pffdtd {

/// https://github.com/sukop/doubledouble/blob/master/doubledouble.py
/// https://hal.science/hal-01351529v3/document
/// https://inria.hal.science/inria-00070314/document
/// https://github.com/sukop/doubledouble
/// https://github.com/JuliaMath/DoubleFloats.jl
/// https://github.com/FlorisSteenkamp/double-double
template<typename Real>
struct Double {
constexpr Double() = default;

constexpr Double(Real x, Real y = 0.0) noexcept : _high{x}, _low{y} {}
constexpr Double(Real x) noexcept {
auto const [h, l] = split(x);
_high = h;
_low = l;
}

constexpr Double(Real x, Real y) noexcept : _high{x}, _low{y} {}

[[nodiscard]] constexpr auto high() const noexcept -> Real { return _high; }

Expand Down Expand Up @@ -86,6 +99,21 @@ struct Double {
}

private:
[[nodiscard]] static constexpr auto split(Real a) noexcept -> Double {
constexpr auto const digits = FloatTraits<Real>::digits;
constexpr auto const f = ipow<2>(idiv(digits, 2)) + 1;

if constexpr (std::same_as<Real, double> and sizeof(double) == 8) {
static_assert(f == 134217729); // 2**27 + 1
}

auto const c = static_cast<Real>(f) * a;
auto const a_h = c - (c - a);
auto const a_l = a - a_h;

return {a_h, a_l};
}

[[nodiscard]] static constexpr auto twoSum(Real x, Real y) noexcept -> Double {
auto r = x + y;
auto t = r - x;
Expand All @@ -109,7 +137,7 @@ struct Double {
[[nodiscard]] static constexpr auto twoProduct(Real x, Real y) noexcept -> Double {
auto r = x * y;

#if defined(__APPLE__) or defined(__clang__)
#if defined(PFFDTD_HAS_FLOAT16)
if constexpr (std::same_as<Real, _Float16>) {
auto e = static_cast<_Float16>(std::fma(float{x}, float{y}, float{-r}));
return Double{r, e};
Expand All @@ -125,4 +153,7 @@ struct Double {
Real _low{static_cast<Real>(0)};
};

template<typename Real>
struct FloatTraits<Double<Real>> : FloatTraits<Real> {};

} // namespace pffdtd
2 changes: 1 addition & 1 deletion src/cpp/pffdtd/engine_cpu_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ auto EngineCPU2D::operator()(Simulation2D const& sim, Precision precision) const
case Precision::Double: return run<double>(sim);
case Precision::DoubleFloat: return run<Double<float>>(sim);
case Precision::DoubleDouble: return run<Double<double>>(sim);
#if defined(__APPLE__) or defined(__clang__)
#if defined(PFFDTD_HAS_FLOAT16)
case Precision::Half: return run<_Float16>(sim);
case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
#endif
Expand Down
27 changes: 5 additions & 22 deletions src/cpp/pffdtd/engine_sycl_2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "pffdtd/progress.hpp"
#include "pffdtd/sycl.hpp"
#include "pffdtd/time.hpp"
#include "pffdtd/utility.hpp"

#include <fmt/format.h>

Expand All @@ -30,35 +31,17 @@ struct CopyInput;
template<typename Real>
struct CopyOutput;

template<typename Real>
static constexpr auto min_exponent = std::numeric_limits<Real>::min_exponent;

template<typename Real>
static constexpr auto min_exponent<Double<Real>> = min_exponent<Real>;

template<>
[[maybe_unused]] static constexpr auto min_exponent<_Float16> = -13;

template<typename Real>
static constexpr auto max_exponent = std::numeric_limits<Real>::max_exponent;

template<typename Real>
static constexpr auto max_exponent<Double<Real>> = max_exponent<Real>;

template<>
[[maybe_unused]] static constexpr auto max_exponent<_Float16> = 16;

[[nodiscard]] constexpr auto to_ixy(auto x, auto y, auto /*Nx*/, auto Ny) { return x * Ny + y; }

template<typename Real>
auto scaleInput(std::vector<double> const& buf) {
static constexpr auto min_exp = static_cast<double>(min_exponent<Real>);
static constexpr auto max_exp = static_cast<double>(max_exponent<Real>);
static constexpr auto min_exp = static_cast<double>(FloatTraits<Real>::minExponent);
static constexpr auto max_exp = static_cast<double>(FloatTraits<Real>::maxExponent);
println("min_exp = {}, min_exp = {}", min_exp, max_exp);

auto const aexp = 0.5;
auto const pow2 = static_cast<int>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
auto const norm1 = std::pow(2.0, pow2);
auto const norm1 = static_cast<double>(ipow(2, pow2));

auto const max_in = *std::ranges::max_element(buf, [](auto l, auto r) { return std::fabs(l) < std::fabs(r); });
auto const inv_infac = norm1 / max_in;
Expand Down Expand Up @@ -255,7 +238,7 @@ auto run(Simulation2D const& sim) {
auto EngineSYCL2D::operator()(Simulation2D const& sim, Precision precision) const
-> stdex::mdarray<double, stdex::dextents<size_t, 2>> {
switch (precision) {
#if not defined(__INTEL_LLVM_COMPILER)
#if defined(PFFDTD_HAS_FLOAT16)
case Precision::Half: return run<_Float16>(sim);
case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
#endif
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/pffdtd/engine_sycl_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ auto run(Simulation3D const& sim) -> void {

auto EngineSYCL3D::operator()(Simulation3D const& sim) const -> void {
switch (sim.precision) {
#if not defined(__INTEL_LLVM_COMPILER)
#if defined(PFFDTD_HAS_FLOAT16)
case Precision::Half: return run<_Float16>(sim);
case Precision::DoubleHalf: return run<Double<_Float16>>(sim);
#endif
Expand Down
40 changes: 40 additions & 0 deletions src/cpp/pffdtd/float.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: 2024 Tobias Hienzsch

#pragma once

#include <limits>

#if (defined(__APPLE__) or defined(__clang__)) and not defined(__INTEL_LLVM_COMPILER)
#define PFFDTD_HAS_FLOAT16
#endif

namespace pffdtd {

template<typename T>
struct FloatTraits;

#if defined(PFFDTD_HAS_FLOAT16)
template<>
struct FloatTraits<_Float16> {
static constexpr auto digits = 11;
static constexpr auto minExponent = -13;
static constexpr auto maxExponent = 16;
};
#endif

template<>
struct FloatTraits<float> {
static constexpr auto digits = std::numeric_limits<float>::digits;
static constexpr auto minExponent = std::numeric_limits<float>::min_exponent;
static constexpr auto maxExponent = std::numeric_limits<float>::max_exponent;
};

template<>
struct FloatTraits<double> {
static constexpr auto digits = std::numeric_limits<double>::digits;
static constexpr auto minExponent = std::numeric_limits<double>::min_exponent;
static constexpr auto maxExponent = std::numeric_limits<double>::max_exponent;
};

} // namespace pffdtd
53 changes: 53 additions & 0 deletions src/cpp/pffdtd/precision.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: 2024 Tobias Hienzsch

#include "precision.hpp"

#include "pffdtd/exception.hpp"
#include "pffdtd/float.hpp"

namespace pffdtd {

template<typename T>
constexpr auto EPS = 0.0;

template<>
constexpr auto EPS<float> = 1.19209289e-07;

auto getEpsilon(Precision precision) -> double {
switch (precision) {
case Precision::Half: return EPS<float>;
case Precision::Float: return EPS<float>;
case Precision::Double: return EPS<double>;

case Precision::DoubleHalf: return EPS<float>;
case Precision::DoubleFloat: return EPS<float>;
case Precision::DoubleDouble: return EPS<double>;

default: break;
}

raisef<std::invalid_argument>("invalid precision = {}", int(precision));
}

auto getMinMaxExponent(Precision precision) -> std::pair<int, int> {
using std::pair;
switch (precision) {
case Precision::Float: return pair{FloatTraits<float>::minExponent, FloatTraits<float>::maxExponent};
case Precision::DoubleFloat: return pair{FloatTraits<float>::minExponent, FloatTraits<float>::maxExponent};

case Precision::Double: return pair{FloatTraits<double>::minExponent, FloatTraits<double>::maxExponent};
case Precision::DoubleDouble: return pair{FloatTraits<double>::minExponent, FloatTraits<double>::maxExponent};

#if defined(PFFDTD_HAS_FLOAT16)
case Precision::Half: return pair{FloatTraits<_Float16>::minExponent, FloatTraits<_Float16>::maxExponent};
case Precision::DoubleHalf: return pair{FloatTraits<_Float16>::minExponent, FloatTraits<_Float16>::maxExponent};
#endif

default: break;
}

raisef<std::invalid_argument>("invalid precision = {}", int(precision));
}

} // namespace pffdtd
5 changes: 5 additions & 0 deletions src/cpp/pffdtd/precision.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#pragma once

#include <utility>

namespace pffdtd {

enum struct Precision {
Expand All @@ -15,4 +17,7 @@ enum struct Precision {
DoubleDouble,
};

[[nodiscard]] auto getEpsilon(Precision precision) -> double;
[[nodiscard]] auto getMinMaxExponent(Precision precision) -> std::pair<int, int>;

} // namespace pffdtd
66 changes: 3 additions & 63 deletions src/cpp/pffdtd/simulation_3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "pffdtd/double.hpp"
#include "pffdtd/exception.hpp"
#include "pffdtd/hdf.hpp"
#include "pffdtd/utility.hpp"

#include <fmt/format.h>

Expand All @@ -19,67 +20,6 @@ namespace pffdtd {

namespace {

template<typename T>
constexpr auto EPS = 0.0;

template<>
constexpr auto EPS<float> = 1.19209289e-07;

auto getEpsilon(Precision precision) {
switch (precision) {
case Precision::Half: return EPS<float>;
case Precision::Float: return EPS<float>;
case Precision::Double: return EPS<double>;

case Precision::DoubleHalf: return EPS<float>;
case Precision::DoubleFloat: return EPS<float>;
case Precision::DoubleDouble: return EPS<double>;

default: break;
}

raisef<std::invalid_argument>("invalid precision = {}", int(precision));
}

template<typename Real>
static constexpr auto min_exponent = std::numeric_limits<Real>::min_exponent;

template<typename Real>
static constexpr auto max_exponent = std::numeric_limits<Real>::max_exponent;

template<typename Real>
static constexpr auto min_exponent<Double<Real>> = min_exponent<Real>;

template<typename Real>
static constexpr auto max_exponent<Double<Real>> = max_exponent<Real>;

#if defined(__APPLE__) or defined(__clang__)
template<>
static constexpr auto min_exponent<_Float16> = -13;

template<>
static constexpr auto max_exponent<_Float16> = 16;
#endif

auto getMinMaxExponent(Precision precision) {
switch (precision) {
case Precision::Float: return std::pair{min_exponent<float>, max_exponent<float>};
case Precision::DoubleFloat: return std::pair{min_exponent<float>, max_exponent<float>};

case Precision::Double: return std::pair{min_exponent<double>, max_exponent<double>};
case Precision::DoubleDouble: return std::pair{min_exponent<double>, max_exponent<double>};

#if defined(__APPLE__) or defined(__clang__)
case Precision::Half: return std::pair{min_exponent<_Float16>, max_exponent<_Float16>};
case Precision::DoubleHalf: return std::pair{min_exponent<_Float16>, max_exponent<_Float16>};
#endif

default: break;
}

raisef<std::invalid_argument>("invalid precision = {}", int(precision));
}

// sort and return indices
void sort_keys(int64_t* val_arr, int64_t* key_arr, int64_t N) {
// for sorting int64 arrays and returning keys
Expand Down Expand Up @@ -586,8 +526,8 @@ void scaleInput(Simulation3D& sim) {
auto const [min_exp, max_exp] = std::pair<double, double>{getMinMaxExponent(sim.precision)};

auto const aexp = 0.5; // normalise to middle power of two
auto const pow2 = static_cast<int32_t>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
auto const norm1 = std::pow(2.0, pow2);
auto const pow2 = static_cast<int>(std::round(aexp * max_exp + (1 - aexp) * min_exp));
auto const norm1 = static_cast<double>(ipow(2, pow2));
auto const inv_infac = norm1 / max_in;
auto const infac = 1.0 / inv_infac;

Expand Down
Loading

0 comments on commit ed05fb6

Please sign in to comment.