-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Power and Logarithm Approximations (#1)
* Working pow functions, needs more tests and benchmarks * Improved tests and started on benchmarks * Test improvements * First pass at log approximations * More complete log implementations * More benchmarks work * Fixing test issues * Fix warning ignores * GCCCCCCC * Ignoring uninitialized warnings * Contending with memory issues on Linux * More test fixing for Linux
- Loading branch information
1 parent
a1590f8
commit 109bb58
Showing
14 changed files
with
821 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
#pragma once | ||
|
||
#include "basic_math.hpp" | ||
#include "pow_approx.hpp" | ||
|
||
namespace math_approx | ||
{ | ||
namespace log_detail | ||
{ | ||
/** approximation for log2(x), optimized on the range [1, 2] */ | ||
template <typename T, int order> | ||
constexpr T log2_approx (T x) | ||
{ | ||
static_assert (order >= 3 && order <= 6); | ||
using S = scalar_of_t<T>; | ||
|
||
const auto x_sq = x * x; | ||
if constexpr (order == 3) | ||
{ | ||
const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x; | ||
const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x; | ||
return x_0_1 + x_2_3 * x_sq; | ||
} | ||
else if constexpr (order == 4) | ||
{ | ||
const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x; | ||
const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x; | ||
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; | ||
return (S) -2.51982743265 + x_1_2_3_4 * x; | ||
} | ||
else if constexpr (order == 5) | ||
{ | ||
const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x; | ||
const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x; | ||
const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x; | ||
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; | ||
return x_0_1 + x_2_3_4_5 * x_sq; | ||
} | ||
else if constexpr (order == 6) | ||
{ | ||
const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x; | ||
const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x; | ||
const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x; | ||
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; | ||
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; | ||
return (S) -3.04376925958 + x_1_2_3_4_5_6 * x; | ||
} | ||
else | ||
{ | ||
return {}; | ||
} | ||
} | ||
} | ||
|
||
#if defined(__GNUC__) | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning | ||
#pragma GCC diagnostic ignored "-Wuninitialized" | ||
#endif | ||
|
||
/** approximation for log(Base, x) (32-bit) */ | ||
template <typename Base, int order> | ||
float log (float x) | ||
{ | ||
const auto vi = reinterpret_cast<int32_t&> (x); | ||
const auto ex = vi & 0x7f800000; | ||
const auto e = (ex >> 23) - 127; | ||
const auto vfi = (vi - ex) | 0x3f800000; | ||
const auto vf = reinterpret_cast<const float&> (vfi); | ||
|
||
static constexpr auto log2_base_r = 1.0f / Base::log2_base; | ||
return log2_base_r * ((float) e + log_detail::log2_approx<float, order> (vf)); | ||
} | ||
|
||
/** approximation for log(x) (64-bit) */ | ||
template <typename Base, int order> | ||
double log (double x) | ||
{ | ||
const auto vi = reinterpret_cast<int64_t&> (x); | ||
const auto ex = vi & 0x7ff0000000000000; | ||
const auto e = (ex >> 52) - 1023; | ||
const auto vfi = (vi - ex) | 0x3ff0000000000000; | ||
const auto vf = reinterpret_cast<const double&> (vfi); | ||
|
||
static constexpr auto log2_base_r = 1.0 / Base::log2_base; | ||
return log2_base_r * ((double) e + log_detail::log2_approx<double, order> (vf)); | ||
} | ||
|
||
#if defined(XSIMD_HPP) | ||
/** approximation for pow(Base, x) (32-bit SIMD) */ | ||
template <typename Base, int order> | ||
xsimd::batch<float> log (xsimd::batch<float> x) | ||
{ | ||
const auto vi = reinterpret_cast<xsimd::batch<int32_t>&> (x); // NOSONAR | ||
const auto ex = vi & 0x7f800000; | ||
const auto e = (ex >> 23) - 127; | ||
const auto vfi = (vi - ex) | 0x3f800000; | ||
const auto vf = reinterpret_cast<const xsimd::batch<float>&> (vfi); // NOSONAR | ||
|
||
static constexpr auto log2_base_r = 1.0f / Base::log2_base; | ||
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<float>, order> (vf)); | ||
} | ||
|
||
/** approximation for pow(Base, x) (64-bit SIMD) */ | ||
template <typename Base, int order> | ||
xsimd::batch<double> log (xsimd::batch<double> x) | ||
{ | ||
const auto vi = reinterpret_cast<xsimd::batch<int64_t>&> (x); // NOSONAR | ||
const auto ex = vi & 0x7ff0000000000000; | ||
const auto e = (ex >> 52) - 1023; | ||
const auto vfi = (vi - ex) | 0x3ff0000000000000; | ||
const auto vf = reinterpret_cast<const xsimd::batch<double>&> (vfi); // NOSONAR | ||
|
||
static constexpr auto log2_base_r = 1.0 / Base::log2_base; | ||
return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx<xsimd::batch<double>, order> (vf)); | ||
} | ||
#endif | ||
|
||
#if defined(__GNUC__) | ||
#pragma GCC diagnostic pop // end ignore strict-aliasing warnings | ||
#endif | ||
|
||
template <int order, typename T> | ||
T log (T x) | ||
{ | ||
return log<pow_detail::BaseE<scalar_of_t<T>>, order> (x); | ||
} | ||
|
||
template <int order, typename T> | ||
T log2 (T x) | ||
{ | ||
return log<pow_detail::Base2<scalar_of_t<T>>, order> (x); | ||
} | ||
|
||
template <int order, typename T> | ||
T log10 (T x) | ||
{ | ||
return log<pow_detail::Base10<scalar_of_t<T>>, order> (x); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
#pragma once | ||
|
||
#include "basic_math.hpp" | ||
|
||
namespace math_approx | ||
{ | ||
namespace pow_detail | ||
{ | ||
/** approximation for 2^x, optimized on the range [0, 1] */ | ||
template <typename T, int order> | ||
constexpr T pow2_approx (T x) | ||
{ | ||
static_assert (order >= 3 && order <= 6); | ||
using S = scalar_of_t<T>; | ||
|
||
const auto x_sq = x * x; | ||
if constexpr (order == 3) | ||
{ | ||
const auto x_2_3 = (S) 0.226307586882 + (S) 0.0782680256330 * x; | ||
const auto x_0_1 = (S) 1 + (S) 0.695424387485 * x; | ||
return x_0_1 + x_2_3 * x_sq; | ||
} | ||
else if constexpr (order == 4) | ||
{ | ||
const auto x_3_4 = (S) 0.0520324008177 + (S) 0.0135557244044 * x; | ||
const auto x_1_2 = (S) 0.693032120001 + (S) 0.241379754777 * x; | ||
const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; | ||
return (S) 1 + x_1_2_3_4 * x; | ||
} | ||
else if constexpr (order == 5) | ||
{ | ||
const auto x_4_5 = (S) 0.00899009909264 + (S) 0.00187839071291 * x; | ||
const auto x_2_3 = (S) 0.240156326598 + (S) 0.0558229130202 * x; | ||
const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; | ||
const auto x_0_1 = (S) 1 + (S) 0.693152270576 * x; | ||
return x_0_1 + x_2_3_4_5 * x_sq; | ||
} | ||
else if constexpr (order == 6) | ||
{ | ||
const auto x_5_6 = (S) 0.00124359387839 + (S) 0.000217187820427 * x; | ||
const auto x_3_4 = (S) 0.0554833098983 + (S) 0.00967911763840 * x; | ||
const auto x_1_2 = (S) 0.693147003658 + (S) 0.240229787107 * x; | ||
const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; | ||
const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; | ||
return (S) 1 + x_1_2_3_4_5_6 * x; | ||
} | ||
else | ||
{ | ||
return {}; | ||
} | ||
} | ||
|
||
template <typename T> | ||
struct BaseE | ||
{ | ||
static constexpr auto log2_base = (T) 1.4426950408889634074; | ||
}; | ||
|
||
template <typename T> | ||
struct Base2 | ||
{ | ||
static constexpr auto log2_base = (T) 1; | ||
}; | ||
|
||
template <typename T> | ||
struct Base10 | ||
{ | ||
static constexpr auto log2_base = (T) 3.3219280948873623479; | ||
}; | ||
} | ||
|
||
#if defined(__GNUC__) | ||
#pragma GCC diagnostic push | ||
#pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning | ||
#pragma GCC diagnostic ignored "-Wuninitialized" | ||
#endif | ||
|
||
/** approximation for pow(Base, x) (32-bit) */ | ||
template <typename Base, int order> | ||
float pow (float x) | ||
{ | ||
x = std::max (-126.0f, Base::log2_base * x); | ||
|
||
const auto xi = (int32_t) x; | ||
const auto l = x < 0.0f ? xi - 1 : xi; | ||
const auto f = x - (float) l; | ||
const auto vi = (l + 127) << 23; | ||
|
||
return reinterpret_cast<const float&> (vi) * pow_detail::pow2_approx<float, order> (f); | ||
} | ||
|
||
/** approximation for pow(Base, x) (64-bit) */ | ||
template <typename Base, int order> | ||
double pow (double x) | ||
{ | ||
x = std::max (-1022.0, Base::log2_base * x); | ||
|
||
const auto xi = (int64_t) x; | ||
const auto l = x < 0.0 ? xi - 1 : xi; | ||
const auto d = x - (double) l; | ||
const auto vi = (l + 1023) << 52; | ||
|
||
return reinterpret_cast<const double&> (vi) * pow_detail::pow2_approx<double, order> (d); | ||
} | ||
|
||
#if defined(XSIMD_HPP) | ||
/** approximation for pow(Base, x) (32-bit SIMD) */ | ||
template <typename Base, int order> | ||
xsimd::batch<float> pow (xsimd::batch<float> x) | ||
{ | ||
x = xsimd::max (xsimd::broadcast (-126.0f), Base::log2_base * x); | ||
|
||
const auto xi = xsimd::to_int (x); | ||
const auto l = xsimd::select (xsimd::batch_bool_cast<int32_t> (x < 0.0f), xi - 1, xi); | ||
const auto f = x - xsimd::to_float (l); | ||
const auto vi = (l + 127) << 23; | ||
|
||
return reinterpret_cast<const xsimd::batch<float>&> (vi) * pow_detail::pow2_approx<xsimd::batch<float>, order> (f); | ||
} | ||
|
||
/** approximation for pow(Base, x) (64-bit SIMD) */ | ||
template <typename Base, int order> | ||
xsimd::batch<double> pow (xsimd::batch<double> x) | ||
{ | ||
x = xsimd::max (-1022.0, Base::log2_base * x); | ||
|
||
const auto xi = xsimd::to_int (x); | ||
const auto l = xsimd::select (xsimd::batch_bool_cast<int64_t> (x < 0.0), xi - 1, xi); | ||
const auto d = x - xsimd::to_float (l); | ||
const auto vi = (l + 1023) << 52; | ||
|
||
return reinterpret_cast<const xsimd::batch<double>&> (vi) * pow_detail::pow2_approx<xsimd::batch<double>, order> (d); | ||
} | ||
#endif | ||
|
||
#if defined(__GNUC__) | ||
#pragma GCC diagnostic pop // end ignore strict-aliasing warnings | ||
#endif | ||
|
||
template <int order, typename T> | ||
T exp (T x) | ||
{ | ||
return pow<pow_detail::BaseE<scalar_of_t<T>>, order> (x); | ||
} | ||
|
||
template <int order, typename T> | ||
T exp2 (T x) | ||
{ | ||
return pow<pow_detail::Base2<scalar_of_t<T>>, order> (x); | ||
} | ||
|
||
template <int order, typename T> | ||
T exp10 (T x) | ||
{ | ||
return pow<pow_detail::Base10<scalar_of_t<T>>, order> (x); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.