diff --git a/include/math_approx/math_approx.hpp b/include/math_approx/math_approx.hpp index 406aceb..9a3f06f 100644 --- a/include/math_approx/math_approx.hpp +++ b/include/math_approx/math_approx.hpp @@ -9,3 +9,5 @@ namespace math_approx #include "src/tanh_approx.hpp" #include "src/sigmoid_approx.hpp" #include "src/trig_approx.hpp" +#include "src/pow_approx.hpp" +#include "src/log_approx.hpp" diff --git a/include/math_approx/src/log_approx.hpp b/include/math_approx/src/log_approx.hpp new file mode 100644 index 0000000..6c65c33 --- /dev/null +++ b/include/math_approx/src/log_approx.hpp @@ -0,0 +1,140 @@ +#pragma once + +#include "basic_math.hpp" +#include "pow_approx.hpp" + +namespace math_approx +{ +namespace log_detail +{ + /** approximation for log2(x), optimized on the range [1, 2] */ + template + constexpr T log2_approx (T x) + { + static_assert (order >= 3 && order <= 6); + using S = scalar_of_t; + + const auto x_sq = x * x; + if constexpr (order == 3) + { + const auto x_2_3 = (S) -1.05974531422 + (S) 0.159220010975 * x; + const auto x_0_1 = (S) -2.16417056258 + (S) 3.06469586582 * x; + return x_0_1 + x_2_3 * x_sq; + } + else if constexpr (order == 4) + { + const auto x_3_4 = (S) 0.649709537672 + (S) -0.0821303550902 * x; + const auto x_1_2 = (S) 4.08637809379 + (S) -2.13412984371 * x; + const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; + return (S) -2.51982743265 + x_1_2_3_4 * x; + } + else if constexpr (order == 5) + { + const auto x_4_5 = (S) -0.419319345483 + (S) 0.0451488402558 * x; + const auto x_2_3 = (S) -3.56885211615 + (S) 1.64139451414 * x; + const auto x_0_1 = (S) -2.80534277658 + (S) 5.10697088382 * x; + const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; + return x_0_1 + x_2_3_4_5 * x_sq; + } + else if constexpr (order == 6) + { + const auto x_5_6 = (S) 0.276834061071 + (S) -0.0258400886535 * x; + const auto x_3_4 = (S) 3.30388341157 + (S) -1.27446900713 * x; + const auto x_1_2 = (S) 6.12708086513 + (S) -5.36371998242 * x; + const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; + const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; + return (S) -3.04376925958 + x_1_2_3_4_5_6 * x; + } + else + { + return {}; + } + } +} + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + +/** approximation for log(Base, x) (32-bit) */ +template +float log (float x) +{ + const auto vi = reinterpret_cast (x); + const auto ex = vi & 0x7f800000; + const auto e = (ex >> 23) - 127; + const auto vfi = (vi - ex) | 0x3f800000; + const auto vf = reinterpret_cast (vfi); + + static constexpr auto log2_base_r = 1.0f / Base::log2_base; + return log2_base_r * ((float) e + log_detail::log2_approx (vf)); +} + +/** approximation for log(x) (64-bit) */ +template +double log (double x) +{ + const auto vi = reinterpret_cast (x); + const auto ex = vi & 0x7ff0000000000000; + const auto e = (ex >> 52) - 1023; + const auto vfi = (vi - ex) | 0x3ff0000000000000; + const auto vf = reinterpret_cast (vfi); + + static constexpr auto log2_base_r = 1.0 / Base::log2_base; + return log2_base_r * ((double) e + log_detail::log2_approx (vf)); +} + +#if defined(XSIMD_HPP) +/** approximation for pow(Base, x) (32-bit SIMD) */ +template +xsimd::batch log (xsimd::batch x) +{ + const auto vi = reinterpret_cast&> (x); // NOSONAR + const auto ex = vi & 0x7f800000; + const auto e = (ex >> 23) - 127; + const auto vfi = (vi - ex) | 0x3f800000; + const auto vf = reinterpret_cast&> (vfi); // NOSONAR + + static constexpr auto log2_base_r = 1.0f / Base::log2_base; + return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx, order> (vf)); +} + +/** approximation for pow(Base, x) (64-bit SIMD) */ +template +xsimd::batch log (xsimd::batch x) +{ + const auto vi = reinterpret_cast&> (x); // NOSONAR + const auto ex = vi & 0x7ff0000000000000; + const auto e = (ex >> 52) - 1023; + const auto vfi = (vi - ex) | 0x3ff0000000000000; + const auto vf = reinterpret_cast&> (vfi); // NOSONAR + + static constexpr auto log2_base_r = 1.0 / Base::log2_base; + return log2_base_r * (xsimd::to_float (e) + log_detail::log2_approx, order> (vf)); +} +#endif + +#if defined(__GNUC__) +#pragma GCC diagnostic pop // end ignore strict-aliasing warnings +#endif + +template +T log (T x) +{ + return log>, order> (x); +} + +template +T log2 (T x) +{ + return log>, order> (x); +} + +template +T log10 (T x) +{ + return log>, order> (x); +} +} diff --git a/include/math_approx/src/pow_approx.hpp b/include/math_approx/src/pow_approx.hpp new file mode 100644 index 0000000..5cf7f11 --- /dev/null +++ b/include/math_approx/src/pow_approx.hpp @@ -0,0 +1,157 @@ +#pragma once + +#include "basic_math.hpp" + +namespace math_approx +{ +namespace pow_detail +{ + /** approximation for 2^x, optimized on the range [0, 1] */ + template + constexpr T pow2_approx (T x) + { + static_assert (order >= 3 && order <= 6); + using S = scalar_of_t; + + const auto x_sq = x * x; + if constexpr (order == 3) + { + const auto x_2_3 = (S) 0.226307586882 + (S) 0.0782680256330 * x; + const auto x_0_1 = (S) 1 + (S) 0.695424387485 * x; + return x_0_1 + x_2_3 * x_sq; + } + else if constexpr (order == 4) + { + const auto x_3_4 = (S) 0.0520324008177 + (S) 0.0135557244044 * x; + const auto x_1_2 = (S) 0.693032120001 + (S) 0.241379754777 * x; + const auto x_1_2_3_4 = x_1_2 + x_3_4 * x_sq; + return (S) 1 + x_1_2_3_4 * x; + } + else if constexpr (order == 5) + { + const auto x_4_5 = (S) 0.00899009909264 + (S) 0.00187839071291 * x; + const auto x_2_3 = (S) 0.240156326598 + (S) 0.0558229130202 * x; + const auto x_2_3_4_5 = x_2_3 + x_4_5 * x_sq; + const auto x_0_1 = (S) 1 + (S) 0.693152270576 * x; + return x_0_1 + x_2_3_4_5 * x_sq; + } + else if constexpr (order == 6) + { + const auto x_5_6 = (S) 0.00124359387839 + (S) 0.000217187820427 * x; + const auto x_3_4 = (S) 0.0554833098983 + (S) 0.00967911763840 * x; + const auto x_1_2 = (S) 0.693147003658 + (S) 0.240229787107 * x; + const auto x_3_4_5_6 = x_3_4 + x_5_6 * x_sq; + const auto x_1_2_3_4_5_6 = x_1_2 + x_3_4_5_6 * x_sq; + return (S) 1 + x_1_2_3_4_5_6 * x; + } + else + { + return {}; + } + } + + template + struct BaseE + { + static constexpr auto log2_base = (T) 1.4426950408889634074; + }; + + template + struct Base2 + { + static constexpr auto log2_base = (T) 1; + }; + + template + struct Base10 + { + static constexpr auto log2_base = (T) 3.3219280948873623479; + }; +} + +#if defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wstrict-aliasing" // these methods require some type-punning +#pragma GCC diagnostic ignored "-Wuninitialized" +#endif + +/** approximation for pow(Base, x) (32-bit) */ +template +float pow (float x) +{ + x = std::max (-126.0f, Base::log2_base * x); + + const auto xi = (int32_t) x; + const auto l = x < 0.0f ? xi - 1 : xi; + const auto f = x - (float) l; + const auto vi = (l + 127) << 23; + + return reinterpret_cast (vi) * pow_detail::pow2_approx (f); +} + +/** approximation for pow(Base, x) (64-bit) */ +template +double pow (double x) +{ + x = std::max (-1022.0, Base::log2_base * x); + + const auto xi = (int64_t) x; + const auto l = x < 0.0 ? xi - 1 : xi; + const auto d = x - (double) l; + const auto vi = (l + 1023) << 52; + + return reinterpret_cast (vi) * pow_detail::pow2_approx (d); +} + +#if defined(XSIMD_HPP) +/** approximation for pow(Base, x) (32-bit SIMD) */ +template +xsimd::batch pow (xsimd::batch x) +{ + x = xsimd::max (xsimd::broadcast (-126.0f), Base::log2_base * x); + + const auto xi = xsimd::to_int (x); + const auto l = xsimd::select (xsimd::batch_bool_cast (x < 0.0f), xi - 1, xi); + const auto f = x - xsimd::to_float (l); + const auto vi = (l + 127) << 23; + + return reinterpret_cast&> (vi) * pow_detail::pow2_approx, order> (f); +} + +/** approximation for pow(Base, x) (64-bit SIMD) */ +template +xsimd::batch pow (xsimd::batch x) +{ + x = xsimd::max (-1022.0, Base::log2_base * x); + + const auto xi = xsimd::to_int (x); + const auto l = xsimd::select (xsimd::batch_bool_cast (x < 0.0), xi - 1, xi); + const auto d = x - xsimd::to_float (l); + const auto vi = (l + 1023) << 52; + + return reinterpret_cast&> (vi) * pow_detail::pow2_approx, order> (d); +} +#endif + +#if defined(__GNUC__) +#pragma GCC diagnostic pop // end ignore strict-aliasing warnings +#endif + +template +T exp (T x) +{ + return pow>, order> (x); +} + +template +T exp2 (T x) +{ + return pow>, order> (x); +} + +template +T exp10 (T x) +{ + return pow>, order> (x); +} +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8ec6f36..622920c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,3 +31,5 @@ endfunction(setup_catch_test) setup_catch_test(tanh_approx_test) setup_catch_test(sigmoid_approx_test) setup_catch_test(trig_approx_test) +setup_catch_test(pow_approx_test) +setup_catch_test(log_approx_test) diff --git a/test/src/log_approx_test.cpp b/test/src/log_approx_test.cpp new file mode 100644 index 0000000..96a97b3 --- /dev/null +++ b/test/src/log_approx_test.cpp @@ -0,0 +1,115 @@ +#include "test_helpers.hpp" +#include "catch2/catch_template_test_macros.hpp" + +#include +#include + +#include + +template +void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float err_bound) +{ + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto max_error = test_helpers::abs_max (error); + + std::cout << max_error << std::endl; + REQUIRE (std::abs (max_error) < err_bound); +} + + +TEMPLATE_TEST_CASE ("Log Approx Test", "", float, double) +{ + const auto all_floats = test_helpers::all_32_bit_floats (0.01f, 10.0f, 1.0e-3f); + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::log (x); }); + + SECTION ("6th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log<6> (x); }, + 4.5e-6f); + } + SECTION ("5th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log<5> (x); }, + 1.5e-5f); + } + SECTION ("4th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log<4> (x); }, + 8.5e-5f); + } + SECTION ("3th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log<3> (x); }, + 6.5e-4f); + } +} + +TEMPLATE_TEST_CASE ("Log2 Approx Test", "", float, double) +{ + const auto all_floats = test_helpers::all_32_bit_floats (0.01f, 10.0f, 1.0e-3f); + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::log2 (x); }); + + SECTION ("6th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log2<6> (x); }, + 6.0e-6f); + } + SECTION ("5th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log2<5> (x); }, + 2.0e-5f); + } + SECTION ("4th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log2<4> (x); }, + 1.5e-4f); + } + SECTION ("3th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log2<3> (x); }, + 9.0e-4f); + } +} + +TEMPLATE_TEST_CASE ("Log10 Approx Test", "", float, double) +{ + const auto all_floats = test_helpers::all_32_bit_floats (0.01f, 10.0f, 1.0e-3f); + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + { return std::log10 (x); }); + + SECTION ("6th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log10<6> (x); }, + 2.0e-6f); + } + SECTION ("5th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log10<5> (x); }, + 6.0e-6f); + } + SECTION ("4th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log10<4> (x); }, + 4.0e-5f); + } + SECTION ("3th-Order") + { + test_approx (all_floats, y_exact, [] (auto x) + { return math_approx::log10<3> (x); }, + 3.0e-4f); + } +} diff --git a/test/src/pow_approx_test.cpp b/test/src/pow_approx_test.cpp new file mode 100644 index 0000000..f5b1efd --- /dev/null +++ b/test/src/pow_approx_test.cpp @@ -0,0 +1,195 @@ +#include "test_helpers.hpp" +#include "catch2/catch_template_test_macros.hpp" + +#include +#include + +#include + +template +void test_approx (const auto& all_floats, const auto& y_exact, auto&& f_approx, float rel_err_bound, uint32_t ulp_bound) +{ + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); + const auto ulp_error = [&] + { + if constexpr (std::is_same_v) + return test_helpers::compute_ulp_error (y_exact, y_approx); + return std::vector {}; + }(); + + const auto max_rel_error = test_helpers::abs_max (rel_error); + const auto max_ulp_error = std::is_same_v ? *std::max_element (ulp_error.begin(), ulp_error.end()) : 0; + + std::cout << max_rel_error << ", " << max_ulp_error << std::endl; + REQUIRE (std::abs (max_rel_error) < rel_err_bound); + if (ulp_bound > 0) + REQUIRE (max_ulp_error < ulp_bound); +} + +TEMPLATE_TEST_CASE ("Exp Approx Test", "", float, double) +{ + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); + const auto y_exact = test_helpers::compute_all (all_floats, + [](auto x) + { + return std::exp (x); + }); + + SECTION ("6th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp<6> (x); + }, + 6.0e-7f, + 10); + } + SECTION ("5th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp<5> (x); + }, + 7.5e-7f, + 15); + } + SECTION ("4th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp<4> (x); + }, + 4.0e-6f, + 80); + } + SECTION ("3th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp<3> (x); + }, + 1.5e-4f, + 0); + } +} + +TEMPLATE_TEST_CASE ("Exp2 Approx Test", "", float, double) +{ + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); + const auto y_exact = test_helpers::compute_all (all_floats, + [](auto x) + { + return std::exp2 (x); + }); + + SECTION ("6th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp2<6> (x); + }, + 3.0e-7f, + 4); + } + SECTION ("5th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp2<5> (x); + }, + 4.0e-7f, + 5); + } + SECTION ("4th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp2<4> (x); + }, + 4.0e-6f, + 70); + } + SECTION ("3th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp2<3> (x); + }, + 1.5e-4f, + 0); + } +} + +TEMPLATE_TEST_CASE ("Exp10 Approx Test", "", float, double) +{ + const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); + const auto y_exact = test_helpers::compute_all (all_floats, + [](auto x) + { + return std::pow (10.0f, x); + }); + + SECTION ("6th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp10<6> (x); + }, + 2.0e-6f, + 32); + } + SECTION ("5th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp10<5> (x); + }, + 2.5e-6f, + 35); + } + SECTION ("4th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp10<4> (x); + }, + 5.5e-6f, + 90); + } + SECTION ("3th-Order") + { + test_approx (all_floats, + y_exact, + [](auto x) + { + return math_approx::exp10<3> (x); + }, + 1.5e-4f, + 0); + } +} diff --git a/test/src/sigmoid_approx_test.cpp b/test/src/sigmoid_approx_test.cpp index 9d9033e..b444325 100644 --- a/test/src/sigmoid_approx_test.cpp +++ b/test/src/sigmoid_approx_test.cpp @@ -11,17 +11,17 @@ TEST_CASE ("Sigmoid Approx Test") #else const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); #endif - const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) { return 1.0f / (1.0f + std::exp (-x)); }); const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) { - const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); - const auto error = test_helpers::compute_error (y_exact, y_approx); - const auto max_error = test_helpers::abs_max (error); + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto max_error = test_helpers::abs_max (error); - // std::cout << max_error << std::endl; + std::cout << max_error << std::endl; REQUIRE (std::abs (max_error) < err_bound); }; diff --git a/test/src/tanh_approx_test.cpp b/test/src/tanh_approx_test.cpp index d220fec..fe24b73 100644 --- a/test/src/tanh_approx_test.cpp +++ b/test/src/tanh_approx_test.cpp @@ -11,19 +11,19 @@ TEST_CASE ("Tanh Approx Test") #else const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); #endif - const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) { return std::tanh (x); }); const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound, float rel_err_bound, uint32_t ulp_bound) { - const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); - const auto error = test_helpers::compute_error (y_exact, y_approx); - const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto rel_error = test_helpers::compute_rel_error (y_exact, y_approx); const auto ulp_error = test_helpers::compute_ulp_error (y_exact, y_approx); - const auto max_error = test_helpers::abs_max (error); - const auto max_rel_error = test_helpers::abs_max (rel_error); + const auto max_error = test_helpers::abs_max (error); + const auto max_rel_error = test_helpers::abs_max (rel_error); const auto max_ulp_error = *std::max_element (ulp_error.begin(), ulp_error.end()); std::cout << max_error << ", " << max_rel_error << ", " << max_ulp_error << std::endl; diff --git a/test/src/test_helpers.hpp b/test/src/test_helpers.hpp index fca2392..a2815de 100644 --- a/test/src/test_helpers.hpp +++ b/test/src/test_helpers.hpp @@ -10,29 +10,30 @@ namespace test_helpers { +template inline auto all_32_bit_floats (float begin, float end, float tol = 1.0e-10f) { - std::vector vec; + std::vector vec; vec.reserve (1 << 20); - begin = vec.emplace_back (begin); + begin = (float) vec.emplace_back (static_cast (begin)); while (begin < end) { if (std::abs (begin) < tol) { - begin = vec.emplace_back (0.0f); - begin = vec.emplace_back (tol); + begin = (float) vec.emplace_back (static_cast (0)); + begin = (float) vec.emplace_back (static_cast (tol)); } - begin = vec.emplace_back (std::nextafter (begin, end)); + begin = (float) vec.emplace_back (static_cast (std::nextafter (begin, end))); } return vec; } -template -auto compute_all (std::span all_floats, +template +auto compute_all (std::span all_floats, F&& f) { - std::vector y; + std::vector y; y.resize (all_floats.size()); for (size_t i = 0; i < all_floats.size(); ++i) y[i] = f (all_floats[i]); @@ -40,18 +41,20 @@ auto compute_all (std::span all_floats, return y; } -inline std::vector compute_error (std::span actual, std::span approx) +template +inline std::vector compute_error (std::span actual, std::span approx) { - std::vector err; + std::vector err; err.resize (actual.size()); for (size_t i = 0; i < actual.size(); ++i) err[i] = (actual[i] - approx[i]); return err; } -inline std::vector compute_rel_error (std::span actual, std::span approx) +template +inline std::vector compute_rel_error (std::span actual, std::span approx) { - std::vector err; + std::vector err; err.resize (actual.size()); for (size_t i = 0; i < actual.size(); ++i) err[i] = (actual[i] - approx[i]) / actual[i]; @@ -116,7 +119,8 @@ inline auto compute_ulp_error (std::span actual, std::span x) +template +inline T abs_max (std::span x) { const auto [min, max] = std::minmax_element (x.begin(), x.end()); diff --git a/test/src/trig_approx_test.cpp b/test/src/trig_approx_test.cpp index f4a4799..71735c4 100644 --- a/test/src/trig_approx_test.cpp +++ b/test/src/trig_approx_test.cpp @@ -11,17 +11,17 @@ TEST_CASE ("Sine Approx Test") #else const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); #endif - const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) { return std::sin (x); }); const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) { - const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); - const auto error = test_helpers::compute_error (y_exact, y_approx); - const auto max_error = test_helpers::abs_max (error); + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto max_error = test_helpers::abs_max (error); - // std::cout << max_error << std::endl; + std::cout << max_error << std::endl; REQUIRE (std::abs (max_error) < err_bound); }; @@ -52,17 +52,17 @@ TEST_CASE ("Cosine Approx Test") #else const auto all_floats = test_helpers::all_32_bit_floats (-10.0f, 10.0f, 1.0e-1f); #endif - const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) + const auto y_exact = test_helpers::compute_all (all_floats, [] (auto x) { return std::cos (x); }); const auto test_approx = [&all_floats, &y_exact] (auto&& f_approx, float err_bound) { - const auto y_approx = test_helpers::compute_all (all_floats, f_approx); + const auto y_approx = test_helpers::compute_all (all_floats, f_approx); - const auto error = test_helpers::compute_error (y_exact, y_approx); - const auto max_error = test_helpers::abs_max (error); + const auto error = test_helpers::compute_error (y_exact, y_approx); + const auto max_error = test_helpers::abs_max (error); - // std::cout << max_error << std::endl; + std::cout << max_error << std::endl; REQUIRE (std::abs (max_error) < err_bound); }; diff --git a/tools/bench/CMakeLists.txt b/tools/bench/CMakeLists.txt index 42c8769..773d33c 100644 --- a/tools/bench/CMakeLists.txt +++ b/tools/bench/CMakeLists.txt @@ -13,3 +13,9 @@ target_link_libraries(sigmoid_approx_bench PRIVATE benchmark::benchmark math_app add_executable(trig_approx_bench trig_bench.cpp) target_link_libraries(trig_approx_bench PRIVATE benchmark::benchmark math_approx) + +add_executable(pow_approx_bench pow_bench.cpp) +target_link_libraries(pow_approx_bench PRIVATE benchmark::benchmark math_approx) + +add_executable(log_approx_bench log_bench.cpp) +target_link_libraries(log_approx_bench PRIVATE benchmark::benchmark math_approx) diff --git a/tools/bench/log_bench.cpp b/tools/bench/log_bench.cpp new file mode 100644 index 0000000..3e12c8a --- /dev/null +++ b/tools/bench/log_bench.cpp @@ -0,0 +1,77 @@ +#include +#include + +static constexpr size_t N = 2000; +const auto data = [] +{ + std::vector x; + x.resize (N, 0.0f); + for (size_t i = 0; i < N; ++i) + x[i] = -10.0f + 20.0f * (float) i / (float) N; + return x; +}(); + +#define LOG_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (x); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +LOG_BENCH (log_std, std::log) +LOG_BENCH (log_approx6, math_approx::log<6>) +LOG_BENCH (log_approx5, math_approx::log<5>) +LOG_BENCH (log_approx4, math_approx::log<4>) +LOG_BENCH (log_approx3, math_approx::log<3>) + +LOG_BENCH (log2_std, std::log2) +LOG_BENCH (log2_approx6, math_approx::log2<6>) +LOG_BENCH (log2_approx5, math_approx::log2<5>) +LOG_BENCH (log2_approx4, math_approx::log2<4>) +LOG_BENCH (log2_approx3, math_approx::log2<3>) + +LOG_BENCH (log10_std, std::log10) +LOG_BENCH (log10_approx6, math_approx::log10<6>) +LOG_BENCH (log10_approx5, math_approx::log10<5>) +LOG_BENCH (log10_approx4, math_approx::log10<4>) +LOG_BENCH (log10_approx3, math_approx::log10<3>) + +#define LOG_SIMD_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (xsimd::broadcast (x)); \ +static_assert (std::is_same_v, decltype(y)>); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +LOG_SIMD_BENCH (log_xsimd, xsimd::log) +LOG_SIMD_BENCH (log_simd_approx6, math_approx::log<6>) +LOG_SIMD_BENCH (log_simd_approx5, math_approx::log<5>) +LOG_SIMD_BENCH (log_simd_approx4, math_approx::log<4>) +LOG_SIMD_BENCH (log_simd_approx3, math_approx::log<3>) + +LOG_SIMD_BENCH (log2_xsimd, xsimd::log2) +LOG_SIMD_BENCH (log2_simd_approx6, math_approx::log2<6>) +LOG_SIMD_BENCH (log2_simd_approx5, math_approx::log2<5>) +LOG_SIMD_BENCH (log2_simd_approx4, math_approx::log2<4>) +LOG_SIMD_BENCH (log2_simd_approx3, math_approx::log2<3>) + +LOG_SIMD_BENCH (log10_xsimd, xsimd::log10) +LOG_SIMD_BENCH (log10_simd_approx6, math_approx::log10<6>) +LOG_SIMD_BENCH (log10_simd_approx5, math_approx::log10<5>) +LOG_SIMD_BENCH (log10_simd_approx4, math_approx::log10<4>) +LOG_SIMD_BENCH (log10_simd_approx3, math_approx::log10<3>) + +BENCHMARK_MAIN(); diff --git a/tools/bench/pow_bench.cpp b/tools/bench/pow_bench.cpp new file mode 100644 index 0000000..0f2f59d --- /dev/null +++ b/tools/bench/pow_bench.cpp @@ -0,0 +1,81 @@ +#include +#include + +static constexpr size_t N = 2000; +const auto data = [] +{ + std::vector x; + x.resize (N, 0.0f); + for (size_t i = 0; i < N; ++i) + x[i] = -10.0f + 20.0f * (float) i / (float) N; + return x; +}(); + +#define POW_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (x); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +POW_BENCH (exp_std, std::exp) +POW_BENCH (exp_approx6, math_approx::exp<6>) +POW_BENCH (exp_approx5, math_approx::exp<5>) +POW_BENCH (exp_approx4, math_approx::exp<4>) +POW_BENCH (exp_approx3, math_approx::exp<3>) + +POW_BENCH (exp2_std, std::exp2) +POW_BENCH (exp2_approx6, math_approx::exp2<6>) +POW_BENCH (exp2_approx5, math_approx::exp2<5>) +POW_BENCH (exp2_approx4, math_approx::exp2<4>) +POW_BENCH (exp2_approx3, math_approx::exp2<3>) + +float stdpow_exp10 (float x) +{ + return std::pow (10.0f, x); +} +POW_BENCH (exp10_std, stdpow_exp10) +POW_BENCH (exp10_approx6, math_approx::exp10<6>) +POW_BENCH (exp10_approx5, math_approx::exp10<5>) +POW_BENCH (exp10_approx4, math_approx::exp10<4>) +POW_BENCH (exp10_approx3, math_approx::exp10<3>) + +#define POW_SIMD_BENCH(name, func) \ +void name (benchmark::State& state) \ +{ \ +for (auto _ : state) \ +{ \ +for (auto& x : data) \ +{ \ +auto y = func (xsimd::broadcast (x)); \ +static_assert (std::is_same_v, decltype(y)>); \ +benchmark::DoNotOptimize (y); \ +} \ +} \ +} \ +BENCHMARK (name); +POW_SIMD_BENCH (exp_xsimd, xsimd::exp) +POW_SIMD_BENCH (exp_simd_approx6, math_approx::exp<6>) +POW_SIMD_BENCH (exp_simd_approx5, math_approx::exp<5>) +POW_SIMD_BENCH (exp_simd_approx4, math_approx::exp<4>) +POW_SIMD_BENCH (exp_simd_approx3, math_approx::exp<3>) + +POW_SIMD_BENCH (exp2_xsimd, xsimd::exp2) +POW_SIMD_BENCH (exp2_simd_approx6, math_approx::exp2<6>) +POW_SIMD_BENCH (exp2_simd_approx5, math_approx::exp2<5>) +POW_SIMD_BENCH (exp2_simd_approx4, math_approx::exp2<4>) +POW_SIMD_BENCH (exp2_simd_approx3, math_approx::exp2<3>) + +POW_SIMD_BENCH (exp10_xsimd, xsimd::exp10) +POW_SIMD_BENCH (exp10_simd_approx6, math_approx::exp10<6>) +POW_SIMD_BENCH (exp10_simd_approx5, math_approx::exp10<5>) +POW_SIMD_BENCH (exp10_simd_approx4, math_approx::exp10<4>) +POW_SIMD_BENCH (exp10_simd_approx3, math_approx::exp10<3>) + +BENCHMARK_MAIN(); diff --git a/tools/plotter/plotter.cpp b/tools/plotter/plotter.cpp index c41ca97..879533b 100644 --- a/tools/plotter/plotter.cpp +++ b/tools/plotter/plotter.cpp @@ -54,20 +54,21 @@ void plot_function (std::span all_floats, plt::named_plot (name, all_floats, y_approx); } +#define FLOAT_FUNC(func) [] (float x) { return func (x); } + int main() { plt::figure(); - const auto range = std::make_pair (-3.141f, 3.141f); + const auto range = std::make_pair (0.01f, 10.0f); static constexpr auto tol = 1.0e-2f; const auto all_floats = test_helpers::all_32_bit_floats (range.first, range.second, tol); - const auto y_exact = test_helpers::compute_all (all_floats, [] (float x) - { return std::cos (x); }); + const auto y_exact = test_helpers::compute_all (all_floats, FLOAT_FUNC(std::log)); - // // plot_error (all_floats, y_exact, [] (float x) { return math_approx::sin<5> (x); }, "Sin-5"); - // // plot_error (all_floats, y_exact, [] (float x) { return math_approx::sin<7> (x); }, "Sin-7"); - // plot_ulp_error (all_floats, y_exact, [] (float x) { return math_approx::cos_mpi_pi<9> (x); }, "Cos-9"); - plot_function (all_floats, [] (float x) { return math_approx::cos_mpi_pi<9> (x); }, "Cos-9"); + // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC(math_approx::exp2<4>), "Exp2-4"); + // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC(math_approx::exp2<5>), "Exp2-5"); + // plot_ulp_error (all_floats, y_exact, FLOAT_FUNC(math_approx::exp2<6>), "Exp2-6"); + plot_error (all_floats, y_exact, FLOAT_FUNC (math_approx::log<6>), "Log-6"); plt::legend ({ { "loc", "upper right" } }); plt::xlim (range.first, range.second);