diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index a1aee8b32..cf665174f 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -30,3 +30,4 @@ setup_benchmark(MatrixOpsBench MatrixOpsBench.cpp chowdsp_math juce_dsp) setup_benchmark(BufferBench BufferBench.cpp chowdsp_filters juce_dsp) setup_benchmark(DecibelsBench DecibelsBench.cpp chowdsp_math juce_audio_basics) setup_benchmark(AbstractTreeBench AbstractTreeBench.cpp chowdsp_data_structures) +setup_benchmark(TrigBench TrigBench.cpp chowdsp_math juce_dsp) diff --git a/bench/TrigBench.cpp b/bench/TrigBench.cpp new file mode 100644 index 000000000..f93c1b843 --- /dev/null +++ b/bench/TrigBench.cpp @@ -0,0 +1,89 @@ +#include + +#include +#include + +static constexpr size_t N = 2048; +static constexpr auto data_mpi_pi = [] +{ + std::array angle_values {}; + for (size_t i = 0; i < N; ++i) + angle_values[i] = -juce::MathConstants::pi + juce::MathConstants::twoPi * (float) i / float (N - 1); + return angle_values; +}(); + +static constexpr auto data_m10pi_10pi = [] +{ + std::array angle_values {}; + for (size_t i = 0; i < N; ++i) + angle_values[i] = -10.0f * (juce::MathConstants::pi + juce::MathConstants::twoPi * (float) i / float (N - 1)); + return angle_values; +}(); + +#define TRIG_BENCH_RANGE(name, range, op) \ + static void name (benchmark::State& state) \ + { \ + for (auto _ : state) \ + { \ + for (const auto& x : range) \ + op \ + } \ + } \ + BENCHMARK (name)->MinTime (5) + +#define TRIG_BENCH(name, op_mpi_pi, op_full_range) \ + TRIG_BENCH_RANGE (name##_mpi_pi, data_mpi_pi, op_mpi_pi); \ + TRIG_BENCH_RANGE (name##_m10pi_10pi, data_m10pi_10pi, op_full_range) + +#define SIN_BENCH(name, op_mpi_pi, op_full_range) \ + TRIG_BENCH ( \ + name##_sin, { \ + const auto s = op_mpi_pi (x); \ + benchmark::DoNotOptimize (s); }, { \ + const auto s = op_full_range (x); \ + benchmark::DoNotOptimize (s); }) + +SIN_BENCH (std, std::sin, std::sin); +SIN_BENCH (juce, juce::dsp::FastMathApproximations::sin, std::sin); // JUCE approximation is range-limited +SIN_BENCH (bhaskara, chowdsp::TrigApprox::sin_bhaskara_mpi_pi, chowdsp::TrigApprox::sin_bhaskara); +SIN_BENCH (order1, chowdsp::TrigApprox::sin_1st_order_mpi_pi, chowdsp::TrigApprox::sin_1st_order); +SIN_BENCH (tri_angle_9, chowdsp::TrigApprox::sin_3angle_mpi_pi<9>, chowdsp::TrigApprox::sin_3angle<9>); +SIN_BENCH (tri_angle_7, chowdsp::TrigApprox::sin_3angle_mpi_pi<7>, chowdsp::TrigApprox::sin_3angle<7>); +SIN_BENCH (tri_angle_5, chowdsp::TrigApprox::sin_3angle_mpi_pi<5>, chowdsp::TrigApprox::sin_3angle<5>); +SIN_BENCH (tri_angle_3, chowdsp::TrigApprox::sin_3angle_mpi_pi<3>, chowdsp::TrigApprox::sin_3angle<3>); + +#define COS_BENCH(name, op_mpi_pi, op_full_range) \ + TRIG_BENCH ( \ + name##_cos, { \ + const auto c = op_mpi_pi (x); \ + benchmark::DoNotOptimize (c); }, { \ + const auto c = op_full_range (x); \ + benchmark::DoNotOptimize (c); }) + +COS_BENCH (std, std::cos, std::cos); +COS_BENCH (juce, juce::dsp::FastMathApproximations::cos, std::cos); // JUCE approximation is range-limited +COS_BENCH (bhaskara, chowdsp::TrigApprox::cos_bhaskara_mpi_pi, chowdsp::TrigApprox::cos_bhaskara); +COS_BENCH (order1, chowdsp::TrigApprox::cos_1st_order_mpi_pi, chowdsp::TrigApprox::cos_1st_order); +COS_BENCH (tri_angle_8, chowdsp::TrigApprox::cos_3angle_mpi_pi<8>, chowdsp::TrigApprox::cos_3angle<8>); +COS_BENCH (tri_angle_6, chowdsp::TrigApprox::cos_3angle_mpi_pi<6>, chowdsp::TrigApprox::cos_3angle<6>); +COS_BENCH (tri_angle_4, chowdsp::TrigApprox::cos_3angle_mpi_pi<4>, chowdsp::TrigApprox::cos_3angle<4>); + +#define SIN_COS_BENCH(name, op_mpi_pi, op_full_range) \ + TRIG_BENCH ( \ + name##_sin_cos, { \ + const auto sc = op_mpi_pi (x); \ + benchmark::DoNotOptimize (sc); }, { \ + const auto sc = op_full_range (x); \ + benchmark::DoNotOptimize (sc); }) + +const auto std_sin_cos = [] (float x) +{ + return std::make_tuple (std::sin (x), std::cos (x)); +}; + +SIN_COS_BENCH (std, std_sin_cos, std_sin_cos); +SIN_COS_BENCH (tri_angle_7_8, (chowdsp::TrigApprox::sin_cos_3angle_mpi_pi<7, 8>), (chowdsp::TrigApprox::sin_cos_3angle<7, 8>) ); +SIN_COS_BENCH (tri_angle_5_6, (chowdsp::TrigApprox::sin_cos_3angle_mpi_pi<5, 6>), (chowdsp::TrigApprox::sin_cos_3angle<5, 6>) ); +SIN_COS_BENCH (tri_angle_3_4, (chowdsp::TrigApprox::sin_cos_3angle_mpi_pi<3, 4>), (chowdsp::TrigApprox::sin_cos_3angle<3, 4>) ); + +BENCHMARK_MAIN(); diff --git a/modules/dsp/chowdsp_math/Math/chowdsp_TrigApprox.h b/modules/dsp/chowdsp_math/Math/chowdsp_TrigApprox.h new file mode 100644 index 000000000..5b2497e9f --- /dev/null +++ b/modules/dsp/chowdsp_math/Math/chowdsp_TrigApprox.h @@ -0,0 +1,348 @@ +#pragma once + +namespace chowdsp +{ +/** + * Approximation functions for trigonometric functions. + * + * References: + * - Sine plots: https://www.desmos.com/calculator/rnsmcx6wb5 + * - Cosine plots: https://www.desmos.com/calculator/tgt3dejrgr + */ +namespace TrigApprox +{ +#ifndef DOXYGEN + namespace detail + { + template + static constexpr auto recip_2pi = (T) 1 / juce::MathConstants::twoPi; + + template + T truncate (T x) + { + return T ((int) x); + } + +#if ! CHOWDSP_NO_XSIMD + template + xsimd::batch truncate (xsimd::batch x) + { + return xsimd::to_float (xsimd::to_int (x)); + } +#endif + + /** Fast method to wrap a value into the range [-pi, pi] */ + template + T fast_mod_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + x += juce::MathConstants::pi; + const auto mod = x - juce::MathConstants::twoPi * truncate (x * recip_2pi); + return SIMDUtils::select (x >= (T) 0, mod, mod + juce::MathConstants::twoPi) - juce::MathConstants::pi; + } + + /** Shifts range [-pi, pi] to [-pi/2, 3pi/2] for cosine calculations */ + template + T shift_cosine_range (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + return SIMDUtils::select (x >= -juce::MathConstants::halfPi, + x, + x + juce::MathConstants::twoPi); + } + + /** + * Polynomial approximations of sine for [-pi/3, pi] + * + * Starts as a Taylor approximation, but then tries to fix the end-points, + * minimize max error. + * + * Reference: https://www.wolframcloud.com/env/chowdsp/sin_approx.nb + */ + template + T sin_o3 (T theta, T theta_sq, [[maybe_unused]] T theta_quad) + { + using NumericType = SampleTypeHelpers::NumericType; + static_assert (order % 2 == 1 && order > 1 && order <= 9, "Order must be an odd integer within [3,9]"); + + if constexpr (order == 9) + { + const auto s1 = NumericType (1) + NumericType (-1.66666662492e-1) * theta_sq; + const auto s2 = NumericType (8.33330303755e-3) + NumericType (-1.98339763113e-4) * theta_sq; + const auto s3 = NumericType (2.68396896919e-6) * theta_quad; + return theta * (s1 + (s2 + s3) * theta_quad); + } + else if constexpr (order == 7) + { + const auto s1 = NumericType (1) + NumericType (-1.66664550673e-1) * theta_sq; + const auto s2 = NumericType (8.32622561667e-3) + NumericType (-1.90698699142e-4) * theta_sq; + return theta * (s1 + s2 * theta_quad); + } + else if constexpr (order == 5) + { + const auto s1 = NumericType (1) + NumericType (-1.66531874421e-1) * theta_sq; + const auto s2 = NumericType (7.99611485830e-3) * theta_quad; + return theta * (s1 + s2); + } + else if constexpr (order == 3) + { + const auto s1 = NumericType (1) + NumericType (-1.57763153266e-1) * theta_sq; + return theta * s1; + } + } + + /** + * Polynomial approximations of cosine for [-pi/3, pi] + * + * Starts as a Taylor approximation, but then tries to fix the end-points, + * minimize max error. + * + * Reference: https://www.wolframcloud.com/env/chowdsp/cos_approx.nb + */ + template + T cos_o3 ([[maybe_unused]] T theta, T theta_sq, [[maybe_unused]] T theta_quad) + { + using NumericType = SampleTypeHelpers::NumericType; + static_assert (order % 2 == 0 && order > 2 && order <= 8, "Order must be an odd integer within [2,8]"); + + if constexpr (order == 8) + { + const auto c1 = (NumericType) -0.5 + (NumericType) 4.16666094252e-2 * theta_sq; + const auto c2 = (NumericType) -1.38854590421e-3 + (NumericType) 2.42367173361e-5 * theta_sq; + return (NumericType) 1 + theta_sq * (c1 + c2 * theta_quad); + } + else if constexpr (order == 6) + { + const auto c1 = (NumericType) -0.5 + (NumericType) 4.16527333677e-2 * theta_sq; + const auto c2 = (NumericType) -1.34931392239e-3 * theta_quad; + return (NumericType) 1 + theta_sq * (c1 + c2); + } + else if constexpr (order == 4) + { + const auto c1 = (NumericType) -0.5 + (NumericType) 4.01730450758e-2 * theta_sq; + return (NumericType) 1 + theta_sq * c1; + } + } + } // namespace detail +#endif // DOXYGEN + + /** + * Bhaskara I's sine approximation valid for x in [-pi, pi] + * + * Max error: 1.64e-3 + * + * Reference: https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula + */ + template + T sin_bhaskara_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + CHOWDSP_USING_XSIMD_STD (abs); + const auto abs_x = abs (x); + const auto x_pi_minus_x = abs_x * (pi - abs_x); + const auto numerator = (NumericType) 16 * x_pi_minus_x; + + static constexpr auto five_pi_sq = (NumericType) 5 * pi * pi; + const auto denominator = five_pi_sq - (NumericType) 4 * x_pi_minus_x; + + const auto result = numerator / denominator; + return SIMDUtils::select (x >= 0, result, -result); + } + + /** Bhaskara I's sine approximation valid across the whole range. */ + template + T sin_bhaskara (T x) + { + return sin_bhaskara_mpi_pi (detail::fast_mod_mpi_pi (x)); + } + + /** + * First-order sine approximation. + * + * Max error: 5.6e-2 + */ + template + T sin_1st_order_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + CHOWDSP_USING_XSIMD_STD (abs); + static constexpr auto four_over_pi_sq = (NumericType) 4 / (pi * pi); + const auto abs_x = abs (x); + + return four_over_pi_sq * x * (pi - abs_x); + } + + /** Full-range first-order sine approximation. */ + template + T sin_1st_order (T x) + { + return sin_1st_order_mpi_pi (detail::fast_mod_mpi_pi (x)); + } + + /** + * Sine approximation using a Taylor approximation on the + * range [-pi/3, pi/3], expanded to [-pi, pi] using the + * triple angle formula. + * + * The approximation is parameterized on the order of + * the Taylor approximation, which _must_ be an odd integer. + * + * Max error (9th-order): 3.5e-10 + * Max error (7th-order): 1.142e-7 + * Max error (5th-order): 4.3e-5 + * Max error (3th-order): 7.3e-3 + */ + template + T sin_3angle_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + [[maybe_unused]] static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + const auto theta_o3 = x * NumericType (1.0 / 3.0); + const auto theta_o3_sq = theta_o3 * theta_o3; + const auto theta_o3_quad = theta_o3_sq * theta_o3_sq; + const auto sin_o3 = detail::sin_o3 (theta_o3, theta_o3_sq, theta_o3_quad); + return sin_o3 * ((NumericType) 3 + (NumericType) -4 * sin_o3 * sin_o3); + } + + /** Full-range triple-angle sine approximation. */ + template + T sin_3angle (T x) + { + return sin_3angle_mpi_pi (detail::fast_mod_mpi_pi (x)); + } + + /** + * Bhaskara I's cosine approximation valid for x in [-pi, pi] + * + * Max error: 1.64e-3 + * + * Reference: https://en.wikipedia.org/wiki/Bh%C4%81skara_I%27s_sine_approximation_formula + */ + template + T cos_bhaskara_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + x = detail::shift_cosine_range (x); + const auto needs_flip = x > juce::MathConstants::halfPi; + + x = SIMDUtils::select (needs_flip, x - pi, x); + + static constexpr auto pi_sq = pi * pi; + const auto x_sq = x * x; + const auto result = (pi_sq - (NumericType) 4 * x_sq) / (pi_sq + x_sq); + return SIMDUtils::select (needs_flip, -result, result); + } + + /** Bhaskara I's cosine approximation valid across the whole range. */ + template + T cos_bhaskara (T x) + { + return cos_bhaskara_mpi_pi (detail::fast_mod_mpi_pi (x)); + } + + /** + * First-order cosine approximation. + * + * Max error: 5.6e-2 + */ + template + T cos_1st_order_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + [[maybe_unused]] static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + x = detail::shift_cosine_range (x); + return -sin_1st_order_mpi_pi (x - juce::MathConstants::halfPi); + } + + /** Full-range first-order cosine approximation. */ + template + T cos_1st_order (T x) + { + return cos_1st_order_mpi_pi (detail::fast_mod_mpi_pi (x)); + } + + /** + * Cosine approximation using a Taylor approximation on the + * range [-pi/3, pi/3], expanded to [-pi, pi] using the + * triple angle formula. + * + * The approximation is parameterized on the order of + * the Taylor approximation, which _must_ be an odd integer. + * + * Max error (8th-order): 7.25e-9 + * Max error (6th-order): 2.24e-6 + * Max error (4th-order): 7.93e-4 + */ + template + T cos_3angle_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + [[maybe_unused]] static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + const auto theta_o3 = x * NumericType (1.0 / 3.0); + const auto theta_o3_sq = theta_o3 * theta_o3; + const auto theta_o3_quad = theta_o3_sq * theta_o3_sq; + const auto cos_o3 = detail::cos_o3 (theta_o3, theta_o3_sq, theta_o3_quad); + return cos_o3 * ((NumericType) -3 + (NumericType) 4 * cos_o3 * cos_o3); + } + + /** Full-range triple-angle cosine approximation. */ + template + T cos_3angle (T x) + { + return cos_3angle_mpi_pi (detail::fast_mod_mpi_pi (x)); + } + + /** + * Combined sine/cosine approximation, using the triple-angle + * approximations described above. + */ + template + auto sin_cos_3angle_mpi_pi (T x) + { + using NumericType = SampleTypeHelpers::NumericType; + [[maybe_unused]] static constexpr auto pi = juce::MathConstants::pi; + jassert (SIMDUtils::all (x <= pi) + && SIMDUtils::all (x >= -pi)); + + const auto theta_o3 = x * NumericType (1.0 / 3.0); + const auto theta_o3_sq = theta_o3 * theta_o3; + const auto theta_o3_quad = theta_o3_sq * theta_o3_sq; + + const auto sin_o3 = detail::sin_o3 (theta_o3, theta_o3_sq, theta_o3_quad); + const auto s = sin_o3 * ((NumericType) 3 + (NumericType) -4 * sin_o3 * sin_o3); + + const auto cos_o3 = detail::cos_o3 (theta_o3, theta_o3_sq, theta_o3_quad); + const auto c = cos_o3 * ((NumericType) -3 + (NumericType) 4 * cos_o3 * cos_o3); + + return std::make_tuple (s, c); + } + + /** Full-range triple-angle sine/cosine approximation. */ + template + auto sin_cos_3angle (T x) + { + return sin_cos_3angle_mpi_pi (detail::fast_mod_mpi_pi (x)); + } +} // namespace TrigApprox +} // namespace chowdsp diff --git a/modules/dsp/chowdsp_math/chowdsp_math.h b/modules/dsp/chowdsp_math/chowdsp_math.h index 924434cd3..3bd6baa34 100644 --- a/modules/dsp/chowdsp_math/chowdsp_math.h +++ b/modules/dsp/chowdsp_math/chowdsp_math.h @@ -43,6 +43,7 @@ JUCE_END_IGNORE_WARNINGS_GCC_LIKE #include "Math/chowdsp_LogApprox.h" #include "Math/chowdsp_PowApprox.h" #include "Math/chowdsp_DecibelsApprox.h" +#include "Math/chowdsp_TrigApprox.h" #if JUCE_MODULE_AVAILABLE_chowdsp_buffers #include diff --git a/tests/dsp_tests/chowdsp_math_test/CMakeLists.txt b/tests/dsp_tests/chowdsp_math_test/CMakeLists.txt index 84be7dd34..67c42f81d 100644 --- a/tests/dsp_tests/chowdsp_math_test/CMakeLists.txt +++ b/tests/dsp_tests/chowdsp_math_test/CMakeLists.txt @@ -14,4 +14,5 @@ target_sources(chowdsp_math_test LogApproxTest.cpp PowApproxTest.cpp DecibelsApproxTest.cpp + TrigApproxTest.cpp ) diff --git a/tests/dsp_tests/chowdsp_math_test/TrigApproxTest.cpp b/tests/dsp_tests/chowdsp_math_test/TrigApproxTest.cpp new file mode 100644 index 000000000..108208372 --- /dev/null +++ b/tests/dsp_tests/chowdsp_math_test/TrigApproxTest.cpp @@ -0,0 +1,84 @@ +#include +#include + +namespace ta = chowdsp::TrigApprox; + +TEMPLATE_TEST_CASE ("Trig Approx Test", "[dsp][math][simd]", float, double, xsimd::batch, xsimd::batch) +{ + using FloatType = TestType; + using NumericType = chowdsp::SampleTypeHelpers::NumericType; + + SECTION ("Sine") + { + const auto tester = [] (FloatType (*testFunc) (FloatType), NumericType margin) + { + if constexpr (std::is_same_v) + margin = std::sqrt (margin); + + static constexpr auto pi10 = (NumericType) 10 * juce::MathConstants::pi; + static constexpr auto inc = (NumericType) 0.001 * juce::MathConstants::pi; + for (auto x = -pi10; x < pi10; x += inc) + { + const auto ref = std::sin (x); + const auto input = (FloatType) x; + REQUIRE (testFunc (input) == SIMDApprox { ref }.margin (margin)); + } + }; + + tester (&ta::sin_bhaskara, (NumericType) 1.65e-3); + tester (&ta::sin_1st_order, (NumericType) 5.65e-2); + tester (&ta::sin_3angle<9, FloatType>, (NumericType) 3.5e-10); + tester (&ta::sin_3angle<7, FloatType>, (NumericType) 1.15e-7); + tester (&ta::sin_3angle<5, FloatType>, (NumericType) 4.35e-5); + tester (&ta::sin_3angle<3, FloatType>, (NumericType) 7.35e-3); + } + + SECTION ("Cosine") + { + const auto tester = [] (FloatType (*testFunc) (FloatType), NumericType margin) + { + if constexpr (std::is_same_v) + margin = std::sqrt (margin); + + static constexpr auto pi10 = (NumericType) 10 * juce::MathConstants::pi; + static constexpr auto inc = (NumericType) 0.001 * juce::MathConstants::pi; + for (auto x = -pi10; x < pi10; x += inc) + { + const auto ref = std::cos (x); + const auto input = (FloatType) x; + REQUIRE (testFunc (input) == SIMDApprox { ref }.margin (margin)); + } + }; + + tester (&ta::cos_bhaskara, (NumericType) 1.65e-3); + tester (&ta::cos_1st_order, (NumericType) 5.65e-2); + tester (&ta::cos_3angle<8, FloatType>, (NumericType) 7.25e-9); + tester (&ta::cos_3angle<6, FloatType>, (NumericType) 2.24e-6); + tester (&ta::cos_3angle<4, FloatType>, (NumericType) 7.93e-4); + } + + SECTION ("Sine/Cosine") + { + const auto tester = [] (std::tuple (*testFunc) (FloatType), NumericType margin) + { + if constexpr (std::is_same_v) + margin = std::sqrt (margin); + + static constexpr auto pi10 = (NumericType) 10 * juce::MathConstants::pi; + static constexpr auto inc = (NumericType) 0.001 * juce::MathConstants::pi; + for (auto x = -pi10; x < pi10; x += inc) + { + const auto ref_s = std::sin (x); + const auto ref_c = std::cos (x); + const auto input = (FloatType) x; + const auto [test_s, test_c] = testFunc (input); + REQUIRE (test_s == SIMDApprox { ref_s }.margin (margin)); + REQUIRE (test_c == SIMDApprox { ref_c }.margin (margin)); + } + }; + + tester (&ta::sin_cos_3angle<7, 8, FloatType>, (NumericType) 2.0e-7); + tester (&ta::sin_cos_3angle<5, 6, FloatType>, (NumericType) 5.0e-5); + tester (&ta::sin_cos_3angle<3, 4, FloatType>, (NumericType) 8.0e-3); + } +}