diff --git a/chowdsp_fft.cpp b/chowdsp_fft.cpp
index c62e368..0fac38d 100644
--- a/chowdsp_fft.cpp
+++ b/chowdsp_fft.cpp
@@ -50,8 +50,8 @@ SOFTWARE.
 
 #include <cassert>
 #include <cmath>
-#include <cstdlib>
 #include <cstdint>
+#include <cstdlib>
 
 #if defined(_MSC_VER)
 // Contains the definition of __cpuidex
@@ -89,7 +89,8 @@ struct FFT_Setup;
 FFT_Setup* fft_new_setup (int N, fft_transform_t transform);
 void fft_destroy_setup (FFT_Setup* s);
 void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* foutput, void* scratch, fft_direction_t direction, int ordered);
-}
+void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b, float* ab, float scaling);
+} // namespace chowdsp::fft::avx
 static constexpr uintptr_t address_mask = ~static_cast<uintptr_t> (3);
 static constexpr uintptr_t typeid_mask = static_cast<uintptr_t> (3);
 #endif
@@ -187,7 +188,7 @@ static bool cpu_supports_avx()
     [[maybe_unused]] const auto fma3_avx = avx && fma3_sse42;
 
     int regs8[4];
-    get_cpuid(regs8, 0x80000001);
+    get_cpuid (regs8, 0x80000001);
     [[maybe_unused]] const auto fma4 = regs8[2] >> 16 & avx_state_os_enabled;
 
     // sse4a = regs[2] >> 6 & 1;
@@ -195,11 +196,11 @@ static bool cpu_supports_avx()
     // xop = regs[2] >> 11 & 1;
 
     int regs7[4];
-    get_cpuid(regs7, 0x7);
+    get_cpuid (regs7, 0x7);
     const auto avx2 = regs7[1] >> 5 & avx_state_os_enabled;
 
     int regs7a[4];
-    get_cpuid(regs7a, 0x7, 0x1);
+    get_cpuid (regs7a, 0x7, 0x1);
     [[maybe_unused]] const auto avxvnni = regs7a[0] >> 4 & avx_state_os_enabled;
 
     const auto fma3_avx2 = avx2 && fma3_sse42;
@@ -305,4 +306,80 @@ void fft_transform (void* setup, const float* input, float* output, float* work,
                                     1);
 #endif
 }
+
+void fft_transform_unordered (void* setup, const float* input, float* output, float* work, fft_direction_t direction)
+{
+#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)
+#if CHOWDSP_FFT_COMPILER_SUPPORTS_AVX
+    if (check_is_pointer_sse_setup (setup))
+    {
+        sse::pffft_transform_internal (reinterpret_cast<sse::FFT_Setup*> (get_setup_pointer (setup)),
+                                       input,
+                                       output,
+                                       (__m128*) work,
+                                       direction,
+                                       0);
+    }
+    else
+    {
+        avx::pffft_transform_internal (reinterpret_cast<avx::FFT_Setup*> (get_setup_pointer (setup)),
+                                       input,
+                                       output,
+                                       work,
+                                       direction,
+                                       0);
+    }
+#else
+    sse::pffft_transform_internal (reinterpret_cast<sse::FFT_Setup*> (setup),
+                                   input,
+                                   output,
+                                   (__m128*) work,
+                                   direction,
+                                   0);
+#endif
+#elif defined(__ARM_NEON__)
+    neon::pffft_transform_internal (reinterpret_cast<neon::FFT_Setup*> (setup),
+                                    input,
+                                    output,
+                                    (float32x4_t*) work,
+                                    direction,
+                                    0);
+#endif
+}
+
+void fft_convolve_unordered (void* setup, const float* a, const float* b, float* ab, float scaling)
+{
+#if defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64)
+#if CHOWDSP_FFT_COMPILER_SUPPORTS_AVX
+    if (check_is_pointer_sse_setup (setup))
+    {
+        sse::pffft_convolve_internal (reinterpret_cast<sse::FFT_Setup*> (get_setup_pointer (setup)),
+                                      a,
+                                      b,
+                                      ab,
+                                      scaling);
+    }
+    else
+    {
+        avx::pffft_convolve_internal (reinterpret_cast<avx::FFT_Setup*> (get_setup_pointer (setup)),
+                                      a,
+                                      b,
+                                      ab,
+                                      scaling);
+    }
+#else
+    sse::pffft_convolve_internal (reinterpret_cast<sse::FFT_Setup*> (setup),
+                                  a,
+                                  b,
+                                  ab,
+                                  scaling);
+#endif
+#elif defined(__ARM_NEON__)
+    neon::pffft_convolve_internal (reinterpret_cast<neon::FFT_Setup*> (setup),
+                                   a,
+                                   b,
+                                   ab,
+                                   scaling);
+#endif
+}
 } // namespace chowdsp::fft
diff --git a/chowdsp_fft.h b/chowdsp_fft.h
index 92597db..f9ce206 100644
--- a/chowdsp_fft.h
+++ b/chowdsp_fft.h
@@ -102,6 +102,18 @@ void fft_destroy_setup (void*);
 */
 void fft_transform (void* setup, const float* input, float* output, float* work, fft_direction_t direction);
 
+/**
+ * Same as the above method, but without necessarily maintaining the correct data order
+ * in the frequency domain. This may be useful if your frequency-domain operations
+ * are order-independent, for example, convolution.
+ */
+void fft_transform_unordered (void* setup, const float* input, float* output, float* work, fft_direction_t direction);
+
+/**
+ * Convolve two (unordered) frequency-domain signals, with some scale factor.
+ */
+void fft_convolve_unordered (void* setup, const float* a, const float* b, float* ab, float scaling);
+
 void* aligned_malloc (size_t nb_bytes);
 void aligned_free (void*);
 
diff --git a/simd/chowdsp_fft_impl_avx.cpp b/simd/chowdsp_fft_impl_avx.cpp
index 4b8b7c3..4cd2d6b 100644
--- a/simd/chowdsp_fft_impl_avx.cpp
+++ b/simd/chowdsp_fft_impl_avx.cpp
@@ -1984,5 +1984,49 @@ void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* fou
     }
     assert (buff[ib] == voutput);
 }
+
+void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b, float* ab, float scaling)
+{
+    int Ncvec = setup->Ncvec;
+    auto* va = reinterpret_cast<const __m256*> (a);
+    auto* vb = reinterpret_cast<const __m256*> (b);
+    auto* vab = reinterpret_cast<__m256*> (ab);
+
+    float ar0, ai0, br0, bi0, abr0, abi0;
+    const auto vscal = _mm256_set1_ps (scaling);
+    int i;
+
+    ar0 = reinterpret_cast<const float*> (&va[0])[0];
+    ai0 = reinterpret_cast<const float*> (&va[1])[0];
+    br0 = reinterpret_cast<const float*> (&vb[0])[0];
+    bi0 = reinterpret_cast<const float*> (&vb[1])[0];
+    abr0 = reinterpret_cast<const float*> (&vab[0])[0];
+    abi0 = reinterpret_cast<const float*> (&vab[1])[0];
+
+    for (i = 0; i < Ncvec; i += 2)
+    {
+        __m256 ar, ai, br, bi;
+        ar = va[2 * i + 0];
+        ai = va[2 * i + 1];
+        br = vb[2 * i + 0];
+        bi = vb[2 * i + 1];
+        cplx_mul_v (ar, ai, br, bi);
+        vab[2 * i + 0] = _mm256_fmadd_ps (ar, vscal, vab[2 * i + 0]);
+        vab[2 * i + 1] = _mm256_fmadd_ps (ai, vscal, vab[2 * i + 1]);
+        ar = va[2 * i + 2];
+        ai = va[2 * i + 3];
+        br = vb[2 * i + 2];
+        bi = vb[2 * i + 3];
+        cplx_mul_v (ar, ai, br, bi);
+        vab[2 * i + 2] = _mm256_fmadd_ps (ar, vscal, vab[2 * i + 2]);
+        vab[2 * i + 3] = _mm256_fmadd_ps (ai, vscal, vab[2 * i + 3]);
+    }
+
+    if (setup->transform == FFT_REAL)
+    {
+        reinterpret_cast<float*> (&vab[0])[0] = abr0 + ar0 * br0 * scaling;
+        reinterpret_cast<float*> (&vab[1])[0] = abi0 + ai0 * bi0 * scaling;
+    }
+}
 } // namespace chowdsp::fft::avx
 #endif
diff --git a/simd/chowdsp_fft_impl_neon.cpp b/simd/chowdsp_fft_impl_neon.cpp
index 296d864..a12c73f 100644
--- a/simd/chowdsp_fft_impl_neon.cpp
+++ b/simd/chowdsp_fft_impl_neon.cpp
@@ -1558,9 +1558,6 @@ void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* fou
     float32x4_t* buff[2] = { voutput, scratch ? scratch : scratch_on_stack };
     int ib = (nf_odd ^ ordered ? 1 : 0);
 
-    // assert (VALIGNED (finput) && VALIGNED (foutput));
-
-    //assert(finput != foutput);
     if (direction == FFT_FORWARD)
     {
         ib = ! ib;
@@ -1628,4 +1625,48 @@ void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* fou
     }
     assert (buff[ib] == voutput);
 }
+
+void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b, float* ab, float scaling)
+{
+    int Ncvec = setup->Ncvec;
+    auto* va = (const float32x4_t*) a;
+    auto* vb = (const float32x4_t*) b;
+    auto* vab = (float32x4_t*) ab;
+
+    float ar0, ai0, br0, bi0, abr0, abi0;
+    const auto vscal = vld1q_dup_f32 (&scaling);
+    int i;
+
+    ar0 = reinterpret_cast<const float*> (&va[0])[0];
+    ai0 = reinterpret_cast<const float*> (&va[1])[0];
+    br0 = reinterpret_cast<const float*> (&vb[0])[0];
+    bi0 = reinterpret_cast<const float*> (&vb[1])[0];
+    abr0 = reinterpret_cast<const float*> (&vab[0])[0];
+    abi0 = reinterpret_cast<const float*> (&vab[1])[0];
+
+    for (i = 0; i < Ncvec; i += 2)
+    {
+        float32x4_t ar, ai, br, bi;
+        ar = va[2 * i + 0];
+        ai = va[2 * i + 1];
+        br = vb[2 * i + 0];
+        bi = vb[2 * i + 1];
+        std::tie (ar, ai) = cplx_mul_v (ar, ai, br, bi);
+        vab[2 * i + 0] = vmlaq_f32 (vab[2 * i + 0], ar, vscal);
+        vab[2 * i + 1] = vmlaq_f32 (vab[2 * i + 1], ai, vscal);
+        ar = va[2 * i + 2];
+        ai = va[2 * i + 3];
+        br = vb[2 * i + 2];
+        bi = vb[2 * i + 3];
+        std::tie (ar, ai) = cplx_mul_v (ar, ai, br, bi);
+        vab[2 * i + 2] = vmlaq_f32 (vab[2 * i + 2], ar, vscal);
+        vab[2 * i + 3] = vmlaq_f32 (vab[2 * i + 3], ai, vscal);
+    }
+
+    if (setup->transform == FFT_REAL)
+    {
+        reinterpret_cast<float*> (&vab[0])[0] = abr0 + ar0 * br0 * scaling;
+        reinterpret_cast<float*> (&vab[1])[0] = abi0 + ai0 * bi0 * scaling;
+    }
+}
 } // namespace chowdsp::fft::neon
diff --git a/simd/chowdsp_fft_impl_sse.cpp b/simd/chowdsp_fft_impl_sse.cpp
index d6b1493..4691594 100644
--- a/simd/chowdsp_fft_impl_sse.cpp
+++ b/simd/chowdsp_fft_impl_sse.cpp
@@ -1645,4 +1645,48 @@ void pffft_transform_internal (FFT_Setup* setup, const float* finput, float* fou
     }
     assert (buff[ib] == voutput);
 }
+
+void pffft_convolve_internal (FFT_Setup* setup, const float* a, const float* b, float* ab, float scaling)
+{
+    int Ncvec = setup->Ncvec;
+    auto* va = reinterpret_cast<const __m128*> (a);
+    auto* vb = reinterpret_cast<const __m128*> (b);
+    auto* vab = reinterpret_cast<__m128*> (ab);
+
+    float ar0, ai0, br0, bi0, abr0, abi0;
+    const auto vscal = _mm_set1_ps (scaling);
+    int i;
+
+    ar0 = reinterpret_cast<const float*> (&va[0])[0];
+    ai0 = reinterpret_cast<const float*> (&va[1])[0];
+    br0 = reinterpret_cast<const float*> (&vb[0])[0];
+    bi0 = reinterpret_cast<const float*> (&vb[1])[0];
+    abr0 = reinterpret_cast<const float*> (&vab[0])[0];
+    abi0 = reinterpret_cast<const float*> (&vab[1])[0];
+
+    for (i = 0; i < Ncvec; i += 2)
+    {
+        __m128 ar, ai, br, bi;
+        ar = va[2 * i + 0];
+        ai = va[2 * i + 1];
+        br = vb[2 * i + 0];
+        bi = vb[2 * i + 1];
+        std::tie (ar, ai) = cplx_mul_v (ar, ai, br, bi);
+        vab[2 * i + 0] = _mm_add_ps  (vab[2 * i + 0], _mm_mul_ps (ar, vscal));
+        vab[2 * i + 1] = _mm_add_ps  (vab[2 * i + 1], _mm_mul_ps (ai, vscal));
+        ar = va[2 * i + 2];
+        ai = va[2 * i + 3];
+        br = vb[2 * i + 2];
+        bi = vb[2 * i + 3];
+        std::tie (ar, ai) = cplx_mul_v (ar, ai, br, bi);
+        vab[2 * i + 2] = _mm_add_ps (vab[2 * i + 2], _mm_mul_ps (ar, vscal));
+        vab[2 * i + 3] = _mm_add_ps (vab[2 * i + 3], _mm_mul_ps (ai, vscal));
+    }
+
+    if (setup->transform == FFT_REAL)
+    {
+        reinterpret_cast<float*> (&vab[0])[0] = abr0 + ar0 * br0 * scaling;
+        reinterpret_cast<float*> (&vab[1])[0] = abi0 + ai0 * bi0 * scaling;
+    }
+}
 } // namespace chowdsp::fft::sse
diff --git a/test/test.cpp b/test/test.cpp
index 3c963e9..8d171c1 100644
--- a/test/test.cpp
+++ b/test/test.cpp
@@ -8,12 +8,12 @@
 
 void compare (const float* ref, const float* test, int N)
 {
-    const auto tol = 1.0e-6f * (float) N / 8.0f;
+    const auto tol = 2.0e-7f * (float) N;
     for (int n = 0; n < N; ++n)
         REQUIRE (test[n] == Catch::Approx { ref[n] }.margin(tol));
 }
 
-void test_complex (int N, bool use_avx = false)
+void test_fft_complex (int N, bool use_avx = false)
 {
     auto* data = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N * 2);
     auto* data_ref = (float*) pffft_aligned_malloc (sizeof (float) * N * 2);
@@ -28,7 +28,7 @@ void test_complex (int N, bool use_avx = false)
     std::copy (data, data + N * 2, data_ref);
 
     auto* fft_setup = chowdsp::fft::fft_new_setup (N, chowdsp::fft::FFT_COMPLEX, use_avx);
-    assert (fft_setup != nullptr);
+    REQUIRE (fft_setup != nullptr);
     auto* pffft_setup = pffft_new_setup (N, PFFFT_COMPLEX);
 
     chowdsp::fft::fft_transform (fft_setup, data, data, work_data, chowdsp::fft::FFT_FORWARD);
@@ -56,7 +56,7 @@ void test_complex (int N, bool use_avx = false)
     pffft_aligned_free (work_data_ref);
 }
 
-void test_real (int N, bool use_avx = false)
+void test_fft_real (int N, bool use_avx = false)
 {
     auto* data = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N);
     auto* data_ref = (float*) pffft_aligned_malloc (sizeof (float) * N);
@@ -70,7 +70,7 @@ void test_real (int N, bool use_avx = false)
     std::copy (data, data + N, data_ref);
 
     auto* fft_setup = chowdsp::fft::fft_new_setup (N, chowdsp::fft::FFT_REAL, use_avx);
-    assert (fft_setup != nullptr);
+    REQUIRE (fft_setup != nullptr);
     auto* pffft_setup = pffft_new_setup (N, PFFFT_REAL);
 
     chowdsp::fft::fft_transform (fft_setup, data, data, work_data, chowdsp::fft::FFT_FORWARD);
@@ -98,6 +98,106 @@ void test_real (int N, bool use_avx = false)
     pffft_aligned_free (work_data_ref);
 }
 
+void test_convolution_complex (int N, bool use_avx = false)
+{
+    auto* sine1 = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N * 2);
+    auto* sine1_ref = (float*) pffft_aligned_malloc (sizeof (float) * N * 2);
+    auto* sine2 = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N * 2);
+    auto* sine2_ref = (float*) pffft_aligned_malloc (sizeof (float) * N * 2);
+    auto* out = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N * 2);
+    auto* out_ref = (float*) pffft_aligned_malloc (sizeof (float) * N * 2);
+    auto* work_data = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N * 2);
+    auto* work_data_ref = (float*) pffft_aligned_malloc (sizeof (float) * N * 2);
+
+    for (int i = 0; i < N; ++i)
+    {
+        sine1[i * 2] = std::sin (3.14f * (100.0f / 48000.0f) * (float) i);
+        sine1[i * 2 + 1] = std::cos (3.14f * (100.0f / 48000.0f) * (float) i);
+        sine2[i * 2] = std::sin (3.14f * (200.0f / 48000.0f) * (float) i);
+        sine2[i * 2 + 1] = std::cos (3.14f * (200.0f / 48000.0f) * (float) i);
+    }
+    std::copy (sine1, sine1 + N * 2, sine1_ref);
+    std::copy (sine2, sine2 + N * 2, sine2_ref);
+    std::fill_n (out, N * 2, 0.0f);
+    std::fill_n (out_ref, N * 2, 0.0f);
+    const auto norm_gain = 1.0f / static_cast<float> (N);
+
+    auto* pffft_setup = pffft_new_setup (N, PFFFT_COMPLEX);
+    pffft_transform (pffft_setup, sine1_ref, sine1_ref, work_data_ref, PFFFT_FORWARD);
+    pffft_transform (pffft_setup, sine2_ref, sine2_ref, work_data_ref, PFFFT_FORWARD);
+    pffft_zconvolve_accumulate (pffft_setup, sine1_ref, sine2_ref, out_ref, norm_gain);
+    pffft_transform (pffft_setup, out_ref, out_ref, work_data_ref, PFFFT_BACKWARD);
+
+    auto* fft_setup = chowdsp::fft::fft_new_setup (N, chowdsp::fft::FFT_COMPLEX, use_avx);
+    REQUIRE (fft_setup != nullptr);
+    chowdsp::fft::fft_transform_unordered (fft_setup, sine1, sine1, work_data, chowdsp::fft::FFT_FORWARD);
+    chowdsp::fft::fft_transform_unordered (fft_setup, sine2, sine2, work_data, chowdsp::fft::FFT_FORWARD);
+    chowdsp::fft::fft_convolve_unordered (fft_setup, sine1, sine2, out, norm_gain);
+    chowdsp::fft::fft_transform_unordered (fft_setup, out, out, work_data, chowdsp::fft::FFT_BACKWARD);
+
+    compare (out_ref, out, N);
+
+    chowdsp::fft::fft_destroy_setup (fft_setup);
+    pffft_destroy_setup (pffft_setup);
+    chowdsp::fft::aligned_free (sine1);
+    pffft_aligned_free (sine1_ref);
+    chowdsp::fft::aligned_free (sine2);
+    pffft_aligned_free (sine2_ref);
+    chowdsp::fft::aligned_free (out);
+    pffft_aligned_free (out_ref);
+    chowdsp::fft::aligned_free (work_data);
+    pffft_aligned_free (work_data_ref);
+}
+
+void test_convolution_real (int N, bool use_avx = false)
+{
+    auto* sine1 = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N);
+    auto* sine1_ref = (float*) pffft_aligned_malloc (sizeof (float) * N);
+    auto* sine2 = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N);
+    auto* sine2_ref = (float*) pffft_aligned_malloc (sizeof (float) * N);
+    auto* out = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N);
+    auto* out_ref = (float*) pffft_aligned_malloc (sizeof (float) * N);
+    auto* work_data = (float*) chowdsp::fft::aligned_malloc (sizeof (float) * N);
+    auto* work_data_ref = (float*) pffft_aligned_malloc (sizeof (float) * N);
+
+    for (int i = 0; i < N; ++i)
+    {
+        sine1[i] = std::sin (3.14f * (100.0f / 48000.0f) * (float) i);
+        sine2[i] = std::sin (3.14f * (200.0f / 48000.0f) * (float) i);
+    }
+    std::copy (sine1, sine1 + N, sine1_ref);
+    std::copy (sine2, sine2 + N, sine2_ref);
+    std::fill_n (out, N, 0.0f);
+    std::fill_n (out_ref, N, 0.0f);
+    const auto norm_gain = 1.0f / static_cast<float> (N);
+
+    auto* pffft_setup = pffft_new_setup (N, PFFFT_REAL);
+    pffft_transform (pffft_setup, sine1_ref, sine1_ref, work_data_ref, PFFFT_FORWARD);
+    pffft_transform (pffft_setup, sine2_ref, sine2_ref, work_data_ref, PFFFT_FORWARD);
+    pffft_zconvolve_accumulate (pffft_setup, sine1_ref, sine2_ref, out_ref, norm_gain);
+    pffft_transform (pffft_setup, out_ref, out_ref, work_data_ref, PFFFT_BACKWARD);
+
+    auto* fft_setup = chowdsp::fft::fft_new_setup (N, chowdsp::fft::FFT_REAL, use_avx);
+    REQUIRE (fft_setup != nullptr);
+    chowdsp::fft::fft_transform_unordered (fft_setup, sine1, sine1, work_data, chowdsp::fft::FFT_FORWARD);
+    chowdsp::fft::fft_transform_unordered (fft_setup, sine2, sine2, work_data, chowdsp::fft::FFT_FORWARD);
+    chowdsp::fft::fft_convolve_unordered (fft_setup, sine1, sine2, out, norm_gain);
+    chowdsp::fft::fft_transform_unordered (fft_setup, out, out, work_data, chowdsp::fft::FFT_BACKWARD);
+
+    compare (out_ref, out, N);
+
+    chowdsp::fft::fft_destroy_setup (fft_setup);
+    pffft_destroy_setup (pffft_setup);
+    chowdsp::fft::aligned_free (sine1);
+    pffft_aligned_free (sine1_ref);
+    chowdsp::fft::aligned_free (sine2);
+    pffft_aligned_free (sine2_ref);
+    chowdsp::fft::aligned_free (out);
+    pffft_aligned_free (out_ref);
+    chowdsp::fft::aligned_free (work_data);
+    pffft_aligned_free (work_data_ref);
+}
+
 TEST_CASE("FFT SSE/NEON")
 {
     for (int i = 5; i < 20; ++i)
@@ -105,12 +205,22 @@ TEST_CASE("FFT SSE/NEON")
         const auto fft_size = 1 << i;
         SECTION ("Testing Complex FFT with size: " + std::to_string (fft_size))
         {
-            test_complex (fft_size);
+            test_fft_complex (fft_size);
         }
 
         SECTION ("Testing Real FFT with size: " + std::to_string (fft_size))
         {
-            test_real (fft_size);
+            test_fft_real (fft_size);
+        }
+
+        SECTION ("Testing Complex Convolution with size: " + std::to_string (fft_size))
+        {
+            test_convolution_complex (fft_size);
+        }
+
+        SECTION ("Testing Real Convolution with size: " + std::to_string (fft_size))
+        {
+            test_convolution_real (fft_size);
         }
     }
 }
@@ -123,12 +233,22 @@ TEST_CASE("FFT AVX")
         const auto fft_size = 1 << i;
         SECTION ("Testing Complex FFT with size: " + std::to_string (fft_size))
         {
-            test_complex (fft_size, true);
+            test_fft_complex (fft_size, true);
         }
 
         SECTION ("Testing Real FFT with size: " + std::to_string (fft_size))
         {
-            test_real (fft_size, true);
+            test_fft_real (fft_size, true);
+        }
+
+        SECTION ("Testing Complex Convolution with size: " + std::to_string (fft_size))
+        {
+            test_convolution_complex (fft_size, true);
+        }
+
+        SECTION ("Testing Real Convolution with size: " + std::to_string (fft_size))
+        {
+            test_convolution_real (fft_size, true);
         }
     }
 }