From dd6e32457e93e70179b0a9e347542bab62cad8b5 Mon Sep 17 00:00:00 2001 From: Adam Lugowski Date: Mon, 22 Jan 2024 00:06:46 -0800 Subject: [PATCH] Add `pluggable_quicksort` and make it default parallel sort impl (#29) Add `pluggable_quicksort` and make it default parallel sort impl Retain `pluggable_mergesort` for now. --- README.md | 8 +- benchmark/algorithm_bench.cpp | 14 ++- include/poolstl/algorithm | 135 +++++++++++++++++++++++--- include/poolstl/internal/ttp_impl.hpp | 101 ++++++++++++++++++- include/poolstl/internal/utils.hpp | 22 +++++ tests/poolstl_test.cpp | 105 ++++++++++++-------- 6 files changed, 320 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 3c8bb25..85912f7 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ All in `std::` namespace. ### Other * [`poolstl::iota_iter`](include/poolstl/iota_iter.hpp) - Iterate over integers. Same as iterating over output of [`std::iota`](https://en.cppreference.com/w/cpp/algorithm/iota) but without materializing anything. Iterator version of [`std::ranges::iota_view`](https://en.cppreference.com/w/cpp/ranges/iota_view). * `poolstl::for_each_chunk` - Like `std::for_each`, but explicitly splits the input range into chunks then exposes the chunked parallelism. A user-specified chunk constructor is called for each parallel chunk then its output is passed to each loop iteration. Useful for workloads that need an expensive workspace that can be reused between iterations, but not simultaneously by all iterations in parallel. -* `poolstl::pluggable_sort` - Like `std::sort`, but allows specification of sequential sort and merge methods. To parallelize [pdqsort](https://github.com/orlp/pdqsort): `pluggable_sort(par, v.begin(), v.end(), pdqsort)`. +* `poolstl::pluggable_sort` - Like `std::sort`, but allows specification of sequential sort method. To parallelize [pdqsort](https://github.com/orlp/pdqsort): `pluggable_sort(par, v.begin(), v.end(), pdqsort)`. ## Usage @@ -197,9 +197,9 @@ for_each()/real_time 94.6 ms for_each(poolstl::par)/real_time 18.7 ms 0.044 ms 36 for_each(std::execution::par)/real_time 15.3 ms 12.9 ms 46 sort()/real_time 603 ms 602 ms 1 -sort(poolstl::par)/real_time 146 ms 0.667 ms 5 -sort(std::execution::par)/real_time 121 ms 95.1 ms 6 -pluggable_sort(poolstl::par, ..., pdqsort)/real_time 97.7 ms 0.519 ms 7 +sort(poolstl::par)/real_time 137 ms 11.8 ms 5 +sort(std::execution::par)/real_time 113 ms 102 ms 6 +pluggable_sort(poolstl::par, ..., pdqsort)/real_time 91.8 ms 11.9 ms 7 transform()/real_time 95.0 ms 94.9 ms 7 transform(poolstl::par)/real_time 17.4 ms 0.037 ms 38 transform(std::execution::par)/real_time 15.3 ms 13.2 ms 45 diff --git a/benchmark/algorithm_bench.cpp b/benchmark/algorithm_bench.cpp index ff3764e..2f8561c 100644 --- a/benchmark/algorithm_bench.cpp +++ b/benchmark/algorithm_bench.cpp @@ -122,7 +122,7 @@ BENCHMARK(sort)->Name("sort(std::execution::par)")->UseRealTime(); //////////////////////////////// template -void pluggable_sort_pdq(benchmark::State& state) { +void pluggable_sort(benchmark::State& state) { auto source = random_vector(arr_length / 10); // auto source = random_vector(arr_length); @@ -134,7 +134,12 @@ void pluggable_sort_pdq(benchmark::State& state) { if constexpr (which_impl == 1) { poolstl::pluggable_sort(policy::get(), values.begin(), values.end(), pdqsort); } else if constexpr (which_impl == 2) { - poolstl::pluggable_sort(policy::get(), values.begin(), values.end(), pdqsort, adapted_pipm_inplace_merge); + poolstl::pluggable_mergesort(policy::get(), values.begin(), values.end(), pdqsort); + } else if constexpr (which_impl == 3) { + poolstl::pluggable_mergesort(policy::get(), values.begin(), values.end(), pdqsort, adapted_pipm_inplace_merge); + } else if constexpr (which_impl == 4) { + // pluggable_sort delegates to this, so essentially same as which_impl==1 + poolstl::pluggable_quicksort(policy::get(), values.begin(), values.end(), pdqsort); } benchmark::DoNotOptimize(values); @@ -142,8 +147,9 @@ void pluggable_sort_pdq(benchmark::State& state) { } } -BENCHMARK(pluggable_sort_pdq)->Name("pluggable_sort(poolstl::par, ..., pdqsort)")->UseRealTime(); // uses pdqsort and std::inplace_merge (O(n) extra memory) -BENCHMARK(pluggable_sort_pdq)->Name("pluggable_sort(poolstl::par, ..., pdqsort, pipm_merge)")->UseRealTime(); // uses pdqsort and adapted_pipm_inplace_merge (O(1) extra memory) +BENCHMARK(pluggable_sort)->Name("pluggable_sort(poolstl::par, ..., pdqsort)")->UseRealTime(); // uses pdqsort +//BENCHMARK(pluggable_sort)->Name("pluggable_mergesort(poolstl::par, ..., pdqsort)")->UseRealTime(); // uses pdqsort and std::inplace_merge (O(n) extra memory) +//BENCHMARK(pluggable_sort)->Name("pluggable_mergesort(poolstl::par, ..., pdqsort, pipm_merge)")->UseRealTime(); // uses pdqsort and adapted_pipm_inplace_merge (slower, but O(1) extra memory) //////////////////////////////// diff --git a/include/poolstl/algorithm b/include/poolstl/algorithm index 9479a89..41ebe11 100644 --- a/include/poolstl/algorithm +++ b/include/poolstl/algorithm @@ -221,8 +221,11 @@ namespace std { return; } - poolstl::internal::parallel_sort(std::forward(policy), first, last, comp, - std::sort, std::inplace_merge); + poolstl::internal::parallel_quicksort(std::forward(policy), first, last, comp, + std::sort, + std::partition::value_type>>, + poolstl::internal::quicksort_pivot); } /** @@ -248,8 +251,11 @@ namespace std { return; } - poolstl::internal::parallel_sort(std::forward(policy), first, last, comp, - std::stable_sort, std::inplace_merge); + poolstl::internal::parallel_quicksort(std::forward(policy), first, last, comp, + std::stable_sort, + std::stable_partition::value_type>>, + poolstl::internal::quicksort_pivot); } /** @@ -374,37 +380,142 @@ namespace poolstl { /** * NOTE: Iterators are expected to be random access. * - * Like `std::sort`, but allows specifying the sequential sort and merge methods. These methods must have the - * same signature as the comparator versions of `std::sort` and `std::inplace_merge`, respectively. + * Like `std::sort`, but allows specifying the sequential sort method, which must have the + * same signature as the comparator version of `std::sort`. + * + * Implemented as a high-level quicksort that delegates to `sort_func`, in parallel, once the range has been + * sufficiently partitioned. */ template poolstl::internal::enable_if_poolstl_policy pluggable_sort(ExecPolicy &&policy, RandIt first, RandIt last, Compare comp, - void (sort_func)(RandIt, RandIt, Compare) = std::sort, - void (merge_func)(RandIt, RandIt, RandIt, Compare) = std::inplace_merge) { + void (sort_func)(RandIt, RandIt, Compare) = std::sort) { if (poolstl::internal::is_seq(policy)) { sort_func(first, last, comp); return; } - poolstl::internal::parallel_sort(std::forward(policy), first, last, comp, sort_func, merge_func); + poolstl::internal::parallel_quicksort(std::forward(policy), first, last, comp, sort_func, + std::partition::value_type>>, + poolstl::internal::quicksort_pivot); } /** * NOTE: Iterators are expected to be random access. * - * Like `std::sort`, but allows specifying the sequential sort and merge methods. These methods must have the - * same signature as the comparator versions of `std::sort` and `std::inplace_merge`, respectively. + * Like `std::sort`, but allows specifying the sequential sort method, which must have the + * same signature as the comparator version of `std::sort`. + * + * Implemented as a parallel high-level quicksort that delegates to `sort_func` once the range has been + * sufficiently partitioned. */ template poolstl::internal::enable_if_poolstl_policy pluggable_sort(ExecPolicy &&policy, RandIt first, RandIt last, + void (sort_func)(RandIt, RandIt, + std::less::value_type>) = std::sort){ + using T = typename std::iterator_traits::value_type; + pluggable_sort(std::forward(policy), first, last, std::less(), sort_func); + } + + /** + * NOTE: Iterators are expected to be random access. + * + * Parallel merge sort. + * + * @param comp Comparator. + * @param sort_func Sequential sort method. Must have the same signature as the comparator version of `std::sort`. + * @param merge_func Sequential merge method. Must have the same signature as `std::inplace_merge`. + */ + template + poolstl::internal::enable_if_poolstl_policy + pluggable_mergesort(ExecPolicy &&policy, RandIt first, RandIt last, Compare comp, + void (sort_func)(RandIt, RandIt, Compare) = std::sort, + void (merge_func)(RandIt, RandIt, RandIt, Compare) = std::inplace_merge) { + if (poolstl::internal::is_seq(policy)) { + sort_func(first, last, comp); + return; + } + + poolstl::internal::parallel_mergesort(std::forward(policy), + first, last, comp, sort_func, merge_func); + } + + /** + * NOTE: Iterators are expected to be random access. + * + * Parallel merge sort. + * + * Uses `std::less` comparator. + * + * @param sort_func Sequential sort method. Must have the same signature as the comparator version of `std::sort`. + * @param merge_func Sequential merge method. Must have the same signature as `std::inplace_merge`. + */ + template + poolstl::internal::enable_if_poolstl_policy + pluggable_mergesort(ExecPolicy &&policy, RandIt first, RandIt last, void (sort_func)(RandIt, RandIt, std::less::value_type>) = std::sort, void (merge_func)(RandIt, RandIt, RandIt, std::less::value_type>) = std::inplace_merge){ using T = typename std::iterator_traits::value_type; - pluggable_sort(std::forward(policy), first, last, std::less(), sort_func, merge_func); + pluggable_mergesort(std::forward(policy), first, last, std::less(), sort_func, merge_func); + } + + /** + * NOTE: Iterators are expected to be random access. + * + * Parallel quicksort that allows specifying the sequential sort and partition methods. + * + * @param comp Comparator. + * @param sort_func Sequential sort method to use once range is sufficiently partitioned. Must have the same + * signature as the comparator version of `std::sort`. + * @param part_func Sequential partition method. Must have the same signature as `std::partition`. + * @param pivot_func Method that identifies the pivot element + */ + template + poolstl::internal::enable_if_poolstl_policy + pluggable_quicksort(ExecPolicy &&policy, RandIt first, RandIt last, Compare comp, + void (sort_func)(RandIt, RandIt, Compare) = std::sort, + RandIt (part_func)(RandIt, RandIt, poolstl::internal::pivot_predicate::value_type>) = std::partition, + typename std::iterator_traits::value_type (pivot_func)(RandIt, RandIt) = + poolstl::internal::quicksort_pivot) { + if (poolstl::internal::is_seq(policy)) { + sort_func(first, last, comp); + return; + } + + poolstl::internal::parallel_quicksort(std::forward(policy), + first, last, comp, sort_func, part_func, pivot_func); + } + + /** + * NOTE: Iterators are expected to be random access. + * + * Parallel quicksort that allows specifying the sequential sort and partition methods. + * + * Uses `std::less` comparator. + * + * @param sort_func Sequential sort method to use once range is sufficiently partitioned. Must have the same + * signature as the comparator version of `std::sort`. + * @param part_func Sequential partition method. Must have the same signature as `std::partition`. + * @param pivot_func Method that identifies the pivot element + */ + template + poolstl::internal::enable_if_poolstl_policy + pluggable_quicksort(ExecPolicy &&policy, RandIt first, RandIt last, + void (sort_func)(RandIt, RandIt, + std::less::value_type>) = std::sort, + RandIt (part_func)(RandIt, RandIt, poolstl::internal::pivot_predicate< + std::less::value_type>, + typename std::iterator_traits::value_type>) = std::partition, + typename std::iterator_traits::value_type (pivot_func)(RandIt, RandIt) = + poolstl::internal::quicksort_pivot) { + using T = typename std::iterator_traits::value_type; + pluggable_quicksort(std::forward(policy), first, last, std::less(), + sort_func, part_func, pivot_func); } } diff --git a/include/poolstl/internal/ttp_impl.hpp b/include/poolstl/internal/ttp_impl.hpp index 600459b..5f2b7f1 100644 --- a/include/poolstl/internal/ttp_impl.hpp +++ b/include/poolstl/internal/ttp_impl.hpp @@ -160,8 +160,8 @@ namespace poolstl { * @param merge_func Sequential merge method, like std::inplace_merge */ template - void parallel_sort(ExecPolicy &&policy, RandIt first, RandIt last, - Compare comp, SortFunc sort_func, MergeFunc merge_func) { + void parallel_mergesort(ExecPolicy &&policy, RandIt first, RandIt last, + Compare comp, SortFunc sort_func, MergeFunc merge_func) { if (first == last) { return; } @@ -207,6 +207,103 @@ namespace poolstl { } while (futures.size() > 1); futures.front().get(); } + + /** + * Quicksort worker function. + */ + template + void quicksort_impl(task_thread_pool::task_thread_pool* task_pool, const RandIt first, const RandIt last, + Compare comp, SortFunc sort_func, PartFunc part_func, PivotFunc pivot_func, + std::ptrdiff_t target_leaf_size, + std::vector>* futures, std::mutex* mutex, + std::condition_variable* cv, int* inflight_spawns) { + using T = typename std::iterator_traits::value_type; + + auto partition_size = std::distance(first, last); + + if (partition_size > target_leaf_size) { + // partition the range + auto mid = part_func(first, last, pivot_predicate(comp, pivot_func(first, last))); + + if (mid != first && mid != last) { + // was able to partition the range, so recurse + std::lock_guard guard(*mutex); + ++(*inflight_spawns); + + futures->emplace_back(task_pool->submit( + quicksort_impl, + task_pool, first, mid, comp, sort_func, part_func, pivot_func, target_leaf_size, + futures, mutex, cv, inflight_spawns)); + + futures->emplace_back(task_pool->submit( + quicksort_impl, + task_pool, mid, last, comp, sort_func, part_func, pivot_func, target_leaf_size, + futures, mutex, cv, inflight_spawns)); + return; + } + } + + // Range does not need to be subdivided (or was unable to subdivide). Run the sequential sort. + { + // notify main thread that partitioning may be finished + std::lock_guard guard(*mutex); + --(*inflight_spawns); + } + cv->notify_one(); + + sort_func(first, last, comp); + } + + /** + * Sort a range in parallel using quicksort. + * + * @param sort_func Sequential sort method, like std::sort or std::stable_sort + * @param part_func Method that partitions a range, like std::partition or std::stable_partition + * @param pivot_func Method that identifies the pivot + */ + template + void parallel_quicksort(ExecPolicy &&policy, RandIt first, RandIt last, + Compare comp, SortFunc sort_func, PartFunc part_func, PivotFunc pivot_func) { + if (first == last) { + return; + } + + auto& task_pool = *policy.pool(); + + // Target partition size. Range will be recursively partitioned into partitions no bigger than this + // size. Target approximately twice as many partitions as threads to reduce impact of uneven pivot + // selection. + std::ptrdiff_t target_leaf_size = std::max(std::distance(first, last) / (task_pool.get_num_threads() * 2), + (std::ptrdiff_t)5); + + // task_thread_pool does not support creating task DAGs, so organize the code such that + // all parallel tasks are independent. The parallel tasks can spawn additional parallel tasks, and they + // record their "child" task's std::future into a common vector to be waited on by the main thread. + std::mutex mutex; + + // Futures of parallel tasks. Access protected by mutex. + std::vector> futures; + + // For signaling that all partitioning has been completed and futures vector is complete. Uses mutex. + std::condition_variable cv; + + // Number of `quicksort_impl` calls that haven't finished yet. Nonzero value means futures vector may + // still be modified. Access protected by mutex. + int inflight_spawns = 1; + + // Root task. + quicksort_impl(&task_pool, first, last, comp, sort_func, part_func, pivot_func, target_leaf_size, + &futures, &mutex, &cv, &inflight_spawns); + + // Wait for all partitioning to finish. + { + std::unique_lock lock(mutex); + cv.wait(lock, [&] { return inflight_spawns == 0; }); + } + + // Wait on all the parallel tasks. + get_futures(futures); + } } } diff --git a/include/poolstl/internal/utils.hpp b/include/poolstl/internal/utils.hpp index d02f105..f116883 100644 --- a/include/poolstl/internal/utils.hpp +++ b/include/poolstl/internal/utils.hpp @@ -101,6 +101,28 @@ namespace poolstl { } } + /** + * Identify a pivot element for quicksort. Chooses the middle element of the range. + */ + template + typename std::iterator_traits::value_type quicksort_pivot(Iterator first, Iterator last) { + return *(std::next(first, std::distance(first, last) / 2)); + } + + /** + * Predicate for std::partition (for quicksort) + */ + template + struct pivot_predicate { + pivot_predicate(Compare comp, const T& pivot) : comp(comp), pivot(pivot) {} + + bool operator()(const T& em) { + return comp(em, pivot); + } + Compare comp; + const T pivot; + }; + /* * Some methods are only available with C++17 and up. Reimplement on older standards. */ diff --git a/tests/poolstl_test.cpp b/tests/poolstl_test.cpp index 4114b80..47b3ee4 100644 --- a/tests/poolstl_test.cpp +++ b/tests/poolstl_test.cpp @@ -334,34 +334,53 @@ TEST_CASE("sort", "[alg][algorithm]") { default: break; } - for (auto which_impl : {0, 1, 2, 3, 4, 5}) { - std::vector dest1(source); - std::vector dest2(source); - - std::sort(dest1.begin(), dest1.end()); - switch (which_impl) { - case 0: - std::sort(poolstl::par_if(false), dest2.begin(), dest2.end()); - break; - case 1: - std::sort(poolstl::par.on(pool), dest2.begin(), dest2.end()); - break; - case 2: - poolstl::pluggable_sort(poolstl::par_if(false), dest2.begin(), dest2.end(), std::sort, std::inplace_merge); - break; - case 3: - poolstl::pluggable_sort(poolstl::par.on(pool), dest2.begin(), dest2.end(), pdqsort); - break; - case 4: - poolstl::pluggable_sort(poolstl::par.on(pool), dest2.begin(), dest2.end(), std::less(), pdqsort_branchless); - break; - case 5: - poolstl::pluggable_sort(poolstl::par.on(pool), dest2.begin(), dest2.end(), std::less(), pdqsort_branchless, adapted_pipm_inplace_merge); - default: - break; - } + std::vector dest1(source); + std::sort(dest1.begin(), dest1.end()); - REQUIRE(dest1 == dest2); + { + std::vector work(source); + std::sort(poolstl::par_if(false), work.begin(), work.end()); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + std::sort(poolstl::par.on(pool), work.begin(), work.end()); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_sort(poolstl::par_if(false), work.begin(), work.end(), pdqsort); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_sort(poolstl::par.on(pool), work.begin(), work.end(), pdqsort); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_mergesort(poolstl::par_if(false), work.begin(), work.end(), std::sort, std::inplace_merge); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_mergesort(poolstl::par.on(pool), work.begin(), work.end(), std::less(), pdqsort_branchless); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_mergesort(poolstl::par.on(pool), work.begin(), work.end(), std::less(), pdqsort_branchless, adapted_pipm_inplace_merge); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_quicksort(poolstl::par_if(false), work.begin(), work.end(), std::sort, std::partition); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_quicksort(poolstl::par.on(pool), work.begin(), work.end(), std::less(), pdqsort_branchless); + REQUIRE(dest1 == work); } } } @@ -386,23 +405,23 @@ TEST_CASE("stable_sort", "[alg][algorithm]") { default: break; } - for (auto which_impl : {0, 1}) { - std::vector dest1(source); - std::vector dest2(source); - - std::sort(dest1.begin(), dest1.end()); - switch (which_impl) { - case 0: - std::stable_sort(poolstl::par_if(false), dest2.begin(), dest2.end()); - break; - case 1: - std::stable_sort(poolstl::par.on(pool), dest2.begin(), dest2.end()); - break; - default: - break; - } + std::vector dest1(source); + std::stable_sort(dest1.begin(), dest1.end()); - REQUIRE(dest1 == dest2); + { + std::vector work(source); + std::stable_sort(poolstl::par_if(false), work.begin(), work.end()); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + std::stable_sort(poolstl::par.on(pool), work.begin(), work.end()); + REQUIRE(dest1 == work); + } + { + std::vector work(source); + poolstl::pluggable_quicksort(poolstl::par.on(pool), work.begin(), work.end(), std::stable_sort, std::stable_partition); + REQUIRE(dest1 == work); } } }